Skip to main content

arrow_array/builder/
fixed_size_list_builder.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::builder::ArrayBuilder;
19use crate::{ArrayRef, FixedSizeListArray};
20use arrow_buffer::NullBufferBuilder;
21use arrow_schema::{Field, FieldRef};
22use std::any::Any;
23use std::sync::Arc;
24
25///  Builder for [`FixedSizeListArray`]
26/// ```
27/// use arrow_array::{builder::{Int32Builder, FixedSizeListBuilder}, Array, Int32Array};
28/// let values_builder = Int32Builder::new();
29/// let mut builder = FixedSizeListBuilder::new(values_builder, 3);
30///
31/// //  [[0, 1, 2], null, [3, null, 5], [6, 7, null]]
32/// builder.values().append_value(0);
33/// builder.values().append_value(1);
34/// builder.values().append_value(2);
35/// builder.append(true);
36/// builder.values().append_null();
37/// builder.values().append_null();
38/// builder.values().append_null();
39/// builder.append(false);
40/// builder.values().append_value(3);
41/// builder.values().append_null();
42/// builder.values().append_value(5);
43/// builder.append(true);
44/// builder.values().append_value(6);
45/// builder.values().append_value(7);
46/// builder.values().append_null();
47/// builder.append(true);
48/// let list_array = builder.finish();
49/// assert_eq!(
50///     *list_array.value(0),
51///     Int32Array::from(vec![Some(0), Some(1), Some(2)])
52/// );
53/// assert!(list_array.is_null(1));
54/// assert_eq!(
55///     *list_array.value(2),
56///     Int32Array::from(vec![Some(3), None, Some(5)])
57/// );
58/// assert_eq!(
59///     *list_array.value(3),
60///     Int32Array::from(vec![Some(6), Some(7), None])
61/// )
62/// ```
63///
64#[derive(Debug)]
65pub struct FixedSizeListBuilder<T: ArrayBuilder> {
66    null_buffer_builder: NullBufferBuilder,
67    values_builder: T,
68    list_len: i32,
69    field: Option<FieldRef>,
70}
71
72impl<T: ArrayBuilder> FixedSizeListBuilder<T> {
73    /// Creates a new [`FixedSizeListBuilder`] from a given values array builder
74    /// `value_length` is the number of values within each array
75    pub fn new(values_builder: T, value_length: i32) -> Self {
76        let capacity = values_builder
77            .len()
78            .checked_div(value_length as _)
79            .unwrap_or_default();
80
81        Self::with_capacity(values_builder, value_length, capacity)
82    }
83
84    /// Creates a new [`FixedSizeListBuilder`] from a given values array builder
85    /// `value_length` is the number of values within each array
86    /// `capacity` is the number of items to pre-allocate space for in this builder
87    pub fn with_capacity(values_builder: T, value_length: i32, capacity: usize) -> Self {
88        Self {
89            null_buffer_builder: NullBufferBuilder::new(capacity),
90            values_builder,
91            list_len: value_length,
92            field: None,
93        }
94    }
95
96    /// Override the field passed to [`FixedSizeListArray::new`]
97    ///
98    /// By default, a nullable field is created with the name `item`
99    ///
100    /// Note: [`Self::finish`] and [`Self::finish_cloned`] will panic if the
101    /// field's data type does not match that of `T`
102    pub fn with_field(self, field: impl Into<FieldRef>) -> Self {
103        Self {
104            field: Some(field.into()),
105            ..self
106        }
107    }
108}
109
110impl<T: ArrayBuilder> ArrayBuilder for FixedSizeListBuilder<T>
111where
112    T: 'static,
113{
114    /// Returns the builder as a non-mutable `Any` reference.
115    fn as_any(&self) -> &dyn Any {
116        self
117    }
118
119    /// Returns the builder as a mutable `Any` reference.
120    fn as_any_mut(&mut self) -> &mut dyn Any {
121        self
122    }
123
124    /// Returns the boxed builder as a box of `Any`.
125    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
126        self
127    }
128
129    /// Returns the number of array slots in the builder
130    fn len(&self) -> usize {
131        self.null_buffer_builder.len()
132    }
133
134    /// Builds the array and reset this builder.
135    fn finish(&mut self) -> ArrayRef {
136        Arc::new(self.finish())
137    }
138
139    /// Builds the array without resetting the builder.
140    fn finish_cloned(&self) -> ArrayRef {
141        Arc::new(self.finish_cloned())
142    }
143
144    fn finish_preserve_values(&mut self) -> ArrayRef {
145        Arc::new(self.finish_preserve_values())
146    }
147}
148
149impl<T: ArrayBuilder> FixedSizeListBuilder<T>
150where
151    T: 'static,
152{
153    /// Returns the child array builder as a mutable reference.
154    ///
155    /// This mutable reference can be used to append values into the child array builder,
156    /// but you must call [`append`](#method.append) to delimit each distinct list value.
157    pub fn values(&mut self) -> &mut T {
158        &mut self.values_builder
159    }
160
161    /// Returns the length of the list
162    pub fn value_length(&self) -> i32 {
163        self.list_len
164    }
165
166    /// Finish the current fixed-length list array slot
167    #[inline]
168    pub fn append(&mut self, is_valid: bool) {
169        self.null_buffer_builder.append(is_valid);
170    }
171
172    /// Builds the [`FixedSizeListBuilder`] and reset this builder.
173    pub fn finish(&mut self) -> FixedSizeListArray {
174        let len = self.len();
175        let values = self.values_builder.finish();
176        let nulls = self.null_buffer_builder.finish();
177
178        assert_eq!(
179            values.len(),
180            len * self.list_len as usize,
181            "Length of the child array ({}) must be the multiple of the value length ({}) and the array length ({}).",
182            values.len(),
183            self.list_len,
184            len,
185        );
186
187        let field = self
188            .field
189            .clone()
190            .unwrap_or_else(|| Arc::new(Field::new_list_field(values.data_type().clone(), true)));
191
192        FixedSizeListArray::new(field, self.list_len, values, nulls)
193    }
194
195    /// Builds the [`FixedSizeListBuilder`] without resetting the builder.
196    pub fn finish_cloned(&self) -> FixedSizeListArray {
197        let len = self.len();
198        let values = self.values_builder.finish_cloned();
199        let nulls = self.null_buffer_builder.finish_cloned();
200
201        assert_eq!(
202            values.len(),
203            len * self.list_len as usize,
204            "Length of the child array ({}) must be the multiple of the value length ({}) and the array length ({}).",
205            values.len(),
206            self.list_len,
207            len,
208        );
209
210        let field = self
211            .field
212            .clone()
213            .unwrap_or_else(|| Arc::new(Field::new_list_field(values.data_type().clone(), true)));
214
215        FixedSizeListArray::new(field, self.list_len, values, nulls)
216    }
217
218    fn finish_preserve_values(&mut self) -> FixedSizeListArray {
219        let len = self.len();
220        let values = self.values_builder.finish_preserve_values();
221        let nulls = self.null_buffer_builder.finish();
222
223        assert_eq!(
224            values.len(),
225            len * self.list_len as usize,
226            "Length of the child array ({}) must be the multiple of the value length ({}) and the array length ({}).",
227            values.len(),
228            self.list_len,
229            len,
230        );
231
232        let field = self
233            .field
234            .clone()
235            .unwrap_or_else(|| Arc::new(Field::new_list_field(values.data_type().clone(), true)));
236
237        FixedSizeListArray::new(field, self.list_len, values, nulls)
238    }
239
240    /// Returns the current null buffer as a slice
241    pub fn validity_slice(&self) -> Option<&[u8]> {
242        self.null_buffer_builder.as_slice()
243    }
244}
245
246#[cfg(test)]
247mod tests {
248    use super::*;
249    use arrow_schema::DataType;
250
251    use crate::Array;
252    use crate::Int32Array;
253    use crate::builder::{Int32Builder, tests::PreserveValuesMock};
254
255    fn make_list_builder(
256        include_null_element: bool,
257        include_null_in_values: bool,
258    ) -> FixedSizeListBuilder<crate::builder::PrimitiveBuilder<crate::types::Int32Type>> {
259        let values_builder = Int32Builder::new();
260        let mut builder = FixedSizeListBuilder::new(values_builder, 3);
261
262        builder.values().append_value(0);
263        builder.values().append_value(1);
264        builder.values().append_value(2);
265        builder.append(true);
266
267        builder.values().append_value(2);
268        builder.values().append_value(3);
269        builder.values().append_value(4);
270        builder.append(true);
271
272        if include_null_element {
273            builder.values().append_null();
274            builder.values().append_null();
275            builder.values().append_null();
276            builder.append(false);
277        } else {
278            builder.values().append_value(2);
279            builder.values().append_value(3);
280            builder.values().append_value(4);
281            builder.append(true);
282        }
283
284        if include_null_in_values {
285            builder.values().append_value(3);
286            builder.values().append_null();
287            builder.values().append_value(5);
288            builder.append(true);
289        } else {
290            builder.values().append_value(3);
291            builder.values().append_value(4);
292            builder.values().append_value(5);
293            builder.append(true);
294        }
295
296        builder
297    }
298
299    #[test]
300    fn test_fixed_size_list_array_builder() {
301        let mut builder = make_list_builder(true, true);
302
303        let list_array = builder.finish();
304
305        assert_eq!(DataType::Int32, list_array.value_type());
306        assert_eq!(4, list_array.len());
307        assert_eq!(1, list_array.null_count());
308        assert_eq!(6, list_array.value_offset(2));
309        assert_eq!(3, list_array.value_length());
310    }
311
312    #[test]
313    fn test_fixed_size_list_array_builder_with_field() {
314        let builder = make_list_builder(false, false);
315        let mut builder = builder.with_field(Field::new("list_element", DataType::Int32, false));
316        let list_array = builder.finish();
317
318        assert_eq!(DataType::Int32, list_array.value_type());
319        assert_eq!(4, list_array.len());
320        assert_eq!(0, list_array.null_count());
321        assert_eq!(6, list_array.value_offset(2));
322        assert_eq!(3, list_array.value_length());
323    }
324
325    #[test]
326    fn test_fixed_size_list_array_builder_with_field_and_null() {
327        let builder = make_list_builder(true, false);
328        let mut builder = builder.with_field(Field::new("list_element", DataType::Int32, false));
329        let list_array = builder.finish();
330
331        assert_eq!(DataType::Int32, list_array.value_type());
332        assert_eq!(4, list_array.len());
333        assert_eq!(1, list_array.null_count());
334        assert_eq!(6, list_array.value_offset(2));
335        assert_eq!(3, list_array.value_length());
336    }
337
338    #[test]
339    #[should_panic(expected = "Found unmasked nulls for non-nullable FixedSizeListArray")]
340    fn test_fixed_size_list_array_builder_with_field_null_panic() {
341        let builder = make_list_builder(true, true);
342        let mut builder = builder.with_field(Field::new("list_item", DataType::Int32, false));
343
344        builder.finish();
345    }
346
347    #[test]
348    #[should_panic(expected = "FixedSizeListArray expected data type Int64 got Int32")]
349    fn test_fixed_size_list_array_builder_with_field_type_panic() {
350        let values_builder = Int32Builder::new();
351        let builder = FixedSizeListBuilder::new(values_builder, 3);
352        let mut builder = builder.with_field(Field::new("list_item", DataType::Int64, true));
353
354        //  [[0, 1, 2], null, [3, null, 5], [6, 7, null]]
355        builder.values().append_value(0);
356        builder.values().append_value(1);
357        builder.values().append_value(2);
358        builder.append(true);
359        builder.values().append_null();
360        builder.values().append_null();
361        builder.values().append_null();
362        builder.append(false);
363        builder.values().append_value(3);
364        builder.values().append_value(4);
365        builder.values().append_value(5);
366        builder.append(true);
367
368        builder.finish();
369    }
370
371    #[test]
372    fn test_fixed_size_list_array_builder_cloned_with_field() {
373        let builder = make_list_builder(true, true);
374        let builder = builder.with_field(Field::new("list_element", DataType::Int32, true));
375
376        let list_array = builder.finish_cloned();
377
378        assert_eq!(DataType::Int32, list_array.value_type());
379        assert_eq!(4, list_array.len());
380        assert_eq!(1, list_array.null_count());
381        assert_eq!(6, list_array.value_offset(2));
382        assert_eq!(3, list_array.value_length());
383    }
384
385    #[test]
386    #[should_panic(expected = "Found unmasked nulls for non-nullable FixedSizeListArray")]
387    fn test_fixed_size_list_array_builder_cloned_with_field_null_panic() {
388        let builder = make_list_builder(true, true);
389        let builder = builder.with_field(Field::new("list_item", DataType::Int32, false));
390
391        builder.finish_cloned();
392    }
393
394    #[test]
395    fn test_fixed_size_list_array_builder_cloned_with_field_and_null() {
396        let builder = make_list_builder(true, false);
397        let mut builder = builder.with_field(Field::new("list_element", DataType::Int32, false));
398        let list_array = builder.finish();
399
400        assert_eq!(DataType::Int32, list_array.value_type());
401        assert_eq!(4, list_array.len());
402        assert_eq!(1, list_array.null_count());
403        assert_eq!(6, list_array.value_offset(2));
404        assert_eq!(3, list_array.value_length());
405    }
406
407    #[test]
408    #[should_panic(expected = "FixedSizeListArray expected data type Int64 got Int32")]
409    fn test_fixed_size_list_array_builder_cloned_with_field_type_panic() {
410        let builder = make_list_builder(false, false);
411        let builder = builder.with_field(Field::new("list_item", DataType::Int64, true));
412
413        builder.finish_cloned();
414    }
415
416    #[test]
417    fn test_fixed_size_list_array_builder_finish_cloned() {
418        let mut builder = make_list_builder(true, true);
419
420        let mut list_array = builder.finish_cloned();
421
422        assert_eq!(DataType::Int32, list_array.value_type());
423        assert_eq!(4, list_array.len());
424        assert_eq!(1, list_array.null_count());
425        assert_eq!(3, list_array.value_length());
426
427        builder.values().append_value(6);
428        builder.values().append_value(7);
429        builder.values().append_null();
430        builder.append(true);
431        builder.values().append_null();
432        builder.values().append_null();
433        builder.values().append_null();
434        builder.append(false);
435        list_array = builder.finish();
436
437        assert_eq!(DataType::Int32, list_array.value_type());
438        assert_eq!(6, list_array.len());
439        assert_eq!(2, list_array.null_count());
440        assert_eq!(6, list_array.value_offset(2));
441        assert_eq!(3, list_array.value_length());
442    }
443
444    #[test]
445    fn test_fixed_size_list_array_builder_with_field_empty() {
446        let values_builder = Int32Array::builder(0);
447        let mut builder = FixedSizeListBuilder::new(values_builder, 3).with_field(Field::new(
448            "list_item",
449            DataType::Int32,
450            false,
451        ));
452        assert!(builder.is_empty());
453        let arr = builder.finish();
454        assert_eq!(0, arr.len());
455        assert_eq!(0, builder.len());
456    }
457
458    #[test]
459    fn test_fixed_size_list_array_builder_cloned_with_field_empty() {
460        let values_builder = Int32Array::builder(0);
461        let builder = FixedSizeListBuilder::new(values_builder, 3).with_field(Field::new(
462            "list_item",
463            DataType::Int32,
464            false,
465        ));
466        assert!(builder.is_empty());
467        let arr = builder.finish_cloned();
468        assert_eq!(0, arr.len());
469        assert_eq!(0, builder.len());
470    }
471
472    #[test]
473    fn test_fixed_size_list_array_builder_empty() {
474        let values_builder = Int32Array::builder(5);
475        let mut builder = FixedSizeListBuilder::new(values_builder, 3);
476        assert!(builder.is_empty());
477        let arr = builder.finish();
478        assert_eq!(0, arr.len());
479        assert_eq!(0, builder.len());
480    }
481
482    #[test]
483    fn test_fixed_size_list_array_builder_finish() {
484        let values_builder = Int32Array::builder(5);
485        let mut builder = FixedSizeListBuilder::new(values_builder, 3);
486
487        builder.values().append_slice(&[1, 2, 3]);
488        builder.append(true);
489        builder.values().append_slice(&[4, 5, 6]);
490        builder.append(true);
491
492        let mut arr = builder.finish();
493        assert_eq!(2, arr.len());
494        assert_eq!(0, builder.len());
495
496        builder.values().append_slice(&[7, 8, 9]);
497        builder.append(true);
498        arr = builder.finish();
499        assert_eq!(1, arr.len());
500        assert_eq!(0, builder.len());
501    }
502
503    #[test]
504    #[should_panic(
505        expected = "Length of the child array (10) must be the multiple of the value length (3) and the array length (3)."
506    )]
507    fn test_fixed_size_list_array_builder_fail() {
508        let values_builder = Int32Array::builder(5);
509        let mut builder = FixedSizeListBuilder::new(values_builder, 3);
510
511        builder.values().append_slice(&[1, 2, 3]);
512        builder.append(true);
513        builder.values().append_slice(&[4, 5, 6]);
514        builder.append(true);
515        builder.values().append_slice(&[7, 8, 9, 10]);
516        builder.append(true);
517
518        builder.finish();
519    }
520
521    #[test]
522    fn test_finish_preserve_values() {
523        let mut builder = FixedSizeListBuilder::new(PreserveValuesMock::default(), 2);
524
525        builder.values().inner.append_value(0);
526        builder.values().inner.append_value(1);
527        builder.append(true);
528
529        let arr = builder.finish_preserve_values();
530
531        assert_eq!(1, arr.len());
532        assert_eq!(1, builder.values().called);
533    }
534}