arrow_array/array/
struct_array.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::array::print_long_array;
19use crate::{make_array, new_null_array, Array, ArrayRef, RecordBatch};
20use arrow_buffer::{BooleanBuffer, Buffer, NullBuffer};
21use arrow_data::{ArrayData, ArrayDataBuilder};
22use arrow_schema::{ArrowError, DataType, Field, FieldRef, Fields};
23use std::sync::Arc;
24use std::{any::Any, ops::Index};
25
26/// An array of [structs](https://arrow.apache.org/docs/format/Columnar.html#struct-layout)
27///
28/// Each child (called *field*) is represented by a separate array.
29///
30/// # Comparison with [RecordBatch]
31///
32/// Both [`RecordBatch`] and [`StructArray`] represent a collection of columns / arrays with the
33/// same length.
34///
35/// However, there are a couple of key differences:
36///
37/// * [`StructArray`] can be nested within other [`Array`], including itself
38/// * [`RecordBatch`] can contain top-level metadata on its associated [`Schema`][arrow_schema::Schema]
39/// * [`StructArray`] can contain top-level nulls, i.e. `null`
40/// * [`RecordBatch`] can only represent nulls in its child columns, i.e. `{"field": null}`
41///
42/// [`StructArray`] is therefore a more general data container than [`RecordBatch`], and as such
43/// code that needs to handle both will typically share an implementation in terms of
44/// [`StructArray`] and convert to/from [`RecordBatch`] as necessary.
45///
46/// [`From`] implementations are provided to facilitate this conversion, however, converting
47/// from a [`StructArray`] containing top-level nulls to a [`RecordBatch`] will panic, as there
48/// is no way to preserve them.
49///
50/// # Example: Create an array from a vector of fields
51///
52/// ```
53/// use std::sync::Arc;
54/// use arrow_array::{Array, ArrayRef, BooleanArray, Int32Array, StructArray};
55/// use arrow_schema::{DataType, Field};
56///
57/// let boolean = Arc::new(BooleanArray::from(vec![false, false, true, true]));
58/// let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31]));
59///
60/// let struct_array = StructArray::from(vec![
61///     (
62///         Arc::new(Field::new("b", DataType::Boolean, false)),
63///         boolean.clone() as ArrayRef,
64///     ),
65///     (
66///         Arc::new(Field::new("c", DataType::Int32, false)),
67///         int.clone() as ArrayRef,
68///     ),
69/// ]);
70/// assert_eq!(struct_array.column(0).as_ref(), boolean.as_ref());
71/// assert_eq!(struct_array.column(1).as_ref(), int.as_ref());
72/// assert_eq!(4, struct_array.len());
73/// assert_eq!(0, struct_array.null_count());
74/// assert_eq!(0, struct_array.offset());
75/// ```
76#[derive(Clone)]
77pub struct StructArray {
78    len: usize,
79    data_type: DataType,
80    nulls: Option<NullBuffer>,
81    fields: Vec<ArrayRef>,
82}
83
84impl StructArray {
85    /// Create a new [`StructArray`] from the provided parts, panicking on failure
86    ///
87    /// # Panics
88    ///
89    /// Panics if [`Self::try_new`] returns an error
90    pub fn new(fields: Fields, arrays: Vec<ArrayRef>, nulls: Option<NullBuffer>) -> Self {
91        Self::try_new(fields, arrays, nulls).unwrap()
92    }
93
94    /// Create a new [`StructArray`] from the provided parts, returning an error on failure
95    ///
96    /// # Errors
97    ///
98    /// Errors if
99    ///
100    /// * `fields.len() != arrays.len()`
101    /// * `fields[i].data_type() != arrays[i].data_type()`
102    /// * `arrays[i].len() != arrays[j].len()`
103    /// * `arrays[i].len() != nulls.len()`
104    /// * `!fields[i].is_nullable() && !nulls.contains(arrays[i].nulls())`
105    pub fn try_new(
106        fields: Fields,
107        arrays: Vec<ArrayRef>,
108        nulls: Option<NullBuffer>,
109    ) -> Result<Self, ArrowError> {
110        if fields.len() != arrays.len() {
111            return Err(ArrowError::InvalidArgumentError(format!(
112                "Incorrect number of arrays for StructArray fields, expected {} got {}",
113                fields.len(),
114                arrays.len()
115            )));
116        }
117        let len = arrays.first().map(|x| x.len()).unwrap_or_default();
118
119        if let Some(n) = nulls.as_ref() {
120            if n.len() != len {
121                return Err(ArrowError::InvalidArgumentError(format!(
122                    "Incorrect number of nulls for StructArray, expected {len} got {}",
123                    n.len(),
124                )));
125            }
126        }
127
128        for (f, a) in fields.iter().zip(&arrays) {
129            if f.data_type() != a.data_type() {
130                return Err(ArrowError::InvalidArgumentError(format!(
131                    "Incorrect datatype for StructArray field {:?}, expected {} got {}",
132                    f.name(),
133                    f.data_type(),
134                    a.data_type()
135                )));
136            }
137
138            if a.len() != len {
139                return Err(ArrowError::InvalidArgumentError(format!(
140                    "Incorrect array length for StructArray field {:?}, expected {} got {}",
141                    f.name(),
142                    len,
143                    a.len()
144                )));
145            }
146
147            if !f.is_nullable() {
148                if let Some(a) = a.logical_nulls() {
149                    if !nulls.as_ref().map(|n| n.contains(&a)).unwrap_or_default() {
150                        return Err(ArrowError::InvalidArgumentError(format!(
151                            "Found unmasked nulls for non-nullable StructArray field {:?}",
152                            f.name()
153                        )));
154                    }
155                }
156            }
157        }
158
159        Ok(Self {
160            len,
161            data_type: DataType::Struct(fields),
162            nulls: nulls.filter(|n| n.null_count() > 0),
163            fields: arrays,
164        })
165    }
166
167    /// Create a new [`StructArray`] of length `len` where all values are null
168    pub fn new_null(fields: Fields, len: usize) -> Self {
169        let arrays = fields
170            .iter()
171            .map(|f| new_null_array(f.data_type(), len))
172            .collect();
173
174        Self {
175            len,
176            data_type: DataType::Struct(fields),
177            nulls: Some(NullBuffer::new_null(len)),
178            fields: arrays,
179        }
180    }
181
182    /// Create a new [`StructArray`] from the provided parts without validation
183    ///
184    /// # Safety
185    ///
186    /// Safe if [`Self::new`] would not panic with the given arguments
187    pub unsafe fn new_unchecked(
188        fields: Fields,
189        arrays: Vec<ArrayRef>,
190        nulls: Option<NullBuffer>,
191    ) -> Self {
192        if cfg!(feature = "force_validate") {
193            return Self::new(fields, arrays, nulls);
194        }
195
196        let len = arrays.first().map(|x| x.len()).unwrap_or_default();
197        Self {
198            len,
199            data_type: DataType::Struct(fields),
200            nulls,
201            fields: arrays,
202        }
203    }
204
205    /// Create a new [`StructArray`] containing no fields
206    ///
207    /// # Panics
208    ///
209    /// If `len != nulls.len()`
210    pub fn new_empty_fields(len: usize, nulls: Option<NullBuffer>) -> Self {
211        if let Some(n) = &nulls {
212            assert_eq!(len, n.len())
213        }
214        Self {
215            len,
216            data_type: DataType::Struct(Fields::empty()),
217            fields: vec![],
218            nulls,
219        }
220    }
221
222    /// Deconstruct this array into its constituent parts
223    pub fn into_parts(self) -> (Fields, Vec<ArrayRef>, Option<NullBuffer>) {
224        let f = match self.data_type {
225            DataType::Struct(f) => f,
226            _ => unreachable!(),
227        };
228        (f, self.fields, self.nulls)
229    }
230
231    /// Returns the field at `pos`.
232    pub fn column(&self, pos: usize) -> &ArrayRef {
233        &self.fields[pos]
234    }
235
236    /// Return the number of fields in this struct array
237    pub fn num_columns(&self) -> usize {
238        self.fields.len()
239    }
240
241    /// Returns the fields of the struct array
242    pub fn columns(&self) -> &[ArrayRef] {
243        &self.fields
244    }
245
246    /// Return field names in this struct array
247    pub fn column_names(&self) -> Vec<&str> {
248        match self.data_type() {
249            DataType::Struct(fields) => fields
250                .iter()
251                .map(|f| f.name().as_str())
252                .collect::<Vec<&str>>(),
253            _ => unreachable!("Struct array's data type is not struct!"),
254        }
255    }
256
257    /// Returns the [`Fields`] of this [`StructArray`]
258    pub fn fields(&self) -> &Fields {
259        match self.data_type() {
260            DataType::Struct(f) => f,
261            _ => unreachable!(),
262        }
263    }
264
265    /// Return child array whose field name equals to column_name
266    ///
267    /// Note: A schema can currently have duplicate field names, in which case
268    /// the first field will always be selected.
269    /// This issue will be addressed in [ARROW-11178](https://issues.apache.org/jira/browse/ARROW-11178)
270    pub fn column_by_name(&self, column_name: &str) -> Option<&ArrayRef> {
271        self.column_names()
272            .iter()
273            .position(|c| c == &column_name)
274            .map(|pos| self.column(pos))
275    }
276
277    /// Returns a zero-copy slice of this array with the indicated offset and length.
278    pub fn slice(&self, offset: usize, len: usize) -> Self {
279        assert!(
280            offset.saturating_add(len) <= self.len,
281            "the length + offset of the sliced StructArray cannot exceed the existing length"
282        );
283
284        let fields = self.fields.iter().map(|a| a.slice(offset, len)).collect();
285
286        Self {
287            len,
288            data_type: self.data_type.clone(),
289            nulls: self.nulls.as_ref().map(|n| n.slice(offset, len)),
290            fields,
291        }
292    }
293}
294
295impl From<ArrayData> for StructArray {
296    fn from(data: ArrayData) -> Self {
297        let parent_offset = data.offset();
298        let parent_len = data.len();
299
300        let fields = data
301            .child_data()
302            .iter()
303            .map(|cd| {
304                if parent_offset != 0 || parent_len != cd.len() {
305                    make_array(cd.slice(parent_offset, parent_len))
306                } else {
307                    make_array(cd.clone())
308                }
309            })
310            .collect();
311
312        Self {
313            len: data.len(),
314            data_type: data.data_type().clone(),
315            nulls: data.nulls().cloned(),
316            fields,
317        }
318    }
319}
320
321impl From<StructArray> for ArrayData {
322    fn from(array: StructArray) -> Self {
323        let builder = ArrayDataBuilder::new(array.data_type)
324            .len(array.len)
325            .nulls(array.nulls)
326            .child_data(array.fields.iter().map(|x| x.to_data()).collect());
327
328        unsafe { builder.build_unchecked() }
329    }
330}
331
332impl TryFrom<Vec<(&str, ArrayRef)>> for StructArray {
333    type Error = ArrowError;
334
335    /// builds a StructArray from a vector of names and arrays.
336    fn try_from(values: Vec<(&str, ArrayRef)>) -> Result<Self, ArrowError> {
337        let (fields, arrays): (Vec<_>, _) = values
338            .into_iter()
339            .map(|(name, array)| {
340                (
341                    Field::new(name, array.data_type().clone(), array.is_nullable()),
342                    array,
343                )
344            })
345            .unzip();
346
347        StructArray::try_new(fields.into(), arrays, None)
348    }
349}
350
351impl Array for StructArray {
352    fn as_any(&self) -> &dyn Any {
353        self
354    }
355
356    fn to_data(&self) -> ArrayData {
357        self.clone().into()
358    }
359
360    fn into_data(self) -> ArrayData {
361        self.into()
362    }
363
364    fn data_type(&self) -> &DataType {
365        &self.data_type
366    }
367
368    fn slice(&self, offset: usize, length: usize) -> ArrayRef {
369        Arc::new(self.slice(offset, length))
370    }
371
372    fn len(&self) -> usize {
373        self.len
374    }
375
376    fn is_empty(&self) -> bool {
377        self.len == 0
378    }
379
380    fn shrink_to_fit(&mut self) {
381        if let Some(nulls) = &mut self.nulls {
382            nulls.shrink_to_fit();
383        }
384        self.fields.iter_mut().for_each(|n| n.shrink_to_fit());
385    }
386
387    fn offset(&self) -> usize {
388        0
389    }
390
391    fn nulls(&self) -> Option<&NullBuffer> {
392        self.nulls.as_ref()
393    }
394
395    fn logical_null_count(&self) -> usize {
396        // More efficient that the default implementation
397        self.null_count()
398    }
399
400    fn get_buffer_memory_size(&self) -> usize {
401        let mut size = self.fields.iter().map(|a| a.get_buffer_memory_size()).sum();
402        if let Some(n) = self.nulls.as_ref() {
403            size += n.buffer().capacity();
404        }
405        size
406    }
407
408    fn get_array_memory_size(&self) -> usize {
409        let mut size = self.fields.iter().map(|a| a.get_array_memory_size()).sum();
410        size += std::mem::size_of::<Self>();
411        if let Some(n) = self.nulls.as_ref() {
412            size += n.buffer().capacity();
413        }
414        size
415    }
416}
417
418impl From<Vec<(FieldRef, ArrayRef)>> for StructArray {
419    fn from(v: Vec<(FieldRef, ArrayRef)>) -> Self {
420        let (fields, arrays): (Vec<_>, _) = v.into_iter().unzip();
421        StructArray::new(fields.into(), arrays, None)
422    }
423}
424
425impl std::fmt::Debug for StructArray {
426    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
427        writeln!(f, "StructArray")?;
428        writeln!(f, "-- validity:")?;
429        writeln!(f, "[")?;
430        print_long_array(self, f, |_array, _index, f| write!(f, "valid"))?;
431        writeln!(f, "]\n[")?;
432        for (child_index, name) in self.column_names().iter().enumerate() {
433            let column = self.column(child_index);
434            writeln!(
435                f,
436                "-- child {}: \"{}\" ({:?})",
437                child_index,
438                name,
439                column.data_type()
440            )?;
441            std::fmt::Debug::fmt(column, f)?;
442            writeln!(f)?;
443        }
444        write!(f, "]")
445    }
446}
447
448impl From<(Vec<(FieldRef, ArrayRef)>, Buffer)> for StructArray {
449    fn from(pair: (Vec<(FieldRef, ArrayRef)>, Buffer)) -> Self {
450        let len = pair.0.first().map(|x| x.1.len()).unwrap_or_default();
451        let (fields, arrays): (Vec<_>, Vec<_>) = pair.0.into_iter().unzip();
452        let nulls = NullBuffer::new(BooleanBuffer::new(pair.1, 0, len));
453        Self::new(fields.into(), arrays, Some(nulls))
454    }
455}
456
457impl From<RecordBatch> for StructArray {
458    fn from(value: RecordBatch) -> Self {
459        Self {
460            len: value.num_rows(),
461            data_type: DataType::Struct(value.schema().fields().clone()),
462            nulls: None,
463            fields: value.columns().to_vec(),
464        }
465    }
466}
467
468impl Index<&str> for StructArray {
469    type Output = ArrayRef;
470
471    /// Get a reference to a column's array by name.
472    ///
473    /// Note: A schema can currently have duplicate field names, in which case
474    /// the first field will always be selected.
475    /// This issue will be addressed in [ARROW-11178](https://issues.apache.org/jira/browse/ARROW-11178)
476    ///
477    /// # Panics
478    ///
479    /// Panics if the name is not in the schema.
480    fn index(&self, name: &str) -> &Self::Output {
481        self.column_by_name(name).unwrap()
482    }
483}
484
485#[cfg(test)]
486mod tests {
487    use super::*;
488
489    use crate::{BooleanArray, Float32Array, Float64Array, Int32Array, Int64Array, StringArray};
490    use arrow_buffer::ToByteSlice;
491
492    #[test]
493    fn test_struct_array_builder() {
494        let boolean_array = BooleanArray::from(vec![false, false, true, true]);
495        let int_array = Int64Array::from(vec![42, 28, 19, 31]);
496
497        let fields = vec![
498            Field::new("a", DataType::Boolean, false),
499            Field::new("b", DataType::Int64, false),
500        ];
501        let struct_array_data = ArrayData::builder(DataType::Struct(fields.into()))
502            .len(4)
503            .add_child_data(boolean_array.to_data())
504            .add_child_data(int_array.to_data())
505            .build()
506            .unwrap();
507        let struct_array = StructArray::from(struct_array_data);
508
509        assert_eq!(struct_array.column(0).as_ref(), &boolean_array);
510        assert_eq!(struct_array.column(1).as_ref(), &int_array);
511    }
512
513    #[test]
514    fn test_struct_array_from() {
515        let boolean = Arc::new(BooleanArray::from(vec![false, false, true, true]));
516        let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31]));
517
518        let struct_array = StructArray::from(vec![
519            (
520                Arc::new(Field::new("b", DataType::Boolean, false)),
521                boolean.clone() as ArrayRef,
522            ),
523            (
524                Arc::new(Field::new("c", DataType::Int32, false)),
525                int.clone() as ArrayRef,
526            ),
527        ]);
528        assert_eq!(struct_array.column(0).as_ref(), boolean.as_ref());
529        assert_eq!(struct_array.column(1).as_ref(), int.as_ref());
530        assert_eq!(4, struct_array.len());
531        assert_eq!(0, struct_array.null_count());
532        assert_eq!(0, struct_array.offset());
533    }
534
535    #[test]
536    fn test_struct_array_from_data_with_offset_and_length() {
537        // Various ways to make the struct array:
538        //
539        // [{x: 2}, {x: 3}, None]
540        //
541        // from slicing larger buffers/arrays with offsets and lengths
542        let int_arr = Int32Array::from(vec![1, 2, 3, 4, 5]);
543        let int_field = Field::new("x", DataType::Int32, false);
544        let struct_nulls = NullBuffer::new(BooleanBuffer::from(vec![true, true, false]));
545        let int_data = int_arr.to_data();
546        // Case 1: Offset + length, nulls are not sliced
547        let case1 = ArrayData::builder(DataType::Struct(Fields::from(vec![int_field.clone()])))
548            .len(3)
549            .offset(1)
550            .nulls(Some(struct_nulls))
551            .add_child_data(int_data.clone())
552            .build()
553            .unwrap();
554
555        // Case 2: Offset + length, nulls are sliced
556        let struct_nulls =
557            NullBuffer::new(BooleanBuffer::from(vec![true, true, true, false, true]).slice(1, 3));
558        let case2 = ArrayData::builder(DataType::Struct(Fields::from(vec![int_field.clone()])))
559            .len(3)
560            .offset(1)
561            .nulls(Some(struct_nulls.clone()))
562            .add_child_data(int_data.clone())
563            .build()
564            .unwrap();
565
566        // Case 3: struct length is smaller than child length but no offset
567        let offset_int_data = int_data.slice(1, 4);
568        let case3 = ArrayData::builder(DataType::Struct(Fields::from(vec![int_field.clone()])))
569            .len(3)
570            .nulls(Some(struct_nulls))
571            .add_child_data(offset_int_data)
572            .build()
573            .unwrap();
574
575        let expected = StructArray::new(
576            Fields::from(vec![int_field.clone()]),
577            vec![Arc::new(int_arr)],
578            Some(NullBuffer::new(BooleanBuffer::from(vec![
579                true, true, true, false, true,
580            ]))),
581        )
582        .slice(1, 3);
583
584        for case in [case1, case2, case3] {
585            let struct_arr_from_data = StructArray::from(case);
586            assert_eq!(struct_arr_from_data, expected);
587            assert_eq!(struct_arr_from_data.column(0), expected.column(0));
588        }
589    }
590
591    #[test]
592    #[should_panic(expected = "assertion failed: (offset + length) <= self.len()")]
593    fn test_struct_array_from_data_with_offset_and_length_error() {
594        let int_arr = Int32Array::from(vec![1, 2, 3, 4, 5]);
595        let int_field = Field::new("x", DataType::Int32, false);
596        let struct_nulls = NullBuffer::new(BooleanBuffer::from(vec![true, true, false]));
597        let int_data = int_arr.to_data();
598        // If parent offset is 3 and len is 3 then child must have 6 items
599        let struct_data =
600            ArrayData::builder(DataType::Struct(Fields::from(vec![int_field.clone()])))
601                .len(3)
602                .offset(3)
603                .nulls(Some(struct_nulls))
604                .add_child_data(int_data)
605                .build()
606                .unwrap();
607        let _ = StructArray::from(struct_data);
608    }
609
610    /// validates that struct can be accessed using `column_name` as index i.e. `struct_array["column_name"]`.
611    #[test]
612    fn test_struct_array_index_access() {
613        let boolean = Arc::new(BooleanArray::from(vec![false, false, true, true]));
614        let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31]));
615
616        let struct_array = StructArray::from(vec![
617            (
618                Arc::new(Field::new("b", DataType::Boolean, false)),
619                boolean.clone() as ArrayRef,
620            ),
621            (
622                Arc::new(Field::new("c", DataType::Int32, false)),
623                int.clone() as ArrayRef,
624            ),
625        ]);
626        assert_eq!(struct_array["b"].as_ref(), boolean.as_ref());
627        assert_eq!(struct_array["c"].as_ref(), int.as_ref());
628    }
629
630    /// validates that the in-memory representation follows [the spec](https://arrow.apache.org/docs/format/Columnar.html#struct-layout)
631    #[test]
632    fn test_struct_array_from_vec() {
633        let strings: ArrayRef = Arc::new(StringArray::from(vec![
634            Some("joe"),
635            None,
636            None,
637            Some("mark"),
638        ]));
639        let ints: ArrayRef = Arc::new(Int32Array::from(vec![Some(1), Some(2), None, Some(4)]));
640
641        let arr =
642            StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())]).unwrap();
643
644        let struct_data = arr.into_data();
645        assert_eq!(4, struct_data.len());
646        assert_eq!(0, struct_data.null_count());
647
648        let expected_string_data = ArrayData::builder(DataType::Utf8)
649            .len(4)
650            .null_bit_buffer(Some(Buffer::from(&[9_u8])))
651            .add_buffer(Buffer::from([0, 3, 3, 3, 7].to_byte_slice()))
652            .add_buffer(Buffer::from(b"joemark"))
653            .build()
654            .unwrap();
655
656        let expected_int_data = ArrayData::builder(DataType::Int32)
657            .len(4)
658            .null_bit_buffer(Some(Buffer::from(&[11_u8])))
659            .add_buffer(Buffer::from([1, 2, 0, 4].to_byte_slice()))
660            .build()
661            .unwrap();
662
663        assert_eq!(expected_string_data, struct_data.child_data()[0]);
664        assert_eq!(expected_int_data, struct_data.child_data()[1]);
665    }
666
667    #[test]
668    fn test_struct_array_from_vec_error() {
669        let strings: ArrayRef = Arc::new(StringArray::from(vec![
670            Some("joe"),
671            None,
672            None,
673            // 3 elements, not 4
674        ]));
675        let ints: ArrayRef = Arc::new(Int32Array::from(vec![Some(1), Some(2), None, Some(4)]));
676
677        let err = StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())])
678            .unwrap_err()
679            .to_string();
680
681        assert_eq!(
682            err,
683            "Invalid argument error: Incorrect array length for StructArray field \"f2\", expected 3 got 4"
684        )
685    }
686
687    #[test]
688    #[should_panic(
689        expected = "Incorrect datatype for StructArray field \\\"b\\\", expected Int16 got Boolean"
690    )]
691    fn test_struct_array_from_mismatched_types_single() {
692        drop(StructArray::from(vec![(
693            Arc::new(Field::new("b", DataType::Int16, false)),
694            Arc::new(BooleanArray::from(vec![false, false, true, true])) as Arc<dyn Array>,
695        )]));
696    }
697
698    #[test]
699    #[should_panic(
700        expected = "Incorrect datatype for StructArray field \\\"b\\\", expected Int16 got Boolean"
701    )]
702    fn test_struct_array_from_mismatched_types_multiple() {
703        drop(StructArray::from(vec![
704            (
705                Arc::new(Field::new("b", DataType::Int16, false)),
706                Arc::new(BooleanArray::from(vec![false, false, true, true])) as Arc<dyn Array>,
707            ),
708            (
709                Arc::new(Field::new("c", DataType::Utf8, false)),
710                Arc::new(Int32Array::from(vec![42, 28, 19, 31])),
711            ),
712        ]));
713    }
714
715    #[test]
716    fn test_struct_array_slice() {
717        let boolean_data = ArrayData::builder(DataType::Boolean)
718            .len(5)
719            .add_buffer(Buffer::from([0b00010000]))
720            .null_bit_buffer(Some(Buffer::from([0b00010001])))
721            .build()
722            .unwrap();
723        let int_data = ArrayData::builder(DataType::Int32)
724            .len(5)
725            .add_buffer(Buffer::from([0, 28, 42, 0, 0].to_byte_slice()))
726            .null_bit_buffer(Some(Buffer::from([0b00000110])))
727            .build()
728            .unwrap();
729
730        let field_types = vec![
731            Field::new("a", DataType::Boolean, true),
732            Field::new("b", DataType::Int32, true),
733        ];
734        let struct_array_data = ArrayData::builder(DataType::Struct(field_types.into()))
735            .len(5)
736            .add_child_data(boolean_data.clone())
737            .add_child_data(int_data.clone())
738            .null_bit_buffer(Some(Buffer::from([0b00010111])))
739            .build()
740            .unwrap();
741        let struct_array = StructArray::from(struct_array_data);
742
743        assert_eq!(5, struct_array.len());
744        assert_eq!(1, struct_array.null_count());
745        assert!(struct_array.is_valid(0));
746        assert!(struct_array.is_valid(1));
747        assert!(struct_array.is_valid(2));
748        assert!(struct_array.is_null(3));
749        assert!(struct_array.is_valid(4));
750        assert_eq!(boolean_data, struct_array.column(0).to_data());
751        assert_eq!(int_data, struct_array.column(1).to_data());
752
753        let c0 = struct_array.column(0);
754        let c0 = c0.as_any().downcast_ref::<BooleanArray>().unwrap();
755        assert_eq!(5, c0.len());
756        assert_eq!(3, c0.null_count());
757        assert!(c0.is_valid(0));
758        assert!(!c0.value(0));
759        assert!(c0.is_null(1));
760        assert!(c0.is_null(2));
761        assert!(c0.is_null(3));
762        assert!(c0.is_valid(4));
763        assert!(c0.value(4));
764
765        let c1 = struct_array.column(1);
766        let c1 = c1.as_any().downcast_ref::<Int32Array>().unwrap();
767        assert_eq!(5, c1.len());
768        assert_eq!(3, c1.null_count());
769        assert!(c1.is_null(0));
770        assert!(c1.is_valid(1));
771        assert_eq!(28, c1.value(1));
772        assert!(c1.is_valid(2));
773        assert_eq!(42, c1.value(2));
774        assert!(c1.is_null(3));
775        assert!(c1.is_null(4));
776
777        let sliced_array = struct_array.slice(2, 3);
778        let sliced_array = sliced_array.as_any().downcast_ref::<StructArray>().unwrap();
779        assert_eq!(3, sliced_array.len());
780        assert_eq!(1, sliced_array.null_count());
781        assert!(sliced_array.is_valid(0));
782        assert!(sliced_array.is_null(1));
783        assert!(sliced_array.is_valid(2));
784
785        let sliced_c0 = sliced_array.column(0);
786        let sliced_c0 = sliced_c0.as_any().downcast_ref::<BooleanArray>().unwrap();
787        assert_eq!(3, sliced_c0.len());
788        assert!(sliced_c0.is_null(0));
789        assert!(sliced_c0.is_null(1));
790        assert!(sliced_c0.is_valid(2));
791        assert!(sliced_c0.value(2));
792
793        let sliced_c1 = sliced_array.column(1);
794        let sliced_c1 = sliced_c1.as_any().downcast_ref::<Int32Array>().unwrap();
795        assert_eq!(3, sliced_c1.len());
796        assert!(sliced_c1.is_valid(0));
797        assert_eq!(42, sliced_c1.value(0));
798        assert!(sliced_c1.is_null(1));
799        assert!(sliced_c1.is_null(2));
800    }
801
802    #[test]
803    #[should_panic(
804        expected = "Incorrect array length for StructArray field \\\"c\\\", expected 1 got 2"
805    )]
806    fn test_invalid_struct_child_array_lengths() {
807        drop(StructArray::from(vec![
808            (
809                Arc::new(Field::new("b", DataType::Float32, false)),
810                Arc::new(Float32Array::from(vec![1.1])) as Arc<dyn Array>,
811            ),
812            (
813                Arc::new(Field::new("c", DataType::Float64, false)),
814                Arc::new(Float64Array::from(vec![2.2, 3.3])),
815            ),
816        ]));
817    }
818
819    #[test]
820    fn test_struct_array_from_empty() {
821        let sa = StructArray::from(vec![]);
822        assert!(sa.is_empty())
823    }
824
825    #[test]
826    #[should_panic(expected = "Found unmasked nulls for non-nullable StructArray field \\\"c\\\"")]
827    fn test_struct_array_from_mismatched_nullability() {
828        drop(StructArray::from(vec![(
829            Arc::new(Field::new("c", DataType::Int32, false)),
830            Arc::new(Int32Array::from(vec![Some(42), None, Some(19)])) as ArrayRef,
831        )]));
832    }
833
834    #[test]
835    fn test_struct_array_fmt_debug() {
836        let arr: StructArray = StructArray::new(
837            vec![Arc::new(Field::new("c", DataType::Int32, true))].into(),
838            vec![Arc::new(Int32Array::from((0..30).collect::<Vec<_>>())) as ArrayRef],
839            Some(NullBuffer::new(BooleanBuffer::from(
840                (0..30).map(|i| i % 2 == 0).collect::<Vec<_>>(),
841            ))),
842        );
843        assert_eq!(format!("{arr:?}"), "StructArray\n-- validity:\n[\n  valid,\n  null,\n  valid,\n  null,\n  valid,\n  null,\n  valid,\n  null,\n  valid,\n  null,\n  ...10 elements...,\n  valid,\n  null,\n  valid,\n  null,\n  valid,\n  null,\n  valid,\n  null,\n  valid,\n  null,\n]\n[\n-- child 0: \"c\" (Int32)\nPrimitiveArray<Int32>\n[\n  0,\n  1,\n  2,\n  3,\n  4,\n  5,\n  6,\n  7,\n  8,\n  9,\n  ...10 elements...,\n  20,\n  21,\n  22,\n  23,\n  24,\n  25,\n  26,\n  27,\n  28,\n  29,\n]\n]")
844    }
845}