polars_arrow/array/struct_/
mod.rs

1use super::{Array, Splitable, new_empty_array, new_null_array};
2use crate::bitmap::Bitmap;
3use crate::datatypes::{ArrowDataType, Field};
4
5mod builder;
6pub use builder::*;
7mod ffi;
8pub(super) mod fmt;
9mod iterator;
10use polars_error::{PolarsResult, polars_bail, polars_ensure};
11#[cfg(feature = "proptest")]
12pub mod proptest;
13
14/// A [`StructArray`] is a nested [`Array`] with an optional validity representing
15/// multiple [`Array`] with the same number of rows.
16/// # Example
17/// ```
18/// use polars_arrow::array::*;
19/// use polars_arrow::datatypes::*;
20/// let boolean = BooleanArray::from_slice(&[false, false, true, true]).boxed();
21/// let int = Int32Array::from_slice(&[42, 28, 19, 31]).boxed();
22///
23/// let fields = vec![
24///     Field::new("b".into(), ArrowDataType::Boolean, false),
25///     Field::new("c".into(), ArrowDataType::Int32, false),
26/// ];
27///
28/// let array = StructArray::new(ArrowDataType::Struct(fields), 4, vec![boolean, int], None);
29/// ```
30#[derive(Clone)]
31pub struct StructArray {
32    dtype: ArrowDataType,
33    // invariant: each array has the same length
34    values: Vec<Box<dyn Array>>,
35    // invariant: for each v in values: length == v.len()
36    length: usize,
37    validity: Option<Bitmap>,
38}
39
40impl StructArray {
41    /// Returns a new [`StructArray`].
42    /// # Errors
43    /// This function errors iff:
44    /// * `dtype`'s physical type is not [`crate::datatypes::PhysicalType::Struct`].
45    /// * the children of `dtype` are empty
46    /// * the values's len is different from children's length
47    /// * any of the values's data type is different from its corresponding children' data type
48    /// * any element of values has a different length than the first element
49    /// * the validity's length is not equal to the length of the first element
50    pub fn try_new(
51        dtype: ArrowDataType,
52        length: usize,
53        values: Vec<Box<dyn Array>>,
54        validity: Option<Bitmap>,
55    ) -> PolarsResult<Self> {
56        let fields = Self::try_get_fields(&dtype)?;
57
58        polars_ensure!(
59            fields.len() == values.len(),
60            ComputeError:
61                "a StructArray must have a number of fields in its DataType equal to the number of child values"
62        );
63
64        fields
65            .iter().map(|a| &a.dtype)
66            .zip(values.iter().map(|a| a.dtype()))
67            .enumerate()
68            .try_for_each(|(index, (dtype, child))| {
69                if dtype != child {
70                    polars_bail!(ComputeError:
71                        "The children DataTypes of a StructArray must equal the children data types.
72                         However, the field {index} has data type {dtype:?} but the value has data type {child:?}"
73                    )
74                } else {
75                    Ok(())
76                }
77            })?;
78
79        values
80            .iter()
81            .map(|f| f.len())
82            .enumerate()
83            .try_for_each(|(index, f_length)| {
84                if f_length != length {
85                    polars_bail!(ComputeError: "The children must have the given number of values.
86                         However, the values at index {index} have a length of {f_length}, which is different from given length {length}.")
87                } else {
88                    Ok(())
89                }
90            })?;
91
92        if validity
93            .as_ref()
94            .is_some_and(|validity| validity.len() != length)
95        {
96            polars_bail!(ComputeError:"The validity length of a StructArray must match its number of elements")
97        }
98
99        Ok(Self {
100            dtype,
101            length,
102            values,
103            validity,
104        })
105    }
106
107    /// Returns a new [`StructArray`]
108    /// # Panics
109    /// This function panics iff:
110    /// * `dtype`'s physical type is not [`crate::datatypes::PhysicalType::Struct`].
111    /// * the children of `dtype` are empty
112    /// * the values's len is different from children's length
113    /// * any of the values's data type is different from its corresponding children' data type
114    /// * any element of values has a different length than the first element
115    /// * the validity's length is not equal to the length of the first element
116    pub fn new(
117        dtype: ArrowDataType,
118        length: usize,
119        values: Vec<Box<dyn Array>>,
120        validity: Option<Bitmap>,
121    ) -> Self {
122        Self::try_new(dtype, length, values, validity).unwrap()
123    }
124
125    /// Creates an empty [`StructArray`].
126    pub fn new_empty(dtype: ArrowDataType) -> Self {
127        if let ArrowDataType::Struct(fields) = &dtype.to_logical_type() {
128            let values = fields
129                .iter()
130                .map(|field| new_empty_array(field.dtype().clone()))
131                .collect();
132            Self::new(dtype, 0, values, None)
133        } else {
134            panic!("StructArray must be initialized with DataType::Struct");
135        }
136    }
137
138    /// Creates a null [`StructArray`] of length `length`.
139    pub fn new_null(dtype: ArrowDataType, length: usize) -> Self {
140        if let ArrowDataType::Struct(fields) = &dtype {
141            let values = fields
142                .iter()
143                .map(|field| new_null_array(field.dtype().clone(), length))
144                .collect();
145            Self::new(dtype, length, values, Some(Bitmap::new_zeroed(length)))
146        } else {
147            panic!("StructArray must be initialized with DataType::Struct");
148        }
149    }
150}
151
152// must use
153impl StructArray {
154    /// Deconstructs the [`StructArray`] into its individual components.
155    #[must_use]
156    pub fn into_data(self) -> (Vec<Field>, usize, Vec<Box<dyn Array>>, Option<Bitmap>) {
157        let Self {
158            dtype,
159            length,
160            values,
161            validity,
162        } = self;
163        let fields = if let ArrowDataType::Struct(fields) = dtype {
164            fields
165        } else {
166            unreachable!()
167        };
168        (fields, length, values, validity)
169    }
170
171    /// Slices this [`StructArray`].
172    /// # Panics
173    /// panics iff `offset + length > self.len()`
174    /// # Implementation
175    /// This operation is `O(F)` where `F` is the number of fields.
176    pub fn slice(&mut self, offset: usize, length: usize) {
177        assert!(
178            offset + length <= self.len(),
179            "offset + length may not exceed length of array"
180        );
181        unsafe { self.slice_unchecked(offset, length) }
182    }
183
184    /// Slices this [`StructArray`].
185    /// # Implementation
186    /// This operation is `O(F)` where `F` is the number of fields.
187    ///
188    /// # Safety
189    /// The caller must ensure that `offset + length <= self.len()`.
190    pub unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
191        self.validity = self
192            .validity
193            .take()
194            .map(|bitmap| bitmap.sliced_unchecked(offset, length))
195            .filter(|bitmap| bitmap.unset_bits() > 0);
196        self.values
197            .iter_mut()
198            .for_each(|x| x.slice_unchecked(offset, length));
199        self.length = length;
200    }
201
202    impl_sliced!();
203
204    impl_mut_validity!();
205
206    impl_into_array!();
207}
208
209// Accessors
210impl StructArray {
211    #[inline]
212    pub fn len(&self) -> usize {
213        if cfg!(debug_assertions) {
214            for arr in self.values.iter() {
215                assert_eq!(
216                    arr.len(),
217                    self.length,
218                    "StructArray invariant: each array has same length"
219                );
220            }
221        }
222
223        self.length
224    }
225
226    /// The optional validity.
227    #[inline]
228    pub fn validity(&self) -> Option<&Bitmap> {
229        self.validity.as_ref()
230    }
231
232    /// Returns the values of this [`StructArray`].
233    pub fn values(&self) -> &[Box<dyn Array>] {
234        &self.values
235    }
236
237    /// Returns the fields of this [`StructArray`].
238    pub fn fields(&self) -> &[Field] {
239        let fields = Self::get_fields(&self.dtype);
240        debug_assert_eq!(self.values().len(), fields.len());
241        fields
242    }
243}
244
245impl StructArray {
246    /// Returns the fields the `DataType::Struct`.
247    pub(crate) fn try_get_fields(dtype: &ArrowDataType) -> PolarsResult<&[Field]> {
248        match dtype.to_logical_type() {
249            ArrowDataType::Struct(fields) => Ok(fields),
250            _ => {
251                polars_bail!(ComputeError: "Struct array must be created with a DataType whose physical type is Struct")
252            },
253        }
254    }
255
256    /// Returns the fields the `DataType::Struct`.
257    pub fn get_fields(dtype: &ArrowDataType) -> &[Field] {
258        Self::try_get_fields(dtype).unwrap()
259    }
260}
261
262impl Array for StructArray {
263    impl_common_array!();
264
265    fn validity(&self) -> Option<&Bitmap> {
266        self.validity.as_ref()
267    }
268
269    #[inline]
270    fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array> {
271        Box::new(self.clone().with_validity(validity))
272    }
273}
274
275impl Splitable for StructArray {
276    fn check_bound(&self, offset: usize) -> bool {
277        offset <= self.len()
278    }
279
280    unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {
281        let (lhs_validity, rhs_validity) = unsafe { self.validity.split_at_unchecked(offset) };
282
283        let mut lhs_values = Vec::with_capacity(self.values.len());
284        let mut rhs_values = Vec::with_capacity(self.values.len());
285
286        for v in self.values.iter() {
287            let (lhs, rhs) = unsafe { v.split_at_boxed_unchecked(offset) };
288            lhs_values.push(lhs);
289            rhs_values.push(rhs);
290        }
291
292        (
293            Self {
294                dtype: self.dtype.clone(),
295                length: offset,
296                values: lhs_values,
297                validity: lhs_validity,
298            },
299            Self {
300                dtype: self.dtype.clone(),
301                length: self.length - offset,
302                values: rhs_values,
303                validity: rhs_validity,
304            },
305        )
306    }
307}