polars_arrow/array/struct_/
mod.rs1use super::{Array, Splitable, new_empty_array, new_null_array};
2use crate::bitmap::Bitmap;
3use crate::datatypes::{ArrowDataType, Field};
4
5mod builder;
6pub use builder::*;
7mod ffi;
8pub(super) mod fmt;
9mod iterator;
10use polars_error::{PolarsResult, polars_bail, polars_ensure};
11#[cfg(feature = "proptest")]
12pub mod proptest;
13
14#[derive(Clone)]
31pub struct StructArray {
32 dtype: ArrowDataType,
33 values: Vec<Box<dyn Array>>,
35 length: usize,
37 validity: Option<Bitmap>,
38}
39
40impl StructArray {
41 pub fn try_new(
51 dtype: ArrowDataType,
52 length: usize,
53 values: Vec<Box<dyn Array>>,
54 validity: Option<Bitmap>,
55 ) -> PolarsResult<Self> {
56 let fields = Self::try_get_fields(&dtype)?;
57
58 polars_ensure!(
59 fields.len() == values.len(),
60 ComputeError:
61 "a StructArray must have a number of fields in its DataType equal to the number of child values"
62 );
63
64 fields
65 .iter().map(|a| &a.dtype)
66 .zip(values.iter().map(|a| a.dtype()))
67 .enumerate()
68 .try_for_each(|(index, (dtype, child))| {
69 if dtype != child {
70 polars_bail!(ComputeError:
71 "The children DataTypes of a StructArray must equal the children data types.
72 However, the field {index} has data type {dtype:?} but the value has data type {child:?}"
73 )
74 } else {
75 Ok(())
76 }
77 })?;
78
79 values
80 .iter()
81 .map(|f| f.len())
82 .enumerate()
83 .try_for_each(|(index, f_length)| {
84 if f_length != length {
85 polars_bail!(ComputeError: "The children must have the given number of values.
86 However, the values at index {index} have a length of {f_length}, which is different from given length {length}.")
87 } else {
88 Ok(())
89 }
90 })?;
91
92 if validity
93 .as_ref()
94 .is_some_and(|validity| validity.len() != length)
95 {
96 polars_bail!(ComputeError:"The validity length of a StructArray must match its number of elements")
97 }
98
99 Ok(Self {
100 dtype,
101 length,
102 values,
103 validity,
104 })
105 }
106
107 pub fn new(
117 dtype: ArrowDataType,
118 length: usize,
119 values: Vec<Box<dyn Array>>,
120 validity: Option<Bitmap>,
121 ) -> Self {
122 Self::try_new(dtype, length, values, validity).unwrap()
123 }
124
125 pub fn new_empty(dtype: ArrowDataType) -> Self {
127 if let ArrowDataType::Struct(fields) = &dtype.to_logical_type() {
128 let values = fields
129 .iter()
130 .map(|field| new_empty_array(field.dtype().clone()))
131 .collect();
132 Self::new(dtype, 0, values, None)
133 } else {
134 panic!("StructArray must be initialized with DataType::Struct");
135 }
136 }
137
138 pub fn new_null(dtype: ArrowDataType, length: usize) -> Self {
140 if let ArrowDataType::Struct(fields) = &dtype {
141 let values = fields
142 .iter()
143 .map(|field| new_null_array(field.dtype().clone(), length))
144 .collect();
145 Self::new(dtype, length, values, Some(Bitmap::new_zeroed(length)))
146 } else {
147 panic!("StructArray must be initialized with DataType::Struct");
148 }
149 }
150}
151
152impl StructArray {
154 #[must_use]
156 pub fn into_data(self) -> (Vec<Field>, usize, Vec<Box<dyn Array>>, Option<Bitmap>) {
157 let Self {
158 dtype,
159 length,
160 values,
161 validity,
162 } = self;
163 let fields = if let ArrowDataType::Struct(fields) = dtype {
164 fields
165 } else {
166 unreachable!()
167 };
168 (fields, length, values, validity)
169 }
170
171 pub fn slice(&mut self, offset: usize, length: usize) {
177 assert!(
178 offset + length <= self.len(),
179 "offset + length may not exceed length of array"
180 );
181 unsafe { self.slice_unchecked(offset, length) }
182 }
183
184 pub unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
191 self.validity = self
192 .validity
193 .take()
194 .map(|bitmap| bitmap.sliced_unchecked(offset, length))
195 .filter(|bitmap| bitmap.unset_bits() > 0);
196 self.values
197 .iter_mut()
198 .for_each(|x| x.slice_unchecked(offset, length));
199 self.length = length;
200 }
201
202 impl_sliced!();
203
204 impl_mut_validity!();
205
206 impl_into_array!();
207}
208
209impl StructArray {
211 #[inline]
212 pub fn len(&self) -> usize {
213 if cfg!(debug_assertions) {
214 for arr in self.values.iter() {
215 assert_eq!(
216 arr.len(),
217 self.length,
218 "StructArray invariant: each array has same length"
219 );
220 }
221 }
222
223 self.length
224 }
225
226 #[inline]
228 pub fn validity(&self) -> Option<&Bitmap> {
229 self.validity.as_ref()
230 }
231
232 pub fn values(&self) -> &[Box<dyn Array>] {
234 &self.values
235 }
236
237 pub fn fields(&self) -> &[Field] {
239 let fields = Self::get_fields(&self.dtype);
240 debug_assert_eq!(self.values().len(), fields.len());
241 fields
242 }
243}
244
245impl StructArray {
246 pub(crate) fn try_get_fields(dtype: &ArrowDataType) -> PolarsResult<&[Field]> {
248 match dtype.to_logical_type() {
249 ArrowDataType::Struct(fields) => Ok(fields),
250 _ => {
251 polars_bail!(ComputeError: "Struct array must be created with a DataType whose physical type is Struct")
252 },
253 }
254 }
255
256 pub fn get_fields(dtype: &ArrowDataType) -> &[Field] {
258 Self::try_get_fields(dtype).unwrap()
259 }
260}
261
262impl Array for StructArray {
263 impl_common_array!();
264
265 fn validity(&self) -> Option<&Bitmap> {
266 self.validity.as_ref()
267 }
268
269 #[inline]
270 fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array> {
271 Box::new(self.clone().with_validity(validity))
272 }
273}
274
275impl Splitable for StructArray {
276 fn check_bound(&self, offset: usize) -> bool {
277 offset <= self.len()
278 }
279
280 unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {
281 let (lhs_validity, rhs_validity) = unsafe { self.validity.split_at_unchecked(offset) };
282
283 let mut lhs_values = Vec::with_capacity(self.values.len());
284 let mut rhs_values = Vec::with_capacity(self.values.len());
285
286 for v in self.values.iter() {
287 let (lhs, rhs) = unsafe { v.split_at_boxed_unchecked(offset) };
288 lhs_values.push(lhs);
289 rhs_values.push(rhs);
290 }
291
292 (
293 Self {
294 dtype: self.dtype.clone(),
295 length: offset,
296 values: lhs_values,
297 validity: lhs_validity,
298 },
299 Self {
300 dtype: self.dtype.clone(),
301 length: self.length - offset,
302 values: rhs_values,
303 validity: rhs_validity,
304 },
305 )
306 }
307}