polars_arrow/array/fixed_size_list/
mod.rs

1use super::{Array, ArrayRef, Splitable, new_empty_array, new_null_array};
2use crate::array::list::LIST_VALUES_NAME;
3use crate::bitmap::Bitmap;
4use crate::datatypes::{ArrowDataType, Field};
5
6mod ffi;
7pub(super) mod fmt;
8mod iterator;
9
10mod builder;
11pub use builder::*;
12mod mutable;
13pub use mutable::*;
14use polars_error::{PolarsResult, polars_bail, polars_ensure};
15use polars_utils::format_tuple;
16#[cfg(feature = "proptest")]
17pub mod proptest;
18
19use crate::datatypes::reshape::{Dimension, ReshapeDimension};
20
21/// The Arrow's equivalent to an immutable `Vec<Option<[T; size]>>` where `T` is an Arrow type.
22/// Cloning and slicing this struct is `O(1)`.
23#[derive(Clone)]
24pub struct FixedSizeListArray {
25    size: usize, // this is redundant with `dtype`, but useful to not have to deconstruct the dtype.
26    length: usize, // invariant: this is values.len() / size if size > 0
27    dtype: ArrowDataType,
28    values: Box<dyn Array>,
29    validity: Option<Bitmap>,
30}
31
32impl FixedSizeListArray {
33    /// Creates a new [`FixedSizeListArray`].
34    ///
35    /// # Errors
36    /// This function returns an error iff:
37    /// * The `dtype`'s physical type is not [`crate::datatypes::PhysicalType::FixedSizeList`]
38    /// * The `dtype`'s inner field's data type is not equal to `values.dtype`.
39    /// * The length of `values` is not a multiple of `size` in `dtype`
40    /// * the validity's length is not equal to `values.len() / size`.
41    pub fn try_new(
42        dtype: ArrowDataType,
43        length: usize,
44        values: Box<dyn Array>,
45        validity: Option<Bitmap>,
46    ) -> PolarsResult<Self> {
47        let (child, size) = Self::try_child_and_size(&dtype)?;
48
49        let child_dtype = &child.dtype;
50        let values_dtype = values.dtype();
51        if child_dtype != values_dtype {
52            polars_bail!(ComputeError: "FixedSizeListArray's child's DataType must match. However, the expected DataType is {child_dtype:?} while it got {values_dtype:?}.")
53        }
54
55        polars_ensure!(size == 0 || values.len().is_multiple_of(size), ComputeError:
56            "values (of len {}) must be a multiple of size ({}) in FixedSizeListArray.",
57            values.len(),
58            size
59        );
60
61        polars_ensure!(size == 0 || values.len() / size == length, ComputeError:
62            "length of values ({}) is not equal to given length ({}) in FixedSizeListArray({size}).",
63            values.len() / size,
64            length,
65        );
66        polars_ensure!(size != 0 || values.is_empty(), ComputeError:
67            "zero width FixedSizeListArray has values (length = {}).",
68            values.len(),
69        );
70
71        if validity
72            .as_ref()
73            .is_some_and(|validity| validity.len() != length)
74        {
75            polars_bail!(ComputeError: "validity mask length must be equal to the number of values divided by size")
76        }
77
78        Ok(Self {
79            size,
80            length,
81            dtype,
82            values,
83            validity,
84        })
85    }
86
87    #[inline]
88    fn has_invariants(&self) -> bool {
89        let has_valid_length = (self.size == 0 && self.values().is_empty())
90            || (self.size > 0
91                && self.values().len().is_multiple_of(self.size())
92                && self.values().len() / self.size() == self.length);
93        let has_valid_validity = self
94            .validity
95            .as_ref()
96            .is_none_or(|v| v.len() == self.length);
97
98        has_valid_length && has_valid_validity
99    }
100
101    /// Alias to `Self::try_new(...).unwrap()`
102    #[track_caller]
103    pub fn new(
104        dtype: ArrowDataType,
105        length: usize,
106        values: Box<dyn Array>,
107        validity: Option<Bitmap>,
108    ) -> Self {
109        Self::try_new(dtype, length, values, validity).unwrap()
110    }
111
112    /// Returns the size (number of elements per slot) of this [`FixedSizeListArray`].
113    pub const fn size(&self) -> usize {
114        self.size
115    }
116
117    /// Returns a new empty [`FixedSizeListArray`].
118    pub fn new_empty(dtype: ArrowDataType) -> Self {
119        let values = new_empty_array(Self::get_child_and_size(&dtype).0.dtype().clone());
120        Self::new(dtype, 0, values, None)
121    }
122
123    /// Returns a new null [`FixedSizeListArray`].
124    pub fn new_null(dtype: ArrowDataType, length: usize) -> Self {
125        let (field, size) = Self::get_child_and_size(&dtype);
126
127        let values = new_null_array(field.dtype().clone(), length * size);
128        Self::new(dtype, length, values, Some(Bitmap::new_zeroed(length)))
129    }
130
131    pub fn from_shape(
132        leaf_array: ArrayRef,
133        dimensions: &[ReshapeDimension],
134    ) -> PolarsResult<ArrayRef> {
135        polars_ensure!(
136            !dimensions.is_empty(),
137            InvalidOperation: "at least one dimension must be specified"
138        );
139        let size = leaf_array.len();
140
141        let mut total_dim_size = 1;
142        let mut num_infers = 0;
143        for &dim in dimensions {
144            match dim {
145                ReshapeDimension::Infer => num_infers += 1,
146                ReshapeDimension::Specified(dim) => total_dim_size *= dim.get() as usize,
147            }
148        }
149
150        polars_ensure!(num_infers <= 1, InvalidOperation: "can only specify one inferred dimension");
151
152        if size == 0 {
153            polars_ensure!(
154                num_infers > 0 || total_dim_size == 0,
155                InvalidOperation: "cannot reshape empty array into shape without zero dimension: {}",
156                format_tuple!(dimensions),
157            );
158
159            let mut prev_arrow_dtype = leaf_array.dtype().clone();
160            let mut prev_array = leaf_array;
161
162            // @NOTE: We need to collect the iterator here because it is lazily processed.
163            let mut current_length = dimensions[0].get_or_infer(0);
164            let len_iter = dimensions[1..]
165                .iter()
166                .map(|d| {
167                    let length = current_length as usize;
168                    current_length *= d.get_or_infer(0);
169                    length
170                })
171                .collect::<Vec<_>>();
172
173            // We pop the outer dimension as that is the height of the series.
174            for (dim, length) in dimensions[1..].iter().zip(len_iter).rev() {
175                // Infer dimension if needed
176                let dim = dim.get_or_infer(0);
177                prev_arrow_dtype = prev_arrow_dtype.to_fixed_size_list(dim as usize, true);
178
179                prev_array =
180                    FixedSizeListArray::new(prev_arrow_dtype.clone(), length, prev_array, None)
181                        .boxed();
182            }
183
184            return Ok(prev_array);
185        }
186
187        polars_ensure!(
188            total_dim_size > 0,
189            InvalidOperation: "cannot reshape non-empty array into shape containing a zero dimension: {}",
190            format_tuple!(dimensions)
191        );
192
193        polars_ensure!(
194            size.is_multiple_of(total_dim_size),
195            InvalidOperation: "cannot reshape array of size {} into shape {}", size, format_tuple!(dimensions)
196        );
197
198        let mut prev_arrow_dtype = leaf_array.dtype().clone();
199        let mut prev_array = leaf_array;
200
201        // We pop the outer dimension as that is the height of the series.
202        for dim in dimensions[1..].iter().rev() {
203            // Infer dimension if needed
204            let dim = dim.get_or_infer((size / total_dim_size) as u64);
205            prev_arrow_dtype = prev_arrow_dtype.to_fixed_size_list(dim as usize, true);
206
207            prev_array = FixedSizeListArray::new(
208                prev_arrow_dtype.clone(),
209                prev_array.len() / dim as usize,
210                prev_array,
211                None,
212            )
213            .boxed();
214        }
215        Ok(prev_array)
216    }
217
218    pub fn get_dims(&self) -> Vec<Dimension> {
219        let mut dims = vec![
220            Dimension::new(self.length as _),
221            Dimension::new(self.size as _),
222        ];
223
224        let mut prev_array = &self.values;
225
226        while let Some(a) = prev_array.as_any().downcast_ref::<FixedSizeListArray>() {
227            dims.push(Dimension::new(a.size as _));
228            prev_array = &a.values;
229        }
230        dims
231    }
232}
233
234// must use
235impl FixedSizeListArray {
236    /// Slices this [`FixedSizeListArray`].
237    /// # Implementation
238    /// This operation is `O(1)`.
239    /// # Panics
240    /// panics iff `offset + length > self.len()`
241    pub fn slice(&mut self, offset: usize, length: usize) {
242        assert!(
243            offset + length <= self.len(),
244            "the offset of the new Buffer cannot exceed the existing length"
245        );
246        unsafe { self.slice_unchecked(offset, length) }
247    }
248
249    /// Slices this [`FixedSizeListArray`].
250    /// # Implementation
251    /// This operation is `O(1)`.
252    ///
253    /// # Safety
254    /// The caller must ensure that `offset + length <= self.len()`.
255    pub unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
256        debug_assert!(offset + length <= self.len());
257        self.validity = self
258            .validity
259            .take()
260            .map(|bitmap| bitmap.sliced_unchecked(offset, length))
261            .filter(|bitmap| bitmap.unset_bits() > 0);
262        self.values
263            .slice_unchecked(offset * self.size, length * self.size);
264        self.length = length;
265    }
266
267    impl_sliced!();
268    impl_mut_validity!();
269    impl_into_array!();
270}
271
272// accessors
273impl FixedSizeListArray {
274    /// Returns the length of this array
275    #[inline]
276    pub fn len(&self) -> usize {
277        debug_assert!(self.has_invariants());
278        self.length
279    }
280
281    /// The optional validity.
282    #[inline]
283    pub fn validity(&self) -> Option<&Bitmap> {
284        self.validity.as_ref()
285    }
286
287    /// Returns the inner array.
288    pub fn values(&self) -> &Box<dyn Array> {
289        &self.values
290    }
291
292    /// Returns the `Vec<T>` at position `i`.
293    /// # Panic:
294    /// panics iff `i >= self.len()`
295    #[inline]
296    pub fn value(&self, i: usize) -> Box<dyn Array> {
297        self.values.sliced(i * self.size, self.size)
298    }
299
300    /// Returns the `Vec<T>` at position `i`.
301    ///
302    /// # Safety
303    /// Caller must ensure that `i < self.len()`
304    #[inline]
305    pub unsafe fn value_unchecked(&self, i: usize) -> Box<dyn Array> {
306        self.values.sliced_unchecked(i * self.size, self.size)
307    }
308
309    /// Returns the element at index `i` or `None` if it is null
310    /// # Panics
311    /// iff `i >= self.len()`
312    #[inline]
313    pub fn get(&self, i: usize) -> Option<Box<dyn Array>> {
314        if !self.is_null(i) {
315            // soundness: Array::is_null panics if i >= self.len
316            unsafe { Some(self.value_unchecked(i)) }
317        } else {
318            None
319        }
320    }
321}
322
323impl FixedSizeListArray {
324    pub(crate) fn try_child_and_size(dtype: &ArrowDataType) -> PolarsResult<(&Field, usize)> {
325        match dtype.to_logical_type() {
326            ArrowDataType::FixedSizeList(child, size) => Ok((child.as_ref(), *size)),
327            _ => polars_bail!(ComputeError: "FixedSizeListArray expects DataType::FixedSizeList"),
328        }
329    }
330
331    pub(crate) fn get_child_and_size(dtype: &ArrowDataType) -> (&Field, usize) {
332        Self::try_child_and_size(dtype).unwrap()
333    }
334
335    /// Returns a [`ArrowDataType`] consistent with [`FixedSizeListArray`].
336    pub fn default_datatype(dtype: ArrowDataType, size: usize) -> ArrowDataType {
337        let field = Box::new(Field::new(LIST_VALUES_NAME, dtype, true));
338        ArrowDataType::FixedSizeList(field, size)
339    }
340}
341
342impl Array for FixedSizeListArray {
343    impl_common_array!();
344
345    fn validity(&self) -> Option<&Bitmap> {
346        self.validity.as_ref()
347    }
348
349    #[inline]
350    fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array> {
351        Box::new(self.clone().with_validity(validity))
352    }
353}
354
355impl Splitable for FixedSizeListArray {
356    fn check_bound(&self, offset: usize) -> bool {
357        offset <= self.len()
358    }
359
360    unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {
361        let (lhs_values, rhs_values) =
362            unsafe { self.values.split_at_boxed_unchecked(offset * self.size) };
363        let (lhs_validity, rhs_validity) = unsafe { self.validity.split_at_unchecked(offset) };
364
365        let size = self.size;
366
367        (
368            Self {
369                dtype: self.dtype.clone(),
370                length: offset,
371                values: lhs_values,
372                validity: lhs_validity,
373                size,
374            },
375            Self {
376                dtype: self.dtype.clone(),
377                length: self.length - offset,
378                values: rhs_values,
379                validity: rhs_validity,
380                size,
381            },
382        )
383    }
384}