polars_arrow/array/fixed_size_list/
mod.rs

1use super::{Array, ArrayRef, Splitable, new_empty_array, new_null_array};
2use crate::bitmap::Bitmap;
3use crate::datatypes::{ArrowDataType, Field};
4
5mod ffi;
6pub(super) mod fmt;
7mod iterator;
8
9mod builder;
10pub use builder::*;
11mod mutable;
12pub use mutable::*;
13use polars_error::{PolarsResult, polars_bail, polars_ensure};
14use polars_utils::format_tuple;
15use polars_utils::pl_str::PlSmallStr;
16
17use crate::datatypes::reshape::{Dimension, ReshapeDimension};
18
19/// The Arrow's equivalent to an immutable `Vec<Option<[T; size]>>` where `T` is an Arrow type.
20/// Cloning and slicing this struct is `O(1)`.
21#[derive(Clone)]
22pub struct FixedSizeListArray {
23    size: usize, // this is redundant with `dtype`, but useful to not have to deconstruct the dtype.
24    length: usize, // invariant: this is values.len() / size if size > 0
25    dtype: ArrowDataType,
26    values: Box<dyn Array>,
27    validity: Option<Bitmap>,
28}
29
30impl FixedSizeListArray {
31    /// Creates a new [`FixedSizeListArray`].
32    ///
33    /// # Errors
34    /// This function returns an error iff:
35    /// * The `dtype`'s physical type is not [`crate::datatypes::PhysicalType::FixedSizeList`]
36    /// * The `dtype`'s inner field's data type is not equal to `values.dtype`.
37    /// * The length of `values` is not a multiple of `size` in `dtype`
38    /// * the validity's length is not equal to `values.len() / size`.
39    pub fn try_new(
40        dtype: ArrowDataType,
41        length: usize,
42        values: Box<dyn Array>,
43        validity: Option<Bitmap>,
44    ) -> PolarsResult<Self> {
45        let (child, size) = Self::try_child_and_size(&dtype)?;
46
47        let child_dtype = &child.dtype;
48        let values_dtype = values.dtype();
49        if child_dtype != values_dtype {
50            polars_bail!(ComputeError: "FixedSizeListArray's child's DataType must match. However, the expected DataType is {child_dtype:?} while it got {values_dtype:?}.")
51        }
52
53        polars_ensure!(size == 0 || values.len() % size == 0, ComputeError:
54            "values (of len {}) must be a multiple of size ({}) in FixedSizeListArray.",
55            values.len(),
56            size
57        );
58
59        polars_ensure!(size == 0 || values.len() / size == length, ComputeError:
60            "length of values ({}) is not equal to given length ({}) in FixedSizeListArray({size}).",
61            values.len() / size,
62            length,
63        );
64        polars_ensure!(size != 0 || values.is_empty(), ComputeError:
65            "zero width FixedSizeListArray has values (length = {}).",
66            values.len(),
67        );
68
69        if validity
70            .as_ref()
71            .is_some_and(|validity| validity.len() != length)
72        {
73            polars_bail!(ComputeError: "validity mask length must be equal to the number of values divided by size")
74        }
75
76        Ok(Self {
77            size,
78            length,
79            dtype,
80            values,
81            validity,
82        })
83    }
84
85    #[inline]
86    fn has_invariants(&self) -> bool {
87        let has_valid_length = (self.size == 0 && self.values().is_empty())
88            || (self.size > 0
89                && self.values().len() % self.size() == 0
90                && self.values().len() / self.size() == self.length);
91        let has_valid_validity = self
92            .validity
93            .as_ref()
94            .is_none_or(|v| v.len() == self.length);
95
96        has_valid_length && has_valid_validity
97    }
98
99    /// Alias to `Self::try_new(...).unwrap()`
100    #[track_caller]
101    pub fn new(
102        dtype: ArrowDataType,
103        length: usize,
104        values: Box<dyn Array>,
105        validity: Option<Bitmap>,
106    ) -> Self {
107        Self::try_new(dtype, length, values, validity).unwrap()
108    }
109
110    /// Returns the size (number of elements per slot) of this [`FixedSizeListArray`].
111    pub const fn size(&self) -> usize {
112        self.size
113    }
114
115    /// Returns a new empty [`FixedSizeListArray`].
116    pub fn new_empty(dtype: ArrowDataType) -> Self {
117        let values = new_empty_array(Self::get_child_and_size(&dtype).0.dtype().clone());
118        Self::new(dtype, 0, values, None)
119    }
120
121    /// Returns a new null [`FixedSizeListArray`].
122    pub fn new_null(dtype: ArrowDataType, length: usize) -> Self {
123        let (field, size) = Self::get_child_and_size(&dtype);
124
125        let values = new_null_array(field.dtype().clone(), length * size);
126        Self::new(dtype, length, values, Some(Bitmap::new_zeroed(length)))
127    }
128
129    pub fn from_shape(
130        leaf_array: ArrayRef,
131        dimensions: &[ReshapeDimension],
132    ) -> PolarsResult<ArrayRef> {
133        polars_ensure!(
134            !dimensions.is_empty(),
135            InvalidOperation: "at least one dimension must be specified"
136        );
137        let size = leaf_array.len();
138
139        let mut total_dim_size = 1;
140        let mut num_infers = 0;
141        for &dim in dimensions {
142            match dim {
143                ReshapeDimension::Infer => num_infers += 1,
144                ReshapeDimension::Specified(dim) => total_dim_size *= dim.get() as usize,
145            }
146        }
147
148        polars_ensure!(num_infers <= 1, InvalidOperation: "can only specify one inferred dimension");
149
150        if size == 0 {
151            polars_ensure!(
152                num_infers > 0 || total_dim_size == 0,
153                InvalidOperation: "cannot reshape empty array into shape without zero dimension: {}",
154                format_tuple!(dimensions),
155            );
156
157            let mut prev_arrow_dtype = leaf_array.dtype().clone();
158            let mut prev_array = leaf_array;
159
160            // @NOTE: We need to collect the iterator here because it is lazily processed.
161            let mut current_length = dimensions[0].get_or_infer(0);
162            let len_iter = dimensions[1..]
163                .iter()
164                .map(|d| {
165                    let length = current_length as usize;
166                    current_length *= d.get_or_infer(0);
167                    length
168                })
169                .collect::<Vec<_>>();
170
171            // We pop the outer dimension as that is the height of the series.
172            for (dim, length) in dimensions[1..].iter().zip(len_iter).rev() {
173                // Infer dimension if needed
174                let dim = dim.get_or_infer(0);
175                prev_arrow_dtype = prev_arrow_dtype.to_fixed_size_list(dim as usize, true);
176
177                prev_array =
178                    FixedSizeListArray::new(prev_arrow_dtype.clone(), length, prev_array, None)
179                        .boxed();
180            }
181
182            return Ok(prev_array);
183        }
184
185        polars_ensure!(
186            total_dim_size > 0,
187            InvalidOperation: "cannot reshape non-empty array into shape containing a zero dimension: {}",
188            format_tuple!(dimensions)
189        );
190
191        polars_ensure!(
192            size % total_dim_size == 0,
193            InvalidOperation: "cannot reshape array of size {} into shape {}", size, format_tuple!(dimensions)
194        );
195
196        let mut prev_arrow_dtype = leaf_array.dtype().clone();
197        let mut prev_array = leaf_array;
198
199        // We pop the outer dimension as that is the height of the series.
200        for dim in dimensions[1..].iter().rev() {
201            // Infer dimension if needed
202            let dim = dim.get_or_infer((size / total_dim_size) as u64);
203            prev_arrow_dtype = prev_arrow_dtype.to_fixed_size_list(dim as usize, true);
204
205            prev_array = FixedSizeListArray::new(
206                prev_arrow_dtype.clone(),
207                prev_array.len() / dim as usize,
208                prev_array,
209                None,
210            )
211            .boxed();
212        }
213        Ok(prev_array)
214    }
215
216    pub fn get_dims(&self) -> Vec<Dimension> {
217        let mut dims = vec![
218            Dimension::new(self.length as _),
219            Dimension::new(self.size as _),
220        ];
221
222        let mut prev_array = &self.values;
223
224        while let Some(a) = prev_array.as_any().downcast_ref::<FixedSizeListArray>() {
225            dims.push(Dimension::new(a.size as _));
226            prev_array = &a.values;
227        }
228        dims
229    }
230}
231
232// must use
233impl FixedSizeListArray {
234    /// Slices this [`FixedSizeListArray`].
235    /// # Implementation
236    /// This operation is `O(1)`.
237    /// # Panics
238    /// panics iff `offset + length > self.len()`
239    pub fn slice(&mut self, offset: usize, length: usize) {
240        assert!(
241            offset + length <= self.len(),
242            "the offset of the new Buffer cannot exceed the existing length"
243        );
244        unsafe { self.slice_unchecked(offset, length) }
245    }
246
247    /// Slices this [`FixedSizeListArray`].
248    /// # Implementation
249    /// This operation is `O(1)`.
250    ///
251    /// # Safety
252    /// The caller must ensure that `offset + length <= self.len()`.
253    pub unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
254        debug_assert!(offset + length <= self.len());
255        self.validity = self
256            .validity
257            .take()
258            .map(|bitmap| bitmap.sliced_unchecked(offset, length))
259            .filter(|bitmap| bitmap.unset_bits() > 0);
260        self.values
261            .slice_unchecked(offset * self.size, length * self.size);
262        self.length = length;
263    }
264
265    impl_sliced!();
266    impl_mut_validity!();
267    impl_into_array!();
268}
269
270// accessors
271impl FixedSizeListArray {
272    /// Returns the length of this array
273    #[inline]
274    pub fn len(&self) -> usize {
275        debug_assert!(self.has_invariants());
276        self.length
277    }
278
279    /// The optional validity.
280    #[inline]
281    pub fn validity(&self) -> Option<&Bitmap> {
282        self.validity.as_ref()
283    }
284
285    /// Returns the inner array.
286    pub fn values(&self) -> &Box<dyn Array> {
287        &self.values
288    }
289
290    /// Returns the `Vec<T>` at position `i`.
291    /// # Panic:
292    /// panics iff `i >= self.len()`
293    #[inline]
294    pub fn value(&self, i: usize) -> Box<dyn Array> {
295        self.values.sliced(i * self.size, self.size)
296    }
297
298    /// Returns the `Vec<T>` at position `i`.
299    ///
300    /// # Safety
301    /// Caller must ensure that `i < self.len()`
302    #[inline]
303    pub unsafe fn value_unchecked(&self, i: usize) -> Box<dyn Array> {
304        self.values.sliced_unchecked(i * self.size, self.size)
305    }
306
307    /// Returns the element at index `i` or `None` if it is null
308    /// # Panics
309    /// iff `i >= self.len()`
310    #[inline]
311    pub fn get(&self, i: usize) -> Option<Box<dyn Array>> {
312        if !self.is_null(i) {
313            // soundness: Array::is_null panics if i >= self.len
314            unsafe { Some(self.value_unchecked(i)) }
315        } else {
316            None
317        }
318    }
319}
320
321impl FixedSizeListArray {
322    pub(crate) fn try_child_and_size(dtype: &ArrowDataType) -> PolarsResult<(&Field, usize)> {
323        match dtype.to_logical_type() {
324            ArrowDataType::FixedSizeList(child, size) => Ok((child.as_ref(), *size)),
325            _ => polars_bail!(ComputeError: "FixedSizeListArray expects DataType::FixedSizeList"),
326        }
327    }
328
329    pub(crate) fn get_child_and_size(dtype: &ArrowDataType) -> (&Field, usize) {
330        Self::try_child_and_size(dtype).unwrap()
331    }
332
333    /// Returns a [`ArrowDataType`] consistent with [`FixedSizeListArray`].
334    pub fn default_datatype(dtype: ArrowDataType, size: usize) -> ArrowDataType {
335        let field = Box::new(Field::new(PlSmallStr::from_static("item"), dtype, true));
336        ArrowDataType::FixedSizeList(field, size)
337    }
338}
339
340impl Array for FixedSizeListArray {
341    impl_common_array!();
342
343    fn validity(&self) -> Option<&Bitmap> {
344        self.validity.as_ref()
345    }
346
347    #[inline]
348    fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array> {
349        Box::new(self.clone().with_validity(validity))
350    }
351}
352
353impl Splitable for FixedSizeListArray {
354    fn check_bound(&self, offset: usize) -> bool {
355        offset <= self.len()
356    }
357
358    unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {
359        let (lhs_values, rhs_values) =
360            unsafe { self.values.split_at_boxed_unchecked(offset * self.size) };
361        let (lhs_validity, rhs_validity) = unsafe { self.validity.split_at_unchecked(offset) };
362
363        let size = self.size;
364
365        (
366            Self {
367                dtype: self.dtype.clone(),
368                length: offset,
369                values: lhs_values,
370                validity: lhs_validity,
371                size,
372            },
373            Self {
374                dtype: self.dtype.clone(),
375                length: self.length - offset,
376                values: rhs_values,
377                validity: rhs_validity,
378                size,
379            },
380        )
381    }
382}