polars_arrow/array/fixed_size_list/
mod.rs

1use super::{new_empty_array, new_null_array, Array, ArrayRef, Splitable};
2use crate::bitmap::Bitmap;
3use crate::datatypes::{ArrowDataType, Field};
4
5mod ffi;
6pub(super) mod fmt;
7mod iterator;
8
9mod mutable;
10pub use mutable::*;
11use polars_error::{polars_bail, polars_ensure, PolarsResult};
12use polars_utils::format_tuple;
13use polars_utils::pl_str::PlSmallStr;
14
15use crate::datatypes::reshape::{Dimension, ReshapeDimension};
16
17/// The Arrow's equivalent to an immutable `Vec<Option<[T; size]>>` where `T` is an Arrow type.
18/// Cloning and slicing this struct is `O(1)`.
19#[derive(Clone)]
20pub struct FixedSizeListArray {
21    size: usize, // this is redundant with `dtype`, but useful to not have to deconstruct the dtype.
22    length: usize, // invariant: this is values.len() / size if size > 0
23    dtype: ArrowDataType,
24    values: Box<dyn Array>,
25    validity: Option<Bitmap>,
26}
27
28impl FixedSizeListArray {
29    /// Creates a new [`FixedSizeListArray`].
30    ///
31    /// # Errors
32    /// This function returns an error iff:
33    /// * The `dtype`'s physical type is not [`crate::datatypes::PhysicalType::FixedSizeList`]
34    /// * The `dtype`'s inner field's data type is not equal to `values.dtype`.
35    /// * The length of `values` is not a multiple of `size` in `dtype`
36    /// * the validity's length is not equal to `values.len() / size`.
37    pub fn try_new(
38        dtype: ArrowDataType,
39        length: usize,
40        values: Box<dyn Array>,
41        validity: Option<Bitmap>,
42    ) -> PolarsResult<Self> {
43        let (child, size) = Self::try_child_and_size(&dtype)?;
44
45        let child_dtype = &child.dtype;
46        let values_dtype = values.dtype();
47        if child_dtype != values_dtype {
48            polars_bail!(ComputeError: "FixedSizeListArray's child's DataType must match. However, the expected DataType is {child_dtype:?} while it got {values_dtype:?}.")
49        }
50
51        polars_ensure!(size == 0 || values.len() % size == 0, ComputeError:
52            "values (of len {}) must be a multiple of size ({}) in FixedSizeListArray.",
53            values.len(),
54            size
55        );
56
57        polars_ensure!(size == 0 || values.len() / size == length, ComputeError:
58            "length of values ({}) is not equal to given length ({}) in FixedSizeListArray({size}).",
59            values.len() / size,
60            length,
61        );
62        polars_ensure!(size != 0 || values.len() == 0, ComputeError:
63            "zero width FixedSizeListArray has values (length = {}).",
64            values.len(),
65        );
66
67        if validity
68            .as_ref()
69            .is_some_and(|validity| validity.len() != length)
70        {
71            polars_bail!(ComputeError: "validity mask length must be equal to the number of values divided by size")
72        }
73
74        Ok(Self {
75            size,
76            length,
77            dtype,
78            values,
79            validity,
80        })
81    }
82
83    #[inline]
84    fn has_invariants(&self) -> bool {
85        let has_valid_length = (self.size == 0 && self.values().len() == 0)
86            || (self.size > 0
87                && self.values().len() % self.size() == 0
88                && self.values().len() / self.size() == self.length);
89        let has_valid_validity = self
90            .validity
91            .as_ref()
92            .is_none_or(|v| v.len() == self.length);
93
94        has_valid_length && has_valid_validity
95    }
96
97    /// Alias to `Self::try_new(...).unwrap()`
98    #[track_caller]
99    pub fn new(
100        dtype: ArrowDataType,
101        length: usize,
102        values: Box<dyn Array>,
103        validity: Option<Bitmap>,
104    ) -> Self {
105        Self::try_new(dtype, length, values, validity).unwrap()
106    }
107
108    /// Returns the size (number of elements per slot) of this [`FixedSizeListArray`].
109    pub const fn size(&self) -> usize {
110        self.size
111    }
112
113    /// Returns a new empty [`FixedSizeListArray`].
114    pub fn new_empty(dtype: ArrowDataType) -> Self {
115        let values = new_empty_array(Self::get_child_and_size(&dtype).0.dtype().clone());
116        Self::new(dtype, 0, values, None)
117    }
118
119    /// Returns a new null [`FixedSizeListArray`].
120    pub fn new_null(dtype: ArrowDataType, length: usize) -> Self {
121        let (field, size) = Self::get_child_and_size(&dtype);
122
123        let values = new_null_array(field.dtype().clone(), length * size);
124        Self::new(dtype, length, values, Some(Bitmap::new_zeroed(length)))
125    }
126
127    pub fn from_shape(
128        leaf_array: ArrayRef,
129        dimensions: &[ReshapeDimension],
130    ) -> PolarsResult<ArrayRef> {
131        polars_ensure!(
132            !dimensions.is_empty(),
133            InvalidOperation: "at least one dimension must be specified"
134        );
135        let size = leaf_array.len();
136
137        let mut total_dim_size = 1;
138        let mut num_infers = 0;
139        for &dim in dimensions {
140            match dim {
141                ReshapeDimension::Infer => num_infers += 1,
142                ReshapeDimension::Specified(dim) => total_dim_size *= dim.get() as usize,
143            }
144        }
145
146        polars_ensure!(num_infers <= 1, InvalidOperation: "can only specify one inferred dimension");
147
148        if size == 0 {
149            polars_ensure!(
150                num_infers > 0 || total_dim_size == 0,
151                InvalidOperation: "cannot reshape empty array into shape without zero dimension: {}",
152                format_tuple!(dimensions),
153            );
154
155            let mut prev_arrow_dtype = leaf_array.dtype().clone();
156            let mut prev_array = leaf_array;
157
158            // @NOTE: We need to collect the iterator here because it is lazily processed.
159            let mut current_length = dimensions[0].get_or_infer(0);
160            let len_iter = dimensions[1..]
161                .iter()
162                .map(|d| {
163                    let length = current_length as usize;
164                    current_length *= d.get_or_infer(0);
165                    length
166                })
167                .collect::<Vec<_>>();
168
169            // We pop the outer dimension as that is the height of the series.
170            for (dim, length) in dimensions[1..].iter().zip(len_iter).rev() {
171                // Infer dimension if needed
172                let dim = dim.get_or_infer(0);
173                prev_arrow_dtype = prev_arrow_dtype.to_fixed_size_list(dim as usize, true);
174
175                prev_array =
176                    FixedSizeListArray::new(prev_arrow_dtype.clone(), length, prev_array, None)
177                        .boxed();
178            }
179
180            return Ok(prev_array);
181        }
182
183        polars_ensure!(
184            total_dim_size > 0,
185            InvalidOperation: "cannot reshape non-empty array into shape containing a zero dimension: {}",
186            format_tuple!(dimensions)
187        );
188
189        polars_ensure!(
190            size % total_dim_size == 0,
191            InvalidOperation: "cannot reshape array of size {} into shape {}", size, format_tuple!(dimensions)
192        );
193
194        let mut prev_arrow_dtype = leaf_array.dtype().clone();
195        let mut prev_array = leaf_array;
196
197        // We pop the outer dimension as that is the height of the series.
198        for dim in dimensions[1..].iter().rev() {
199            // Infer dimension if needed
200            let dim = dim.get_or_infer((size / total_dim_size) as u64);
201            prev_arrow_dtype = prev_arrow_dtype.to_fixed_size_list(dim as usize, true);
202
203            prev_array = FixedSizeListArray::new(
204                prev_arrow_dtype.clone(),
205                prev_array.len() / dim as usize,
206                prev_array,
207                None,
208            )
209            .boxed();
210        }
211        Ok(prev_array)
212    }
213
214    pub fn get_dims(&self) -> Vec<Dimension> {
215        let mut dims = vec![
216            Dimension::new(self.length as _),
217            Dimension::new(self.size as _),
218        ];
219
220        let mut prev_array = &self.values;
221
222        while let Some(a) = prev_array.as_any().downcast_ref::<FixedSizeListArray>() {
223            dims.push(Dimension::new(a.size as _));
224            prev_array = &a.values;
225        }
226        dims
227    }
228
229    pub fn propagate_nulls(&self) -> Self {
230        let Some(validity) = self.validity() else {
231            return self.clone();
232        };
233
234        let propagated_validity = if self.size == 1 {
235            validity.clone()
236        } else {
237            Bitmap::from_trusted_len_iter(
238                (0..self.size * validity.len())
239                    .map(|i| unsafe { validity.get_bit_unchecked(i / self.size) }),
240            )
241        };
242
243        let propagated_validity = match self.values.validity() {
244            None => propagated_validity,
245            Some(val) => val & &propagated_validity,
246        };
247        Self::new(
248            self.dtype().clone(),
249            self.length,
250            self.values.with_validity(Some(propagated_validity)),
251            self.validity.clone(),
252        )
253    }
254}
255
256// must use
257impl FixedSizeListArray {
258    /// Slices this [`FixedSizeListArray`].
259    /// # Implementation
260    /// This operation is `O(1)`.
261    /// # Panics
262    /// panics iff `offset + length > self.len()`
263    pub fn slice(&mut self, offset: usize, length: usize) {
264        assert!(
265            offset + length <= self.len(),
266            "the offset of the new Buffer cannot exceed the existing length"
267        );
268        unsafe { self.slice_unchecked(offset, length) }
269    }
270
271    /// Slices this [`FixedSizeListArray`].
272    /// # Implementation
273    /// This operation is `O(1)`.
274    ///
275    /// # Safety
276    /// The caller must ensure that `offset + length <= self.len()`.
277    pub unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
278        debug_assert!(offset + length <= self.len());
279        self.validity = self
280            .validity
281            .take()
282            .map(|bitmap| bitmap.sliced_unchecked(offset, length))
283            .filter(|bitmap| bitmap.unset_bits() > 0);
284        self.values
285            .slice_unchecked(offset * self.size, length * self.size);
286        self.length = length;
287    }
288
289    impl_sliced!();
290    impl_mut_validity!();
291    impl_into_array!();
292}
293
294// accessors
295impl FixedSizeListArray {
296    /// Returns the length of this array
297    #[inline]
298    pub fn len(&self) -> usize {
299        debug_assert!(self.has_invariants());
300        self.length
301    }
302
303    /// The optional validity.
304    #[inline]
305    pub fn validity(&self) -> Option<&Bitmap> {
306        self.validity.as_ref()
307    }
308
309    /// Returns the inner array.
310    pub fn values(&self) -> &Box<dyn Array> {
311        &self.values
312    }
313
314    /// Returns the `Vec<T>` at position `i`.
315    /// # Panic:
316    /// panics iff `i >= self.len()`
317    #[inline]
318    pub fn value(&self, i: usize) -> Box<dyn Array> {
319        self.values.sliced(i * self.size, self.size)
320    }
321
322    /// Returns the `Vec<T>` at position `i`.
323    ///
324    /// # Safety
325    /// Caller must ensure that `i < self.len()`
326    #[inline]
327    pub unsafe fn value_unchecked(&self, i: usize) -> Box<dyn Array> {
328        self.values.sliced_unchecked(i * self.size, self.size)
329    }
330
331    /// Returns the element at index `i` or `None` if it is null
332    /// # Panics
333    /// iff `i >= self.len()`
334    #[inline]
335    pub fn get(&self, i: usize) -> Option<Box<dyn Array>> {
336        if !self.is_null(i) {
337            // soundness: Array::is_null panics if i >= self.len
338            unsafe { Some(self.value_unchecked(i)) }
339        } else {
340            None
341        }
342    }
343}
344
345impl FixedSizeListArray {
346    pub(crate) fn try_child_and_size(dtype: &ArrowDataType) -> PolarsResult<(&Field, usize)> {
347        match dtype.to_logical_type() {
348            ArrowDataType::FixedSizeList(child, size) => Ok((child.as_ref(), *size)),
349            _ => polars_bail!(ComputeError: "FixedSizeListArray expects DataType::FixedSizeList"),
350        }
351    }
352
353    pub(crate) fn get_child_and_size(dtype: &ArrowDataType) -> (&Field, usize) {
354        Self::try_child_and_size(dtype).unwrap()
355    }
356
357    /// Returns a [`ArrowDataType`] consistent with [`FixedSizeListArray`].
358    pub fn default_datatype(dtype: ArrowDataType, size: usize) -> ArrowDataType {
359        let field = Box::new(Field::new(PlSmallStr::from_static("item"), dtype, true));
360        ArrowDataType::FixedSizeList(field, size)
361    }
362}
363
364impl Array for FixedSizeListArray {
365    impl_common_array!();
366
367    fn validity(&self) -> Option<&Bitmap> {
368        self.validity.as_ref()
369    }
370
371    #[inline]
372    fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array> {
373        Box::new(self.clone().with_validity(validity))
374    }
375}
376
377impl Splitable for FixedSizeListArray {
378    fn check_bound(&self, offset: usize) -> bool {
379        offset <= self.len()
380    }
381
382    unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {
383        let (lhs_values, rhs_values) =
384            unsafe { self.values.split_at_boxed_unchecked(offset * self.size) };
385        let (lhs_validity, rhs_validity) = unsafe { self.validity.split_at_unchecked(offset) };
386
387        let size = self.size;
388
389        (
390            Self {
391                dtype: self.dtype.clone(),
392                length: offset,
393                values: lhs_values,
394                validity: lhs_validity,
395                size,
396            },
397            Self {
398                dtype: self.dtype.clone(),
399                length: self.length - offset,
400                values: rhs_values,
401                validity: rhs_validity,
402                size,
403            },
404        )
405    }
406}