polars_arrow/array/fixed_size_list/
mod.rs1use super::{Array, ArrayRef, Splitable, new_empty_array, new_null_array};
2use crate::array::list::LIST_VALUES_NAME;
3use crate::bitmap::Bitmap;
4use crate::datatypes::{ArrowDataType, Field};
5
6mod ffi;
7pub(super) mod fmt;
8mod iterator;
9
10mod builder;
11pub use builder::*;
12mod mutable;
13pub use mutable::*;
14use polars_error::{PolarsResult, polars_bail, polars_ensure};
15use polars_utils::format_tuple;
16#[cfg(feature = "proptest")]
17pub mod proptest;
18
19use crate::datatypes::reshape::{Dimension, ReshapeDimension};
20
21#[derive(Clone)]
24pub struct FixedSizeListArray {
25 size: usize, length: usize, dtype: ArrowDataType,
28 values: Box<dyn Array>,
29 validity: Option<Bitmap>,
30}
31
32impl FixedSizeListArray {
33 pub fn try_new(
42 dtype: ArrowDataType,
43 length: usize,
44 values: Box<dyn Array>,
45 validity: Option<Bitmap>,
46 ) -> PolarsResult<Self> {
47 let (child, size) = Self::try_child_and_size(&dtype)?;
48
49 let child_dtype = &child.dtype;
50 let values_dtype = values.dtype();
51 if child_dtype != values_dtype {
52 polars_bail!(ComputeError: "FixedSizeListArray's child's DataType must match. However, the expected DataType is {child_dtype:?} while it got {values_dtype:?}.")
53 }
54
55 polars_ensure!(size == 0 || values.len().is_multiple_of(size), ComputeError:
56 "values (of len {}) must be a multiple of size ({}) in FixedSizeListArray.",
57 values.len(),
58 size
59 );
60
61 polars_ensure!(size == 0 || values.len() / size == length, ComputeError:
62 "length of values ({}) is not equal to given length ({}) in FixedSizeListArray({size}).",
63 values.len() / size,
64 length,
65 );
66 polars_ensure!(size != 0 || values.is_empty(), ComputeError:
67 "zero width FixedSizeListArray has values (length = {}).",
68 values.len(),
69 );
70
71 if validity
72 .as_ref()
73 .is_some_and(|validity| validity.len() != length)
74 {
75 polars_bail!(ComputeError: "validity mask length must be equal to the number of values divided by size")
76 }
77
78 Ok(Self {
79 size,
80 length,
81 dtype,
82 values,
83 validity,
84 })
85 }
86
87 #[inline]
88 fn has_invariants(&self) -> bool {
89 let has_valid_length = (self.size == 0 && self.values().is_empty())
90 || (self.size > 0
91 && self.values().len().is_multiple_of(self.size())
92 && self.values().len() / self.size() == self.length);
93 let has_valid_validity = self
94 .validity
95 .as_ref()
96 .is_none_or(|v| v.len() == self.length);
97
98 has_valid_length && has_valid_validity
99 }
100
101 #[track_caller]
103 pub fn new(
104 dtype: ArrowDataType,
105 length: usize,
106 values: Box<dyn Array>,
107 validity: Option<Bitmap>,
108 ) -> Self {
109 Self::try_new(dtype, length, values, validity).unwrap()
110 }
111
112 pub const fn size(&self) -> usize {
114 self.size
115 }
116
117 pub fn new_empty(dtype: ArrowDataType) -> Self {
119 let values = new_empty_array(Self::get_child_and_size(&dtype).0.dtype().clone());
120 Self::new(dtype, 0, values, None)
121 }
122
123 pub fn new_null(dtype: ArrowDataType, length: usize) -> Self {
125 let (field, size) = Self::get_child_and_size(&dtype);
126
127 let values = new_null_array(field.dtype().clone(), length * size);
128 Self::new(dtype, length, values, Some(Bitmap::new_zeroed(length)))
129 }
130
131 pub fn from_shape(
132 leaf_array: ArrayRef,
133 dimensions: &[ReshapeDimension],
134 ) -> PolarsResult<ArrayRef> {
135 polars_ensure!(
136 !dimensions.is_empty(),
137 InvalidOperation: "at least one dimension must be specified"
138 );
139 let size = leaf_array.len();
140
141 let mut total_dim_size = 1;
142 let mut num_infers = 0;
143 for &dim in dimensions {
144 match dim {
145 ReshapeDimension::Infer => num_infers += 1,
146 ReshapeDimension::Specified(dim) => total_dim_size *= dim.get() as usize,
147 }
148 }
149
150 polars_ensure!(num_infers <= 1, InvalidOperation: "can only specify one inferred dimension");
151
152 if size == 0 {
153 polars_ensure!(
154 num_infers > 0 || total_dim_size == 0,
155 InvalidOperation: "cannot reshape empty array into shape without zero dimension: {}",
156 format_tuple!(dimensions),
157 );
158
159 let mut prev_arrow_dtype = leaf_array.dtype().clone();
160 let mut prev_array = leaf_array;
161
162 let mut current_length = dimensions[0].get_or_infer(0);
164 let len_iter = dimensions[1..]
165 .iter()
166 .map(|d| {
167 let length = current_length as usize;
168 current_length *= d.get_or_infer(0);
169 length
170 })
171 .collect::<Vec<_>>();
172
173 for (dim, length) in dimensions[1..].iter().zip(len_iter).rev() {
175 let dim = dim.get_or_infer(0);
177 prev_arrow_dtype = prev_arrow_dtype.to_fixed_size_list(dim as usize, true);
178
179 prev_array =
180 FixedSizeListArray::new(prev_arrow_dtype.clone(), length, prev_array, None)
181 .boxed();
182 }
183
184 return Ok(prev_array);
185 }
186
187 polars_ensure!(
188 total_dim_size > 0,
189 InvalidOperation: "cannot reshape non-empty array into shape containing a zero dimension: {}",
190 format_tuple!(dimensions)
191 );
192
193 polars_ensure!(
194 size.is_multiple_of(total_dim_size),
195 InvalidOperation: "cannot reshape array of size {} into shape {}", size, format_tuple!(dimensions)
196 );
197
198 let mut prev_arrow_dtype = leaf_array.dtype().clone();
199 let mut prev_array = leaf_array;
200
201 for dim in dimensions[1..].iter().rev() {
203 let dim = dim.get_or_infer((size / total_dim_size) as u64);
205 prev_arrow_dtype = prev_arrow_dtype.to_fixed_size_list(dim as usize, true);
206
207 prev_array = FixedSizeListArray::new(
208 prev_arrow_dtype.clone(),
209 prev_array.len() / dim as usize,
210 prev_array,
211 None,
212 )
213 .boxed();
214 }
215 Ok(prev_array)
216 }
217
218 pub fn get_dims(&self) -> Vec<Dimension> {
219 let mut dims = vec![
220 Dimension::new(self.length as _),
221 Dimension::new(self.size as _),
222 ];
223
224 let mut prev_array = &self.values;
225
226 while let Some(a) = prev_array.as_any().downcast_ref::<FixedSizeListArray>() {
227 dims.push(Dimension::new(a.size as _));
228 prev_array = &a.values;
229 }
230 dims
231 }
232}
233
234impl FixedSizeListArray {
236 pub fn slice(&mut self, offset: usize, length: usize) {
242 assert!(
243 offset + length <= self.len(),
244 "the offset of the new Buffer cannot exceed the existing length"
245 );
246 unsafe { self.slice_unchecked(offset, length) }
247 }
248
249 pub unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
256 debug_assert!(offset + length <= self.len());
257 self.validity = self
258 .validity
259 .take()
260 .map(|bitmap| bitmap.sliced_unchecked(offset, length))
261 .filter(|bitmap| bitmap.unset_bits() > 0);
262 self.values
263 .slice_unchecked(offset * self.size, length * self.size);
264 self.length = length;
265 }
266
267 impl_sliced!();
268 impl_mut_validity!();
269 impl_into_array!();
270}
271
272impl FixedSizeListArray {
274 #[inline]
276 pub fn len(&self) -> usize {
277 debug_assert!(self.has_invariants());
278 self.length
279 }
280
281 #[inline]
283 pub fn validity(&self) -> Option<&Bitmap> {
284 self.validity.as_ref()
285 }
286
287 pub fn values(&self) -> &Box<dyn Array> {
289 &self.values
290 }
291
292 #[inline]
296 pub fn value(&self, i: usize) -> Box<dyn Array> {
297 self.values.sliced(i * self.size, self.size)
298 }
299
300 #[inline]
305 pub unsafe fn value_unchecked(&self, i: usize) -> Box<dyn Array> {
306 self.values.sliced_unchecked(i * self.size, self.size)
307 }
308
309 #[inline]
313 pub fn get(&self, i: usize) -> Option<Box<dyn Array>> {
314 if !self.is_null(i) {
315 unsafe { Some(self.value_unchecked(i)) }
317 } else {
318 None
319 }
320 }
321}
322
323impl FixedSizeListArray {
324 pub(crate) fn try_child_and_size(dtype: &ArrowDataType) -> PolarsResult<(&Field, usize)> {
325 match dtype.to_logical_type() {
326 ArrowDataType::FixedSizeList(child, size) => Ok((child.as_ref(), *size)),
327 _ => polars_bail!(ComputeError: "FixedSizeListArray expects DataType::FixedSizeList"),
328 }
329 }
330
331 pub(crate) fn get_child_and_size(dtype: &ArrowDataType) -> (&Field, usize) {
332 Self::try_child_and_size(dtype).unwrap()
333 }
334
335 pub fn default_datatype(dtype: ArrowDataType, size: usize) -> ArrowDataType {
337 let field = Box::new(Field::new(LIST_VALUES_NAME, dtype, true));
338 ArrowDataType::FixedSizeList(field, size)
339 }
340}
341
342impl Array for FixedSizeListArray {
343 impl_common_array!();
344
345 fn validity(&self) -> Option<&Bitmap> {
346 self.validity.as_ref()
347 }
348
349 #[inline]
350 fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array> {
351 Box::new(self.clone().with_validity(validity))
352 }
353}
354
355impl Splitable for FixedSizeListArray {
356 fn check_bound(&self, offset: usize) -> bool {
357 offset <= self.len()
358 }
359
360 unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {
361 let (lhs_values, rhs_values) =
362 unsafe { self.values.split_at_boxed_unchecked(offset * self.size) };
363 let (lhs_validity, rhs_validity) = unsafe { self.validity.split_at_unchecked(offset) };
364
365 let size = self.size;
366
367 (
368 Self {
369 dtype: self.dtype.clone(),
370 length: offset,
371 values: lhs_values,
372 validity: lhs_validity,
373 size,
374 },
375 Self {
376 dtype: self.dtype.clone(),
377 length: self.length - offset,
378 values: rhs_values,
379 validity: rhs_validity,
380 size,
381 },
382 )
383 }
384}