polars_arrow/array/fixed_size_list/
mod.rs1use super::{Array, ArrayRef, Splitable, new_empty_array, new_null_array};
2use crate::bitmap::Bitmap;
3use crate::datatypes::{ArrowDataType, Field};
4
5mod ffi;
6pub(super) mod fmt;
7mod iterator;
8
9mod builder;
10pub use builder::*;
11mod mutable;
12pub use mutable::*;
13use polars_error::{PolarsResult, polars_bail, polars_ensure};
14use polars_utils::format_tuple;
15use polars_utils::pl_str::PlSmallStr;
16
17use crate::datatypes::reshape::{Dimension, ReshapeDimension};
18
19#[derive(Clone)]
22pub struct FixedSizeListArray {
23 size: usize, length: usize, dtype: ArrowDataType,
26 values: Box<dyn Array>,
27 validity: Option<Bitmap>,
28}
29
30impl FixedSizeListArray {
31 pub fn try_new(
40 dtype: ArrowDataType,
41 length: usize,
42 values: Box<dyn Array>,
43 validity: Option<Bitmap>,
44 ) -> PolarsResult<Self> {
45 let (child, size) = Self::try_child_and_size(&dtype)?;
46
47 let child_dtype = &child.dtype;
48 let values_dtype = values.dtype();
49 if child_dtype != values_dtype {
50 polars_bail!(ComputeError: "FixedSizeListArray's child's DataType must match. However, the expected DataType is {child_dtype:?} while it got {values_dtype:?}.")
51 }
52
53 polars_ensure!(size == 0 || values.len() % size == 0, ComputeError:
54 "values (of len {}) must be a multiple of size ({}) in FixedSizeListArray.",
55 values.len(),
56 size
57 );
58
59 polars_ensure!(size == 0 || values.len() / size == length, ComputeError:
60 "length of values ({}) is not equal to given length ({}) in FixedSizeListArray({size}).",
61 values.len() / size,
62 length,
63 );
64 polars_ensure!(size != 0 || values.is_empty(), ComputeError:
65 "zero width FixedSizeListArray has values (length = {}).",
66 values.len(),
67 );
68
69 if validity
70 .as_ref()
71 .is_some_and(|validity| validity.len() != length)
72 {
73 polars_bail!(ComputeError: "validity mask length must be equal to the number of values divided by size")
74 }
75
76 Ok(Self {
77 size,
78 length,
79 dtype,
80 values,
81 validity,
82 })
83 }
84
85 #[inline]
86 fn has_invariants(&self) -> bool {
87 let has_valid_length = (self.size == 0 && self.values().is_empty())
88 || (self.size > 0
89 && self.values().len() % self.size() == 0
90 && self.values().len() / self.size() == self.length);
91 let has_valid_validity = self
92 .validity
93 .as_ref()
94 .is_none_or(|v| v.len() == self.length);
95
96 has_valid_length && has_valid_validity
97 }
98
99 #[track_caller]
101 pub fn new(
102 dtype: ArrowDataType,
103 length: usize,
104 values: Box<dyn Array>,
105 validity: Option<Bitmap>,
106 ) -> Self {
107 Self::try_new(dtype, length, values, validity).unwrap()
108 }
109
110 pub const fn size(&self) -> usize {
112 self.size
113 }
114
115 pub fn new_empty(dtype: ArrowDataType) -> Self {
117 let values = new_empty_array(Self::get_child_and_size(&dtype).0.dtype().clone());
118 Self::new(dtype, 0, values, None)
119 }
120
121 pub fn new_null(dtype: ArrowDataType, length: usize) -> Self {
123 let (field, size) = Self::get_child_and_size(&dtype);
124
125 let values = new_null_array(field.dtype().clone(), length * size);
126 Self::new(dtype, length, values, Some(Bitmap::new_zeroed(length)))
127 }
128
129 pub fn from_shape(
130 leaf_array: ArrayRef,
131 dimensions: &[ReshapeDimension],
132 ) -> PolarsResult<ArrayRef> {
133 polars_ensure!(
134 !dimensions.is_empty(),
135 InvalidOperation: "at least one dimension must be specified"
136 );
137 let size = leaf_array.len();
138
139 let mut total_dim_size = 1;
140 let mut num_infers = 0;
141 for &dim in dimensions {
142 match dim {
143 ReshapeDimension::Infer => num_infers += 1,
144 ReshapeDimension::Specified(dim) => total_dim_size *= dim.get() as usize,
145 }
146 }
147
148 polars_ensure!(num_infers <= 1, InvalidOperation: "can only specify one inferred dimension");
149
150 if size == 0 {
151 polars_ensure!(
152 num_infers > 0 || total_dim_size == 0,
153 InvalidOperation: "cannot reshape empty array into shape without zero dimension: {}",
154 format_tuple!(dimensions),
155 );
156
157 let mut prev_arrow_dtype = leaf_array.dtype().clone();
158 let mut prev_array = leaf_array;
159
160 let mut current_length = dimensions[0].get_or_infer(0);
162 let len_iter = dimensions[1..]
163 .iter()
164 .map(|d| {
165 let length = current_length as usize;
166 current_length *= d.get_or_infer(0);
167 length
168 })
169 .collect::<Vec<_>>();
170
171 for (dim, length) in dimensions[1..].iter().zip(len_iter).rev() {
173 let dim = dim.get_or_infer(0);
175 prev_arrow_dtype = prev_arrow_dtype.to_fixed_size_list(dim as usize, true);
176
177 prev_array =
178 FixedSizeListArray::new(prev_arrow_dtype.clone(), length, prev_array, None)
179 .boxed();
180 }
181
182 return Ok(prev_array);
183 }
184
185 polars_ensure!(
186 total_dim_size > 0,
187 InvalidOperation: "cannot reshape non-empty array into shape containing a zero dimension: {}",
188 format_tuple!(dimensions)
189 );
190
191 polars_ensure!(
192 size % total_dim_size == 0,
193 InvalidOperation: "cannot reshape array of size {} into shape {}", size, format_tuple!(dimensions)
194 );
195
196 let mut prev_arrow_dtype = leaf_array.dtype().clone();
197 let mut prev_array = leaf_array;
198
199 for dim in dimensions[1..].iter().rev() {
201 let dim = dim.get_or_infer((size / total_dim_size) as u64);
203 prev_arrow_dtype = prev_arrow_dtype.to_fixed_size_list(dim as usize, true);
204
205 prev_array = FixedSizeListArray::new(
206 prev_arrow_dtype.clone(),
207 prev_array.len() / dim as usize,
208 prev_array,
209 None,
210 )
211 .boxed();
212 }
213 Ok(prev_array)
214 }
215
216 pub fn get_dims(&self) -> Vec<Dimension> {
217 let mut dims = vec![
218 Dimension::new(self.length as _),
219 Dimension::new(self.size as _),
220 ];
221
222 let mut prev_array = &self.values;
223
224 while let Some(a) = prev_array.as_any().downcast_ref::<FixedSizeListArray>() {
225 dims.push(Dimension::new(a.size as _));
226 prev_array = &a.values;
227 }
228 dims
229 }
230}
231
232impl FixedSizeListArray {
234 pub fn slice(&mut self, offset: usize, length: usize) {
240 assert!(
241 offset + length <= self.len(),
242 "the offset of the new Buffer cannot exceed the existing length"
243 );
244 unsafe { self.slice_unchecked(offset, length) }
245 }
246
247 pub unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
254 debug_assert!(offset + length <= self.len());
255 self.validity = self
256 .validity
257 .take()
258 .map(|bitmap| bitmap.sliced_unchecked(offset, length))
259 .filter(|bitmap| bitmap.unset_bits() > 0);
260 self.values
261 .slice_unchecked(offset * self.size, length * self.size);
262 self.length = length;
263 }
264
265 impl_sliced!();
266 impl_mut_validity!();
267 impl_into_array!();
268}
269
270impl FixedSizeListArray {
272 #[inline]
274 pub fn len(&self) -> usize {
275 debug_assert!(self.has_invariants());
276 self.length
277 }
278
279 #[inline]
281 pub fn validity(&self) -> Option<&Bitmap> {
282 self.validity.as_ref()
283 }
284
285 pub fn values(&self) -> &Box<dyn Array> {
287 &self.values
288 }
289
290 #[inline]
294 pub fn value(&self, i: usize) -> Box<dyn Array> {
295 self.values.sliced(i * self.size, self.size)
296 }
297
298 #[inline]
303 pub unsafe fn value_unchecked(&self, i: usize) -> Box<dyn Array> {
304 self.values.sliced_unchecked(i * self.size, self.size)
305 }
306
307 #[inline]
311 pub fn get(&self, i: usize) -> Option<Box<dyn Array>> {
312 if !self.is_null(i) {
313 unsafe { Some(self.value_unchecked(i)) }
315 } else {
316 None
317 }
318 }
319}
320
321impl FixedSizeListArray {
322 pub(crate) fn try_child_and_size(dtype: &ArrowDataType) -> PolarsResult<(&Field, usize)> {
323 match dtype.to_logical_type() {
324 ArrowDataType::FixedSizeList(child, size) => Ok((child.as_ref(), *size)),
325 _ => polars_bail!(ComputeError: "FixedSizeListArray expects DataType::FixedSizeList"),
326 }
327 }
328
329 pub(crate) fn get_child_and_size(dtype: &ArrowDataType) -> (&Field, usize) {
330 Self::try_child_and_size(dtype).unwrap()
331 }
332
333 pub fn default_datatype(dtype: ArrowDataType, size: usize) -> ArrowDataType {
335 let field = Box::new(Field::new(PlSmallStr::from_static("item"), dtype, true));
336 ArrowDataType::FixedSizeList(field, size)
337 }
338}
339
340impl Array for FixedSizeListArray {
341 impl_common_array!();
342
343 fn validity(&self) -> Option<&Bitmap> {
344 self.validity.as_ref()
345 }
346
347 #[inline]
348 fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array> {
349 Box::new(self.clone().with_validity(validity))
350 }
351}
352
353impl Splitable for FixedSizeListArray {
354 fn check_bound(&self, offset: usize) -> bool {
355 offset <= self.len()
356 }
357
358 unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {
359 let (lhs_values, rhs_values) =
360 unsafe { self.values.split_at_boxed_unchecked(offset * self.size) };
361 let (lhs_validity, rhs_validity) = unsafe { self.validity.split_at_unchecked(offset) };
362
363 let size = self.size;
364
365 (
366 Self {
367 dtype: self.dtype.clone(),
368 length: offset,
369 values: lhs_values,
370 validity: lhs_validity,
371 size,
372 },
373 Self {
374 dtype: self.dtype.clone(),
375 length: self.length - offset,
376 values: rhs_values,
377 validity: rhs_validity,
378 size,
379 },
380 )
381 }
382}