polars_arrow/array/fixed_size_list/
mod.rs1use super::{new_empty_array, new_null_array, Array, ArrayRef, Splitable};
2use crate::bitmap::Bitmap;
3use crate::datatypes::{ArrowDataType, Field};
4
5mod ffi;
6pub(super) mod fmt;
7mod iterator;
8
9mod mutable;
10pub use mutable::*;
11use polars_error::{polars_bail, polars_ensure, PolarsResult};
12use polars_utils::format_tuple;
13use polars_utils::pl_str::PlSmallStr;
14
15use crate::datatypes::reshape::{Dimension, ReshapeDimension};
16
17#[derive(Clone)]
20pub struct FixedSizeListArray {
21 size: usize, length: usize, dtype: ArrowDataType,
24 values: Box<dyn Array>,
25 validity: Option<Bitmap>,
26}
27
28impl FixedSizeListArray {
29 pub fn try_new(
38 dtype: ArrowDataType,
39 length: usize,
40 values: Box<dyn Array>,
41 validity: Option<Bitmap>,
42 ) -> PolarsResult<Self> {
43 let (child, size) = Self::try_child_and_size(&dtype)?;
44
45 let child_dtype = &child.dtype;
46 let values_dtype = values.dtype();
47 if child_dtype != values_dtype {
48 polars_bail!(ComputeError: "FixedSizeListArray's child's DataType must match. However, the expected DataType is {child_dtype:?} while it got {values_dtype:?}.")
49 }
50
51 polars_ensure!(size == 0 || values.len() % size == 0, ComputeError:
52 "values (of len {}) must be a multiple of size ({}) in FixedSizeListArray.",
53 values.len(),
54 size
55 );
56
57 polars_ensure!(size == 0 || values.len() / size == length, ComputeError:
58 "length of values ({}) is not equal to given length ({}) in FixedSizeListArray({size}).",
59 values.len() / size,
60 length,
61 );
62 polars_ensure!(size != 0 || values.len() == 0, ComputeError:
63 "zero width FixedSizeListArray has values (length = {}).",
64 values.len(),
65 );
66
67 if validity
68 .as_ref()
69 .is_some_and(|validity| validity.len() != length)
70 {
71 polars_bail!(ComputeError: "validity mask length must be equal to the number of values divided by size")
72 }
73
74 Ok(Self {
75 size,
76 length,
77 dtype,
78 values,
79 validity,
80 })
81 }
82
83 #[inline]
84 fn has_invariants(&self) -> bool {
85 let has_valid_length = (self.size == 0 && self.values().len() == 0)
86 || (self.size > 0
87 && self.values().len() % self.size() == 0
88 && self.values().len() / self.size() == self.length);
89 let has_valid_validity = self
90 .validity
91 .as_ref()
92 .is_none_or(|v| v.len() == self.length);
93
94 has_valid_length && has_valid_validity
95 }
96
97 #[track_caller]
99 pub fn new(
100 dtype: ArrowDataType,
101 length: usize,
102 values: Box<dyn Array>,
103 validity: Option<Bitmap>,
104 ) -> Self {
105 Self::try_new(dtype, length, values, validity).unwrap()
106 }
107
108 pub const fn size(&self) -> usize {
110 self.size
111 }
112
113 pub fn new_empty(dtype: ArrowDataType) -> Self {
115 let values = new_empty_array(Self::get_child_and_size(&dtype).0.dtype().clone());
116 Self::new(dtype, 0, values, None)
117 }
118
119 pub fn new_null(dtype: ArrowDataType, length: usize) -> Self {
121 let (field, size) = Self::get_child_and_size(&dtype);
122
123 let values = new_null_array(field.dtype().clone(), length * size);
124 Self::new(dtype, length, values, Some(Bitmap::new_zeroed(length)))
125 }
126
127 pub fn from_shape(
128 leaf_array: ArrayRef,
129 dimensions: &[ReshapeDimension],
130 ) -> PolarsResult<ArrayRef> {
131 polars_ensure!(
132 !dimensions.is_empty(),
133 InvalidOperation: "at least one dimension must be specified"
134 );
135 let size = leaf_array.len();
136
137 let mut total_dim_size = 1;
138 let mut num_infers = 0;
139 for &dim in dimensions {
140 match dim {
141 ReshapeDimension::Infer => num_infers += 1,
142 ReshapeDimension::Specified(dim) => total_dim_size *= dim.get() as usize,
143 }
144 }
145
146 polars_ensure!(num_infers <= 1, InvalidOperation: "can only specify one inferred dimension");
147
148 if size == 0 {
149 polars_ensure!(
150 num_infers > 0 || total_dim_size == 0,
151 InvalidOperation: "cannot reshape empty array into shape without zero dimension: {}",
152 format_tuple!(dimensions),
153 );
154
155 let mut prev_arrow_dtype = leaf_array.dtype().clone();
156 let mut prev_array = leaf_array;
157
158 let mut current_length = dimensions[0].get_or_infer(0);
160 let len_iter = dimensions[1..]
161 .iter()
162 .map(|d| {
163 let length = current_length as usize;
164 current_length *= d.get_or_infer(0);
165 length
166 })
167 .collect::<Vec<_>>();
168
169 for (dim, length) in dimensions[1..].iter().zip(len_iter).rev() {
171 let dim = dim.get_or_infer(0);
173 prev_arrow_dtype = prev_arrow_dtype.to_fixed_size_list(dim as usize, true);
174
175 prev_array =
176 FixedSizeListArray::new(prev_arrow_dtype.clone(), length, prev_array, None)
177 .boxed();
178 }
179
180 return Ok(prev_array);
181 }
182
183 polars_ensure!(
184 total_dim_size > 0,
185 InvalidOperation: "cannot reshape non-empty array into shape containing a zero dimension: {}",
186 format_tuple!(dimensions)
187 );
188
189 polars_ensure!(
190 size % total_dim_size == 0,
191 InvalidOperation: "cannot reshape array of size {} into shape {}", size, format_tuple!(dimensions)
192 );
193
194 let mut prev_arrow_dtype = leaf_array.dtype().clone();
195 let mut prev_array = leaf_array;
196
197 for dim in dimensions[1..].iter().rev() {
199 let dim = dim.get_or_infer((size / total_dim_size) as u64);
201 prev_arrow_dtype = prev_arrow_dtype.to_fixed_size_list(dim as usize, true);
202
203 prev_array = FixedSizeListArray::new(
204 prev_arrow_dtype.clone(),
205 prev_array.len() / dim as usize,
206 prev_array,
207 None,
208 )
209 .boxed();
210 }
211 Ok(prev_array)
212 }
213
214 pub fn get_dims(&self) -> Vec<Dimension> {
215 let mut dims = vec![
216 Dimension::new(self.length as _),
217 Dimension::new(self.size as _),
218 ];
219
220 let mut prev_array = &self.values;
221
222 while let Some(a) = prev_array.as_any().downcast_ref::<FixedSizeListArray>() {
223 dims.push(Dimension::new(a.size as _));
224 prev_array = &a.values;
225 }
226 dims
227 }
228
229 pub fn propagate_nulls(&self) -> Self {
230 let Some(validity) = self.validity() else {
231 return self.clone();
232 };
233
234 let propagated_validity = if self.size == 1 {
235 validity.clone()
236 } else {
237 Bitmap::from_trusted_len_iter(
238 (0..self.size * validity.len())
239 .map(|i| unsafe { validity.get_bit_unchecked(i / self.size) }),
240 )
241 };
242
243 let propagated_validity = match self.values.validity() {
244 None => propagated_validity,
245 Some(val) => val & &propagated_validity,
246 };
247 Self::new(
248 self.dtype().clone(),
249 self.length,
250 self.values.with_validity(Some(propagated_validity)),
251 self.validity.clone(),
252 )
253 }
254}
255
256impl FixedSizeListArray {
258 pub fn slice(&mut self, offset: usize, length: usize) {
264 assert!(
265 offset + length <= self.len(),
266 "the offset of the new Buffer cannot exceed the existing length"
267 );
268 unsafe { self.slice_unchecked(offset, length) }
269 }
270
271 pub unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
278 debug_assert!(offset + length <= self.len());
279 self.validity = self
280 .validity
281 .take()
282 .map(|bitmap| bitmap.sliced_unchecked(offset, length))
283 .filter(|bitmap| bitmap.unset_bits() > 0);
284 self.values
285 .slice_unchecked(offset * self.size, length * self.size);
286 self.length = length;
287 }
288
289 impl_sliced!();
290 impl_mut_validity!();
291 impl_into_array!();
292}
293
294impl FixedSizeListArray {
296 #[inline]
298 pub fn len(&self) -> usize {
299 debug_assert!(self.has_invariants());
300 self.length
301 }
302
303 #[inline]
305 pub fn validity(&self) -> Option<&Bitmap> {
306 self.validity.as_ref()
307 }
308
309 pub fn values(&self) -> &Box<dyn Array> {
311 &self.values
312 }
313
314 #[inline]
318 pub fn value(&self, i: usize) -> Box<dyn Array> {
319 self.values.sliced(i * self.size, self.size)
320 }
321
322 #[inline]
327 pub unsafe fn value_unchecked(&self, i: usize) -> Box<dyn Array> {
328 self.values.sliced_unchecked(i * self.size, self.size)
329 }
330
331 #[inline]
335 pub fn get(&self, i: usize) -> Option<Box<dyn Array>> {
336 if !self.is_null(i) {
337 unsafe { Some(self.value_unchecked(i)) }
339 } else {
340 None
341 }
342 }
343}
344
345impl FixedSizeListArray {
346 pub(crate) fn try_child_and_size(dtype: &ArrowDataType) -> PolarsResult<(&Field, usize)> {
347 match dtype.to_logical_type() {
348 ArrowDataType::FixedSizeList(child, size) => Ok((child.as_ref(), *size)),
349 _ => polars_bail!(ComputeError: "FixedSizeListArray expects DataType::FixedSizeList"),
350 }
351 }
352
353 pub(crate) fn get_child_and_size(dtype: &ArrowDataType) -> (&Field, usize) {
354 Self::try_child_and_size(dtype).unwrap()
355 }
356
357 pub fn default_datatype(dtype: ArrowDataType, size: usize) -> ArrowDataType {
359 let field = Box::new(Field::new(PlSmallStr::from_static("item"), dtype, true));
360 ArrowDataType::FixedSizeList(field, size)
361 }
362}
363
364impl Array for FixedSizeListArray {
365 impl_common_array!();
366
367 fn validity(&self) -> Option<&Bitmap> {
368 self.validity.as_ref()
369 }
370
371 #[inline]
372 fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array> {
373 Box::new(self.clone().with_validity(validity))
374 }
375}
376
377impl Splitable for FixedSizeListArray {
378 fn check_bound(&self, offset: usize) -> bool {
379 offset <= self.len()
380 }
381
382 unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {
383 let (lhs_values, rhs_values) =
384 unsafe { self.values.split_at_boxed_unchecked(offset * self.size) };
385 let (lhs_validity, rhs_validity) = unsafe { self.validity.split_at_unchecked(offset) };
386
387 let size = self.size;
388
389 (
390 Self {
391 dtype: self.dtype.clone(),
392 length: offset,
393 values: lhs_values,
394 validity: lhs_validity,
395 size,
396 },
397 Self {
398 dtype: self.dtype.clone(),
399 length: self.length - offset,
400 values: rhs_values,
401 validity: rhs_validity,
402 size,
403 },
404 )
405 }
406}