polars_arrow/array/list/
mod.rs

1use super::specification::try_check_offsets_bounds;
2use super::{Array, Splitable, new_empty_array};
3use crate::bitmap::Bitmap;
4use crate::datatypes::{ArrowDataType, Field};
5use crate::offset::{Offset, Offsets, OffsetsBuffer};
6
7mod builder;
8pub use builder::*;
9mod ffi;
10pub(super) mod fmt;
11mod iterator;
12pub use iterator::*;
13mod mutable;
14pub use mutable::*;
15use polars_error::{PolarsResult, polars_bail};
16use polars_utils::pl_str::PlSmallStr;
17#[cfg(feature = "proptest")]
18pub mod proptest;
19
20/// Name used for the values array within List/FixedSizeList arrays.
21pub const LIST_VALUES_NAME: PlSmallStr = PlSmallStr::from_static("item");
22
23/// An [`Array`] semantically equivalent to `Vec<Option<Vec<Option<T>>>>` with Arrow's in-memory.
24#[derive(Clone)]
25pub struct ListArray<O: Offset> {
26    dtype: ArrowDataType,
27    offsets: OffsetsBuffer<O>,
28    values: Box<dyn Array>,
29    validity: Option<Bitmap>,
30}
31
32impl<O: Offset> ListArray<O> {
33    /// Creates a new [`ListArray`].
34    ///
35    /// # Errors
36    /// This function returns an error iff:
37    /// * `offsets.last()` is greater than `values.len()`.
38    /// * the validity's length is not equal to `offsets.len_proxy()`.
39    /// * The `dtype`'s [`crate::datatypes::PhysicalType`] is not equal to either [`crate::datatypes::PhysicalType::List`] or [`crate::datatypes::PhysicalType::LargeList`].
40    /// * The `dtype`'s inner field's data type is not equal to `values.dtype`.
41    /// # Implementation
42    /// This function is `O(1)`
43    pub fn try_new(
44        dtype: ArrowDataType,
45        offsets: OffsetsBuffer<O>,
46        values: Box<dyn Array>,
47        validity: Option<Bitmap>,
48    ) -> PolarsResult<Self> {
49        try_check_offsets_bounds(&offsets, values.len())?;
50
51        if validity
52            .as_ref()
53            .is_some_and(|validity| validity.len() != offsets.len_proxy())
54        {
55            polars_bail!(ComputeError: "validity mask length must match the number of values")
56        }
57
58        let child_dtype = Self::try_get_child(&dtype)?.dtype();
59        let values_dtype = values.dtype();
60        if child_dtype != values_dtype {
61            polars_bail!(ComputeError: "ListArray's child's DataType must match. However, the expected DataType is {child_dtype:?} while it got {values_dtype:?}.");
62        }
63
64        Ok(Self {
65            dtype,
66            offsets,
67            values,
68            validity,
69        })
70    }
71
72    /// Creates a new [`ListArray`].
73    ///
74    /// # Panics
75    /// This function panics iff:
76    /// * `offsets.last()` is greater than `values.len()`.
77    /// * the validity's length is not equal to `offsets.len_proxy()`.
78    /// * The `dtype`'s [`crate::datatypes::PhysicalType`] is not equal to either [`crate::datatypes::PhysicalType::List`] or [`crate::datatypes::PhysicalType::LargeList`].
79    /// * The `dtype`'s inner field's data type is not equal to `values.dtype`.
80    /// # Implementation
81    /// This function is `O(1)`
82    pub fn new(
83        dtype: ArrowDataType,
84        offsets: OffsetsBuffer<O>,
85        values: Box<dyn Array>,
86        validity: Option<Bitmap>,
87    ) -> Self {
88        Self::try_new(dtype, offsets, values, validity).unwrap()
89    }
90
91    /// Returns a new empty [`ListArray`].
92    pub fn new_empty(dtype: ArrowDataType) -> Self {
93        let values = new_empty_array(Self::get_child_type(&dtype).clone());
94        Self::new(dtype, OffsetsBuffer::default(), values, None)
95    }
96
97    /// Returns a new null [`ListArray`].
98    #[inline]
99    pub fn new_null(dtype: ArrowDataType, length: usize) -> Self {
100        let child = Self::get_child_type(&dtype).clone();
101        Self::new(
102            dtype,
103            Offsets::new_zeroed(length).into(),
104            new_empty_array(child),
105            Some(Bitmap::new_zeroed(length)),
106        )
107    }
108}
109
110impl<O: Offset> ListArray<O> {
111    /// Slices this [`ListArray`].
112    /// # Panics
113    /// panics iff `offset + length > self.len()`
114    pub fn slice(&mut self, offset: usize, length: usize) {
115        assert!(
116            offset + length <= self.len(),
117            "the offset of the new Buffer cannot exceed the existing length"
118        );
119        unsafe { self.slice_unchecked(offset, length) }
120    }
121
122    /// Slices this [`ListArray`].
123    ///
124    /// # Safety
125    /// The caller must ensure that `offset + length < self.len()`.
126    pub unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
127        self.validity = self
128            .validity
129            .take()
130            .map(|bitmap| bitmap.sliced_unchecked(offset, length))
131            .filter(|bitmap| bitmap.unset_bits() > 0);
132        self.offsets.slice_unchecked(offset, length + 1);
133    }
134
135    impl_sliced!();
136    impl_mut_validity!();
137    impl_into_array!();
138}
139
140// Accessors
141impl<O: Offset> ListArray<O> {
142    /// Returns the length of this array
143    #[inline]
144    pub fn len(&self) -> usize {
145        self.offsets.len_proxy()
146    }
147
148    /// Returns the element at index `i`
149    /// # Panic
150    /// Panics iff `i >= self.len()`
151    #[inline]
152    pub fn value(&self, i: usize) -> Box<dyn Array> {
153        assert!(i < self.len());
154        // SAFETY: invariant of this function
155        unsafe { self.value_unchecked(i) }
156    }
157
158    /// Returns the element at index `i` as &str
159    ///
160    /// # Safety
161    /// Assumes that the `i < self.len`.
162    #[inline]
163    pub unsafe fn value_unchecked(&self, i: usize) -> Box<dyn Array> {
164        // SAFETY: the invariant of the function
165        let (start, end) = self.offsets.start_end_unchecked(i);
166        let length = end - start;
167
168        // SAFETY: the invariant of the struct
169        self.values.sliced_unchecked(start, length)
170    }
171
172    /// The optional validity.
173    #[inline]
174    pub fn validity(&self) -> Option<&Bitmap> {
175        self.validity.as_ref()
176    }
177
178    /// The offsets [`Buffer`].
179    #[inline]
180    pub fn offsets(&self) -> &OffsetsBuffer<O> {
181        &self.offsets
182    }
183
184    /// The values.
185    #[inline]
186    pub fn values(&self) -> &Box<dyn Array> {
187        &self.values
188    }
189}
190
191impl<O: Offset> ListArray<O> {
192    /// Returns a default [`ArrowDataType`]: inner field is named "item" and is nullable
193    pub fn default_datatype(dtype: ArrowDataType) -> ArrowDataType {
194        let field = Box::new(Field::new(LIST_VALUES_NAME, dtype, true));
195        if O::IS_LARGE {
196            ArrowDataType::LargeList(field)
197        } else {
198            ArrowDataType::List(field)
199        }
200    }
201
202    /// Returns a the inner [`Field`]
203    /// # Panics
204    /// Panics iff the logical type is not consistent with this struct.
205    pub fn get_child_field(dtype: &ArrowDataType) -> &Field {
206        Self::try_get_child(dtype).unwrap()
207    }
208
209    /// Returns a the inner [`Field`]
210    /// # Errors
211    /// Panics iff the logical type is not consistent with this struct.
212    pub fn try_get_child(dtype: &ArrowDataType) -> PolarsResult<&Field> {
213        if O::IS_LARGE {
214            match dtype.to_logical_type() {
215                ArrowDataType::LargeList(child) => Ok(child.as_ref()),
216                _ => polars_bail!(ComputeError: "ListArray<i64> expects DataType::LargeList"),
217            }
218        } else {
219            match dtype.to_logical_type() {
220                ArrowDataType::List(child) => Ok(child.as_ref()),
221                _ => polars_bail!(ComputeError: "ListArray<i32> expects DataType::List"),
222            }
223        }
224    }
225
226    /// Returns a the inner [`ArrowDataType`]
227    /// # Panics
228    /// Panics iff the logical type is not consistent with this struct.
229    pub fn get_child_type(dtype: &ArrowDataType) -> &ArrowDataType {
230        Self::get_child_field(dtype).dtype()
231    }
232}
233
234impl<O: Offset> Array for ListArray<O> {
235    impl_common_array!();
236
237    fn validity(&self) -> Option<&Bitmap> {
238        self.validity.as_ref()
239    }
240
241    #[inline]
242    fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array> {
243        Box::new(self.clone().with_validity(validity))
244    }
245}
246
247impl<O: Offset> Splitable for ListArray<O> {
248    fn check_bound(&self, offset: usize) -> bool {
249        offset <= self.len()
250    }
251
252    unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {
253        let (lhs_offsets, rhs_offsets) = unsafe { self.offsets.split_at_unchecked(offset) };
254        let (lhs_validity, rhs_validity) = unsafe { self.validity.split_at_unchecked(offset) };
255
256        (
257            Self {
258                dtype: self.dtype.clone(),
259                offsets: lhs_offsets,
260                validity: lhs_validity,
261                values: self.values.clone(),
262            },
263            Self {
264                dtype: self.dtype.clone(),
265                offsets: rhs_offsets,
266                validity: rhs_validity,
267                values: self.values.clone(),
268            },
269        )
270    }
271}