polars_arrow/array/list/
mod.rs

1use super::specification::try_check_offsets_bounds;
2use super::{Array, Splitable, new_empty_array};
3use crate::bitmap::Bitmap;
4use crate::datatypes::{ArrowDataType, Field};
5use crate::offset::{Offset, Offsets, OffsetsBuffer};
6
7mod builder;
8pub use builder::*;
9mod ffi;
10pub(super) mod fmt;
11mod iterator;
12pub use iterator::*;
13mod mutable;
14pub use mutable::*;
15use polars_error::{PolarsResult, polars_bail};
16use polars_utils::pl_str::PlSmallStr;
17
18/// An [`Array`] semantically equivalent to `Vec<Option<Vec<Option<T>>>>` with Arrow's in-memory.
19#[derive(Clone)]
20pub struct ListArray<O: Offset> {
21    dtype: ArrowDataType,
22    offsets: OffsetsBuffer<O>,
23    values: Box<dyn Array>,
24    validity: Option<Bitmap>,
25}
26
27impl<O: Offset> ListArray<O> {
28    /// Creates a new [`ListArray`].
29    ///
30    /// # Errors
31    /// This function returns an error iff:
32    /// * `offsets.last()` is greater than `values.len()`.
33    /// * the validity's length is not equal to `offsets.len_proxy()`.
34    /// * The `dtype`'s [`crate::datatypes::PhysicalType`] is not equal to either [`crate::datatypes::PhysicalType::List`] or [`crate::datatypes::PhysicalType::LargeList`].
35    /// * The `dtype`'s inner field's data type is not equal to `values.dtype`.
36    /// # Implementation
37    /// This function is `O(1)`
38    pub fn try_new(
39        dtype: ArrowDataType,
40        offsets: OffsetsBuffer<O>,
41        values: Box<dyn Array>,
42        validity: Option<Bitmap>,
43    ) -> PolarsResult<Self> {
44        try_check_offsets_bounds(&offsets, values.len())?;
45
46        if validity
47            .as_ref()
48            .is_some_and(|validity| validity.len() != offsets.len_proxy())
49        {
50            polars_bail!(ComputeError: "validity mask length must match the number of values")
51        }
52
53        let child_dtype = Self::try_get_child(&dtype)?.dtype();
54        let values_dtype = values.dtype();
55        if child_dtype != values_dtype {
56            polars_bail!(ComputeError: "ListArray's child's DataType must match. However, the expected DataType is {child_dtype:?} while it got {values_dtype:?}.");
57        }
58
59        Ok(Self {
60            dtype,
61            offsets,
62            values,
63            validity,
64        })
65    }
66
67    /// Creates a new [`ListArray`].
68    ///
69    /// # Panics
70    /// This function panics iff:
71    /// * `offsets.last()` is greater than `values.len()`.
72    /// * the validity's length is not equal to `offsets.len_proxy()`.
73    /// * The `dtype`'s [`crate::datatypes::PhysicalType`] is not equal to either [`crate::datatypes::PhysicalType::List`] or [`crate::datatypes::PhysicalType::LargeList`].
74    /// * The `dtype`'s inner field's data type is not equal to `values.dtype`.
75    /// # Implementation
76    /// This function is `O(1)`
77    pub fn new(
78        dtype: ArrowDataType,
79        offsets: OffsetsBuffer<O>,
80        values: Box<dyn Array>,
81        validity: Option<Bitmap>,
82    ) -> Self {
83        Self::try_new(dtype, offsets, values, validity).unwrap()
84    }
85
86    /// Returns a new empty [`ListArray`].
87    pub fn new_empty(dtype: ArrowDataType) -> Self {
88        let values = new_empty_array(Self::get_child_type(&dtype).clone());
89        Self::new(dtype, OffsetsBuffer::default(), values, None)
90    }
91
92    /// Returns a new null [`ListArray`].
93    #[inline]
94    pub fn new_null(dtype: ArrowDataType, length: usize) -> Self {
95        let child = Self::get_child_type(&dtype).clone();
96        Self::new(
97            dtype,
98            Offsets::new_zeroed(length).into(),
99            new_empty_array(child),
100            Some(Bitmap::new_zeroed(length)),
101        )
102    }
103}
104
105impl<O: Offset> ListArray<O> {
106    /// Slices this [`ListArray`].
107    /// # Panics
108    /// panics iff `offset + length > self.len()`
109    pub fn slice(&mut self, offset: usize, length: usize) {
110        assert!(
111            offset + length <= self.len(),
112            "the offset of the new Buffer cannot exceed the existing length"
113        );
114        unsafe { self.slice_unchecked(offset, length) }
115    }
116
117    /// Slices this [`ListArray`].
118    ///
119    /// # Safety
120    /// The caller must ensure that `offset + length < self.len()`.
121    pub unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
122        self.validity = self
123            .validity
124            .take()
125            .map(|bitmap| bitmap.sliced_unchecked(offset, length))
126            .filter(|bitmap| bitmap.unset_bits() > 0);
127        self.offsets.slice_unchecked(offset, length + 1);
128    }
129
130    impl_sliced!();
131    impl_mut_validity!();
132    impl_into_array!();
133}
134
135// Accessors
136impl<O: Offset> ListArray<O> {
137    /// Returns the length of this array
138    #[inline]
139    pub fn len(&self) -> usize {
140        self.offsets.len_proxy()
141    }
142
143    /// Returns the element at index `i`
144    /// # Panic
145    /// Panics iff `i >= self.len()`
146    #[inline]
147    pub fn value(&self, i: usize) -> Box<dyn Array> {
148        assert!(i < self.len());
149        // SAFETY: invariant of this function
150        unsafe { self.value_unchecked(i) }
151    }
152
153    /// Returns the element at index `i` as &str
154    ///
155    /// # Safety
156    /// Assumes that the `i < self.len`.
157    #[inline]
158    pub unsafe fn value_unchecked(&self, i: usize) -> Box<dyn Array> {
159        // SAFETY: the invariant of the function
160        let (start, end) = self.offsets.start_end_unchecked(i);
161        let length = end - start;
162
163        // SAFETY: the invariant of the struct
164        self.values.sliced_unchecked(start, length)
165    }
166
167    /// The optional validity.
168    #[inline]
169    pub fn validity(&self) -> Option<&Bitmap> {
170        self.validity.as_ref()
171    }
172
173    /// The offsets [`Buffer`].
174    #[inline]
175    pub fn offsets(&self) -> &OffsetsBuffer<O> {
176        &self.offsets
177    }
178
179    /// The values.
180    #[inline]
181    pub fn values(&self) -> &Box<dyn Array> {
182        &self.values
183    }
184}
185
186impl<O: Offset> ListArray<O> {
187    /// Returns a default [`ArrowDataType`]: inner field is named "item" and is nullable
188    pub fn default_datatype(dtype: ArrowDataType) -> ArrowDataType {
189        let field = Box::new(Field::new(PlSmallStr::from_static("item"), dtype, true));
190        if O::IS_LARGE {
191            ArrowDataType::LargeList(field)
192        } else {
193            ArrowDataType::List(field)
194        }
195    }
196
197    /// Returns a the inner [`Field`]
198    /// # Panics
199    /// Panics iff the logical type is not consistent with this struct.
200    pub fn get_child_field(dtype: &ArrowDataType) -> &Field {
201        Self::try_get_child(dtype).unwrap()
202    }
203
204    /// Returns a the inner [`Field`]
205    /// # Errors
206    /// Panics iff the logical type is not consistent with this struct.
207    pub fn try_get_child(dtype: &ArrowDataType) -> PolarsResult<&Field> {
208        if O::IS_LARGE {
209            match dtype.to_logical_type() {
210                ArrowDataType::LargeList(child) => Ok(child.as_ref()),
211                _ => polars_bail!(ComputeError: "ListArray<i64> expects DataType::LargeList"),
212            }
213        } else {
214            match dtype.to_logical_type() {
215                ArrowDataType::List(child) => Ok(child.as_ref()),
216                _ => polars_bail!(ComputeError: "ListArray<i32> expects DataType::List"),
217            }
218        }
219    }
220
221    /// Returns a the inner [`ArrowDataType`]
222    /// # Panics
223    /// Panics iff the logical type is not consistent with this struct.
224    pub fn get_child_type(dtype: &ArrowDataType) -> &ArrowDataType {
225        Self::get_child_field(dtype).dtype()
226    }
227}
228
229impl<O: Offset> Array for ListArray<O> {
230    impl_common_array!();
231
232    fn validity(&self) -> Option<&Bitmap> {
233        self.validity.as_ref()
234    }
235
236    #[inline]
237    fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array> {
238        Box::new(self.clone().with_validity(validity))
239    }
240}
241
242impl<O: Offset> Splitable for ListArray<O> {
243    fn check_bound(&self, offset: usize) -> bool {
244        offset <= self.len()
245    }
246
247    unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {
248        let (lhs_offsets, rhs_offsets) = unsafe { self.offsets.split_at_unchecked(offset) };
249        let (lhs_validity, rhs_validity) = unsafe { self.validity.split_at_unchecked(offset) };
250
251        (
252            Self {
253                dtype: self.dtype.clone(),
254                offsets: lhs_offsets,
255                validity: lhs_validity,
256                values: self.values.clone(),
257            },
258            Self {
259                dtype: self.dtype.clone(),
260                offsets: rhs_offsets,
261                validity: rhs_validity,
262                values: self.values.clone(),
263            },
264        )
265    }
266}