polars_arrow/array/list/
mod.rs

1use super::specification::try_check_offsets_bounds;
2use super::{Array, Splitable, new_empty_array};
3use crate::bitmap::Bitmap;
4use crate::datatypes::{ArrowDataType, Field};
5use crate::offset::{Offset, Offsets, OffsetsBuffer};
6
7mod builder;
8pub use builder::*;
9mod ffi;
10pub(super) mod fmt;
11mod iterator;
12pub use iterator::*;
13mod mutable;
14pub use mutable::*;
15use polars_error::{PolarsResult, polars_bail};
16use polars_utils::pl_str::PlSmallStr;
17#[cfg(feature = "proptest")]
18pub mod proptest;
19
20/// An [`Array`] semantically equivalent to `Vec<Option<Vec<Option<T>>>>` with Arrow's in-memory.
21#[derive(Clone)]
22pub struct ListArray<O: Offset> {
23    dtype: ArrowDataType,
24    offsets: OffsetsBuffer<O>,
25    values: Box<dyn Array>,
26    validity: Option<Bitmap>,
27}
28
29impl<O: Offset> ListArray<O> {
30    /// Creates a new [`ListArray`].
31    ///
32    /// # Errors
33    /// This function returns an error iff:
34    /// * `offsets.last()` is greater than `values.len()`.
35    /// * the validity's length is not equal to `offsets.len_proxy()`.
36    /// * The `dtype`'s [`crate::datatypes::PhysicalType`] is not equal to either [`crate::datatypes::PhysicalType::List`] or [`crate::datatypes::PhysicalType::LargeList`].
37    /// * The `dtype`'s inner field's data type is not equal to `values.dtype`.
38    /// # Implementation
39    /// This function is `O(1)`
40    pub fn try_new(
41        dtype: ArrowDataType,
42        offsets: OffsetsBuffer<O>,
43        values: Box<dyn Array>,
44        validity: Option<Bitmap>,
45    ) -> PolarsResult<Self> {
46        try_check_offsets_bounds(&offsets, values.len())?;
47
48        if validity
49            .as_ref()
50            .is_some_and(|validity| validity.len() != offsets.len_proxy())
51        {
52            polars_bail!(ComputeError: "validity mask length must match the number of values")
53        }
54
55        let child_dtype = Self::try_get_child(&dtype)?.dtype();
56        let values_dtype = values.dtype();
57        if child_dtype != values_dtype {
58            polars_bail!(ComputeError: "ListArray's child's DataType must match. However, the expected DataType is {child_dtype:?} while it got {values_dtype:?}.");
59        }
60
61        Ok(Self {
62            dtype,
63            offsets,
64            values,
65            validity,
66        })
67    }
68
69    /// Creates a new [`ListArray`].
70    ///
71    /// # Panics
72    /// This function panics iff:
73    /// * `offsets.last()` is greater than `values.len()`.
74    /// * the validity's length is not equal to `offsets.len_proxy()`.
75    /// * The `dtype`'s [`crate::datatypes::PhysicalType`] is not equal to either [`crate::datatypes::PhysicalType::List`] or [`crate::datatypes::PhysicalType::LargeList`].
76    /// * The `dtype`'s inner field's data type is not equal to `values.dtype`.
77    /// # Implementation
78    /// This function is `O(1)`
79    pub fn new(
80        dtype: ArrowDataType,
81        offsets: OffsetsBuffer<O>,
82        values: Box<dyn Array>,
83        validity: Option<Bitmap>,
84    ) -> Self {
85        Self::try_new(dtype, offsets, values, validity).unwrap()
86    }
87
88    /// Returns a new empty [`ListArray`].
89    pub fn new_empty(dtype: ArrowDataType) -> Self {
90        let values = new_empty_array(Self::get_child_type(&dtype).clone());
91        Self::new(dtype, OffsetsBuffer::default(), values, None)
92    }
93
94    /// Returns a new null [`ListArray`].
95    #[inline]
96    pub fn new_null(dtype: ArrowDataType, length: usize) -> Self {
97        let child = Self::get_child_type(&dtype).clone();
98        Self::new(
99            dtype,
100            Offsets::new_zeroed(length).into(),
101            new_empty_array(child),
102            Some(Bitmap::new_zeroed(length)),
103        )
104    }
105}
106
107impl<O: Offset> ListArray<O> {
108    /// Slices this [`ListArray`].
109    /// # Panics
110    /// panics iff `offset + length > self.len()`
111    pub fn slice(&mut self, offset: usize, length: usize) {
112        assert!(
113            offset + length <= self.len(),
114            "the offset of the new Buffer cannot exceed the existing length"
115        );
116        unsafe { self.slice_unchecked(offset, length) }
117    }
118
119    /// Slices this [`ListArray`].
120    ///
121    /// # Safety
122    /// The caller must ensure that `offset + length < self.len()`.
123    pub unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
124        self.validity = self
125            .validity
126            .take()
127            .map(|bitmap| bitmap.sliced_unchecked(offset, length))
128            .filter(|bitmap| bitmap.unset_bits() > 0);
129        self.offsets.slice_unchecked(offset, length + 1);
130    }
131
132    impl_sliced!();
133    impl_mut_validity!();
134    impl_into_array!();
135}
136
137// Accessors
138impl<O: Offset> ListArray<O> {
139    /// Returns the length of this array
140    #[inline]
141    pub fn len(&self) -> usize {
142        self.offsets.len_proxy()
143    }
144
145    /// Returns the element at index `i`
146    /// # Panic
147    /// Panics iff `i >= self.len()`
148    #[inline]
149    pub fn value(&self, i: usize) -> Box<dyn Array> {
150        assert!(i < self.len());
151        // SAFETY: invariant of this function
152        unsafe { self.value_unchecked(i) }
153    }
154
155    /// Returns the element at index `i` as &str
156    ///
157    /// # Safety
158    /// Assumes that the `i < self.len`.
159    #[inline]
160    pub unsafe fn value_unchecked(&self, i: usize) -> Box<dyn Array> {
161        // SAFETY: the invariant of the function
162        let (start, end) = self.offsets.start_end_unchecked(i);
163        let length = end - start;
164
165        // SAFETY: the invariant of the struct
166        self.values.sliced_unchecked(start, length)
167    }
168
169    /// The optional validity.
170    #[inline]
171    pub fn validity(&self) -> Option<&Bitmap> {
172        self.validity.as_ref()
173    }
174
175    /// The offsets [`Buffer`].
176    #[inline]
177    pub fn offsets(&self) -> &OffsetsBuffer<O> {
178        &self.offsets
179    }
180
181    /// The values.
182    #[inline]
183    pub fn values(&self) -> &Box<dyn Array> {
184        &self.values
185    }
186}
187
188impl<O: Offset> ListArray<O> {
189    /// Returns a default [`ArrowDataType`]: inner field is named "item" and is nullable
190    pub fn default_datatype(dtype: ArrowDataType) -> ArrowDataType {
191        let field = Box::new(Field::new(PlSmallStr::from_static("item"), dtype, true));
192        if O::IS_LARGE {
193            ArrowDataType::LargeList(field)
194        } else {
195            ArrowDataType::List(field)
196        }
197    }
198
199    /// Returns a the inner [`Field`]
200    /// # Panics
201    /// Panics iff the logical type is not consistent with this struct.
202    pub fn get_child_field(dtype: &ArrowDataType) -> &Field {
203        Self::try_get_child(dtype).unwrap()
204    }
205
206    /// Returns a the inner [`Field`]
207    /// # Errors
208    /// Panics iff the logical type is not consistent with this struct.
209    pub fn try_get_child(dtype: &ArrowDataType) -> PolarsResult<&Field> {
210        if O::IS_LARGE {
211            match dtype.to_logical_type() {
212                ArrowDataType::LargeList(child) => Ok(child.as_ref()),
213                _ => polars_bail!(ComputeError: "ListArray<i64> expects DataType::LargeList"),
214            }
215        } else {
216            match dtype.to_logical_type() {
217                ArrowDataType::List(child) => Ok(child.as_ref()),
218                _ => polars_bail!(ComputeError: "ListArray<i32> expects DataType::List"),
219            }
220        }
221    }
222
223    /// Returns a the inner [`ArrowDataType`]
224    /// # Panics
225    /// Panics iff the logical type is not consistent with this struct.
226    pub fn get_child_type(dtype: &ArrowDataType) -> &ArrowDataType {
227        Self::get_child_field(dtype).dtype()
228    }
229}
230
231impl<O: Offset> Array for ListArray<O> {
232    impl_common_array!();
233
234    fn validity(&self) -> Option<&Bitmap> {
235        self.validity.as_ref()
236    }
237
238    #[inline]
239    fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array> {
240        Box::new(self.clone().with_validity(validity))
241    }
242}
243
244impl<O: Offset> Splitable for ListArray<O> {
245    fn check_bound(&self, offset: usize) -> bool {
246        offset <= self.len()
247    }
248
249    unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {
250        let (lhs_offsets, rhs_offsets) = unsafe { self.offsets.split_at_unchecked(offset) };
251        let (lhs_validity, rhs_validity) = unsafe { self.validity.split_at_unchecked(offset) };
252
253        (
254            Self {
255                dtype: self.dtype.clone(),
256                offsets: lhs_offsets,
257                validity: lhs_validity,
258                values: self.values.clone(),
259            },
260            Self {
261                dtype: self.dtype.clone(),
262                offsets: rhs_offsets,
263                validity: rhs_validity,
264                values: self.values.clone(),
265            },
266        )
267    }
268}