polars_arrow/array/list/
mod.rs1use super::specification::try_check_offsets_bounds;
2use super::{Array, Splitable, new_empty_array};
3use crate::bitmap::Bitmap;
4use crate::datatypes::{ArrowDataType, Field};
5use crate::offset::{Offset, Offsets, OffsetsBuffer};
6
7mod builder;
8pub use builder::*;
9mod ffi;
10pub(super) mod fmt;
11mod iterator;
12pub use iterator::*;
13mod mutable;
14pub use mutable::*;
15use polars_error::{PolarsResult, polars_bail};
16use polars_utils::pl_str::PlSmallStr;
17#[cfg(feature = "proptest")]
18pub mod proptest;
19
20pub const LIST_VALUES_NAME: PlSmallStr = PlSmallStr::from_static("item");
22
23#[derive(Clone)]
25pub struct ListArray<O: Offset> {
26 dtype: ArrowDataType,
27 offsets: OffsetsBuffer<O>,
28 values: Box<dyn Array>,
29 validity: Option<Bitmap>,
30}
31
32impl<O: Offset> ListArray<O> {
33 pub fn try_new(
44 dtype: ArrowDataType,
45 offsets: OffsetsBuffer<O>,
46 values: Box<dyn Array>,
47 validity: Option<Bitmap>,
48 ) -> PolarsResult<Self> {
49 try_check_offsets_bounds(&offsets, values.len())?;
50
51 if validity
52 .as_ref()
53 .is_some_and(|validity| validity.len() != offsets.len_proxy())
54 {
55 polars_bail!(ComputeError: "validity mask length must match the number of values")
56 }
57
58 let child_dtype = Self::try_get_child(&dtype)?.dtype();
59 let values_dtype = values.dtype();
60 if child_dtype != values_dtype {
61 polars_bail!(ComputeError: "ListArray's child's DataType must match. However, the expected DataType is {child_dtype:?} while it got {values_dtype:?}.");
62 }
63
64 Ok(Self {
65 dtype,
66 offsets,
67 values,
68 validity,
69 })
70 }
71
72 pub fn new(
83 dtype: ArrowDataType,
84 offsets: OffsetsBuffer<O>,
85 values: Box<dyn Array>,
86 validity: Option<Bitmap>,
87 ) -> Self {
88 Self::try_new(dtype, offsets, values, validity).unwrap()
89 }
90
91 pub fn new_empty(dtype: ArrowDataType) -> Self {
93 let values = new_empty_array(Self::get_child_type(&dtype).clone());
94 Self::new(dtype, OffsetsBuffer::default(), values, None)
95 }
96
97 #[inline]
99 pub fn new_null(dtype: ArrowDataType, length: usize) -> Self {
100 let child = Self::get_child_type(&dtype).clone();
101 Self::new(
102 dtype,
103 Offsets::new_zeroed(length).into(),
104 new_empty_array(child),
105 Some(Bitmap::new_zeroed(length)),
106 )
107 }
108
109 pub fn into_inner(
110 self,
111 ) -> (
112 ArrowDataType,
113 Box<dyn Array>,
114 OffsetsBuffer<O>,
115 Option<Bitmap>,
116 ) {
117 (self.dtype, self.values, self.offsets, self.validity)
118 }
119}
120
121impl<O: Offset> ListArray<O> {
122 pub fn slice(&mut self, offset: usize, length: usize) {
126 assert!(
127 offset + length <= self.len(),
128 "the offset of the new Buffer cannot exceed the existing length"
129 );
130 unsafe { self.slice_unchecked(offset, length) }
131 }
132
133 pub unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
138 self.validity = self
139 .validity
140 .take()
141 .map(|bitmap| bitmap.sliced_unchecked(offset, length))
142 .filter(|bitmap| bitmap.unset_bits() > 0);
143 self.offsets.slice_unchecked(offset, length + 1);
144 }
145
146 impl_sliced!();
147 impl_mut_validity!();
148 impl_into_array!();
149}
150
151impl<O: Offset> ListArray<O> {
153 #[inline]
155 pub fn len(&self) -> usize {
156 self.offsets.len_proxy()
157 }
158
159 #[inline]
163 pub fn value(&self, i: usize) -> Box<dyn Array> {
164 assert!(i < self.len());
165 unsafe { self.value_unchecked(i) }
167 }
168
169 #[inline]
174 pub unsafe fn value_unchecked(&self, i: usize) -> Box<dyn Array> {
175 let (start, end) = self.offsets.start_end_unchecked(i);
177 let length = end - start;
178
179 self.values.sliced_unchecked(start, length)
181 }
182
183 #[inline]
185 pub fn validity(&self) -> Option<&Bitmap> {
186 self.validity.as_ref()
187 }
188
189 #[inline]
191 pub fn offsets(&self) -> &OffsetsBuffer<O> {
192 &self.offsets
193 }
194
195 #[inline]
197 pub fn values(&self) -> &Box<dyn Array> {
198 &self.values
199 }
200}
201
202impl<O: Offset> ListArray<O> {
203 pub fn default_datatype(dtype: ArrowDataType) -> ArrowDataType {
205 let field = Box::new(Field::new(LIST_VALUES_NAME, dtype, true));
206 if O::IS_LARGE {
207 ArrowDataType::LargeList(field)
208 } else {
209 ArrowDataType::List(field)
210 }
211 }
212
213 pub fn get_child_field(dtype: &ArrowDataType) -> &Field {
217 Self::try_get_child(dtype).unwrap()
218 }
219
220 pub fn try_get_child(dtype: &ArrowDataType) -> PolarsResult<&Field> {
224 if O::IS_LARGE {
225 match dtype.to_logical_type() {
226 ArrowDataType::LargeList(child) => Ok(child.as_ref()),
227 _ => polars_bail!(ComputeError: "ListArray<i64> expects DataType::LargeList"),
228 }
229 } else {
230 match dtype.to_logical_type() {
231 ArrowDataType::List(child) => Ok(child.as_ref()),
232 _ => polars_bail!(ComputeError: "ListArray<i32> expects DataType::List"),
233 }
234 }
235 }
236
237 pub fn get_child_type(dtype: &ArrowDataType) -> &ArrowDataType {
241 Self::get_child_field(dtype).dtype()
242 }
243}
244
245impl<O: Offset> Array for ListArray<O> {
246 impl_common_array!();
247
248 fn validity(&self) -> Option<&Bitmap> {
249 self.validity.as_ref()
250 }
251
252 #[inline]
253 fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array> {
254 Box::new(self.clone().with_validity(validity))
255 }
256}
257
258impl<O: Offset> Splitable for ListArray<O> {
259 fn check_bound(&self, offset: usize) -> bool {
260 offset <= self.len()
261 }
262
263 unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {
264 let (lhs_offsets, rhs_offsets) = unsafe { self.offsets.split_at_unchecked(offset) };
265 let (lhs_validity, rhs_validity) = unsafe { self.validity.split_at_unchecked(offset) };
266
267 (
268 Self {
269 dtype: self.dtype.clone(),
270 offsets: lhs_offsets,
271 validity: lhs_validity,
272 values: self.values.clone(),
273 },
274 Self {
275 dtype: self.dtype.clone(),
276 offsets: rhs_offsets,
277 validity: rhs_validity,
278 values: self.values.clone(),
279 },
280 )
281 }
282}