polars_arrow/array/list/
mod.rs1use super::specification::try_check_offsets_bounds;
2use super::{Array, Splitable, new_empty_array};
3use crate::bitmap::Bitmap;
4use crate::datatypes::{ArrowDataType, Field};
5use crate::offset::{Offset, Offsets, OffsetsBuffer};
6
7mod builder;
8pub use builder::*;
9mod ffi;
10pub(super) mod fmt;
11mod iterator;
12pub use iterator::*;
13mod mutable;
14pub use mutable::*;
15use polars_error::{PolarsResult, polars_bail};
16use polars_utils::pl_str::PlSmallStr;
17#[cfg(feature = "proptest")]
18pub mod proptest;
19
20pub const LIST_VALUES_NAME: PlSmallStr = PlSmallStr::from_static("item");
22
23#[derive(Clone)]
25pub struct ListArray<O: Offset> {
26 dtype: ArrowDataType,
27 offsets: OffsetsBuffer<O>,
28 values: Box<dyn Array>,
29 validity: Option<Bitmap>,
30}
31
32impl<O: Offset> ListArray<O> {
33 pub fn try_new(
44 dtype: ArrowDataType,
45 offsets: OffsetsBuffer<O>,
46 values: Box<dyn Array>,
47 validity: Option<Bitmap>,
48 ) -> PolarsResult<Self> {
49 try_check_offsets_bounds(&offsets, values.len())?;
50
51 if validity
52 .as_ref()
53 .is_some_and(|validity| validity.len() != offsets.len_proxy())
54 {
55 polars_bail!(ComputeError: "validity mask length must match the number of values")
56 }
57
58 let child_dtype = Self::try_get_child(&dtype)?.dtype();
59 let values_dtype = values.dtype();
60 if child_dtype != values_dtype {
61 polars_bail!(ComputeError: "ListArray's child's DataType must match. However, the expected DataType is {child_dtype:?} while it got {values_dtype:?}.");
62 }
63
64 Ok(Self {
65 dtype,
66 offsets,
67 values,
68 validity,
69 })
70 }
71
72 pub fn new(
83 dtype: ArrowDataType,
84 offsets: OffsetsBuffer<O>,
85 values: Box<dyn Array>,
86 validity: Option<Bitmap>,
87 ) -> Self {
88 Self::try_new(dtype, offsets, values, validity).unwrap()
89 }
90
91 pub fn new_empty(dtype: ArrowDataType) -> Self {
93 let values = new_empty_array(Self::get_child_type(&dtype).clone());
94 Self::new(dtype, OffsetsBuffer::default(), values, None)
95 }
96
97 #[inline]
99 pub fn new_null(dtype: ArrowDataType, length: usize) -> Self {
100 let child = Self::get_child_type(&dtype).clone();
101 Self::new(
102 dtype,
103 Offsets::new_zeroed(length).into(),
104 new_empty_array(child),
105 Some(Bitmap::new_zeroed(length)),
106 )
107 }
108}
109
110impl<O: Offset> ListArray<O> {
111 pub fn slice(&mut self, offset: usize, length: usize) {
115 assert!(
116 offset + length <= self.len(),
117 "the offset of the new Buffer cannot exceed the existing length"
118 );
119 unsafe { self.slice_unchecked(offset, length) }
120 }
121
122 pub unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
127 self.validity = self
128 .validity
129 .take()
130 .map(|bitmap| bitmap.sliced_unchecked(offset, length))
131 .filter(|bitmap| bitmap.unset_bits() > 0);
132 self.offsets.slice_unchecked(offset, length + 1);
133 }
134
135 impl_sliced!();
136 impl_mut_validity!();
137 impl_into_array!();
138}
139
140impl<O: Offset> ListArray<O> {
142 #[inline]
144 pub fn len(&self) -> usize {
145 self.offsets.len_proxy()
146 }
147
148 #[inline]
152 pub fn value(&self, i: usize) -> Box<dyn Array> {
153 assert!(i < self.len());
154 unsafe { self.value_unchecked(i) }
156 }
157
158 #[inline]
163 pub unsafe fn value_unchecked(&self, i: usize) -> Box<dyn Array> {
164 let (start, end) = self.offsets.start_end_unchecked(i);
166 let length = end - start;
167
168 self.values.sliced_unchecked(start, length)
170 }
171
172 #[inline]
174 pub fn validity(&self) -> Option<&Bitmap> {
175 self.validity.as_ref()
176 }
177
178 #[inline]
180 pub fn offsets(&self) -> &OffsetsBuffer<O> {
181 &self.offsets
182 }
183
184 #[inline]
186 pub fn values(&self) -> &Box<dyn Array> {
187 &self.values
188 }
189}
190
191impl<O: Offset> ListArray<O> {
192 pub fn default_datatype(dtype: ArrowDataType) -> ArrowDataType {
194 let field = Box::new(Field::new(LIST_VALUES_NAME, dtype, true));
195 if O::IS_LARGE {
196 ArrowDataType::LargeList(field)
197 } else {
198 ArrowDataType::List(field)
199 }
200 }
201
202 pub fn get_child_field(dtype: &ArrowDataType) -> &Field {
206 Self::try_get_child(dtype).unwrap()
207 }
208
209 pub fn try_get_child(dtype: &ArrowDataType) -> PolarsResult<&Field> {
213 if O::IS_LARGE {
214 match dtype.to_logical_type() {
215 ArrowDataType::LargeList(child) => Ok(child.as_ref()),
216 _ => polars_bail!(ComputeError: "ListArray<i64> expects DataType::LargeList"),
217 }
218 } else {
219 match dtype.to_logical_type() {
220 ArrowDataType::List(child) => Ok(child.as_ref()),
221 _ => polars_bail!(ComputeError: "ListArray<i32> expects DataType::List"),
222 }
223 }
224 }
225
226 pub fn get_child_type(dtype: &ArrowDataType) -> &ArrowDataType {
230 Self::get_child_field(dtype).dtype()
231 }
232}
233
234impl<O: Offset> Array for ListArray<O> {
235 impl_common_array!();
236
237 fn validity(&self) -> Option<&Bitmap> {
238 self.validity.as_ref()
239 }
240
241 #[inline]
242 fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array> {
243 Box::new(self.clone().with_validity(validity))
244 }
245}
246
247impl<O: Offset> Splitable for ListArray<O> {
248 fn check_bound(&self, offset: usize) -> bool {
249 offset <= self.len()
250 }
251
252 unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {
253 let (lhs_offsets, rhs_offsets) = unsafe { self.offsets.split_at_unchecked(offset) };
254 let (lhs_validity, rhs_validity) = unsafe { self.validity.split_at_unchecked(offset) };
255
256 (
257 Self {
258 dtype: self.dtype.clone(),
259 offsets: lhs_offsets,
260 validity: lhs_validity,
261 values: self.values.clone(),
262 },
263 Self {
264 dtype: self.dtype.clone(),
265 offsets: rhs_offsets,
266 validity: rhs_validity,
267 values: self.values.clone(),
268 },
269 )
270 }
271}