polars_arrow/array/list/
mod.rs1use super::specification::try_check_offsets_bounds;
2use super::{Array, Splitable, new_empty_array};
3use crate::bitmap::Bitmap;
4use crate::datatypes::{ArrowDataType, Field};
5use crate::offset::{Offset, Offsets, OffsetsBuffer};
6
7mod builder;
8pub use builder::*;
9mod ffi;
10pub(super) mod fmt;
11mod iterator;
12pub use iterator::*;
13mod mutable;
14pub use mutable::*;
15use polars_error::{PolarsResult, polars_bail};
16use polars_utils::pl_str::PlSmallStr;
17#[cfg(feature = "proptest")]
18pub mod proptest;
19
20#[derive(Clone)]
22pub struct ListArray<O: Offset> {
23 dtype: ArrowDataType,
24 offsets: OffsetsBuffer<O>,
25 values: Box<dyn Array>,
26 validity: Option<Bitmap>,
27}
28
29impl<O: Offset> ListArray<O> {
30 pub fn try_new(
41 dtype: ArrowDataType,
42 offsets: OffsetsBuffer<O>,
43 values: Box<dyn Array>,
44 validity: Option<Bitmap>,
45 ) -> PolarsResult<Self> {
46 try_check_offsets_bounds(&offsets, values.len())?;
47
48 if validity
49 .as_ref()
50 .is_some_and(|validity| validity.len() != offsets.len_proxy())
51 {
52 polars_bail!(ComputeError: "validity mask length must match the number of values")
53 }
54
55 let child_dtype = Self::try_get_child(&dtype)?.dtype();
56 let values_dtype = values.dtype();
57 if child_dtype != values_dtype {
58 polars_bail!(ComputeError: "ListArray's child's DataType must match. However, the expected DataType is {child_dtype:?} while it got {values_dtype:?}.");
59 }
60
61 Ok(Self {
62 dtype,
63 offsets,
64 values,
65 validity,
66 })
67 }
68
69 pub fn new(
80 dtype: ArrowDataType,
81 offsets: OffsetsBuffer<O>,
82 values: Box<dyn Array>,
83 validity: Option<Bitmap>,
84 ) -> Self {
85 Self::try_new(dtype, offsets, values, validity).unwrap()
86 }
87
88 pub fn new_empty(dtype: ArrowDataType) -> Self {
90 let values = new_empty_array(Self::get_child_type(&dtype).clone());
91 Self::new(dtype, OffsetsBuffer::default(), values, None)
92 }
93
94 #[inline]
96 pub fn new_null(dtype: ArrowDataType, length: usize) -> Self {
97 let child = Self::get_child_type(&dtype).clone();
98 Self::new(
99 dtype,
100 Offsets::new_zeroed(length).into(),
101 new_empty_array(child),
102 Some(Bitmap::new_zeroed(length)),
103 )
104 }
105}
106
107impl<O: Offset> ListArray<O> {
108 pub fn slice(&mut self, offset: usize, length: usize) {
112 assert!(
113 offset + length <= self.len(),
114 "the offset of the new Buffer cannot exceed the existing length"
115 );
116 unsafe { self.slice_unchecked(offset, length) }
117 }
118
119 pub unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
124 self.validity = self
125 .validity
126 .take()
127 .map(|bitmap| bitmap.sliced_unchecked(offset, length))
128 .filter(|bitmap| bitmap.unset_bits() > 0);
129 self.offsets.slice_unchecked(offset, length + 1);
130 }
131
132 impl_sliced!();
133 impl_mut_validity!();
134 impl_into_array!();
135}
136
137impl<O: Offset> ListArray<O> {
139 #[inline]
141 pub fn len(&self) -> usize {
142 self.offsets.len_proxy()
143 }
144
145 #[inline]
149 pub fn value(&self, i: usize) -> Box<dyn Array> {
150 assert!(i < self.len());
151 unsafe { self.value_unchecked(i) }
153 }
154
155 #[inline]
160 pub unsafe fn value_unchecked(&self, i: usize) -> Box<dyn Array> {
161 let (start, end) = self.offsets.start_end_unchecked(i);
163 let length = end - start;
164
165 self.values.sliced_unchecked(start, length)
167 }
168
169 #[inline]
171 pub fn validity(&self) -> Option<&Bitmap> {
172 self.validity.as_ref()
173 }
174
175 #[inline]
177 pub fn offsets(&self) -> &OffsetsBuffer<O> {
178 &self.offsets
179 }
180
181 #[inline]
183 pub fn values(&self) -> &Box<dyn Array> {
184 &self.values
185 }
186}
187
188impl<O: Offset> ListArray<O> {
189 pub fn default_datatype(dtype: ArrowDataType) -> ArrowDataType {
191 let field = Box::new(Field::new(PlSmallStr::from_static("item"), dtype, true));
192 if O::IS_LARGE {
193 ArrowDataType::LargeList(field)
194 } else {
195 ArrowDataType::List(field)
196 }
197 }
198
199 pub fn get_child_field(dtype: &ArrowDataType) -> &Field {
203 Self::try_get_child(dtype).unwrap()
204 }
205
206 pub fn try_get_child(dtype: &ArrowDataType) -> PolarsResult<&Field> {
210 if O::IS_LARGE {
211 match dtype.to_logical_type() {
212 ArrowDataType::LargeList(child) => Ok(child.as_ref()),
213 _ => polars_bail!(ComputeError: "ListArray<i64> expects DataType::LargeList"),
214 }
215 } else {
216 match dtype.to_logical_type() {
217 ArrowDataType::List(child) => Ok(child.as_ref()),
218 _ => polars_bail!(ComputeError: "ListArray<i32> expects DataType::List"),
219 }
220 }
221 }
222
223 pub fn get_child_type(dtype: &ArrowDataType) -> &ArrowDataType {
227 Self::get_child_field(dtype).dtype()
228 }
229}
230
231impl<O: Offset> Array for ListArray<O> {
232 impl_common_array!();
233
234 fn validity(&self) -> Option<&Bitmap> {
235 self.validity.as_ref()
236 }
237
238 #[inline]
239 fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array> {
240 Box::new(self.clone().with_validity(validity))
241 }
242}
243
244impl<O: Offset> Splitable for ListArray<O> {
245 fn check_bound(&self, offset: usize) -> bool {
246 offset <= self.len()
247 }
248
249 unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {
250 let (lhs_offsets, rhs_offsets) = unsafe { self.offsets.split_at_unchecked(offset) };
251 let (lhs_validity, rhs_validity) = unsafe { self.validity.split_at_unchecked(offset) };
252
253 (
254 Self {
255 dtype: self.dtype.clone(),
256 offsets: lhs_offsets,
257 validity: lhs_validity,
258 values: self.values.clone(),
259 },
260 Self {
261 dtype: self.dtype.clone(),
262 offsets: rhs_offsets,
263 validity: rhs_validity,
264 values: self.values.clone(),
265 },
266 )
267 }
268}