polars_arrow/array/list/
mod.rs1use super::specification::try_check_offsets_bounds;
2use super::{Array, Splitable, new_empty_array};
3use crate::bitmap::Bitmap;
4use crate::datatypes::{ArrowDataType, Field};
5use crate::offset::{Offset, Offsets, OffsetsBuffer};
6
7mod builder;
8pub use builder::*;
9mod ffi;
10pub(super) mod fmt;
11mod iterator;
12pub use iterator::*;
13mod mutable;
14pub use mutable::*;
15use polars_error::{PolarsResult, polars_bail};
16use polars_utils::pl_str::PlSmallStr;
17
18#[derive(Clone)]
20pub struct ListArray<O: Offset> {
21 dtype: ArrowDataType,
22 offsets: OffsetsBuffer<O>,
23 values: Box<dyn Array>,
24 validity: Option<Bitmap>,
25}
26
27impl<O: Offset> ListArray<O> {
28 pub fn try_new(
39 dtype: ArrowDataType,
40 offsets: OffsetsBuffer<O>,
41 values: Box<dyn Array>,
42 validity: Option<Bitmap>,
43 ) -> PolarsResult<Self> {
44 try_check_offsets_bounds(&offsets, values.len())?;
45
46 if validity
47 .as_ref()
48 .is_some_and(|validity| validity.len() != offsets.len_proxy())
49 {
50 polars_bail!(ComputeError: "validity mask length must match the number of values")
51 }
52
53 let child_dtype = Self::try_get_child(&dtype)?.dtype();
54 let values_dtype = values.dtype();
55 if child_dtype != values_dtype {
56 polars_bail!(ComputeError: "ListArray's child's DataType must match. However, the expected DataType is {child_dtype:?} while it got {values_dtype:?}.");
57 }
58
59 Ok(Self {
60 dtype,
61 offsets,
62 values,
63 validity,
64 })
65 }
66
67 pub fn new(
78 dtype: ArrowDataType,
79 offsets: OffsetsBuffer<O>,
80 values: Box<dyn Array>,
81 validity: Option<Bitmap>,
82 ) -> Self {
83 Self::try_new(dtype, offsets, values, validity).unwrap()
84 }
85
86 pub fn new_empty(dtype: ArrowDataType) -> Self {
88 let values = new_empty_array(Self::get_child_type(&dtype).clone());
89 Self::new(dtype, OffsetsBuffer::default(), values, None)
90 }
91
92 #[inline]
94 pub fn new_null(dtype: ArrowDataType, length: usize) -> Self {
95 let child = Self::get_child_type(&dtype).clone();
96 Self::new(
97 dtype,
98 Offsets::new_zeroed(length).into(),
99 new_empty_array(child),
100 Some(Bitmap::new_zeroed(length)),
101 )
102 }
103}
104
105impl<O: Offset> ListArray<O> {
106 pub fn slice(&mut self, offset: usize, length: usize) {
110 assert!(
111 offset + length <= self.len(),
112 "the offset of the new Buffer cannot exceed the existing length"
113 );
114 unsafe { self.slice_unchecked(offset, length) }
115 }
116
117 pub unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
122 self.validity = self
123 .validity
124 .take()
125 .map(|bitmap| bitmap.sliced_unchecked(offset, length))
126 .filter(|bitmap| bitmap.unset_bits() > 0);
127 self.offsets.slice_unchecked(offset, length + 1);
128 }
129
130 impl_sliced!();
131 impl_mut_validity!();
132 impl_into_array!();
133}
134
135impl<O: Offset> ListArray<O> {
137 #[inline]
139 pub fn len(&self) -> usize {
140 self.offsets.len_proxy()
141 }
142
143 #[inline]
147 pub fn value(&self, i: usize) -> Box<dyn Array> {
148 assert!(i < self.len());
149 unsafe { self.value_unchecked(i) }
151 }
152
153 #[inline]
158 pub unsafe fn value_unchecked(&self, i: usize) -> Box<dyn Array> {
159 let (start, end) = self.offsets.start_end_unchecked(i);
161 let length = end - start;
162
163 self.values.sliced_unchecked(start, length)
165 }
166
167 #[inline]
169 pub fn validity(&self) -> Option<&Bitmap> {
170 self.validity.as_ref()
171 }
172
173 #[inline]
175 pub fn offsets(&self) -> &OffsetsBuffer<O> {
176 &self.offsets
177 }
178
179 #[inline]
181 pub fn values(&self) -> &Box<dyn Array> {
182 &self.values
183 }
184}
185
186impl<O: Offset> ListArray<O> {
187 pub fn default_datatype(dtype: ArrowDataType) -> ArrowDataType {
189 let field = Box::new(Field::new(PlSmallStr::from_static("item"), dtype, true));
190 if O::IS_LARGE {
191 ArrowDataType::LargeList(field)
192 } else {
193 ArrowDataType::List(field)
194 }
195 }
196
197 pub fn get_child_field(dtype: &ArrowDataType) -> &Field {
201 Self::try_get_child(dtype).unwrap()
202 }
203
204 pub fn try_get_child(dtype: &ArrowDataType) -> PolarsResult<&Field> {
208 if O::IS_LARGE {
209 match dtype.to_logical_type() {
210 ArrowDataType::LargeList(child) => Ok(child.as_ref()),
211 _ => polars_bail!(ComputeError: "ListArray<i64> expects DataType::LargeList"),
212 }
213 } else {
214 match dtype.to_logical_type() {
215 ArrowDataType::List(child) => Ok(child.as_ref()),
216 _ => polars_bail!(ComputeError: "ListArray<i32> expects DataType::List"),
217 }
218 }
219 }
220
221 pub fn get_child_type(dtype: &ArrowDataType) -> &ArrowDataType {
225 Self::get_child_field(dtype).dtype()
226 }
227}
228
229impl<O: Offset> Array for ListArray<O> {
230 impl_common_array!();
231
232 fn validity(&self) -> Option<&Bitmap> {
233 self.validity.as_ref()
234 }
235
236 #[inline]
237 fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array> {
238 Box::new(self.clone().with_validity(validity))
239 }
240}
241
242impl<O: Offset> Splitable for ListArray<O> {
243 fn check_bound(&self, offset: usize) -> bool {
244 offset <= self.len()
245 }
246
247 unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {
248 let (lhs_offsets, rhs_offsets) = unsafe { self.offsets.split_at_unchecked(offset) };
249 let (lhs_validity, rhs_validity) = unsafe { self.validity.split_at_unchecked(offset) };
250
251 (
252 Self {
253 dtype: self.dtype.clone(),
254 offsets: lhs_offsets,
255 validity: lhs_validity,
256 values: self.values.clone(),
257 },
258 Self {
259 dtype: self.dtype.clone(),
260 offsets: rhs_offsets,
261 validity: rhs_validity,
262 values: self.values.clone(),
263 },
264 )
265 }
266}