polars_arrow/legacy/array/
mod.rs1#![allow(unsafe_op_in_unsafe_fn)]
2
3use crate::array::{
4 Array, BooleanArray, FixedSizeListArray, ListArray, MutableBinaryViewArray, PrimitiveArray,
5 StructArray, ViewType, new_null_array,
6};
7use crate::bitmap::BitmapBuilder;
8use crate::datatypes::ArrowDataType;
9use crate::legacy::utils::CustomIterTools;
10use crate::offset::Offsets;
11use crate::types::NativeType;
12
13pub mod default_arrays;
14#[cfg(feature = "dtype-array")]
15pub mod fixed_size_list;
16pub mod list;
17pub mod null;
18pub mod slice;
19pub mod utf8;
20
21pub use slice::*;
22
23use crate::legacy::prelude::LargeListArray;
24
25macro_rules! iter_to_values {
26 ($iterator:expr, $validity:expr, $offsets:expr, $length_so_far:expr) => {{
27 $iterator
28 .filter_map(|opt_iter| match opt_iter {
29 Some(x) => {
30 let it = x.into_iter();
31 $length_so_far += it.size_hint().0 as i64;
32 $validity.push(true);
33 $offsets.push($length_so_far);
34 Some(it)
35 },
36 None => {
37 $validity.push(false);
38 $offsets.push($length_so_far);
39 None
40 },
41 })
42 .flatten()
43 .collect()
44 }};
45}
46
47pub trait ListFromIter {
48 unsafe fn from_iter_primitive_trusted_len<T, P, I>(
54 iter: I,
55 dtype: ArrowDataType,
56 ) -> ListArray<i64>
57 where
58 T: NativeType,
59 P: IntoIterator<Item = Option<T>>,
60 I: IntoIterator<Item = Option<P>>,
61 {
62 let iterator = iter.into_iter();
63 let (lower, _) = iterator.size_hint();
64
65 let mut validity = BitmapBuilder::with_capacity(lower);
66 let mut offsets = Vec::<i64>::with_capacity(lower + 1);
67 let mut length_so_far = 0i64;
68 offsets.push(length_so_far);
69
70 let values: PrimitiveArray<T> = iter_to_values!(iterator, validity, offsets, length_so_far);
71
72 ListArray::new(
75 ListArray::<i64>::default_datatype(dtype.clone()),
76 Offsets::new_unchecked(offsets).into(),
77 Box::new(values.to(dtype)),
78 validity.into_opt_validity(),
79 )
80 }
81
82 unsafe fn from_iter_bool_trusted_len<I, P>(iter: I) -> ListArray<i64>
88 where
89 I: IntoIterator<Item = Option<P>>,
90 P: IntoIterator<Item = Option<bool>>,
91 {
92 let iterator = iter.into_iter();
93 let (lower, _) = iterator.size_hint();
94
95 let mut validity = Vec::with_capacity(lower);
96 let mut offsets = Vec::<i64>::with_capacity(lower + 1);
97 let mut length_so_far = 0i64;
98 offsets.push(length_so_far);
99
100 let values: BooleanArray = iter_to_values!(iterator, validity, offsets, length_so_far);
101
102 ListArray::new(
105 ListArray::<i64>::default_datatype(ArrowDataType::Boolean),
106 Offsets::new_unchecked(offsets).into(),
107 Box::new(values),
108 Some(validity.into()),
109 )
110 }
111
112 unsafe fn from_iter_binview_trusted_len<I, P, Ref, T: ViewType + ?Sized>(
115 iter: I,
116 n_elements: usize,
117 ) -> ListArray<i64>
118 where
119 I: IntoIterator<Item = Option<P>>,
120 P: IntoIterator<Item = Option<Ref>>,
121 Ref: AsRef<T>,
122 {
123 let iterator = iter.into_iter();
124 let (lower, _) = iterator.size_hint();
125
126 let mut validity = BitmapBuilder::with_capacity(lower);
127 let mut offsets = Vec::<i64>::with_capacity(lower + 1);
128 let mut length_so_far = 0i64;
129 offsets.push(length_so_far);
130
131 let values: MutableBinaryViewArray<T> = iterator
132 .filter_map(|opt_iter| match opt_iter {
133 Some(x) => {
134 let it = x.into_iter();
135 length_so_far += it.size_hint().0 as i64;
136 validity.push(true);
137 offsets.push(length_so_far);
138 Some(it)
139 },
140 None => {
141 validity.push(false);
142 offsets.push(length_so_far);
143 None
144 },
145 })
146 .flatten()
147 .trust_my_length(n_elements)
148 .collect();
149
150 ListArray::new(
153 ListArray::<i64>::default_datatype(T::DATA_TYPE),
154 Offsets::new_unchecked(offsets).into(),
155 values.freeze().boxed(),
156 validity.into_opt_validity(),
157 )
158 }
159
160 unsafe fn from_iter_utf8_trusted_len<I, P, Ref>(iter: I, n_elements: usize) -> ListArray<i64>
166 where
167 I: IntoIterator<Item = Option<P>>,
168 P: IntoIterator<Item = Option<Ref>>,
169 Ref: AsRef<str>,
170 {
171 Self::from_iter_binview_trusted_len(iter, n_elements)
172 }
173
174 unsafe fn from_iter_binary_trusted_len<I, P, Ref>(iter: I, n_elements: usize) -> ListArray<i64>
180 where
181 I: IntoIterator<Item = Option<P>>,
182 P: IntoIterator<Item = Option<Ref>>,
183 Ref: AsRef<[u8]>,
184 {
185 Self::from_iter_binview_trusted_len(iter, n_elements)
186 }
187}
188impl ListFromIter for ListArray<i64> {}
189
190fn is_nested_null(dtype: &ArrowDataType) -> bool {
191 match dtype {
192 ArrowDataType::Null => true,
193 ArrowDataType::LargeList(field) => is_nested_null(field.dtype()),
194 ArrowDataType::FixedSizeList(field, _) => is_nested_null(field.dtype()),
195 ArrowDataType::Struct(fields) => fields.iter().all(|field| is_nested_null(field.dtype())),
196 _ => false,
197 }
198}
199
200pub fn convert_inner_type(array: &dyn Array, dtype: &ArrowDataType) -> Box<dyn Array> {
202 match dtype {
203 ArrowDataType::LargeList(field) => {
204 let array = array.as_any().downcast_ref::<LargeListArray>().unwrap();
205 let inner = array.values();
206 let new_values = convert_inner_type(inner.as_ref(), field.dtype());
207 let dtype = LargeListArray::default_datatype(new_values.dtype().clone());
208 LargeListArray::new(
209 dtype,
210 array.offsets().clone(),
211 new_values,
212 array.validity().cloned(),
213 )
214 .boxed()
215 },
216 ArrowDataType::FixedSizeList(field, width) => {
217 let width = *width;
218
219 let array = array.as_any().downcast_ref::<FixedSizeListArray>().unwrap();
220 let inner = array.values();
221 let length = if width == array.size() {
222 array.len()
223 } else {
224 assert!(!array.values().is_empty() || width != 0);
225 if width == 0 {
226 0
227 } else {
228 array.values().len() / width
229 }
230 };
231 let new_values = convert_inner_type(inner.as_ref(), field.dtype());
232 let dtype = FixedSizeListArray::default_datatype(new_values.dtype().clone(), width);
233 FixedSizeListArray::new(dtype, length, new_values, array.validity().cloned()).boxed()
234 },
235 ArrowDataType::Struct(fields) => {
236 let array = array.as_any().downcast_ref::<StructArray>().unwrap();
237 let inner = array.values();
238 let new_values = inner
239 .iter()
240 .zip(fields)
241 .map(|(arr, field)| convert_inner_type(arr.as_ref(), field.dtype()))
242 .collect::<Vec<_>>();
243 StructArray::new(
244 dtype.clone(),
245 array.len(),
246 new_values,
247 array.validity().cloned(),
248 )
249 .boxed()
250 },
251 _ => new_null_array(dtype.clone(), array.len()),
252 }
253}