polars_arrow/legacy/array/
mod.rs1#![allow(unsafe_op_in_unsafe_fn)]
2use crate::array::{
3 Array, BooleanArray, FixedSizeListArray, ListArray, MutableBinaryViewArray, PrimitiveArray,
4 StructArray, ViewType, new_null_array,
5};
6use crate::bitmap::BitmapBuilder;
7use crate::datatypes::ArrowDataType;
8use crate::legacy::utils::CustomIterTools;
9use crate::offset::Offsets;
10use crate::types::NativeType;
11
12pub mod default_arrays;
13#[cfg(feature = "dtype-array")]
14pub mod fixed_size_list;
15pub mod list;
16pub mod null;
17pub mod slice;
18pub mod utf8;
19
20pub use slice::*;
21
22use crate::legacy::prelude::LargeListArray;
23
24macro_rules! iter_to_values {
25 ($iterator:expr, $validity:expr, $offsets:expr, $length_so_far:expr) => {{
26 $iterator
27 .filter_map(|opt_iter| match opt_iter {
28 Some(x) => {
29 let it = x.into_iter();
30 $length_so_far += it.size_hint().0 as i64;
31 $validity.push(true);
32 $offsets.push($length_so_far);
33 Some(it)
34 },
35 None => {
36 $validity.push(false);
37 $offsets.push($length_so_far);
38 None
39 },
40 })
41 .flatten()
42 .collect()
43 }};
44}
45
46pub trait ListFromIter {
47 unsafe fn from_iter_primitive_trusted_len<T, P, I>(
53 iter: I,
54 dtype: ArrowDataType,
55 ) -> ListArray<i64>
56 where
57 T: NativeType,
58 P: IntoIterator<Item = Option<T>>,
59 I: IntoIterator<Item = Option<P>>,
60 {
61 let iterator = iter.into_iter();
62 let (lower, _) = iterator.size_hint();
63
64 let mut validity = BitmapBuilder::with_capacity(lower);
65 let mut offsets = Vec::<i64>::with_capacity(lower + 1);
66 let mut length_so_far = 0i64;
67 offsets.push(length_so_far);
68
69 let values: PrimitiveArray<T> = iter_to_values!(iterator, validity, offsets, length_so_far);
70
71 ListArray::new(
74 ListArray::<i64>::default_datatype(dtype.clone()),
75 Offsets::new_unchecked(offsets).into(),
76 Box::new(values.to(dtype)),
77 validity.into_opt_validity(),
78 )
79 }
80
81 unsafe fn from_iter_bool_trusted_len<I, P>(iter: I) -> ListArray<i64>
87 where
88 I: IntoIterator<Item = Option<P>>,
89 P: IntoIterator<Item = Option<bool>>,
90 {
91 let iterator = iter.into_iter();
92 let (lower, _) = iterator.size_hint();
93
94 let mut validity = Vec::with_capacity(lower);
95 let mut offsets = Vec::<i64>::with_capacity(lower + 1);
96 let mut length_so_far = 0i64;
97 offsets.push(length_so_far);
98
99 let values: BooleanArray = iter_to_values!(iterator, validity, offsets, length_so_far);
100
101 ListArray::new(
104 ListArray::<i64>::default_datatype(ArrowDataType::Boolean),
105 Offsets::new_unchecked(offsets).into(),
106 Box::new(values),
107 Some(validity.into()),
108 )
109 }
110
111 unsafe fn from_iter_binview_trusted_len<I, P, Ref, T: ViewType + ?Sized>(
114 iter: I,
115 n_elements: usize,
116 ) -> ListArray<i64>
117 where
118 I: IntoIterator<Item = Option<P>>,
119 P: IntoIterator<Item = Option<Ref>>,
120 Ref: AsRef<T>,
121 {
122 let iterator = iter.into_iter();
123 let (lower, _) = iterator.size_hint();
124
125 let mut validity = BitmapBuilder::with_capacity(lower);
126 let mut offsets = Vec::<i64>::with_capacity(lower + 1);
127 let mut length_so_far = 0i64;
128 offsets.push(length_so_far);
129
130 let values: MutableBinaryViewArray<T> = iterator
131 .filter_map(|opt_iter| match opt_iter {
132 Some(x) => {
133 let it = x.into_iter();
134 length_so_far += it.size_hint().0 as i64;
135 validity.push(true);
136 offsets.push(length_so_far);
137 Some(it)
138 },
139 None => {
140 validity.push(false);
141 offsets.push(length_so_far);
142 None
143 },
144 })
145 .flatten()
146 .trust_my_length(n_elements)
147 .collect();
148
149 ListArray::new(
152 ListArray::<i64>::default_datatype(T::DATA_TYPE),
153 Offsets::new_unchecked(offsets).into(),
154 values.freeze().boxed(),
155 validity.into_opt_validity(),
156 )
157 }
158
159 unsafe fn from_iter_utf8_trusted_len<I, P, Ref>(iter: I, n_elements: usize) -> ListArray<i64>
165 where
166 I: IntoIterator<Item = Option<P>>,
167 P: IntoIterator<Item = Option<Ref>>,
168 Ref: AsRef<str>,
169 {
170 Self::from_iter_binview_trusted_len(iter, n_elements)
171 }
172
173 unsafe fn from_iter_binary_trusted_len<I, P, Ref>(iter: I, n_elements: usize) -> ListArray<i64>
179 where
180 I: IntoIterator<Item = Option<P>>,
181 P: IntoIterator<Item = Option<Ref>>,
182 Ref: AsRef<[u8]>,
183 {
184 Self::from_iter_binview_trusted_len(iter, n_elements)
185 }
186}
187impl ListFromIter for ListArray<i64> {}
188
189fn is_nested_null(dtype: &ArrowDataType) -> bool {
190 match dtype {
191 ArrowDataType::Null => true,
192 ArrowDataType::LargeList(field) => is_nested_null(field.dtype()),
193 ArrowDataType::FixedSizeList(field, _) => is_nested_null(field.dtype()),
194 ArrowDataType::Struct(fields) => fields.iter().all(|field| is_nested_null(field.dtype())),
195 _ => false,
196 }
197}
198
199pub fn convert_inner_type(array: &dyn Array, dtype: &ArrowDataType) -> Box<dyn Array> {
201 match dtype {
202 ArrowDataType::LargeList(field) => {
203 let array = array.as_any().downcast_ref::<LargeListArray>().unwrap();
204 let inner = array.values();
205 let new_values = convert_inner_type(inner.as_ref(), field.dtype());
206 let dtype = LargeListArray::default_datatype(new_values.dtype().clone());
207 LargeListArray::new(
208 dtype,
209 array.offsets().clone(),
210 new_values,
211 array.validity().cloned(),
212 )
213 .boxed()
214 },
215 ArrowDataType::FixedSizeList(field, width) => {
216 let width = *width;
217
218 let array = array.as_any().downcast_ref::<FixedSizeListArray>().unwrap();
219 let inner = array.values();
220 let length = if width == array.size() {
221 array.len()
222 } else {
223 assert!(!array.values().is_empty() || width != 0);
224 if width == 0 {
225 0
226 } else {
227 array.values().len() / width
228 }
229 };
230 let new_values = convert_inner_type(inner.as_ref(), field.dtype());
231 let dtype = FixedSizeListArray::default_datatype(new_values.dtype().clone(), width);
232 FixedSizeListArray::new(dtype, length, new_values, array.validity().cloned()).boxed()
233 },
234 ArrowDataType::Struct(fields) => {
235 let array = array.as_any().downcast_ref::<StructArray>().unwrap();
236 let inner = array.values();
237 let new_values = inner
238 .iter()
239 .zip(fields)
240 .map(|(arr, field)| convert_inner_type(arr.as_ref(), field.dtype()))
241 .collect::<Vec<_>>();
242 StructArray::new(
243 dtype.clone(),
244 array.len(),
245 new_values,
246 array.validity().cloned(),
247 )
248 .boxed()
249 },
250 _ => new_null_array(dtype.clone(), array.len()),
251 }
252}