polars_arrow/legacy/array/
mod.rs

1#![allow(unsafe_op_in_unsafe_fn)]
2use crate::array::{
3    Array, BooleanArray, FixedSizeListArray, ListArray, MutableBinaryViewArray, PrimitiveArray,
4    StructArray, ViewType, new_null_array,
5};
6use crate::bitmap::BitmapBuilder;
7use crate::datatypes::ArrowDataType;
8use crate::legacy::utils::CustomIterTools;
9use crate::offset::Offsets;
10use crate::types::NativeType;
11
12pub mod default_arrays;
13#[cfg(feature = "dtype-array")]
14pub mod fixed_size_list;
15pub mod list;
16pub mod null;
17pub mod slice;
18pub mod utf8;
19
20pub use slice::*;
21
22use crate::legacy::prelude::LargeListArray;
23
24macro_rules! iter_to_values {
25    ($iterator:expr, $validity:expr, $offsets:expr, $length_so_far:expr) => {{
26        $iterator
27            .filter_map(|opt_iter| match opt_iter {
28                Some(x) => {
29                    let it = x.into_iter();
30                    $length_so_far += it.size_hint().0 as i64;
31                    $validity.push(true);
32                    $offsets.push($length_so_far);
33                    Some(it)
34                },
35                None => {
36                    $validity.push(false);
37                    $offsets.push($length_so_far);
38                    None
39                },
40            })
41            .flatten()
42            .collect()
43    }};
44}
45
46pub trait ListFromIter {
47    /// Create a list-array from an iterator.
48    /// Used in group_by agg-list
49    ///
50    /// # Safety
51    /// Will produce incorrect arrays if size hint is incorrect.
52    unsafe fn from_iter_primitive_trusted_len<T, P, I>(
53        iter: I,
54        dtype: ArrowDataType,
55    ) -> ListArray<i64>
56    where
57        T: NativeType,
58        P: IntoIterator<Item = Option<T>>,
59        I: IntoIterator<Item = Option<P>>,
60    {
61        let iterator = iter.into_iter();
62        let (lower, _) = iterator.size_hint();
63
64        let mut validity = BitmapBuilder::with_capacity(lower);
65        let mut offsets = Vec::<i64>::with_capacity(lower + 1);
66        let mut length_so_far = 0i64;
67        offsets.push(length_so_far);
68
69        let values: PrimitiveArray<T> = iter_to_values!(iterator, validity, offsets, length_so_far);
70
71        // SAFETY:
72        // offsets are monotonically increasing
73        ListArray::new(
74            ListArray::<i64>::default_datatype(dtype.clone()),
75            Offsets::new_unchecked(offsets).into(),
76            Box::new(values.to(dtype)),
77            validity.into_opt_validity(),
78        )
79    }
80
81    /// Create a list-array from an iterator.
82    /// Used in group_by agg-list
83    ///
84    /// # Safety
85    /// Will produce incorrect arrays if size hint is incorrect.
86    unsafe fn from_iter_bool_trusted_len<I, P>(iter: I) -> ListArray<i64>
87    where
88        I: IntoIterator<Item = Option<P>>,
89        P: IntoIterator<Item = Option<bool>>,
90    {
91        let iterator = iter.into_iter();
92        let (lower, _) = iterator.size_hint();
93
94        let mut validity = Vec::with_capacity(lower);
95        let mut offsets = Vec::<i64>::with_capacity(lower + 1);
96        let mut length_so_far = 0i64;
97        offsets.push(length_so_far);
98
99        let values: BooleanArray = iter_to_values!(iterator, validity, offsets, length_so_far);
100
101        // SAFETY:
102        // Offsets are monotonically increasing.
103        ListArray::new(
104            ListArray::<i64>::default_datatype(ArrowDataType::Boolean),
105            Offsets::new_unchecked(offsets).into(),
106            Box::new(values),
107            Some(validity.into()),
108        )
109    }
110
111    /// # Safety
112    /// Will produce incorrect arrays if size hint is incorrect.
113    unsafe fn from_iter_binview_trusted_len<I, P, Ref, T: ViewType + ?Sized>(
114        iter: I,
115        n_elements: usize,
116    ) -> ListArray<i64>
117    where
118        I: IntoIterator<Item = Option<P>>,
119        P: IntoIterator<Item = Option<Ref>>,
120        Ref: AsRef<T>,
121    {
122        let iterator = iter.into_iter();
123        let (lower, _) = iterator.size_hint();
124
125        let mut validity = BitmapBuilder::with_capacity(lower);
126        let mut offsets = Vec::<i64>::with_capacity(lower + 1);
127        let mut length_so_far = 0i64;
128        offsets.push(length_so_far);
129
130        let values: MutableBinaryViewArray<T> = iterator
131            .filter_map(|opt_iter| match opt_iter {
132                Some(x) => {
133                    let it = x.into_iter();
134                    length_so_far += it.size_hint().0 as i64;
135                    validity.push(true);
136                    offsets.push(length_so_far);
137                    Some(it)
138                },
139                None => {
140                    validity.push(false);
141                    offsets.push(length_so_far);
142                    None
143                },
144            })
145            .flatten()
146            .trust_my_length(n_elements)
147            .collect();
148
149        // SAFETY:
150        // offsets are monotonically increasing
151        ListArray::new(
152            ListArray::<i64>::default_datatype(T::DATA_TYPE),
153            Offsets::new_unchecked(offsets).into(),
154            values.freeze().boxed(),
155            validity.into_opt_validity(),
156        )
157    }
158
159    /// Create a list-array from an iterator.
160    /// Used in group_by agg-list
161    ///
162    /// # Safety
163    /// Will produce incorrect arrays if size hint is incorrect.
164    unsafe fn from_iter_utf8_trusted_len<I, P, Ref>(iter: I, n_elements: usize) -> ListArray<i64>
165    where
166        I: IntoIterator<Item = Option<P>>,
167        P: IntoIterator<Item = Option<Ref>>,
168        Ref: AsRef<str>,
169    {
170        Self::from_iter_binview_trusted_len(iter, n_elements)
171    }
172
173    /// Create a list-array from an iterator.
174    /// Used in group_by agg-list
175    ///
176    /// # Safety
177    /// Will produce incorrect arrays if size hint is incorrect.
178    unsafe fn from_iter_binary_trusted_len<I, P, Ref>(iter: I, n_elements: usize) -> ListArray<i64>
179    where
180        I: IntoIterator<Item = Option<P>>,
181        P: IntoIterator<Item = Option<Ref>>,
182        Ref: AsRef<[u8]>,
183    {
184        Self::from_iter_binview_trusted_len(iter, n_elements)
185    }
186}
187impl ListFromIter for ListArray<i64> {}
188
189fn is_nested_null(dtype: &ArrowDataType) -> bool {
190    match dtype {
191        ArrowDataType::Null => true,
192        ArrowDataType::LargeList(field) => is_nested_null(field.dtype()),
193        ArrowDataType::FixedSizeList(field, _) => is_nested_null(field.dtype()),
194        ArrowDataType::Struct(fields) => fields.iter().all(|field| is_nested_null(field.dtype())),
195        _ => false,
196    }
197}
198
199/// Cast null arrays to inner type and ensure that all offsets remain correct
200pub fn convert_inner_type(array: &dyn Array, dtype: &ArrowDataType) -> Box<dyn Array> {
201    match dtype {
202        ArrowDataType::LargeList(field) => {
203            let array = array.as_any().downcast_ref::<LargeListArray>().unwrap();
204            let inner = array.values();
205            let new_values = convert_inner_type(inner.as_ref(), field.dtype());
206            let dtype = LargeListArray::default_datatype(new_values.dtype().clone());
207            LargeListArray::new(
208                dtype,
209                array.offsets().clone(),
210                new_values,
211                array.validity().cloned(),
212            )
213            .boxed()
214        },
215        ArrowDataType::FixedSizeList(field, width) => {
216            let width = *width;
217
218            let array = array.as_any().downcast_ref::<FixedSizeListArray>().unwrap();
219            let inner = array.values();
220            let length = if width == array.size() {
221                array.len()
222            } else {
223                assert!(!array.values().is_empty() || width != 0);
224                if width == 0 {
225                    0
226                } else {
227                    array.values().len() / width
228                }
229            };
230            let new_values = convert_inner_type(inner.as_ref(), field.dtype());
231            let dtype = FixedSizeListArray::default_datatype(new_values.dtype().clone(), width);
232            FixedSizeListArray::new(dtype, length, new_values, array.validity().cloned()).boxed()
233        },
234        ArrowDataType::Struct(fields) => {
235            let array = array.as_any().downcast_ref::<StructArray>().unwrap();
236            let inner = array.values();
237            let new_values = inner
238                .iter()
239                .zip(fields)
240                .map(|(arr, field)| convert_inner_type(arr.as_ref(), field.dtype()))
241                .collect::<Vec<_>>();
242            StructArray::new(
243                dtype.clone(),
244                array.len(),
245                new_values,
246                array.validity().cloned(),
247            )
248            .boxed()
249        },
250        _ => new_null_array(dtype.clone(), array.len()),
251    }
252}