Skip to main content

polars_arrow/legacy/array/
mod.rs

1#![allow(unsafe_op_in_unsafe_fn)]
2
3use crate::array::{
4    Array, BooleanArray, FixedSizeListArray, ListArray, MutableBinaryViewArray, PrimitiveArray,
5    StructArray, ViewType, new_null_array,
6};
7use crate::bitmap::BitmapBuilder;
8use crate::datatypes::ArrowDataType;
9use crate::legacy::utils::CustomIterTools;
10use crate::offset::Offsets;
11use crate::types::NativeType;
12
13pub mod default_arrays;
14#[cfg(feature = "dtype-array")]
15pub mod fixed_size_list;
16pub mod list;
17pub mod null;
18pub mod slice;
19pub mod utf8;
20
21pub use slice::*;
22
23use crate::legacy::prelude::LargeListArray;
24
25macro_rules! iter_to_values {
26    ($iterator:expr, $validity:expr, $offsets:expr, $length_so_far:expr) => {{
27        $iterator
28            .filter_map(|opt_iter| match opt_iter {
29                Some(x) => {
30                    let it = x.into_iter();
31                    $length_so_far += it.size_hint().0 as i64;
32                    $validity.push(true);
33                    $offsets.push($length_so_far);
34                    Some(it)
35                },
36                None => {
37                    $validity.push(false);
38                    $offsets.push($length_so_far);
39                    None
40                },
41            })
42            .flatten()
43            .collect()
44    }};
45}
46
47pub trait ListFromIter {
48    /// Create a list-array from an iterator.
49    /// Used in group_by agg-list
50    ///
51    /// # Safety
52    /// Will produce incorrect arrays if size hint is incorrect.
53    unsafe fn from_iter_primitive_trusted_len<T, P, I>(
54        iter: I,
55        dtype: ArrowDataType,
56    ) -> ListArray<i64>
57    where
58        T: NativeType,
59        P: IntoIterator<Item = Option<T>>,
60        I: IntoIterator<Item = Option<P>>,
61    {
62        let iterator = iter.into_iter();
63        let (lower, _) = iterator.size_hint();
64
65        let mut validity = BitmapBuilder::with_capacity(lower);
66        let mut offsets = Vec::<i64>::with_capacity(lower + 1);
67        let mut length_so_far = 0i64;
68        offsets.push(length_so_far);
69
70        let values: PrimitiveArray<T> = iter_to_values!(iterator, validity, offsets, length_so_far);
71
72        // SAFETY:
73        // offsets are monotonically increasing
74        ListArray::new(
75            ListArray::<i64>::default_datatype(dtype.clone()),
76            Offsets::new_unchecked(offsets).into(),
77            Box::new(values.to(dtype)),
78            validity.into_opt_validity(),
79        )
80    }
81
82    /// Create a list-array from an iterator.
83    /// Used in group_by agg-list
84    ///
85    /// # Safety
86    /// Will produce incorrect arrays if size hint is incorrect.
87    unsafe fn from_iter_bool_trusted_len<I, P>(iter: I) -> ListArray<i64>
88    where
89        I: IntoIterator<Item = Option<P>>,
90        P: IntoIterator<Item = Option<bool>>,
91    {
92        let iterator = iter.into_iter();
93        let (lower, _) = iterator.size_hint();
94
95        let mut validity = Vec::with_capacity(lower);
96        let mut offsets = Vec::<i64>::with_capacity(lower + 1);
97        let mut length_so_far = 0i64;
98        offsets.push(length_so_far);
99
100        let values: BooleanArray = iter_to_values!(iterator, validity, offsets, length_so_far);
101
102        // SAFETY:
103        // Offsets are monotonically increasing.
104        ListArray::new(
105            ListArray::<i64>::default_datatype(ArrowDataType::Boolean),
106            Offsets::new_unchecked(offsets).into(),
107            Box::new(values),
108            Some(validity.into()),
109        )
110    }
111
112    /// # Safety
113    /// Will produce incorrect arrays if size hint is incorrect.
114    unsafe fn from_iter_binview_trusted_len<I, P, Ref, T: ViewType + ?Sized>(
115        iter: I,
116        n_elements: usize,
117    ) -> ListArray<i64>
118    where
119        I: IntoIterator<Item = Option<P>>,
120        P: IntoIterator<Item = Option<Ref>>,
121        Ref: AsRef<T>,
122    {
123        let iterator = iter.into_iter();
124        let (lower, _) = iterator.size_hint();
125
126        let mut validity = BitmapBuilder::with_capacity(lower);
127        let mut offsets = Vec::<i64>::with_capacity(lower + 1);
128        let mut length_so_far = 0i64;
129        offsets.push(length_so_far);
130
131        let values: MutableBinaryViewArray<T> = iterator
132            .filter_map(|opt_iter| match opt_iter {
133                Some(x) => {
134                    let it = x.into_iter();
135                    length_so_far += it.size_hint().0 as i64;
136                    validity.push(true);
137                    offsets.push(length_so_far);
138                    Some(it)
139                },
140                None => {
141                    validity.push(false);
142                    offsets.push(length_so_far);
143                    None
144                },
145            })
146            .flatten()
147            .trust_my_length(n_elements)
148            .collect();
149
150        // SAFETY:
151        // offsets are monotonically increasing
152        ListArray::new(
153            ListArray::<i64>::default_datatype(T::DATA_TYPE),
154            Offsets::new_unchecked(offsets).into(),
155            values.freeze().boxed(),
156            validity.into_opt_validity(),
157        )
158    }
159
160    /// Create a list-array from an iterator.
161    /// Used in group_by agg-list
162    ///
163    /// # Safety
164    /// Will produce incorrect arrays if size hint is incorrect.
165    unsafe fn from_iter_utf8_trusted_len<I, P, Ref>(iter: I, n_elements: usize) -> ListArray<i64>
166    where
167        I: IntoIterator<Item = Option<P>>,
168        P: IntoIterator<Item = Option<Ref>>,
169        Ref: AsRef<str>,
170    {
171        Self::from_iter_binview_trusted_len(iter, n_elements)
172    }
173
174    /// Create a list-array from an iterator.
175    /// Used in group_by agg-list
176    ///
177    /// # Safety
178    /// Will produce incorrect arrays if size hint is incorrect.
179    unsafe fn from_iter_binary_trusted_len<I, P, Ref>(iter: I, n_elements: usize) -> ListArray<i64>
180    where
181        I: IntoIterator<Item = Option<P>>,
182        P: IntoIterator<Item = Option<Ref>>,
183        Ref: AsRef<[u8]>,
184    {
185        Self::from_iter_binview_trusted_len(iter, n_elements)
186    }
187}
188impl ListFromIter for ListArray<i64> {}
189
190fn is_nested_null(dtype: &ArrowDataType) -> bool {
191    match dtype {
192        ArrowDataType::Null => true,
193        ArrowDataType::LargeList(field) => is_nested_null(field.dtype()),
194        ArrowDataType::FixedSizeList(field, _) => is_nested_null(field.dtype()),
195        ArrowDataType::Struct(fields) => fields.iter().all(|field| is_nested_null(field.dtype())),
196        _ => false,
197    }
198}
199
200/// Cast null arrays to inner type and ensure that all offsets remain correct
201pub fn convert_inner_type(array: &dyn Array, dtype: &ArrowDataType) -> Box<dyn Array> {
202    match dtype {
203        ArrowDataType::LargeList(field) => {
204            let array = array.as_any().downcast_ref::<LargeListArray>().unwrap();
205            let inner = array.values();
206            let new_values = convert_inner_type(inner.as_ref(), field.dtype());
207            let dtype = LargeListArray::default_datatype(new_values.dtype().clone());
208            LargeListArray::new(
209                dtype,
210                array.offsets().clone(),
211                new_values,
212                array.validity().cloned(),
213            )
214            .boxed()
215        },
216        ArrowDataType::FixedSizeList(field, width) => {
217            let width = *width;
218
219            let array = array.as_any().downcast_ref::<FixedSizeListArray>().unwrap();
220            let inner = array.values();
221            let length = if width == array.size() {
222                array.len()
223            } else {
224                assert!(!array.values().is_empty() || width != 0);
225                if width == 0 {
226                    0
227                } else {
228                    array.values().len() / width
229                }
230            };
231            let new_values = convert_inner_type(inner.as_ref(), field.dtype());
232            let dtype = FixedSizeListArray::default_datatype(new_values.dtype().clone(), width);
233            FixedSizeListArray::new(dtype, length, new_values, array.validity().cloned()).boxed()
234        },
235        ArrowDataType::Struct(fields) => {
236            let array = array.as_any().downcast_ref::<StructArray>().unwrap();
237            let inner = array.values();
238            let new_values = inner
239                .iter()
240                .zip(fields)
241                .map(|(arr, field)| convert_inner_type(arr.as_ref(), field.dtype()))
242                .collect::<Vec<_>>();
243            StructArray::new(
244                dtype.clone(),
245                array.len(),
246                new_values,
247                array.validity().cloned(),
248            )
249            .boxed()
250        },
251        _ => new_null_array(dtype.clone(), array.len()),
252    }
253}