polars_core/chunked_array/
from.rs

1use super::*;
2
3#[allow(clippy::all)]
4fn from_chunks_list_dtype(chunks: &mut Vec<ArrayRef>, dtype: DataType) -> DataType {
5    // ensure we don't get List<null>
6    if let Some(arr) = chunks.get(0) {
7        DataType::from_arrow_dtype(arr.dtype())
8    } else {
9        dtype
10    }
11}
12
13impl<T, A> From<A> for ChunkedArray<T>
14where
15    T: PolarsDataType<Array = A>,
16    A: Array,
17{
18    fn from(arr: A) -> Self {
19        Self::with_chunk(PlSmallStr::EMPTY, arr)
20    }
21}
22
23impl<T> ChunkedArray<T>
24where
25    T: PolarsDataType,
26{
27    pub fn with_chunk<A>(name: PlSmallStr, arr: A) -> Self
28    where
29        A: Array,
30        T: PolarsDataType<Array = A>,
31    {
32        unsafe { Self::from_chunks(name, vec![Box::new(arr)]) }
33    }
34
35    pub fn with_chunk_like<A>(ca: &Self, arr: A) -> Self
36    where
37        A: Array,
38        T: PolarsDataType<Array = A>,
39    {
40        Self::from_chunk_iter_like(ca, std::iter::once(arr))
41    }
42
43    pub fn from_chunk_iter<I>(name: PlSmallStr, iter: I) -> Self
44    where
45        I: IntoIterator,
46        T: PolarsDataType<Array = <I as IntoIterator>::Item>,
47        <I as IntoIterator>::Item: Array,
48    {
49        let chunks = iter
50            .into_iter()
51            .map(|x| Box::new(x) as Box<dyn Array>)
52            .collect();
53        unsafe { Self::from_chunks(name, chunks) }
54    }
55
56    pub fn from_chunk_iter_like<I>(ca: &Self, iter: I) -> Self
57    where
58        I: IntoIterator,
59        T: PolarsDataType<Array = <I as IntoIterator>::Item>,
60        <I as IntoIterator>::Item: Array,
61    {
62        let chunks = iter
63            .into_iter()
64            .map(|x| Box::new(x) as Box<dyn Array>)
65            .collect();
66        unsafe {
67            Self::from_chunks_and_dtype_unchecked(ca.name().clone(), chunks, ca.dtype().clone())
68        }
69    }
70
71    pub fn try_from_chunk_iter<I, A, E>(name: PlSmallStr, iter: I) -> Result<Self, E>
72    where
73        I: IntoIterator<Item = Result<A, E>>,
74        T: PolarsDataType<Array = A>,
75        A: Array,
76    {
77        let chunks: Result<_, _> = iter
78            .into_iter()
79            .map(|x| Ok(Box::new(x?) as Box<dyn Array>))
80            .collect();
81        unsafe { Ok(Self::from_chunks(name, chunks?)) }
82    }
83
84    pub(crate) fn from_chunk_iter_and_field<I>(field: Arc<Field>, chunks: I) -> Self
85    where
86        I: IntoIterator,
87        T: PolarsDataType<Array = <I as IntoIterator>::Item>,
88        <I as IntoIterator>::Item: Array,
89    {
90        assert_eq!(
91            std::mem::discriminant(&T::get_static_dtype()),
92            std::mem::discriminant(&field.dtype)
93        );
94
95        let mut length = 0;
96        let mut null_count = 0;
97        let chunks = chunks
98            .into_iter()
99            .map(|x| {
100                length += x.len();
101                null_count += x.null_count();
102                Box::new(x) as Box<dyn Array>
103            })
104            .collect();
105
106        unsafe { ChunkedArray::new_with_dims(field, chunks, length, null_count) }
107    }
108
109    /// Create a new [`ChunkedArray`] from existing chunks.
110    ///
111    /// # Safety
112    /// The Arrow datatype of all chunks must match the [`PolarsDataType`] `T`.
113    pub unsafe fn from_chunks(name: PlSmallStr, mut chunks: Vec<ArrayRef>) -> Self {
114        let dtype = match T::get_static_dtype() {
115            dtype @ DataType::List(_) => from_chunks_list_dtype(&mut chunks, dtype),
116            #[cfg(feature = "dtype-array")]
117            dtype @ DataType::Array(_, _) => from_chunks_list_dtype(&mut chunks, dtype),
118            #[cfg(feature = "dtype-struct")]
119            dtype @ DataType::Struct(_) => from_chunks_list_dtype(&mut chunks, dtype),
120            dt => dt,
121        };
122        Self::from_chunks_and_dtype(name, chunks, dtype)
123    }
124
125    /// # Safety
126    /// The Arrow datatype of all chunks must match the [`PolarsDataType`] `T`.
127    pub unsafe fn with_chunks(&self, chunks: Vec<ArrayRef>) -> Self {
128        ChunkedArray::new_with_compute_len(self.field.clone(), chunks)
129    }
130
131    /// Create a new [`ChunkedArray`] from existing chunks.
132    ///
133    /// # Safety
134    ///
135    /// The Arrow datatype of all chunks must match the [`PolarsDataType`] `T`.
136    pub unsafe fn from_chunks_and_dtype(
137        name: PlSmallStr,
138        chunks: Vec<ArrayRef>,
139        dtype: DataType,
140    ) -> Self {
141        // assertions in debug mode
142        // that check if the data types in the arrays are as expected
143        #[cfg(debug_assertions)]
144        {
145            if !chunks.is_empty() && !chunks[0].is_empty() && dtype.is_primitive() {
146                assert_eq!(chunks[0].dtype(), &dtype.to_arrow(CompatLevel::newest()))
147            }
148        }
149
150        Self::from_chunks_and_dtype_unchecked(name, chunks, dtype)
151    }
152
153    /// Create a new [`ChunkedArray`] from existing chunks.
154    ///
155    /// # Safety
156    ///
157    /// The Arrow datatype of all chunks must match the [`PolarsDataType`] `T`.
158    pub(crate) unsafe fn from_chunks_and_dtype_unchecked(
159        name: PlSmallStr,
160        chunks: Vec<ArrayRef>,
161        dtype: DataType,
162    ) -> Self {
163        let field = Arc::new(Field::new(name, dtype));
164        ChunkedArray::new_with_compute_len(field, chunks)
165    }
166
167    pub fn full_null_like(ca: &Self, length: usize) -> Self {
168        let chunks = std::iter::once(T::Array::full_null(
169            length,
170            ca.dtype().to_arrow(CompatLevel::newest()),
171        ));
172        Self::from_chunk_iter_like(ca, chunks)
173    }
174}
175
176impl<T> ChunkedArray<T>
177where
178    T: PolarsNumericType,
179{
180    /// Create a new ChunkedArray by taking ownership of the Vec. This operation is zero copy.
181    pub fn from_vec(name: PlSmallStr, v: Vec<T::Native>) -> Self {
182        Self::with_chunk(name, to_primitive::<T>(v, None))
183    }
184
185    /// Create a new ChunkedArray from a Vec and a validity mask.
186    pub fn from_vec_validity(
187        name: PlSmallStr,
188        values: Vec<T::Native>,
189        buffer: Option<Bitmap>,
190    ) -> Self {
191        let arr = to_array::<T>(values, buffer);
192        ChunkedArray::new_with_compute_len(
193            Arc::new(Field::new(name, T::get_static_dtype())),
194            vec![arr],
195        )
196    }
197
198    /// Create a temporary [`ChunkedArray`] from a slice.
199    ///
200    /// # Safety
201    /// The lifetime will be bound to the lifetime of the slice.
202    /// This will not be checked by the borrowchecker.
203    pub unsafe fn mmap_slice(name: PlSmallStr, values: &[T::Native]) -> Self {
204        Self::with_chunk(name, arrow::ffi::mmap::slice(values))
205    }
206}
207
208impl BooleanChunked {
209    /// Create a temporary [`ChunkedArray`] from a slice.
210    ///
211    /// # Safety
212    /// The lifetime will be bound to the lifetime of the slice.
213    /// This will not be checked by the borrowchecker.
214    pub unsafe fn mmap_slice(name: PlSmallStr, values: &[u8], offset: usize, len: usize) -> Self {
215        let arr = arrow::ffi::mmap::bitmap(values, offset, len).unwrap();
216        Self::with_chunk(name, arr)
217    }
218
219    pub fn from_bitmap(name: PlSmallStr, bitmap: Bitmap) -> Self {
220        Self::with_chunk(
221            name,
222            BooleanArray::new(ArrowDataType::Boolean, bitmap, None),
223        )
224    }
225}
226
227impl<'a, T> From<&'a ChunkedArray<T>> for Vec<Option<T::Physical<'a>>>
228where
229    T: PolarsDataType,
230{
231    fn from(ca: &'a ChunkedArray<T>) -> Self {
232        let mut out = Vec::with_capacity(ca.len());
233        for arr in ca.downcast_iter() {
234            out.extend(arr.iter())
235        }
236        out
237    }
238}
239impl From<StringChunked> for Vec<Option<String>> {
240    fn from(ca: StringChunked) -> Self {
241        ca.iter().map(|opt| opt.map(|s| s.to_string())).collect()
242    }
243}
244
245impl From<BooleanChunked> for Vec<Option<bool>> {
246    fn from(ca: BooleanChunked) -> Self {
247        let mut out = Vec::with_capacity(ca.len());
248        for arr in ca.downcast_iter() {
249            out.extend(arr.iter())
250        }
251        out
252    }
253}