polars_python/series/
construction.rs

1use std::borrow::Cow;
2
3use arrow::array::Array;
4use arrow::bitmap::BitmapBuilder;
5use arrow::types::NativeType;
6use numpy::{Element, PyArray1, PyArrayMethods};
7use polars_core::prelude::*;
8use polars_core::utils::CustomIterTools;
9use pyo3::exceptions::{PyTypeError, PyValueError};
10use pyo3::prelude::*;
11
12use crate::conversion::any_value::py_object_to_any_value;
13use crate::conversion::{reinterpret_vec, Wrap};
14use crate::error::PyPolarsErr;
15use crate::interop::arrow::to_rust::array_to_rust;
16use crate::prelude::ObjectValue;
17use crate::PySeries;
18
19// Init with numpy arrays.
20macro_rules! init_method {
21    ($name:ident, $type:ty) => {
22        #[pymethods]
23        impl PySeries {
24            #[staticmethod]
25            fn $name(name: &str, array: &Bound<PyArray1<$type>>, _strict: bool) -> Self {
26                mmap_numpy_array(name, array)
27            }
28        }
29    };
30}
31
32init_method!(new_i8, i8);
33init_method!(new_i16, i16);
34init_method!(new_i32, i32);
35init_method!(new_i64, i64);
36init_method!(new_u8, u8);
37init_method!(new_u16, u16);
38init_method!(new_u32, u32);
39init_method!(new_u64, u64);
40
41fn mmap_numpy_array<T: Element + NativeType>(name: &str, array: &Bound<PyArray1<T>>) -> PySeries {
42    let vals = unsafe { array.as_slice().unwrap() };
43
44    let arr = unsafe { arrow::ffi::mmap::slice_and_owner(vals, array.clone().unbind()) };
45    Series::from_arrow(name.into(), arr.to_boxed())
46        .unwrap()
47        .into()
48}
49
50#[pymethods]
51impl PySeries {
52    #[staticmethod]
53    fn new_bool(py: Python, name: &str, array: &Bound<PyArray1<bool>>, _strict: bool) -> Self {
54        let array = array.readonly();
55        let vals = array.as_slice().unwrap();
56        py.allow_threads(|| Series::new(name.into(), vals).into())
57    }
58
59    #[staticmethod]
60    fn new_f32(py: Python, name: &str, array: &Bound<PyArray1<f32>>, nan_is_null: bool) -> Self {
61        if nan_is_null {
62            let array = array.readonly();
63            let vals = array.as_slice().unwrap();
64            let ca: Float32Chunked = py.allow_threads(|| {
65                vals.iter()
66                    .map(|&val| if f32::is_nan(val) { None } else { Some(val) })
67                    .collect_trusted()
68            });
69            ca.with_name(name.into()).into_series().into()
70        } else {
71            mmap_numpy_array(name, array)
72        }
73    }
74
75    #[staticmethod]
76    fn new_f64(py: Python, name: &str, array: &Bound<PyArray1<f64>>, nan_is_null: bool) -> Self {
77        if nan_is_null {
78            let array = array.readonly();
79            let vals = array.as_slice().unwrap();
80            let ca: Float64Chunked = py.allow_threads(|| {
81                vals.iter()
82                    .map(|&val| if f64::is_nan(val) { None } else { Some(val) })
83                    .collect_trusted()
84            });
85            ca.with_name(name.into()).into_series().into()
86        } else {
87            mmap_numpy_array(name, array)
88        }
89    }
90}
91
92#[pymethods]
93impl PySeries {
94    #[staticmethod]
95    fn new_opt_bool(name: &str, values: &Bound<PyAny>, _strict: bool) -> PyResult<Self> {
96        let len = values.len()?;
97        let mut builder = BooleanChunkedBuilder::new(name.into(), len);
98
99        for res in values.try_iter()? {
100            let value = res?;
101            if value.is_none() {
102                builder.append_null()
103            } else {
104                let v = value.extract::<bool>()?;
105                builder.append_value(v)
106            }
107        }
108
109        let ca = builder.finish();
110        let s = ca.into_series();
111        Ok(s.into())
112    }
113}
114
115fn new_primitive<'a, T>(name: &str, values: &'a Bound<PyAny>, _strict: bool) -> PyResult<PySeries>
116where
117    T: PolarsNumericType,
118    ChunkedArray<T>: IntoSeries,
119    T::Native: FromPyObject<'a>,
120{
121    let len = values.len()?;
122    let mut builder = PrimitiveChunkedBuilder::<T>::new(name.into(), len);
123
124    for res in values.try_iter()? {
125        let value = res?;
126        if value.is_none() {
127            builder.append_null()
128        } else {
129            let v = value.extract::<T::Native>()?;
130            builder.append_value(v)
131        }
132    }
133
134    let ca = builder.finish();
135    let s = ca.into_series();
136    Ok(s.into())
137}
138
139// Init with lists that can contain Nones
140macro_rules! init_method_opt {
141    ($name:ident, $type:ty, $native: ty) => {
142        #[pymethods]
143        impl PySeries {
144            #[staticmethod]
145            fn $name(name: &str, obj: &Bound<PyAny>, strict: bool) -> PyResult<Self> {
146                new_primitive::<$type>(name, obj, strict)
147            }
148        }
149    };
150}
151
152init_method_opt!(new_opt_u8, UInt8Type, u8);
153init_method_opt!(new_opt_u16, UInt16Type, u16);
154init_method_opt!(new_opt_u32, UInt32Type, u32);
155init_method_opt!(new_opt_u64, UInt64Type, u64);
156init_method_opt!(new_opt_i8, Int8Type, i8);
157init_method_opt!(new_opt_i16, Int16Type, i16);
158init_method_opt!(new_opt_i32, Int32Type, i32);
159init_method_opt!(new_opt_i64, Int64Type, i64);
160init_method_opt!(new_opt_i128, Int128Type, i64);
161init_method_opt!(new_opt_f32, Float32Type, f32);
162init_method_opt!(new_opt_f64, Float64Type, f64);
163
164fn convert_to_avs<'a>(
165    values: &'a Bound<'a, PyAny>,
166    strict: bool,
167    allow_object: bool,
168) -> PyResult<Vec<AnyValue<'a>>> {
169    values
170        .try_iter()?
171        .map(|v| py_object_to_any_value(&(v?).as_borrowed(), strict, allow_object))
172        .collect()
173}
174
175#[pymethods]
176impl PySeries {
177    #[staticmethod]
178    fn new_from_any_values(name: &str, values: &Bound<PyAny>, strict: bool) -> PyResult<Self> {
179        let any_values_result = values
180            .try_iter()?
181            .map(|v| py_object_to_any_value(&(v?).as_borrowed(), strict, true))
182            .collect::<PyResult<Vec<AnyValue>>>();
183        let result = any_values_result.and_then(|avs| {
184            let s = Series::from_any_values(name.into(), avs.as_slice(), strict).map_err(|e| {
185                PyTypeError::new_err(format!(
186                    "{e}\n\nHint: Try setting `strict=False` to allow passing data with mixed types."
187                ))
188            })?;
189            Ok(s.into())
190        });
191
192        // Fall back to Object type for non-strict construction.
193        if !strict && result.is_err() {
194            return Python::with_gil(|py| {
195                let objects = values
196                    .try_iter()?
197                    .map(|v| v?.extract())
198                    .collect::<PyResult<Vec<ObjectValue>>>()?;
199                Ok(Self::new_object(py, name, objects, strict))
200            });
201        }
202
203        result
204    }
205
206    #[staticmethod]
207    fn new_from_any_values_and_dtype(
208        name: &str,
209        values: &Bound<PyAny>,
210        dtype: Wrap<DataType>,
211        strict: bool,
212    ) -> PyResult<Self> {
213        let avs = convert_to_avs(values, strict, false)?;
214        let s = Series::from_any_values_and_dtype(name.into(), avs.as_slice(), &dtype.0, strict)
215            .map_err(|e| {
216                PyTypeError::new_err(format!(
217                "{e}\n\nHint: Try setting `strict=False` to allow passing data with mixed types."
218            ))
219            })?;
220        Ok(s.into())
221    }
222
223    #[staticmethod]
224    fn new_str(name: &str, values: &Bound<PyAny>, _strict: bool) -> PyResult<Self> {
225        let len = values.len()?;
226        let mut builder = StringChunkedBuilder::new(name.into(), len);
227
228        for res in values.try_iter()? {
229            let value = res?;
230            if value.is_none() {
231                builder.append_null()
232            } else {
233                let v = value.extract::<Cow<str>>()?;
234                builder.append_value(v)
235            }
236        }
237
238        let ca = builder.finish();
239        let s = ca.into_series();
240        Ok(s.into())
241    }
242
243    #[staticmethod]
244    fn new_binary(name: &str, values: &Bound<PyAny>, _strict: bool) -> PyResult<Self> {
245        let len = values.len()?;
246        let mut builder = BinaryChunkedBuilder::new(name.into(), len);
247
248        for res in values.try_iter()? {
249            let value = res?;
250            if value.is_none() {
251                builder.append_null()
252            } else {
253                let v = value.extract::<&[u8]>()?;
254                builder.append_value(v)
255            }
256        }
257
258        let ca = builder.finish();
259        let s = ca.into_series();
260        Ok(s.into())
261    }
262
263    #[staticmethod]
264    fn new_decimal(name: &str, values: &Bound<PyAny>, strict: bool) -> PyResult<Self> {
265        Self::new_from_any_values(name, values, strict)
266    }
267
268    #[staticmethod]
269    fn new_series_list(name: &str, values: Vec<Option<PySeries>>, _strict: bool) -> PyResult<Self> {
270        let series = reinterpret_vec(values);
271        if let Some(s) = series.iter().flatten().next() {
272            if s.dtype().is_object() {
273                return Err(PyValueError::new_err(
274                    "list of objects isn't supported; try building a 'object' only series",
275                ));
276            }
277        }
278        Ok(Series::new(name.into(), series).into())
279    }
280
281    #[staticmethod]
282    #[pyo3(signature = (name, values, strict, dtype))]
283    fn new_array(
284        name: &str,
285        values: &Bound<PyAny>,
286        strict: bool,
287        dtype: Wrap<DataType>,
288    ) -> PyResult<Self> {
289        Self::new_from_any_values_and_dtype(name, values, dtype, strict)
290    }
291
292    #[staticmethod]
293    pub fn new_object(py: Python, name: &str, values: Vec<ObjectValue>, _strict: bool) -> Self {
294        #[cfg(feature = "object")]
295        {
296            let mut validity = BitmapBuilder::with_capacity(values.len());
297            values.iter().for_each(|v| {
298                let is_valid = !v.inner.is_none(py);
299                // SAFETY: we can ensure that validity has correct capacity.
300                unsafe { validity.push_unchecked(is_valid) };
301            });
302            // Object builder must be registered. This is done on import.
303            let ca = ObjectChunked::<ObjectValue>::new_from_vec_and_validity(
304                name.into(),
305                values,
306                validity.into_opt_validity(),
307            );
308            let s = ca.into_series();
309            s.into()
310        }
311        #[cfg(not(feature = "object"))]
312        panic!("activate 'object' feature")
313    }
314
315    #[staticmethod]
316    fn new_null(name: &str, values: &Bound<PyAny>, _strict: bool) -> PyResult<Self> {
317        let len = values.len()?;
318        Ok(Series::new_null(name.into(), len).into())
319    }
320
321    #[staticmethod]
322    fn from_arrow(name: &str, array: &Bound<PyAny>) -> PyResult<Self> {
323        let arr = array_to_rust(array)?;
324
325        match arr.dtype() {
326            ArrowDataType::LargeList(_) => {
327                let array = arr.as_any().downcast_ref::<LargeListArray>().unwrap();
328                let fast_explode = array.offsets().as_slice().windows(2).all(|w| w[0] != w[1]);
329
330                let mut out = ListChunked::with_chunk(name.into(), array.clone());
331                if fast_explode {
332                    out.set_fast_explode()
333                }
334                Ok(out.into_series().into())
335            },
336            _ => {
337                let series: Series =
338                    Series::try_new(name.into(), arr).map_err(PyPolarsErr::from)?;
339                Ok(series.into())
340            },
341        }
342    }
343}