polars_python/series/
construction.rs

1use std::borrow::Cow;
2
3use arrow::array::Array;
4use arrow::bitmap::BitmapBuilder;
5use arrow::types::NativeType;
6use numpy::{Element, PyArray1, PyArrayMethods};
7use polars_core::prelude::*;
8use polars_core::utils::CustomIterTools;
9use pyo3::exceptions::{PyTypeError, PyValueError};
10use pyo3::prelude::*;
11
12use crate::PySeries;
13use crate::conversion::any_value::py_object_to_any_value;
14use crate::conversion::{Wrap, reinterpret_vec};
15use crate::error::PyPolarsErr;
16use crate::interop::arrow::to_rust::array_to_rust;
17use crate::prelude::ObjectValue;
18use crate::utils::EnterPolarsExt;
19
20// Init with numpy arrays.
21macro_rules! init_method {
22    ($name:ident, $type:ty) => {
23        #[pymethods]
24        impl PySeries {
25            #[staticmethod]
26            fn $name(name: &str, array: &Bound<PyArray1<$type>>, _strict: bool) -> Self {
27                mmap_numpy_array(name, array)
28            }
29        }
30    };
31}
32
33init_method!(new_i8, i8);
34init_method!(new_i16, i16);
35init_method!(new_i32, i32);
36init_method!(new_i64, i64);
37init_method!(new_u8, u8);
38init_method!(new_u16, u16);
39init_method!(new_u32, u32);
40init_method!(new_u64, u64);
41
42fn mmap_numpy_array<T: Element + NativeType>(name: &str, array: &Bound<PyArray1<T>>) -> PySeries {
43    let vals = unsafe { array.as_slice().unwrap() };
44
45    let arr = unsafe { arrow::ffi::mmap::slice_and_owner(vals, array.clone().unbind()) };
46    Series::from_arrow(name.into(), arr.to_boxed())
47        .unwrap()
48        .into()
49}
50
51#[pymethods]
52impl PySeries {
53    #[staticmethod]
54    fn new_bool(
55        py: Python<'_>,
56        name: &str,
57        array: &Bound<PyArray1<bool>>,
58        _strict: bool,
59    ) -> PyResult<Self> {
60        let array = array.readonly();
61        let vals = array.as_slice().unwrap();
62        py.enter_polars_series(|| Ok(Series::new(name.into(), vals)))
63    }
64
65    #[staticmethod]
66    fn new_f32(
67        py: Python<'_>,
68        name: &str,
69        array: &Bound<PyArray1<f32>>,
70        nan_is_null: bool,
71    ) -> PyResult<Self> {
72        if nan_is_null {
73            let array = array.readonly();
74            let vals = array.as_slice().unwrap();
75            py.enter_polars_series(|| {
76                let ca: Float32Chunked = vals
77                    .iter()
78                    .map(|&val| if f32::is_nan(val) { None } else { Some(val) })
79                    .collect_trusted();
80                Ok(ca.with_name(name.into()))
81            })
82        } else {
83            Ok(mmap_numpy_array(name, array))
84        }
85    }
86
87    #[staticmethod]
88    fn new_f64(
89        py: Python<'_>,
90        name: &str,
91        array: &Bound<PyArray1<f64>>,
92        nan_is_null: bool,
93    ) -> PyResult<Self> {
94        if nan_is_null {
95            let array = array.readonly();
96            let vals = array.as_slice().unwrap();
97            py.enter_polars_series(|| {
98                let ca: Float64Chunked = vals
99                    .iter()
100                    .map(|&val| if f64::is_nan(val) { None } else { Some(val) })
101                    .collect_trusted();
102                Ok(ca.with_name(name.into()))
103            })
104        } else {
105            Ok(mmap_numpy_array(name, array))
106        }
107    }
108}
109
110#[pymethods]
111impl PySeries {
112    #[staticmethod]
113    fn new_opt_bool(name: &str, values: &Bound<PyAny>, _strict: bool) -> PyResult<Self> {
114        let len = values.len()?;
115        let mut builder = BooleanChunkedBuilder::new(name.into(), len);
116
117        for res in values.try_iter()? {
118            let value = res?;
119            if value.is_none() {
120                builder.append_null()
121            } else {
122                let v = value.extract::<bool>()?;
123                builder.append_value(v)
124            }
125        }
126
127        let ca = builder.finish();
128        let s = ca.into_series();
129        Ok(s.into())
130    }
131}
132
133fn new_primitive<'py, T>(
134    name: &str,
135    values: &Bound<'py, PyAny>,
136    _strict: bool,
137) -> PyResult<PySeries>
138where
139    T: PolarsNumericType,
140    ChunkedArray<T>: IntoSeries,
141    T::Native: FromPyObject<'py>,
142{
143    let len = values.len()?;
144    let mut builder = PrimitiveChunkedBuilder::<T>::new(name.into(), len);
145
146    for res in values.try_iter()? {
147        let value = res?;
148        if value.is_none() {
149            builder.append_null()
150        } else {
151            let v = value.extract::<T::Native>()?;
152            builder.append_value(v)
153        }
154    }
155
156    let ca = builder.finish();
157    let s = ca.into_series();
158    Ok(s.into())
159}
160
161// Init with lists that can contain Nones
162macro_rules! init_method_opt {
163    ($name:ident, $type:ty, $native: ty) => {
164        #[pymethods]
165        impl PySeries {
166            #[staticmethod]
167            fn $name(name: &str, obj: &Bound<PyAny>, strict: bool) -> PyResult<Self> {
168                new_primitive::<$type>(name, obj, strict)
169            }
170        }
171    };
172}
173
174init_method_opt!(new_opt_u8, UInt8Type, u8);
175init_method_opt!(new_opt_u16, UInt16Type, u16);
176init_method_opt!(new_opt_u32, UInt32Type, u32);
177init_method_opt!(new_opt_u64, UInt64Type, u64);
178init_method_opt!(new_opt_i8, Int8Type, i8);
179init_method_opt!(new_opt_i16, Int16Type, i16);
180init_method_opt!(new_opt_i32, Int32Type, i32);
181init_method_opt!(new_opt_i64, Int64Type, i64);
182init_method_opt!(new_opt_i128, Int128Type, i64);
183init_method_opt!(new_opt_f32, Float32Type, f32);
184init_method_opt!(new_opt_f64, Float64Type, f64);
185
186fn convert_to_avs(
187    values: &Bound<'_, PyAny>,
188    strict: bool,
189    allow_object: bool,
190) -> PyResult<Vec<AnyValue<'static>>> {
191    values
192        .try_iter()?
193        .map(|v| py_object_to_any_value(&(v?).as_borrowed(), strict, allow_object))
194        .collect()
195}
196
197#[pymethods]
198impl PySeries {
199    #[staticmethod]
200    fn new_from_any_values(name: &str, values: &Bound<PyAny>, strict: bool) -> PyResult<Self> {
201        let any_values_result = values
202            .try_iter()?
203            .map(|v| py_object_to_any_value(&(v?).as_borrowed(), strict, true))
204            .collect::<PyResult<Vec<AnyValue>>>();
205
206        let result = any_values_result.and_then(|avs| {
207            let s = Series::from_any_values(name.into(), avs.as_slice(), strict).map_err(|e| {
208                PyTypeError::new_err(format!(
209                    "{e}\n\nHint: Try setting `strict=False` to allow passing data with mixed types."
210                ))
211            })?;
212            Ok(s.into())
213        });
214
215        // Fall back to Object type for non-strict construction.
216        if !strict && result.is_err() {
217            return Python::with_gil(|py| {
218                let objects = values
219                    .try_iter()?
220                    .map(|v| v?.extract())
221                    .collect::<PyResult<Vec<ObjectValue>>>()?;
222                Ok(Self::new_object(py, name, objects, strict))
223            });
224        }
225
226        result
227    }
228
229    #[staticmethod]
230    fn new_from_any_values_and_dtype(
231        name: &str,
232        values: &Bound<PyAny>,
233        dtype: Wrap<DataType>,
234        strict: bool,
235    ) -> PyResult<Self> {
236        let avs = convert_to_avs(values, strict, false)?;
237        let s = Series::from_any_values_and_dtype(name.into(), avs.as_slice(), &dtype.0, strict)
238            .map_err(|e| {
239                PyTypeError::new_err(format!(
240                "{e}\n\nHint: Try setting `strict=False` to allow passing data with mixed types."
241            ))
242            })?;
243        Ok(s.into())
244    }
245
246    #[staticmethod]
247    fn new_str(name: &str, values: &Bound<PyAny>, _strict: bool) -> PyResult<Self> {
248        let len = values.len()?;
249        let mut builder = StringChunkedBuilder::new(name.into(), len);
250
251        for res in values.try_iter()? {
252            let value = res?;
253            if value.is_none() {
254                builder.append_null()
255            } else {
256                let v = value.extract::<Cow<str>>()?;
257                builder.append_value(v)
258            }
259        }
260
261        let ca = builder.finish();
262        let s = ca.into_series();
263        Ok(s.into())
264    }
265
266    #[staticmethod]
267    fn new_binary(name: &str, values: &Bound<PyAny>, _strict: bool) -> PyResult<Self> {
268        let len = values.len()?;
269        let mut builder = BinaryChunkedBuilder::new(name.into(), len);
270
271        for res in values.try_iter()? {
272            let value = res?;
273            if value.is_none() {
274                builder.append_null()
275            } else {
276                let v = value.extract::<&[u8]>()?;
277                builder.append_value(v)
278            }
279        }
280
281        let ca = builder.finish();
282        let s = ca.into_series();
283        Ok(s.into())
284    }
285
286    #[staticmethod]
287    fn new_decimal(name: &str, values: &Bound<PyAny>, strict: bool) -> PyResult<Self> {
288        Self::new_from_any_values(name, values, strict)
289    }
290
291    #[staticmethod]
292    fn new_series_list(name: &str, values: Vec<Option<PySeries>>, _strict: bool) -> PyResult<Self> {
293        let series = reinterpret_vec(values);
294        if let Some(s) = series.iter().flatten().next() {
295            if s.dtype().is_object() {
296                return Err(PyValueError::new_err(
297                    "list of objects isn't supported; try building a 'object' only series",
298                ));
299            }
300        }
301        Ok(Series::new(name.into(), series).into())
302    }
303
304    #[staticmethod]
305    #[pyo3(signature = (name, values, strict, dtype))]
306    fn new_array(
307        name: &str,
308        values: &Bound<PyAny>,
309        strict: bool,
310        dtype: Wrap<DataType>,
311    ) -> PyResult<Self> {
312        Self::new_from_any_values_and_dtype(name, values, dtype, strict)
313    }
314
315    #[staticmethod]
316    pub fn new_object(py: Python<'_>, name: &str, values: Vec<ObjectValue>, _strict: bool) -> Self {
317        #[cfg(feature = "object")]
318        {
319            let mut validity = BitmapBuilder::with_capacity(values.len());
320            values.iter().for_each(|v| {
321                let is_valid = !v.inner.is_none(py);
322                // SAFETY: we can ensure that validity has correct capacity.
323                unsafe { validity.push_unchecked(is_valid) };
324            });
325            // Object builder must be registered. This is done on import.
326            let ca = ObjectChunked::<ObjectValue>::new_from_vec_and_validity(
327                name.into(),
328                values,
329                validity.into_opt_validity(),
330            );
331            let s = ca.into_series();
332            s.into()
333        }
334        #[cfg(not(feature = "object"))]
335        panic!("activate 'object' feature")
336    }
337
338    #[staticmethod]
339    fn new_null(name: &str, values: &Bound<PyAny>, _strict: bool) -> PyResult<Self> {
340        let len = values.len()?;
341        Ok(Series::new_null(name.into(), len).into())
342    }
343
344    #[staticmethod]
345    fn from_arrow(name: &str, array: &Bound<PyAny>) -> PyResult<Self> {
346        let arr = array_to_rust(array)?;
347
348        match arr.dtype() {
349            ArrowDataType::LargeList(_) => {
350                let array = arr.as_any().downcast_ref::<LargeListArray>().unwrap();
351                let fast_explode = array.offsets().as_slice().windows(2).all(|w| w[0] != w[1]);
352
353                let mut out = ListChunked::with_chunk(name.into(), array.clone());
354                if fast_explode {
355                    out.set_fast_explode()
356                }
357                Ok(out.into_series().into())
358            },
359            _ => {
360                let series: Series =
361                    Series::try_new(name.into(), arr).map_err(PyPolarsErr::from)?;
362                Ok(series.into())
363            },
364        }
365    }
366}