polars_python/series/
construction.rs

1use std::borrow::Cow;
2
3use arrow::array::Array;
4use arrow::bitmap::BitmapBuilder;
5use arrow::types::NativeType;
6use numpy::{Element, PyArray1, PyArrayMethods, PyUntypedArrayMethods};
7use polars_core::prelude::*;
8use polars_core::utils::CustomIterTools;
9use pyo3::exceptions::{PyTypeError, PyValueError};
10use pyo3::prelude::*;
11
12use crate::PySeries;
13use crate::conversion::Wrap;
14use crate::conversion::any_value::py_object_to_any_value;
15use crate::error::PyPolarsErr;
16use crate::interop::arrow::to_rust::array_to_rust;
17use crate::prelude::ObjectValue;
18use crate::utils::EnterPolarsExt;
19
20// Init with numpy arrays.
21macro_rules! init_method {
22    ($name:ident, $type:ty) => {
23        #[pymethods]
24        impl PySeries {
25            #[staticmethod]
26            fn $name(name: &str, array: &Bound<PyArray1<$type>>, _strict: bool) -> Self {
27                mmap_numpy_array(name, array)
28            }
29        }
30    };
31}
32
33init_method!(new_i8, i8);
34init_method!(new_i16, i16);
35init_method!(new_i32, i32);
36init_method!(new_i64, i64);
37init_method!(new_u8, u8);
38init_method!(new_u16, u16);
39init_method!(new_u32, u32);
40init_method!(new_u64, u64);
41
42fn mmap_numpy_array<T: Element + NativeType>(name: &str, array: &Bound<PyArray1<T>>) -> PySeries {
43    let vals = unsafe { array.as_slice().unwrap() };
44
45    let arr = unsafe { arrow::ffi::mmap::slice_and_owner(vals, array.clone().unbind()) };
46    Series::from_arrow(name.into(), arr.to_boxed())
47        .unwrap()
48        .into()
49}
50
51#[pymethods]
52impl PySeries {
53    #[staticmethod]
54    fn new_bool(
55        py: Python<'_>,
56        name: &str,
57        array: &Bound<PyArray1<bool>>,
58        _strict: bool,
59    ) -> PyResult<Self> {
60        let array = array.readonly();
61
62        // We use raw ptr methods to read this as a u8 slice to work around PyO3/rust-numpy#509.
63        assert!(array.is_contiguous());
64        let data_ptr = array.data().cast::<u8>();
65        let data_len = array.len();
66        let vals = unsafe { core::slice::from_raw_parts(data_ptr, data_len) };
67        py.enter_polars_series(|| Series::new(name.into(), vals).cast(&DataType::Boolean))
68    }
69
70    #[staticmethod]
71    fn new_f32(
72        py: Python<'_>,
73        name: &str,
74        array: &Bound<PyArray1<f32>>,
75        nan_is_null: bool,
76    ) -> PyResult<Self> {
77        if nan_is_null {
78            let array = array.readonly();
79            let vals = array.as_slice().unwrap();
80            py.enter_polars_series(|| {
81                let ca: Float32Chunked = vals
82                    .iter()
83                    .map(|&val| if f32::is_nan(val) { None } else { Some(val) })
84                    .collect_trusted();
85                Ok(ca.with_name(name.into()))
86            })
87        } else {
88            Ok(mmap_numpy_array(name, array))
89        }
90    }
91
92    #[staticmethod]
93    fn new_f64(
94        py: Python<'_>,
95        name: &str,
96        array: &Bound<PyArray1<f64>>,
97        nan_is_null: bool,
98    ) -> PyResult<Self> {
99        if nan_is_null {
100            let array = array.readonly();
101            let vals = array.as_slice().unwrap();
102            py.enter_polars_series(|| {
103                let ca: Float64Chunked = vals
104                    .iter()
105                    .map(|&val| if f64::is_nan(val) { None } else { Some(val) })
106                    .collect_trusted();
107                Ok(ca.with_name(name.into()))
108            })
109        } else {
110            Ok(mmap_numpy_array(name, array))
111        }
112    }
113}
114
115#[pymethods]
116impl PySeries {
117    #[staticmethod]
118    fn new_opt_bool(name: &str, values: &Bound<PyAny>, _strict: bool) -> PyResult<Self> {
119        let len = values.len()?;
120        let mut builder = BooleanChunkedBuilder::new(name.into(), len);
121
122        for res in values.try_iter()? {
123            let value = res?;
124            if value.is_none() {
125                builder.append_null()
126            } else {
127                let v = value.extract::<bool>()?;
128                builder.append_value(v)
129            }
130        }
131
132        let ca = builder.finish();
133        let s = ca.into_series();
134        Ok(s.into())
135    }
136}
137
138fn new_primitive<'py, T>(
139    name: &str,
140    values: &Bound<'py, PyAny>,
141    _strict: bool,
142) -> PyResult<PySeries>
143where
144    T: PolarsNumericType,
145    T::Native: FromPyObject<'py>,
146{
147    let len = values.len()?;
148    let mut builder = PrimitiveChunkedBuilder::<T>::new(name.into(), len);
149
150    for res in values.try_iter()? {
151        let value = res?;
152        if value.is_none() {
153            builder.append_null()
154        } else {
155            let v = value.extract::<T::Native>()?;
156            builder.append_value(v)
157        }
158    }
159
160    let ca = builder.finish();
161    let s = ca.into_series();
162    Ok(s.into())
163}
164
165// Init with lists that can contain Nones
166macro_rules! init_method_opt {
167    ($name:ident, $type:ty, $native: ty) => {
168        #[pymethods]
169        impl PySeries {
170            #[staticmethod]
171            fn $name(name: &str, obj: &Bound<PyAny>, strict: bool) -> PyResult<Self> {
172                new_primitive::<$type>(name, obj, strict)
173            }
174        }
175    };
176}
177
178init_method_opt!(new_opt_u8, UInt8Type, u8);
179init_method_opt!(new_opt_u16, UInt16Type, u16);
180init_method_opt!(new_opt_u32, UInt32Type, u32);
181init_method_opt!(new_opt_u64, UInt64Type, u64);
182init_method_opt!(new_opt_u128, UInt128Type, u128);
183init_method_opt!(new_opt_i8, Int8Type, i8);
184init_method_opt!(new_opt_i16, Int16Type, i16);
185init_method_opt!(new_opt_i32, Int32Type, i32);
186init_method_opt!(new_opt_i64, Int64Type, i64);
187init_method_opt!(new_opt_i128, Int128Type, i128);
188init_method_opt!(new_opt_f32, Float32Type, f32);
189init_method_opt!(new_opt_f64, Float64Type, f64);
190
191fn convert_to_avs(
192    values: &Bound<'_, PyAny>,
193    strict: bool,
194    allow_object: bool,
195) -> PyResult<Vec<AnyValue<'static>>> {
196    values
197        .try_iter()?
198        .map(|v| py_object_to_any_value(&(v?).as_borrowed(), strict, allow_object))
199        .collect()
200}
201
202#[pymethods]
203impl PySeries {
204    #[staticmethod]
205    fn new_from_any_values(name: &str, values: &Bound<PyAny>, strict: bool) -> PyResult<Self> {
206        let any_values_result = values
207            .try_iter()?
208            .map(|v| py_object_to_any_value(&(v?).as_borrowed(), strict, true))
209            .collect::<PyResult<Vec<AnyValue>>>();
210
211        let result = any_values_result.and_then(|avs| {
212            let s = Series::from_any_values(name.into(), avs.as_slice(), strict).map_err(|e| {
213                PyTypeError::new_err(format!(
214                    "{e}\n\nHint: Try setting `strict=False` to allow passing data with mixed types."
215                ))
216            })?;
217            Ok(s.into())
218        });
219
220        // Fall back to Object type for non-strict construction.
221        if !strict && result.is_err() {
222            return Python::attach(|py| {
223                let objects = values
224                    .try_iter()?
225                    .map(|v| v?.extract())
226                    .collect::<PyResult<Vec<ObjectValue>>>()?;
227                Ok(Self::new_object(py, name, objects, strict))
228            });
229        }
230
231        result
232    }
233
234    #[staticmethod]
235    fn new_from_any_values_and_dtype(
236        name: &str,
237        values: &Bound<PyAny>,
238        dtype: Wrap<DataType>,
239        strict: bool,
240    ) -> PyResult<Self> {
241        let avs = convert_to_avs(values, strict, false)?;
242        let s = Series::from_any_values_and_dtype(name.into(), avs.as_slice(), &dtype.0, strict)
243            .map_err(|e| {
244                PyTypeError::new_err(format!(
245                    "{e}\n\nHint: Try setting `strict=False` to allow passing data with mixed types."
246                ))
247            })?;
248        Ok(s.into())
249    }
250
251    #[staticmethod]
252    fn new_str(name: &str, values: &Bound<PyAny>, _strict: bool) -> PyResult<Self> {
253        let len = values.len()?;
254        let mut builder = StringChunkedBuilder::new(name.into(), len);
255
256        for res in values.try_iter()? {
257            let value = res?;
258            if value.is_none() {
259                builder.append_null()
260            } else {
261                let v = value.extract::<Cow<str>>()?;
262                builder.append_value(v)
263            }
264        }
265
266        let ca = builder.finish();
267        let s = ca.into_series();
268        Ok(s.into())
269    }
270
271    #[staticmethod]
272    fn new_binary(name: &str, values: &Bound<PyAny>, _strict: bool) -> PyResult<Self> {
273        let len = values.len()?;
274        let mut builder = BinaryChunkedBuilder::new(name.into(), len);
275
276        for res in values.try_iter()? {
277            let value = res?;
278            if value.is_none() {
279                builder.append_null()
280            } else {
281                let v = value.extract::<&[u8]>()?;
282                builder.append_value(v)
283            }
284        }
285
286        let ca = builder.finish();
287        let s = ca.into_series();
288        Ok(s.into())
289    }
290
291    #[staticmethod]
292    fn new_decimal(name: &str, values: &Bound<PyAny>, strict: bool) -> PyResult<Self> {
293        Self::new_from_any_values(name, values, strict)
294    }
295
296    #[staticmethod]
297    fn new_series_list(name: &str, values: Vec<Option<PySeries>>, _strict: bool) -> PyResult<Self> {
298        let series: Vec<_> = values
299            .into_iter()
300            .map(|ops| ops.map(|ps| ps.series.into_inner()))
301            .collect();
302        if let Some(s) = series.iter().flatten().next() {
303            if s.dtype().is_object() {
304                return Err(PyValueError::new_err(
305                    "list of objects isn't supported; try building a 'object' only series",
306                ));
307            }
308        }
309        Ok(Series::new(name.into(), series).into())
310    }
311
312    #[staticmethod]
313    #[pyo3(signature = (name, values, strict, dtype))]
314    fn new_array(
315        name: &str,
316        values: &Bound<PyAny>,
317        strict: bool,
318        dtype: Wrap<DataType>,
319    ) -> PyResult<Self> {
320        Self::new_from_any_values_and_dtype(name, values, dtype, strict)
321    }
322
323    #[staticmethod]
324    pub fn new_object(py: Python<'_>, name: &str, values: Vec<ObjectValue>, _strict: bool) -> Self {
325        #[cfg(feature = "object")]
326        {
327            let mut validity = BitmapBuilder::with_capacity(values.len());
328            values.iter().for_each(|v| {
329                let is_valid = !v.inner.is_none(py);
330                // SAFETY: we can ensure that validity has correct capacity.
331                unsafe { validity.push_unchecked(is_valid) };
332            });
333            // Object builder must be registered. This is done on import.
334            let ca = ObjectChunked::<ObjectValue>::new_from_vec_and_validity(
335                name.into(),
336                values,
337                validity.into_opt_validity(),
338            );
339            let s = ca.into_series();
340            s.into()
341        }
342        #[cfg(not(feature = "object"))]
343        panic!("activate 'object' feature")
344    }
345
346    #[staticmethod]
347    fn new_null(name: &str, values: &Bound<PyAny>, _strict: bool) -> PyResult<Self> {
348        let len = values.len()?;
349        Ok(Series::new_null(name.into(), len).into())
350    }
351
352    #[staticmethod]
353    fn from_arrow(name: &str, array: &Bound<PyAny>) -> PyResult<Self> {
354        let arr = array_to_rust(array)?;
355
356        match arr.dtype() {
357            ArrowDataType::LargeList(_) => {
358                let array = arr.as_any().downcast_ref::<LargeListArray>().unwrap();
359                let fast_explode = array.offsets().as_slice().windows(2).all(|w| w[0] != w[1]);
360
361                let mut out = ListChunked::with_chunk(name.into(), array.clone());
362                if fast_explode {
363                    out.set_fast_explode()
364                }
365                Ok(out.into_series().into())
366            },
367            _ => {
368                let series: Series =
369                    Series::try_new(name.into(), arr).map_err(PyPolarsErr::from)?;
370                Ok(series.into())
371            },
372        }
373    }
374}