Skip to main content

polars_python/series/
construction.rs

1use std::borrow::Cow;
2
3use arrow::array::{Array, PrimitiveArray};
4use arrow::bitmap::BitmapBuilder;
5use arrow::types::NativeType;
6use num_traits::AsPrimitive;
7use numpy::{Element, PyArray1, PyArrayMethods, PyUntypedArrayMethods};
8use polars::prelude::*;
9use polars_buffer::{Buffer, SharedStorage};
10use pyo3::exceptions::{PyTypeError, PyValueError};
11use pyo3::prelude::*;
12
13use crate::PySeries;
14use crate::conversion::Wrap;
15use crate::conversion::any_value::py_object_to_any_value;
16use crate::error::PyPolarsErr;
17use crate::interop::arrow::to_rust::array_to_rust;
18use crate::prelude::ObjectValue;
19use crate::utils::EnterPolarsExt;
20
21// Init with numpy arrays.
22macro_rules! init_method {
23    ($name:ident, $type:ty) => {
24        #[pymethods]
25        impl PySeries {
26            #[staticmethod]
27            fn $name(name: &str, array: &Bound<PyArray1<$type>>, _strict: bool) -> Self {
28                let arr = numpy_array_to_arrow(array);
29                Series::from_arrow(name.into(), arr.to_boxed())
30                    .unwrap()
31                    .into()
32            }
33        }
34    };
35}
36
37init_method!(new_i8, i8);
38init_method!(new_i16, i16);
39init_method!(new_i32, i32);
40init_method!(new_i64, i64);
41init_method!(new_u8, u8);
42init_method!(new_u16, u16);
43init_method!(new_u32, u32);
44init_method!(new_u64, u64);
45
46fn numpy_array_to_arrow<T: Element + NativeType>(array: &Bound<PyArray1<T>>) -> PrimitiveArray<T> {
47    let owner = array.clone().unbind();
48    let ro = array.readonly();
49    let vals = ro.as_slice().unwrap();
50    unsafe {
51        let storage = SharedStorage::from_slice_with_owner(vals, owner);
52        let buffer = Buffer::from_storage(storage);
53        PrimitiveArray::new_unchecked(T::PRIMITIVE.into(), buffer, None)
54    }
55}
56
57#[cfg(feature = "object")]
58pub fn series_from_objects(py: Python<'_>, name: PlSmallStr, objects: Vec<ObjectValue>) -> Series {
59    let mut validity = BitmapBuilder::with_capacity(objects.len());
60    for v in &objects {
61        let is_valid = !v.inner.is_none(py);
62        // SAFETY: we can ensure that validity has correct capacity.
63        unsafe { validity.push_unchecked(is_valid) };
64    }
65    ObjectChunked::<ObjectValue>::new_from_vec_and_validity(
66        name,
67        objects,
68        validity.into_opt_validity(),
69    )
70    .into_series()
71}
72
73#[pymethods]
74impl PySeries {
75    #[staticmethod]
76    fn new_bool(
77        py: Python<'_>,
78        name: &str,
79        array: &Bound<PyArray1<bool>>,
80        _strict: bool,
81    ) -> PyResult<Self> {
82        let array = array.readonly();
83
84        // We use raw ptr methods to read this as a u8 slice to work around PyO3/rust-numpy#509.
85        assert!(array.is_contiguous());
86        let data_ptr = array.data().cast::<u8>();
87        let data_len = array.len();
88        let vals = unsafe { core::slice::from_raw_parts(data_ptr, data_len) };
89        py.enter_polars_series(|| Series::new(name.into(), vals).cast(&DataType::Boolean))
90    }
91
92    #[staticmethod]
93    fn new_f16(
94        py: Python<'_>,
95        name: &str,
96        array: &Bound<PyArray1<pf16>>,
97        nan_is_null: bool,
98    ) -> PyResult<Self> {
99        let arr = numpy_array_to_arrow(array);
100        if nan_is_null {
101            py.enter_polars_series(|| {
102                let validity = polars_compute::nan::is_not_nan(arr.values());
103                Ok(Series::from_array(name.into(), arr.with_validity(validity)))
104            })
105        } else {
106            Ok(Series::from_array(name.into(), arr).into())
107        }
108    }
109
110    #[staticmethod]
111    fn new_f32(
112        py: Python<'_>,
113        name: &str,
114        array: &Bound<PyArray1<f32>>,
115        nan_is_null: bool,
116    ) -> PyResult<Self> {
117        let arr = numpy_array_to_arrow(array);
118        if nan_is_null {
119            py.enter_polars_series(|| {
120                let validity = polars_compute::nan::is_not_nan(arr.values());
121                Ok(Series::from_array(name.into(), arr.with_validity(validity)))
122            })
123        } else {
124            Ok(Series::from_array(name.into(), arr).into())
125        }
126    }
127
128    #[staticmethod]
129    fn new_f64(
130        py: Python<'_>,
131        name: &str,
132        array: &Bound<PyArray1<f64>>,
133        nan_is_null: bool,
134    ) -> PyResult<Self> {
135        let arr = numpy_array_to_arrow(array);
136        if nan_is_null {
137            py.enter_polars_series(|| {
138                let validity = polars_compute::nan::is_not_nan(arr.values());
139                Ok(Series::from_array(name.into(), arr.with_validity(validity)))
140            })
141        } else {
142            Ok(Series::from_array(name.into(), arr).into())
143        }
144    }
145}
146
147#[pymethods]
148impl PySeries {
149    #[staticmethod]
150    fn new_opt_bool(name: &str, values: &Bound<PyAny>, _strict: bool) -> PyResult<Self> {
151        let len = values.len()?;
152        let mut builder = BooleanChunkedBuilder::new(name.into(), len);
153
154        for res in values.try_iter()? {
155            let value = res?;
156            if value.is_none() {
157                builder.append_null()
158            } else {
159                let v = value.extract::<bool>()?;
160                builder.append_value(v)
161            }
162        }
163
164        let ca = builder.finish();
165        let s = ca.into_series();
166        Ok(s.into())
167    }
168}
169
170fn new_primitive<'py, T, F>(
171    name: &str,
172    values: &Bound<'py, PyAny>,
173    _strict: bool,
174    extract: F,
175) -> PyResult<PySeries>
176where
177    T: PolarsNumericType,
178    F: Fn(Bound<'py, PyAny>) -> PyResult<T::Native>,
179{
180    let len = values.len()?;
181    let mut builder = PrimitiveChunkedBuilder::<T>::new(name.into(), len);
182
183    for res in values.try_iter()? {
184        let value = res?;
185        if value.is_none() {
186            builder.append_null()
187        } else {
188            let v = extract(value)?;
189            builder.append_value(v)
190        }
191    }
192
193    let ca = builder.finish();
194    let s = ca.into_series();
195    Ok(s.into())
196}
197
198// Init with lists that can contain Nones
199macro_rules! init_method_opt {
200    ($name:ident, $type:ty, $native: ty) => {
201        #[pymethods]
202        impl PySeries {
203            #[staticmethod]
204            fn $name(name: &str, obj: &Bound<PyAny>, strict: bool) -> PyResult<Self> {
205                new_primitive::<$type, _>(name, obj, strict, |v| v.extract::<$native>())
206            }
207        }
208    };
209}
210
211init_method_opt!(new_opt_u8, UInt8Type, u8);
212init_method_opt!(new_opt_u16, UInt16Type, u16);
213init_method_opt!(new_opt_u32, UInt32Type, u32);
214init_method_opt!(new_opt_u64, UInt64Type, u64);
215init_method_opt!(new_opt_u128, UInt128Type, u128);
216init_method_opt!(new_opt_i8, Int8Type, i8);
217init_method_opt!(new_opt_i16, Int16Type, i16);
218init_method_opt!(new_opt_i32, Int32Type, i32);
219init_method_opt!(new_opt_i64, Int64Type, i64);
220init_method_opt!(new_opt_i128, Int128Type, i128);
221init_method_opt!(new_opt_f32, Float32Type, f32);
222init_method_opt!(new_opt_f64, Float64Type, f64);
223
224#[pymethods]
225impl PySeries {
226    #[staticmethod]
227    fn new_opt_f16(name: &str, values: &Bound<PyAny>, _strict: bool) -> PyResult<Self> {
228        new_primitive::<Float16Type, _>(name, values, false, |v| {
229            Ok(AsPrimitive::<pf16>::as_(v.extract::<f64>()?))
230        })
231    }
232}
233
234fn convert_to_avs(
235    values: &Bound<'_, PyAny>,
236    strict: bool,
237    allow_object: bool,
238) -> PyResult<Vec<AnyValue<'static>>> {
239    values
240        .try_iter()?
241        .map(|v| py_object_to_any_value(&(v?).as_borrowed(), strict, allow_object))
242        .collect()
243}
244
245#[pymethods]
246impl PySeries {
247    #[staticmethod]
248    fn new_from_any_values(name: &str, values: &Bound<PyAny>, strict: bool) -> PyResult<Self> {
249        let any_values_result = values
250            .try_iter()?
251            .map(|v| py_object_to_any_value(&(v?).as_borrowed(), strict, true))
252            .collect::<PyResult<Vec<AnyValue>>>();
253
254        let result = any_values_result.and_then(|avs| {
255            let s = Series::from_any_values(name.into(), avs.as_slice(), strict).map_err(|e| {
256                PyTypeError::new_err(format!(
257                    "{e}\n\nHint: Try setting `strict=False` to allow passing data with mixed types."
258                ))
259            })?;
260            Ok(s.into())
261        });
262
263        // Fall back to Object type for non-strict construction.
264        if !strict && result.is_err() {
265            return Python::attach(|py| {
266                let objects = values
267                    .try_iter()?
268                    .map(|v| v?.extract())
269                    .collect::<PyResult<Vec<ObjectValue>>>()?;
270                Ok(Self::new_object(py, name, objects, strict))
271            });
272        }
273
274        result
275    }
276
277    #[staticmethod]
278    fn new_from_any_values_and_dtype(
279        name: &str,
280        values: &Bound<PyAny>,
281        dtype: Wrap<DataType>,
282        strict: bool,
283    ) -> PyResult<Self> {
284        let avs = convert_to_avs(values, strict, false)?;
285        let s = Series::from_any_values_and_dtype(name.into(), avs.as_slice(), &dtype.0, strict)
286            .map_err(|e| {
287                PyTypeError::new_err(format!(
288                    "{e}\n\nHint: Try setting `strict=False` to allow passing data with mixed types."
289                ))
290            })?;
291        Ok(s.into())
292    }
293
294    #[staticmethod]
295    fn new_str(name: &str, values: &Bound<PyAny>, _strict: bool) -> PyResult<Self> {
296        let len = values.len()?;
297        let mut builder = StringChunkedBuilder::new(name.into(), len);
298
299        for res in values.try_iter()? {
300            let value = res?;
301            if value.is_none() {
302                builder.append_null()
303            } else {
304                let v = value.extract::<Cow<str>>()?;
305                builder.append_value(v)
306            }
307        }
308
309        let ca = builder.finish();
310        let s = ca.into_series();
311        Ok(s.into())
312    }
313
314    #[staticmethod]
315    fn new_binary(name: &str, values: &Bound<PyAny>, _strict: bool) -> PyResult<Self> {
316        let len = values.len()?;
317        let mut builder = BinaryChunkedBuilder::new(name.into(), len);
318
319        for res in values.try_iter()? {
320            let value = res?;
321            if value.is_none() {
322                builder.append_null()
323            } else {
324                let v = value.extract::<&[u8]>()?;
325                builder.append_value(v)
326            }
327        }
328
329        let ca = builder.finish();
330        let s = ca.into_series();
331        Ok(s.into())
332    }
333
334    #[staticmethod]
335    fn new_decimal(name: &str, values: &Bound<PyAny>, strict: bool) -> PyResult<Self> {
336        Self::new_from_any_values(name, values, strict)
337    }
338
339    #[staticmethod]
340    fn new_series_list(name: &str, values: Vec<Option<PySeries>>, _strict: bool) -> PyResult<Self> {
341        let series: Vec<_> = values
342            .into_iter()
343            .map(|ops| ops.map(|ps| ps.series.into_inner()))
344            .collect();
345        if let Some(s) = series.iter().flatten().next() {
346            if s.dtype().is_object() {
347                return Err(PyValueError::new_err(
348                    "list of objects isn't supported; try building a 'object' only series",
349                ));
350            }
351        }
352        Ok(Series::new(name.into(), series).into())
353    }
354
355    #[staticmethod]
356    #[pyo3(signature = (name, values, strict, dtype))]
357    fn new_array(
358        name: &str,
359        values: &Bound<PyAny>,
360        strict: bool,
361        dtype: Wrap<DataType>,
362    ) -> PyResult<Self> {
363        Self::new_from_any_values_and_dtype(name, values, dtype, strict)
364    }
365
366    #[staticmethod]
367    pub fn new_object(py: Python<'_>, name: &str, values: Vec<ObjectValue>, _strict: bool) -> Self {
368        #[cfg(feature = "object")]
369        {
370            PySeries::from(series_from_objects(py, name.into(), values))
371        }
372        #[cfg(not(feature = "object"))]
373        panic!("activate 'object' feature")
374    }
375
376    #[staticmethod]
377    fn new_null(name: &str, values: &Bound<PyAny>, _strict: bool) -> PyResult<Self> {
378        let len = values.len()?;
379        Ok(Series::new_null(name.into(), len).into())
380    }
381
382    #[staticmethod]
383    fn from_arrow(name: &str, array: &Bound<PyAny>) -> PyResult<Self> {
384        let arr = array_to_rust(array)?;
385
386        match arr.dtype() {
387            ArrowDataType::LargeList(_) => {
388                let array = arr.as_any().downcast_ref::<LargeListArray>().unwrap();
389                let fast_explode = array.offsets().as_slice().windows(2).all(|w| w[0] != w[1]);
390
391                let mut out = ListChunked::with_chunk(name.into(), array.clone());
392                if fast_explode {
393                    out.set_fast_explode()
394                }
395                Ok(out.into_series().into())
396            },
397            _ => {
398                let series: Series =
399                    Series::try_new(name.into(), arr).map_err(PyPolarsErr::from)?;
400                Ok(series.into())
401            },
402        }
403    }
404}