Skip to main content

polars_python/interop/numpy/
to_numpy_series.rs

1use ndarray::IntoDimension;
2use num_traits::{Float, NumCast};
3use numpy::npyffi::flags;
4use numpy::{Element, PyArray1};
5use polars::prelude::*;
6use pyo3::exceptions::PyRuntimeError;
7use pyo3::prelude::*;
8use pyo3::{IntoPyObjectExt, intern};
9
10use super::to_numpy_df::df_to_numpy;
11use super::utils::{
12    create_borrowed_np_array, dtype_supports_view, polars_dtype_to_np_temporal_dtype,
13    reshape_numpy_array, series_contains_null,
14};
15use crate::conversion::ObjectValue;
16use crate::conversion::chunked_array::{decimal_to_pyobject_iter, time_to_pyobject_iter};
17use crate::series::PySeries;
18
19#[pymethods]
20impl PySeries {
21    /// Convert this Series to a NumPy ndarray.
22    ///
23    /// This method copies data only when necessary. Set `allow_copy` to raise an error if copy
24    /// is required. Set `writable` to make sure the resulting array is writable, possibly requiring
25    /// copying the data.
26    fn to_numpy(&self, py: Python<'_>, writable: bool, allow_copy: bool) -> PyResult<Py<PyAny>> {
27        series_to_numpy(py, &self.series.read(), writable, allow_copy)
28    }
29
30    /// Create a view of the data as a NumPy ndarray.
31    ///
32    /// WARNING: The resulting view will show the underlying value for nulls,
33    /// which may be any value. The caller is responsible for handling nulls
34    /// appropriately.
35    fn to_numpy_view(&self, py: Python) -> Option<Py<PyAny>> {
36        let (view, _) = try_series_to_numpy_view(py, &self.series.read(), true, false)?;
37        Some(view)
38    }
39}
40
41/// Convert a Series to a NumPy ndarray.
42pub(super) fn series_to_numpy(
43    py: Python<'_>,
44    s: &Series,
45    writable: bool,
46    allow_copy: bool,
47) -> PyResult<Py<PyAny>> {
48    if s.is_empty() {
49        // Take this path to ensure a writable array.
50        // This does not actually copy data for an empty Series.
51        return Ok(series_to_numpy_with_copy(py, s, true));
52    }
53    if let Some((mut arr, writable_flag)) = try_series_to_numpy_view(py, s, false, allow_copy) {
54        if writable && !writable_flag {
55            if !allow_copy {
56                return Err(PyRuntimeError::new_err(
57                    "copy not allowed: cannot create a writable array without copying data",
58                ));
59            }
60            arr = arr.call_method0(py, intern!(py, "copy"))?;
61        }
62        return Ok(arr);
63    }
64
65    if !allow_copy {
66        return Err(PyRuntimeError::new_err(
67            "copy not allowed: cannot convert to a NumPy array without copying data",
68        ));
69    }
70
71    Ok(series_to_numpy_with_copy(py, s, writable))
72}
73
74/// Create a NumPy view of the given Series.
75fn try_series_to_numpy_view(
76    py: Python<'_>,
77    s: &Series,
78    allow_nulls: bool,
79    allow_rechunk: bool,
80) -> Option<(Py<PyAny>, bool)> {
81    if !dtype_supports_view(s.dtype()) {
82        return None;
83    }
84    if !allow_nulls && series_contains_null(s) {
85        return None;
86    }
87    let (s_owned, writable_flag) = handle_chunks(py, s, allow_rechunk)?;
88    let array = series_to_numpy_view_recursive(py, s_owned, writable_flag);
89    Some((array, writable_flag))
90}
91
92/// Rechunk the Series if required.
93///
94/// NumPy arrays are always contiguous, so we may have to rechunk before creating a view.
95/// If we do so, we can flag the resulting array as writable.
96fn handle_chunks(py: Python<'_>, s: &Series, allow_rechunk: bool) -> Option<(Series, bool)> {
97    let is_chunked = s.n_chunks() > 1;
98    match (is_chunked, allow_rechunk) {
99        (true, false) => None,
100        (true, true) => Some((py.detach(|| s.rechunk()), true)),
101        (false, _) => Some((s.clone(), false)),
102    }
103}
104
105/// Create a NumPy view of the given Series without checking for data types, chunks, or nulls.
106fn series_to_numpy_view_recursive(py: Python<'_>, s: Series, writable: bool) -> Py<PyAny> {
107    debug_assert!(s.n_chunks() == 1);
108    match s.dtype() {
109        dt if dt.is_primitive_numeric() => numeric_series_to_numpy_view(py, s, writable),
110        DataType::Datetime(_, _) | DataType::Duration(_) => {
111            temporal_series_to_numpy_view(py, s, writable)
112        },
113        DataType::Array(_, _) => array_series_to_numpy_view(py, &s, writable),
114        _ => panic!("invalid data type"),
115    }
116}
117
118/// Create a NumPy view of a numeric Series.
119fn numeric_series_to_numpy_view(py: Python<'_>, s: Series, writable: bool) -> Py<PyAny> {
120    let dims = [s.len()].into_dimension();
121    with_match_physical_numpy_polars_type!(s.dtype(), |$T| {
122        let np_dtype = <$T as PolarsNumericType>::Native::get_dtype(py);
123        let ca: &ChunkedArray<$T> = s.unpack::<$T>().unwrap();
124        let flags = if writable {
125            flags::NPY_ARRAY_FARRAY
126        } else {
127            flags::NPY_ARRAY_FARRAY_RO
128        };
129
130        let slice = ca.data_views().next().unwrap();
131
132        unsafe {
133            create_borrowed_np_array::<_>(
134                py,
135                np_dtype,
136                dims,
137                flags,
138                slice.as_ptr() as _,
139                PySeries::from(s).into_py_any(py).unwrap(), // Keep the Series memory alive.,
140            )
141        }
142    })
143}
144
145/// Create a NumPy view of a Datetime or Duration Series.
146fn temporal_series_to_numpy_view(py: Python<'_>, s: Series, writable: bool) -> Py<PyAny> {
147    let np_dtype = polars_dtype_to_np_temporal_dtype(py, s.dtype());
148
149    let phys = s.to_physical_repr();
150    let ca = phys.i64().unwrap();
151    let slice = ca.data_views().next().unwrap();
152    let dims = [s.len()].into_dimension();
153    let flags = if writable {
154        flags::NPY_ARRAY_FARRAY
155    } else {
156        flags::NPY_ARRAY_FARRAY_RO
157    };
158
159    unsafe {
160        create_borrowed_np_array::<_>(
161            py,
162            np_dtype,
163            dims,
164            flags,
165            slice.as_ptr() as _,
166            PySeries::from(s).into_py_any(py).unwrap(), // Keep the Series memory alive.,
167        )
168    }
169}
170
171/// Create a NumPy view of an Array Series.
172fn array_series_to_numpy_view(py: Python<'_>, s: &Series, writable: bool) -> Py<PyAny> {
173    let ca = s.array().unwrap();
174    let s_inner = ca.get_inner();
175    let np_array_flat = series_to_numpy_view_recursive(py, s_inner, writable);
176
177    // Reshape to the original shape.
178    let DataType::Array(_, width) = s.dtype() else {
179        unreachable!()
180    };
181    reshape_numpy_array(py, np_array_flat, ca.len(), *width).unwrap()
182}
183
184/// Convert a Series to a NumPy ndarray, copying data in the process.
185///
186/// This method will cast integers to floats so that `null = np.nan`.
187fn series_to_numpy_with_copy(py: Python<'_>, s: &Series, writable: bool) -> Py<PyAny> {
188    use DataType::*;
189    match s.dtype() {
190        Int8 => numeric_series_to_numpy::<Int8Type, f32>(py, s),
191        Int16 => numeric_series_to_numpy::<Int16Type, f32>(py, s),
192        Int32 => numeric_series_to_numpy::<Int32Type, f64>(py, s),
193        Int64 => numeric_series_to_numpy::<Int64Type, f64>(py, s),
194        Int128 => {
195            let s = s.cast(&DataType::Float64).unwrap();
196            series_to_numpy(py, &s, writable, true).unwrap()
197        },
198        UInt8 => numeric_series_to_numpy::<UInt8Type, f32>(py, s),
199        UInt16 => numeric_series_to_numpy::<UInt16Type, f32>(py, s),
200        UInt32 => numeric_series_to_numpy::<UInt32Type, f64>(py, s),
201        UInt64 => numeric_series_to_numpy::<UInt64Type, f64>(py, s),
202        UInt128 => {
203            let s = s.cast(&DataType::Float64).unwrap();
204            series_to_numpy(py, &s, writable, true).unwrap()
205        },
206        Float16 => numeric_series_to_numpy::<Float16Type, pf16>(py, s),
207        Float32 => numeric_series_to_numpy::<Float32Type, f32>(py, s),
208        Float64 => numeric_series_to_numpy::<Float64Type, f64>(py, s),
209        Boolean => boolean_series_to_numpy(py, s),
210        Date => date_series_to_numpy(py, s),
211        Datetime(tu, _) => {
212            use numpy::datetime::{Datetime, units};
213            match tu {
214                TimeUnit::Milliseconds => {
215                    temporal_series_to_numpy::<Datetime<units::Milliseconds>>(py, s)
216                },
217                TimeUnit::Microseconds => {
218                    temporal_series_to_numpy::<Datetime<units::Microseconds>>(py, s)
219                },
220                TimeUnit::Nanoseconds => {
221                    temporal_series_to_numpy::<Datetime<units::Nanoseconds>>(py, s)
222                },
223            }
224        },
225        Duration(tu) => {
226            use numpy::datetime::{Timedelta, units};
227            match tu {
228                TimeUnit::Milliseconds => {
229                    temporal_series_to_numpy::<Timedelta<units::Milliseconds>>(py, s)
230                },
231                TimeUnit::Microseconds => {
232                    temporal_series_to_numpy::<Timedelta<units::Microseconds>>(py, s)
233                },
234                TimeUnit::Nanoseconds => {
235                    temporal_series_to_numpy::<Timedelta<units::Nanoseconds>>(py, s)
236                },
237            }
238        },
239        Time => {
240            let ca = s.time().unwrap();
241            let values = time_to_pyobject_iter(ca).map(|v| v.into_py_any(py).unwrap());
242            PyArray1::from_iter(py, values).into_py_any(py).unwrap()
243        },
244        String => {
245            let ca = s.str().unwrap();
246            let values = ca.iter().map(|s| s.into_py_any(py).unwrap());
247            PyArray1::from_iter(py, values).into_py_any(py).unwrap()
248        },
249        Binary => {
250            let ca = s.binary().unwrap();
251            let values = ca.iter().map(|s| s.into_py_any(py).unwrap());
252            PyArray1::from_iter(py, values).into_py_any(py).unwrap()
253        },
254        Categorical(_, _) | Enum(_, _) => {
255            with_match_categorical_physical_type!(s.dtype().cat_physical().unwrap(), |$C| {
256                let ca = s.cat::<$C>().unwrap();
257                let values = ca.iter_str().map(|s| s.into_py_any(py).unwrap());
258                PyArray1::from_iter(py, values).into_py_any(py).unwrap()
259            })
260        },
261        Decimal(_, _) => {
262            let ca = s.decimal().unwrap();
263            let values = decimal_to_pyobject_iter(py, ca)
264                .unwrap()
265                .map(|v| v.into_py_any(py).unwrap());
266            PyArray1::from_iter(py, values).into_py_any(py).unwrap()
267        },
268        List(_) => list_series_to_numpy(py, s, writable),
269        Array(_, _) => array_series_to_numpy(py, s, writable),
270        Struct(_) => {
271            let ca = s.struct_().unwrap();
272            let df = ca.clone().unnest();
273            df_to_numpy(py, &df, IndexOrder::Fortran, writable, true).unwrap()
274        },
275        #[cfg(feature = "object")]
276        Object(_) => {
277            let ca = s
278                .as_any()
279                .downcast_ref::<ObjectChunked<ObjectValue>>()
280                .unwrap();
281            let values = ca.iter().map(|v| v.into_py_any(py).unwrap());
282            PyArray1::from_iter(py, values).into_py_any(py).unwrap()
283        },
284        Null => {
285            let n = s.len();
286            let values = std::iter::repeat_n(f32::NAN, n);
287            PyArray1::from_iter(py, values).into_py_any(py).unwrap()
288        },
289        Extension(_, _) => series_to_numpy_with_copy(py, s.ext().unwrap().storage(), writable),
290        Unknown(_) | BinaryOffset => unreachable!(),
291    }
292}
293
294/// Convert numeric types to f32 or f64 with NaN representing a null value.
295fn numeric_series_to_numpy<T, U>(py: Python<'_>, s: &Series) -> Py<PyAny>
296where
297    T: PolarsNumericType,
298    T::Native: numpy::Element,
299    U: Float + numpy::Element,
300{
301    let ca: &ChunkedArray<T> = s.as_ref().as_ref();
302    if s.null_count() == 0 {
303        let values = ca.into_no_null_iter();
304        PyArray1::<T::Native>::from_iter(py, values)
305            .into_py_any(py)
306            .unwrap()
307    } else {
308        let mapper = |opt_v: Option<T::Native>| match opt_v {
309            Some(v) => NumCast::from(v).unwrap(),
310            None => U::nan(),
311        };
312        let values = ca.iter().map(mapper);
313        PyArray1::from_iter(py, values).into_py_any(py).unwrap()
314    }
315}
316
317/// Convert booleans to u8 if no nulls are present, otherwise convert to objects.
318fn boolean_series_to_numpy(py: Python<'_>, s: &Series) -> Py<PyAny> {
319    let ca = s.bool().unwrap();
320    if s.null_count() == 0 {
321        let values = ca.into_no_null_iter();
322        PyArray1::<bool>::from_iter(py, values)
323            .into_py_any(py)
324            .unwrap()
325    } else {
326        let values = ca.iter().map(|opt_v| opt_v.into_py_any(py).unwrap());
327        PyArray1::from_iter(py, values).into_py_any(py).unwrap()
328    }
329}
330
331/// Convert dates directly to i64 with i64::MIN representing a null value.
332fn date_series_to_numpy(py: Python<'_>, s: &Series) -> Py<PyAny> {
333    use numpy::datetime::{Datetime, units};
334
335    let s_phys = s.to_physical_repr();
336    let ca = s_phys.i32().unwrap();
337
338    if s.null_count() == 0 {
339        let mapper = |v: i32| (v as i64).into();
340        let values = ca.into_no_null_iter().map(mapper);
341        PyArray1::<Datetime<units::Days>>::from_iter(py, values)
342            .into_py_any(py)
343            .unwrap()
344    } else {
345        let mapper = |opt_v: Option<i32>| {
346            match opt_v {
347                Some(v) => v as i64,
348                None => i64::MIN,
349            }
350            .into()
351        };
352        let values = ca.iter().map(mapper);
353        PyArray1::<Datetime<units::Days>>::from_iter(py, values)
354            .into_py_any(py)
355            .unwrap()
356    }
357}
358
359/// Convert datetimes and durations with i64::MIN representing a null value.
360fn temporal_series_to_numpy<T>(py: Python<'_>, s: &Series) -> Py<PyAny>
361where
362    T: From<i64> + numpy::Element,
363{
364    let s_phys = s.to_physical_repr();
365    let ca = s_phys.i64().unwrap();
366    let values = ca.iter().map(|v| v.unwrap_or(i64::MIN).into());
367    PyArray1::<T>::from_iter(py, values)
368        .into_py_any(py)
369        .unwrap()
370}
371fn list_series_to_numpy(py: Python<'_>, s: &Series, writable: bool) -> Py<PyAny> {
372    let ca = s.list().unwrap();
373
374    let iter = ca.amortized_iter().map(|opt_s| match opt_s {
375        None => py.None(),
376        Some(s) => series_to_numpy(py, s.as_ref(), writable, true).unwrap(),
377    });
378    PyArray1::from_iter(py, iter).into_py_any(py).unwrap()
379}
380
381/// Convert arrays by flattening first, converting the flat Series, and then reshaping.
382fn array_series_to_numpy(py: Python<'_>, s: &Series, writable: bool) -> Py<PyAny> {
383    let ca = s.array().unwrap();
384    let s_inner = ca.get_inner();
385    let np_array_flat = series_to_numpy_with_copy(py, &s_inner, writable);
386
387    // Reshape to the original shape.
388    let DataType::Array(_, width) = s.dtype() else {
389        unreachable!()
390    };
391    reshape_numpy_array(py, np_array_flat, ca.len(), *width).unwrap()
392}