Skip to main content

polars_python/interop/numpy/
to_numpy_series.rs

1use ndarray::IntoDimension;
2use num_traits::{Float, NumCast};
3use numpy::npyffi::flags;
4use numpy::{Element, PyArray1};
5use polars::prelude::*;
6use pyo3::IntoPyObjectExt;
7use pyo3::exceptions::PyRuntimeError;
8use pyo3::prelude::*;
9
10use super::to_numpy_df::df_to_numpy;
11use super::utils::{
12    create_borrowed_np_array, dtype_supports_view, polars_dtype_to_np_temporal_dtype,
13    reshape_numpy_array, series_contains_null,
14};
15use crate::conversion::ObjectValue;
16use crate::conversion::chunked_array::{decimal_to_pyobject_iter, time_to_pyobject_iter};
17use crate::interned;
18use crate::series::PySeries;
19
20#[pymethods]
21impl PySeries {
22    /// Convert this Series to a NumPy ndarray.
23    ///
24    /// This method copies data only when necessary. Set `allow_copy` to raise an error if copy
25    /// is required. Set `writable` to make sure the resulting array is writable, possibly requiring
26    /// copying the data.
27    fn to_numpy(&self, py: Python<'_>, writable: bool, allow_copy: bool) -> PyResult<Py<PyAny>> {
28        series_to_numpy(py, &self.series.read(), writable, allow_copy)
29    }
30
31    /// Create a view of the data as a NumPy ndarray.
32    ///
33    /// WARNING: The resulting view will show the underlying value for nulls,
34    /// which may be any value. The caller is responsible for handling nulls
35    /// appropriately.
36    fn to_numpy_view(&self, py: Python) -> Option<Py<PyAny>> {
37        let (view, _) = try_series_to_numpy_view(py, &self.series.read(), true, false)?;
38        Some(view)
39    }
40}
41
42/// Convert a Series to a NumPy ndarray.
43pub(super) fn series_to_numpy(
44    py: Python<'_>,
45    s: &Series,
46    writable: bool,
47    allow_copy: bool,
48) -> PyResult<Py<PyAny>> {
49    if s.is_empty() {
50        // Take this path to ensure a writable array.
51        // This does not actually copy data for an empty Series.
52        return Ok(series_to_numpy_with_copy(py, s, true));
53    }
54    if let Some((mut arr, writable_flag)) = try_series_to_numpy_view(py, s, false, allow_copy) {
55        if writable && !writable_flag {
56            if !allow_copy {
57                return Err(PyRuntimeError::new_err(
58                    "copy not allowed: cannot create a writable array without copying data",
59                ));
60            }
61            arr = arr.call_method0(py, interned::COPY.get(py))?;
62        }
63        return Ok(arr);
64    }
65
66    if !allow_copy {
67        return Err(PyRuntimeError::new_err(
68            "copy not allowed: cannot convert to a NumPy array without copying data",
69        ));
70    }
71
72    Ok(series_to_numpy_with_copy(py, s, writable))
73}
74
75/// Create a NumPy view of the given Series.
76fn try_series_to_numpy_view(
77    py: Python<'_>,
78    s: &Series,
79    allow_nulls: bool,
80    allow_rechunk: bool,
81) -> Option<(Py<PyAny>, bool)> {
82    if !dtype_supports_view(s.dtype()) {
83        return None;
84    }
85    if !allow_nulls && series_contains_null(s) {
86        return None;
87    }
88    let (s_owned, writable_flag) = handle_chunks(py, s, allow_rechunk)?;
89    let array = series_to_numpy_view_recursive(py, s_owned, writable_flag);
90    Some((array, writable_flag))
91}
92
93/// Rechunk the Series if required.
94///
95/// NumPy arrays are always contiguous, so we may have to rechunk before creating a view.
96/// If we do so, we can flag the resulting array as writable.
97fn handle_chunks(py: Python<'_>, s: &Series, allow_rechunk: bool) -> Option<(Series, bool)> {
98    let is_chunked = s.n_chunks() > 1;
99    match (is_chunked, allow_rechunk) {
100        (true, false) => None,
101        (true, true) => Some((py.detach(|| s.rechunk()), true)),
102        (false, _) => Some((s.clone(), false)),
103    }
104}
105
106/// Create a NumPy view of the given Series without checking for data types, chunks, or nulls.
107fn series_to_numpy_view_recursive(py: Python<'_>, s: Series, writable: bool) -> Py<PyAny> {
108    debug_assert!(s.n_chunks() == 1);
109    match s.dtype() {
110        dt if dt.is_primitive_numeric() => numeric_series_to_numpy_view(py, s, writable),
111        DataType::Datetime(_, _) | DataType::Duration(_) => {
112            temporal_series_to_numpy_view(py, s, writable)
113        },
114        DataType::Array(_, _) => array_series_to_numpy_view(py, &s, writable),
115        _ => panic!("invalid data type"),
116    }
117}
118
119/// Create a NumPy view of a numeric Series.
120fn numeric_series_to_numpy_view(py: Python<'_>, s: Series, writable: bool) -> Py<PyAny> {
121    let dims = [s.len()].into_dimension();
122    with_match_physical_numpy_polars_type!(s.dtype(), |$T| {
123        let np_dtype = <$T as PolarsNumericType>::Native::get_dtype(py);
124        let ca: &ChunkedArray<$T> = s.unpack::<$T>().unwrap();
125        let flags = if writable {
126            flags::NPY_ARRAY_FARRAY
127        } else {
128            flags::NPY_ARRAY_FARRAY_RO
129        };
130
131        let slice = ca.data_views().next().unwrap();
132
133        unsafe {
134            create_borrowed_np_array::<_>(
135                py,
136                np_dtype,
137                dims,
138                flags,
139                slice.as_ptr() as _,
140                PySeries::from(s).into_py_any(py).unwrap(), // Keep the Series memory alive.,
141            )
142        }
143    })
144}
145
146/// Create a NumPy view of a Datetime or Duration Series.
147fn temporal_series_to_numpy_view(py: Python<'_>, s: Series, writable: bool) -> Py<PyAny> {
148    let np_dtype = polars_dtype_to_np_temporal_dtype(py, s.dtype());
149
150    let phys = s.to_physical_repr();
151    let ca = phys.i64().unwrap();
152    let slice = ca.data_views().next().unwrap();
153    let dims = [s.len()].into_dimension();
154    let flags = if writable {
155        flags::NPY_ARRAY_FARRAY
156    } else {
157        flags::NPY_ARRAY_FARRAY_RO
158    };
159
160    unsafe {
161        create_borrowed_np_array::<_>(
162            py,
163            np_dtype,
164            dims,
165            flags,
166            slice.as_ptr() as _,
167            PySeries::from(s).into_py_any(py).unwrap(), // Keep the Series memory alive.,
168        )
169    }
170}
171
172/// Create a NumPy view of an Array Series.
173fn array_series_to_numpy_view(py: Python<'_>, s: &Series, writable: bool) -> Py<PyAny> {
174    let ca = s.array().unwrap();
175    let s_inner = ca.get_inner();
176    let np_array_flat = series_to_numpy_view_recursive(py, s_inner, writable);
177
178    // Reshape to the original shape.
179    let DataType::Array(_, width) = s.dtype() else {
180        unreachable!()
181    };
182    reshape_numpy_array(py, np_array_flat, ca.len(), *width).unwrap()
183}
184
185/// Convert a Series to a NumPy ndarray, copying data in the process.
186///
187/// This method will cast integers to floats so that `null = np.nan`.
188fn series_to_numpy_with_copy(py: Python<'_>, s: &Series, writable: bool) -> Py<PyAny> {
189    use DataType::*;
190    match s.dtype() {
191        Int8 => numeric_series_to_numpy::<Int8Type, f32>(py, s),
192        Int16 => numeric_series_to_numpy::<Int16Type, f32>(py, s),
193        Int32 => numeric_series_to_numpy::<Int32Type, f64>(py, s),
194        Int64 => numeric_series_to_numpy::<Int64Type, f64>(py, s),
195        Int128 => {
196            let s = s.cast(&DataType::Float64).unwrap();
197            series_to_numpy(py, &s, writable, true).unwrap()
198        },
199        UInt8 => numeric_series_to_numpy::<UInt8Type, f32>(py, s),
200        UInt16 => numeric_series_to_numpy::<UInt16Type, f32>(py, s),
201        UInt32 => numeric_series_to_numpy::<UInt32Type, f64>(py, s),
202        UInt64 => numeric_series_to_numpy::<UInt64Type, f64>(py, s),
203        UInt128 => {
204            let s = s.cast(&DataType::Float64).unwrap();
205            series_to_numpy(py, &s, writable, true).unwrap()
206        },
207        Float16 => numeric_series_to_numpy::<Float16Type, pf16>(py, s),
208        Float32 => numeric_series_to_numpy::<Float32Type, f32>(py, s),
209        Float64 => numeric_series_to_numpy::<Float64Type, f64>(py, s),
210        Boolean => boolean_series_to_numpy(py, s),
211        Date => date_series_to_numpy(py, s),
212        Datetime(tu, _) => {
213            use numpy::datetime::{Datetime, units};
214            match tu {
215                TimeUnit::Milliseconds => {
216                    temporal_series_to_numpy::<Datetime<units::Milliseconds>>(py, s)
217                },
218                TimeUnit::Microseconds => {
219                    temporal_series_to_numpy::<Datetime<units::Microseconds>>(py, s)
220                },
221                TimeUnit::Nanoseconds => {
222                    temporal_series_to_numpy::<Datetime<units::Nanoseconds>>(py, s)
223                },
224            }
225        },
226        Duration(tu) => {
227            use numpy::datetime::{Timedelta, units};
228            match tu {
229                TimeUnit::Milliseconds => {
230                    temporal_series_to_numpy::<Timedelta<units::Milliseconds>>(py, s)
231                },
232                TimeUnit::Microseconds => {
233                    temporal_series_to_numpy::<Timedelta<units::Microseconds>>(py, s)
234                },
235                TimeUnit::Nanoseconds => {
236                    temporal_series_to_numpy::<Timedelta<units::Nanoseconds>>(py, s)
237                },
238            }
239        },
240        Time => {
241            let ca = s.time().unwrap();
242            let values = time_to_pyobject_iter(ca).map(|v| v.into_py_any(py).unwrap());
243            PyArray1::from_iter(py, values).into_py_any(py).unwrap()
244        },
245        String => {
246            let ca = s.str().unwrap();
247            let values = ca.iter().map(|s| s.into_py_any(py).unwrap());
248            PyArray1::from_iter(py, values).into_py_any(py).unwrap()
249        },
250        Binary => {
251            let ca = s.binary().unwrap();
252            let values = ca.iter().map(|s| s.into_py_any(py).unwrap());
253            PyArray1::from_iter(py, values).into_py_any(py).unwrap()
254        },
255        Categorical(_, _) | Enum(_, _) => {
256            with_match_categorical_physical_type!(s.dtype().cat_physical().unwrap(), |$C| {
257                let ca = s.cat::<$C>().unwrap();
258                let values = ca.iter_str().map(|s| s.into_py_any(py).unwrap());
259                PyArray1::from_iter(py, values).into_py_any(py).unwrap()
260            })
261        },
262        Decimal(_, _) => {
263            let ca = s.decimal().unwrap();
264            let values = decimal_to_pyobject_iter(py, ca)
265                .unwrap()
266                .map(|v| v.into_py_any(py).unwrap());
267            PyArray1::from_iter(py, values).into_py_any(py).unwrap()
268        },
269        List(_) => list_series_to_numpy(py, s, writable),
270        Array(_, _) => array_series_to_numpy(py, s, writable),
271        Struct(_) => {
272            let ca = s.struct_().unwrap();
273            let df = ca.clone().unnest();
274            df_to_numpy(py, &df, IndexOrder::Fortran, writable, true).unwrap()
275        },
276        #[cfg(feature = "object")]
277        Object(_) => {
278            let ca = s
279                .as_any()
280                .downcast_ref::<ObjectChunked<ObjectValue>>()
281                .unwrap();
282            let values = ca.iter().map(|v| v.into_py_any(py).unwrap());
283            PyArray1::from_iter(py, values).into_py_any(py).unwrap()
284        },
285        Null => {
286            let n = s.len();
287            let values = std::iter::repeat_n(f32::NAN, n);
288            PyArray1::from_iter(py, values).into_py_any(py).unwrap()
289        },
290        Extension(_, _) => series_to_numpy_with_copy(py, s.ext().unwrap().storage(), writable),
291        Unknown(_) | BinaryOffset => unreachable!(),
292    }
293}
294
295/// Convert numeric types to f32 or f64 with NaN representing a null value.
296fn numeric_series_to_numpy<T, U>(py: Python<'_>, s: &Series) -> Py<PyAny>
297where
298    T: PolarsNumericType,
299    T::Native: numpy::Element,
300    U: Float + numpy::Element,
301{
302    let ca: &ChunkedArray<T> = s.as_ref().as_ref();
303    if s.null_count() == 0 {
304        let values = ca.into_no_null_iter();
305        PyArray1::<T::Native>::from_iter(py, values)
306            .into_py_any(py)
307            .unwrap()
308    } else {
309        let mapper = |opt_v: Option<T::Native>| match opt_v {
310            Some(v) => NumCast::from(v).unwrap(),
311            None => U::nan(),
312        };
313        let values = ca.iter().map(mapper);
314        PyArray1::from_iter(py, values).into_py_any(py).unwrap()
315    }
316}
317
318/// Convert booleans to u8 if no nulls are present, otherwise convert to objects.
319fn boolean_series_to_numpy(py: Python<'_>, s: &Series) -> Py<PyAny> {
320    let ca = s.bool().unwrap();
321    if s.null_count() == 0 {
322        let values = ca.no_null_iter();
323        PyArray1::<bool>::from_iter(py, values)
324            .into_py_any(py)
325            .unwrap()
326    } else {
327        let values = ca.iter().map(|opt_v| opt_v.into_py_any(py).unwrap());
328        PyArray1::from_iter(py, values).into_py_any(py).unwrap()
329    }
330}
331
332/// Convert dates directly to i64 with i64::MIN representing a null value.
333fn date_series_to_numpy(py: Python<'_>, s: &Series) -> Py<PyAny> {
334    use numpy::datetime::{Datetime, units};
335
336    let s_phys = s.to_physical_repr();
337    let ca = s_phys.i32().unwrap();
338
339    if s.null_count() == 0 {
340        let mapper = |v: i32| (v as i64).into();
341        let values = ca.into_no_null_iter().map(mapper);
342        PyArray1::<Datetime<units::Days>>::from_iter(py, values)
343            .into_py_any(py)
344            .unwrap()
345    } else {
346        let mapper = |opt_v: Option<i32>| {
347            match opt_v {
348                Some(v) => v as i64,
349                None => i64::MIN,
350            }
351            .into()
352        };
353        let values = ca.iter().map(mapper);
354        PyArray1::<Datetime<units::Days>>::from_iter(py, values)
355            .into_py_any(py)
356            .unwrap()
357    }
358}
359
360/// Convert datetimes and durations with i64::MIN representing a null value.
361fn temporal_series_to_numpy<T>(py: Python<'_>, s: &Series) -> Py<PyAny>
362where
363    T: From<i64> + numpy::Element,
364{
365    let s_phys = s.to_physical_repr();
366    let ca = s_phys.i64().unwrap();
367    let values = ca.iter().map(|v| v.unwrap_or(i64::MIN).into());
368    PyArray1::<T>::from_iter(py, values)
369        .into_py_any(py)
370        .unwrap()
371}
372fn list_series_to_numpy(py: Python<'_>, s: &Series, writable: bool) -> Py<PyAny> {
373    let ca = s.list().unwrap();
374
375    let iter = ca.amortized_iter().map(|opt_s| match opt_s {
376        None => py.None(),
377        Some(s) => series_to_numpy(py, s.as_ref(), writable, true).unwrap(),
378    });
379    PyArray1::from_iter(py, iter).into_py_any(py).unwrap()
380}
381
382/// Convert arrays by flattening first, converting the flat Series, and then reshaping.
383fn array_series_to_numpy(py: Python<'_>, s: &Series, writable: bool) -> Py<PyAny> {
384    let ca = s.array().unwrap();
385    let s_inner = ca.get_inner();
386    let np_array_flat = series_to_numpy_with_copy(py, &s_inner, writable);
387
388    // Reshape to the original shape.
389    let DataType::Array(_, width) = s.dtype() else {
390        unreachable!()
391    };
392    reshape_numpy_array(py, np_array_flat, ca.len(), *width).unwrap()
393}