1use ndarray::IntoDimension;
2use num_traits::{Float, NumCast};
3use numpy::npyffi::flags;
4use numpy::{Element, PyArray1};
5use polars_core::prelude::*;
6use pyo3::exceptions::PyRuntimeError;
7use pyo3::prelude::*;
8use pyo3::{IntoPyObjectExt, intern};
9
10use super::to_numpy_df::df_to_numpy;
11use super::utils::{
12 create_borrowed_np_array, dtype_supports_view, polars_dtype_to_np_temporal_dtype,
13 reshape_numpy_array, series_contains_null,
14};
15use crate::conversion::ObjectValue;
16use crate::conversion::chunked_array::{decimal_to_pyobject_iter, time_to_pyobject_iter};
17use crate::series::PySeries;
18
19#[pymethods]
20impl PySeries {
21 fn to_numpy(&self, py: Python<'_>, writable: bool, allow_copy: bool) -> PyResult<Py<PyAny>> {
27 series_to_numpy(py, &self.series.read(), writable, allow_copy)
28 }
29
30 fn to_numpy_view(&self, py: Python) -> Option<Py<PyAny>> {
36 let (view, _) = try_series_to_numpy_view(py, &self.series.read(), true, false)?;
37 Some(view)
38 }
39}
40
41pub(super) fn series_to_numpy(
43 py: Python<'_>,
44 s: &Series,
45 writable: bool,
46 allow_copy: bool,
47) -> PyResult<Py<PyAny>> {
48 if s.is_empty() {
49 return Ok(series_to_numpy_with_copy(py, s, true));
52 }
53 if let Some((mut arr, writable_flag)) = try_series_to_numpy_view(py, s, false, allow_copy) {
54 if writable && !writable_flag {
55 if !allow_copy {
56 return Err(PyRuntimeError::new_err(
57 "copy not allowed: cannot create a writable array without copying data",
58 ));
59 }
60 arr = arr.call_method0(py, intern!(py, "copy"))?;
61 }
62 return Ok(arr);
63 }
64
65 if !allow_copy {
66 return Err(PyRuntimeError::new_err(
67 "copy not allowed: cannot convert to a NumPy array without copying data",
68 ));
69 }
70
71 Ok(series_to_numpy_with_copy(py, s, writable))
72}
73
74fn try_series_to_numpy_view(
76 py: Python<'_>,
77 s: &Series,
78 allow_nulls: bool,
79 allow_rechunk: bool,
80) -> Option<(Py<PyAny>, bool)> {
81 if !dtype_supports_view(s.dtype()) {
82 return None;
83 }
84 if !allow_nulls && series_contains_null(s) {
85 return None;
86 }
87 let (s_owned, writable_flag) = handle_chunks(py, s, allow_rechunk)?;
88 let array = series_to_numpy_view_recursive(py, s_owned, writable_flag);
89 Some((array, writable_flag))
90}
91
92fn handle_chunks(py: Python<'_>, s: &Series, allow_rechunk: bool) -> Option<(Series, bool)> {
97 let is_chunked = s.n_chunks() > 1;
98 match (is_chunked, allow_rechunk) {
99 (true, false) => None,
100 (true, true) => Some((py.detach(|| s.rechunk()), true)),
101 (false, _) => Some((s.clone(), false)),
102 }
103}
104
105fn series_to_numpy_view_recursive(py: Python<'_>, s: Series, writable: bool) -> Py<PyAny> {
107 debug_assert!(s.n_chunks() == 1);
108 match s.dtype() {
109 dt if dt.is_primitive_numeric() => numeric_series_to_numpy_view(py, s, writable),
110 DataType::Datetime(_, _) | DataType::Duration(_) => {
111 temporal_series_to_numpy_view(py, s, writable)
112 },
113 DataType::Array(_, _) => array_series_to_numpy_view(py, &s, writable),
114 _ => panic!("invalid data type"),
115 }
116}
117
118fn numeric_series_to_numpy_view(py: Python<'_>, s: Series, writable: bool) -> Py<PyAny> {
120 let dims = [s.len()].into_dimension();
121 with_match_physical_numpy_polars_type!(s.dtype(), |$T| {
122 let np_dtype = <$T as PolarsNumericType>::Native::get_dtype(py);
123 let ca: &ChunkedArray<$T> = s.unpack::<$T>().unwrap();
124 let flags = if writable {
125 flags::NPY_ARRAY_FARRAY
126 } else {
127 flags::NPY_ARRAY_FARRAY_RO
128 };
129
130 let slice = ca.data_views().next().unwrap();
131
132 unsafe {
133 create_borrowed_np_array::<_>(
134 py,
135 np_dtype,
136 dims,
137 flags,
138 slice.as_ptr() as _,
139 PySeries::from(s).into_py_any(py).unwrap(), )
141 }
142 })
143}
144
145fn temporal_series_to_numpy_view(py: Python<'_>, s: Series, writable: bool) -> Py<PyAny> {
147 let np_dtype = polars_dtype_to_np_temporal_dtype(py, s.dtype());
148
149 let phys = s.to_physical_repr();
150 let ca = phys.i64().unwrap();
151 let slice = ca.data_views().next().unwrap();
152 let dims = [s.len()].into_dimension();
153 let flags = if writable {
154 flags::NPY_ARRAY_FARRAY
155 } else {
156 flags::NPY_ARRAY_FARRAY_RO
157 };
158
159 unsafe {
160 create_borrowed_np_array::<_>(
161 py,
162 np_dtype,
163 dims,
164 flags,
165 slice.as_ptr() as _,
166 PySeries::from(s).into_py_any(py).unwrap(), )
168 }
169}
170
171fn array_series_to_numpy_view(py: Python<'_>, s: &Series, writable: bool) -> Py<PyAny> {
173 let ca = s.array().unwrap();
174 let s_inner = ca.get_inner();
175 let np_array_flat = series_to_numpy_view_recursive(py, s_inner, writable);
176
177 let DataType::Array(_, width) = s.dtype() else {
179 unreachable!()
180 };
181 reshape_numpy_array(py, np_array_flat, ca.len(), *width).unwrap()
182}
183
184fn series_to_numpy_with_copy(py: Python<'_>, s: &Series, writable: bool) -> Py<PyAny> {
188 use DataType::*;
189 match s.dtype() {
190 Int8 => numeric_series_to_numpy::<Int8Type, f32>(py, s),
191 Int16 => numeric_series_to_numpy::<Int16Type, f32>(py, s),
192 Int32 => numeric_series_to_numpy::<Int32Type, f64>(py, s),
193 Int64 => numeric_series_to_numpy::<Int64Type, f64>(py, s),
194 Int128 => {
195 let s = s.cast(&DataType::Float64).unwrap();
196 series_to_numpy(py, &s, writable, true).unwrap()
197 },
198 UInt8 => numeric_series_to_numpy::<UInt8Type, f32>(py, s),
199 UInt16 => numeric_series_to_numpy::<UInt16Type, f32>(py, s),
200 UInt32 => numeric_series_to_numpy::<UInt32Type, f64>(py, s),
201 UInt64 => numeric_series_to_numpy::<UInt64Type, f64>(py, s),
202 UInt128 => {
203 let s = s.cast(&DataType::Float64).unwrap();
204 series_to_numpy(py, &s, writable, true).unwrap()
205 },
206 Float32 => numeric_series_to_numpy::<Float32Type, f32>(py, s),
207 Float64 => numeric_series_to_numpy::<Float64Type, f64>(py, s),
208 Boolean => boolean_series_to_numpy(py, s),
209 Date => date_series_to_numpy(py, s),
210 Datetime(tu, _) => {
211 use numpy::datetime::{Datetime, units};
212 match tu {
213 TimeUnit::Milliseconds => {
214 temporal_series_to_numpy::<Datetime<units::Milliseconds>>(py, s)
215 },
216 TimeUnit::Microseconds => {
217 temporal_series_to_numpy::<Datetime<units::Microseconds>>(py, s)
218 },
219 TimeUnit::Nanoseconds => {
220 temporal_series_to_numpy::<Datetime<units::Nanoseconds>>(py, s)
221 },
222 }
223 },
224 Duration(tu) => {
225 use numpy::datetime::{Timedelta, units};
226 match tu {
227 TimeUnit::Milliseconds => {
228 temporal_series_to_numpy::<Timedelta<units::Milliseconds>>(py, s)
229 },
230 TimeUnit::Microseconds => {
231 temporal_series_to_numpy::<Timedelta<units::Microseconds>>(py, s)
232 },
233 TimeUnit::Nanoseconds => {
234 temporal_series_to_numpy::<Timedelta<units::Nanoseconds>>(py, s)
235 },
236 }
237 },
238 Time => {
239 let ca = s.time().unwrap();
240 let values = time_to_pyobject_iter(ca).map(|v| v.into_py_any(py).unwrap());
241 PyArray1::from_iter(py, values).into_py_any(py).unwrap()
242 },
243 String => {
244 let ca = s.str().unwrap();
245 let values = ca.iter().map(|s| s.into_py_any(py).unwrap());
246 PyArray1::from_iter(py, values).into_py_any(py).unwrap()
247 },
248 Binary => {
249 let ca = s.binary().unwrap();
250 let values = ca.iter().map(|s| s.into_py_any(py).unwrap());
251 PyArray1::from_iter(py, values).into_py_any(py).unwrap()
252 },
253 Categorical(_, _) | Enum(_, _) => {
254 with_match_categorical_physical_type!(s.dtype().cat_physical().unwrap(), |$C| {
255 let ca = s.cat::<$C>().unwrap();
256 let values = ca.iter_str().map(|s| s.into_py_any(py).unwrap());
257 PyArray1::from_iter(py, values).into_py_any(py).unwrap()
258 })
259 },
260 Decimal(_, _) => {
261 let ca = s.decimal().unwrap();
262 let values = decimal_to_pyobject_iter(py, ca)
263 .unwrap()
264 .map(|v| v.into_py_any(py).unwrap());
265 PyArray1::from_iter(py, values).into_py_any(py).unwrap()
266 },
267 List(_) => list_series_to_numpy(py, s, writable),
268 Array(_, _) => array_series_to_numpy(py, s, writable),
269 Struct(_) => {
270 let ca = s.struct_().unwrap();
271 let df = ca.clone().unnest();
272 df_to_numpy(py, &df, IndexOrder::Fortran, writable, true).unwrap()
273 },
274 #[cfg(feature = "object")]
275 Object(_) => {
276 let ca = s
277 .as_any()
278 .downcast_ref::<ObjectChunked<ObjectValue>>()
279 .unwrap();
280 let values = ca.iter().map(|v| v.into_py_any(py).unwrap());
281 PyArray1::from_iter(py, values).into_py_any(py).unwrap()
282 },
283 Null => {
284 let n = s.len();
285 let values = std::iter::repeat_n(f32::NAN, n);
286 PyArray1::from_iter(py, values).into_py_any(py).unwrap()
287 },
288 Unknown(_) | BinaryOffset => unreachable!(),
289 }
290}
291
292fn numeric_series_to_numpy<T, U>(py: Python<'_>, s: &Series) -> Py<PyAny>
294where
295 T: PolarsNumericType,
296 T::Native: numpy::Element,
297 U: Float + numpy::Element,
298{
299 let ca: &ChunkedArray<T> = s.as_ref().as_ref();
300 if s.null_count() == 0 {
301 let values = ca.into_no_null_iter();
302 PyArray1::<T::Native>::from_iter(py, values)
303 .into_py_any(py)
304 .unwrap()
305 } else {
306 let mapper = |opt_v: Option<T::Native>| match opt_v {
307 Some(v) => NumCast::from(v).unwrap(),
308 None => U::nan(),
309 };
310 let values = ca.iter().map(mapper);
311 PyArray1::from_iter(py, values).into_py_any(py).unwrap()
312 }
313}
314
315fn boolean_series_to_numpy(py: Python<'_>, s: &Series) -> Py<PyAny> {
317 let ca = s.bool().unwrap();
318 if s.null_count() == 0 {
319 let values = ca.into_no_null_iter();
320 PyArray1::<bool>::from_iter(py, values)
321 .into_py_any(py)
322 .unwrap()
323 } else {
324 let values = ca.iter().map(|opt_v| opt_v.into_py_any(py).unwrap());
325 PyArray1::from_iter(py, values).into_py_any(py).unwrap()
326 }
327}
328
329fn date_series_to_numpy(py: Python<'_>, s: &Series) -> Py<PyAny> {
331 use numpy::datetime::{Datetime, units};
332
333 let s_phys = s.to_physical_repr();
334 let ca = s_phys.i32().unwrap();
335
336 if s.null_count() == 0 {
337 let mapper = |v: i32| (v as i64).into();
338 let values = ca.into_no_null_iter().map(mapper);
339 PyArray1::<Datetime<units::Days>>::from_iter(py, values)
340 .into_py_any(py)
341 .unwrap()
342 } else {
343 let mapper = |opt_v: Option<i32>| {
344 match opt_v {
345 Some(v) => v as i64,
346 None => i64::MIN,
347 }
348 .into()
349 };
350 let values = ca.iter().map(mapper);
351 PyArray1::<Datetime<units::Days>>::from_iter(py, values)
352 .into_py_any(py)
353 .unwrap()
354 }
355}
356
357fn temporal_series_to_numpy<T>(py: Python<'_>, s: &Series) -> Py<PyAny>
359where
360 T: From<i64> + numpy::Element,
361{
362 let s_phys = s.to_physical_repr();
363 let ca = s_phys.i64().unwrap();
364 let values = ca.iter().map(|v| v.unwrap_or(i64::MIN).into());
365 PyArray1::<T>::from_iter(py, values)
366 .into_py_any(py)
367 .unwrap()
368}
369fn list_series_to_numpy(py: Python<'_>, s: &Series, writable: bool) -> Py<PyAny> {
370 let ca = s.list().unwrap();
371
372 let iter = ca.amortized_iter().map(|opt_s| match opt_s {
373 None => py.None(),
374 Some(s) => series_to_numpy(py, s.as_ref(), writable, true).unwrap(),
375 });
376 PyArray1::from_iter(py, iter).into_py_any(py).unwrap()
377}
378
379fn array_series_to_numpy(py: Python<'_>, s: &Series, writable: bool) -> Py<PyAny> {
381 let ca = s.array().unwrap();
382 let s_inner = ca.get_inner();
383 let np_array_flat = series_to_numpy_with_copy(py, &s_inner, writable);
384
385 let DataType::Array(_, width) = s.dtype() else {
387 unreachable!()
388 };
389 reshape_numpy_array(py, np_array_flat, ca.len(), *width).unwrap()
390}