1use ndarray::IntoDimension;
2use num_traits::{Float, NumCast};
3use numpy::npyffi::flags;
4use numpy::{Element, PyArray1};
5use polars_core::prelude::*;
6use pyo3::exceptions::PyRuntimeError;
7use pyo3::prelude::*;
8use pyo3::{IntoPyObjectExt, intern};
9
10use super::to_numpy_df::df_to_numpy;
11use super::utils::{
12 create_borrowed_np_array, dtype_supports_view, polars_dtype_to_np_temporal_dtype,
13 reshape_numpy_array, series_contains_null,
14};
15use crate::conversion::ObjectValue;
16use crate::conversion::chunked_array::{decimal_to_pyobject_iter, time_to_pyobject_iter};
17use crate::series::PySeries;
18
19#[pymethods]
20impl PySeries {
21 fn to_numpy(&self, py: Python<'_>, writable: bool, allow_copy: bool) -> PyResult<PyObject> {
27 series_to_numpy(py, &self.series, writable, allow_copy)
28 }
29
30 fn to_numpy_view(&self, py: Python) -> Option<PyObject> {
36 let (view, _) = try_series_to_numpy_view(py, &self.series, true, false)?;
37 Some(view)
38 }
39}
40
41pub(super) fn series_to_numpy(
43 py: Python<'_>,
44 s: &Series,
45 writable: bool,
46 allow_copy: bool,
47) -> PyResult<PyObject> {
48 if s.is_empty() {
49 return Ok(series_to_numpy_with_copy(py, s, true));
52 }
53 if let Some((mut arr, writable_flag)) = try_series_to_numpy_view(py, s, false, allow_copy) {
54 if writable && !writable_flag {
55 if !allow_copy {
56 return Err(PyRuntimeError::new_err(
57 "copy not allowed: cannot create a writable array without copying data",
58 ));
59 }
60 arr = arr.call_method0(py, intern!(py, "copy"))?;
61 }
62 return Ok(arr);
63 }
64
65 if !allow_copy {
66 return Err(PyRuntimeError::new_err(
67 "copy not allowed: cannot convert to a NumPy array without copying data",
68 ));
69 }
70
71 Ok(series_to_numpy_with_copy(py, s, writable))
72}
73
74fn try_series_to_numpy_view(
76 py: Python<'_>,
77 s: &Series,
78 allow_nulls: bool,
79 allow_rechunk: bool,
80) -> Option<(PyObject, bool)> {
81 if !dtype_supports_view(s.dtype()) {
82 return None;
83 }
84 if !allow_nulls && series_contains_null(s) {
85 return None;
86 }
87 let (s_owned, writable_flag) = handle_chunks(py, s, allow_rechunk)?;
88 let array = series_to_numpy_view_recursive(py, s_owned, writable_flag);
89 Some((array, writable_flag))
90}
91
92fn handle_chunks(py: Python<'_>, s: &Series, allow_rechunk: bool) -> Option<(Series, bool)> {
97 let is_chunked = s.n_chunks() > 1;
98 match (is_chunked, allow_rechunk) {
99 (true, false) => None,
100 (true, true) => Some((py.allow_threads(|| s.rechunk()), true)),
101 (false, _) => Some((s.clone(), false)),
102 }
103}
104
105fn series_to_numpy_view_recursive(py: Python<'_>, s: Series, writable: bool) -> PyObject {
107 debug_assert!(s.n_chunks() == 1);
108 match s.dtype() {
109 dt if dt.is_primitive_numeric() => numeric_series_to_numpy_view(py, s, writable),
110 DataType::Datetime(_, _) | DataType::Duration(_) => {
111 temporal_series_to_numpy_view(py, s, writable)
112 },
113 DataType::Array(_, _) => array_series_to_numpy_view(py, &s, writable),
114 _ => panic!("invalid data type"),
115 }
116}
117
118fn numeric_series_to_numpy_view(py: Python<'_>, s: Series, writable: bool) -> PyObject {
120 let dims = [s.len()].into_dimension();
121 with_match_physical_numpy_polars_type!(s.dtype(), |$T| {
122 let np_dtype = <$T as PolarsNumericType>::Native::get_dtype(py);
123 let ca: &ChunkedArray<$T> = s.unpack::<$T>().unwrap();
124 let flags = if writable {
125 flags::NPY_ARRAY_FARRAY
126 } else {
127 flags::NPY_ARRAY_FARRAY_RO
128 };
129
130 let slice = ca.data_views().next().unwrap();
131
132 unsafe {
133 create_borrowed_np_array::<_>(
134 py,
135 np_dtype,
136 dims,
137 flags,
138 slice.as_ptr() as _,
139 PySeries::from(s).into_py_any(py).unwrap(), )
141 }
142 })
143}
144
145fn temporal_series_to_numpy_view(py: Python<'_>, s: Series, writable: bool) -> PyObject {
147 let np_dtype = polars_dtype_to_np_temporal_dtype(py, s.dtype());
148
149 let phys = s.to_physical_repr();
150 let ca = phys.i64().unwrap();
151 let slice = ca.data_views().next().unwrap();
152 let dims = [s.len()].into_dimension();
153 let flags = if writable {
154 flags::NPY_ARRAY_FARRAY
155 } else {
156 flags::NPY_ARRAY_FARRAY_RO
157 };
158
159 unsafe {
160 create_borrowed_np_array::<_>(
161 py,
162 np_dtype,
163 dims,
164 flags,
165 slice.as_ptr() as _,
166 PySeries::from(s).into_py_any(py).unwrap(), )
168 }
169}
170
171fn array_series_to_numpy_view(py: Python<'_>, s: &Series, writable: bool) -> PyObject {
173 let ca = s.array().unwrap();
174 let s_inner = ca.get_inner();
175 let np_array_flat = series_to_numpy_view_recursive(py, s_inner, writable);
176
177 let DataType::Array(_, width) = s.dtype() else {
179 unreachable!()
180 };
181 reshape_numpy_array(py, np_array_flat, ca.len(), *width).unwrap()
182}
183
184fn series_to_numpy_with_copy(py: Python<'_>, s: &Series, writable: bool) -> PyObject {
188 use DataType::*;
189 match s.dtype() {
190 Int8 => numeric_series_to_numpy::<Int8Type, f32>(py, s),
191 Int16 => numeric_series_to_numpy::<Int16Type, f32>(py, s),
192 Int32 => numeric_series_to_numpy::<Int32Type, f64>(py, s),
193 Int64 => numeric_series_to_numpy::<Int64Type, f64>(py, s),
194 Int128 => {
195 let s = s.cast(&DataType::Float64).unwrap();
196 series_to_numpy(py, &s, writable, true).unwrap()
197 },
198 UInt8 => numeric_series_to_numpy::<UInt8Type, f32>(py, s),
199 UInt16 => numeric_series_to_numpy::<UInt16Type, f32>(py, s),
200 UInt32 => numeric_series_to_numpy::<UInt32Type, f64>(py, s),
201 UInt64 => numeric_series_to_numpy::<UInt64Type, f64>(py, s),
202 Float32 => numeric_series_to_numpy::<Float32Type, f32>(py, s),
203 Float64 => numeric_series_to_numpy::<Float64Type, f64>(py, s),
204 Boolean => boolean_series_to_numpy(py, s),
205 Date => date_series_to_numpy(py, s),
206 Datetime(tu, _) => {
207 use numpy::datetime::{Datetime, units};
208 match tu {
209 TimeUnit::Milliseconds => {
210 temporal_series_to_numpy::<Datetime<units::Milliseconds>>(py, s)
211 },
212 TimeUnit::Microseconds => {
213 temporal_series_to_numpy::<Datetime<units::Microseconds>>(py, s)
214 },
215 TimeUnit::Nanoseconds => {
216 temporal_series_to_numpy::<Datetime<units::Nanoseconds>>(py, s)
217 },
218 }
219 },
220 Duration(tu) => {
221 use numpy::datetime::{Timedelta, units};
222 match tu {
223 TimeUnit::Milliseconds => {
224 temporal_series_to_numpy::<Timedelta<units::Milliseconds>>(py, s)
225 },
226 TimeUnit::Microseconds => {
227 temporal_series_to_numpy::<Timedelta<units::Microseconds>>(py, s)
228 },
229 TimeUnit::Nanoseconds => {
230 temporal_series_to_numpy::<Timedelta<units::Nanoseconds>>(py, s)
231 },
232 }
233 },
234 Time => {
235 let ca = s.time().unwrap();
236 let values = time_to_pyobject_iter(ca).map(|v| v.into_py_any(py).unwrap());
237 PyArray1::from_iter(py, values).into_py_any(py).unwrap()
238 },
239 String => {
240 let ca = s.str().unwrap();
241 let values = ca.iter().map(|s| s.into_py_any(py).unwrap());
242 PyArray1::from_iter(py, values).into_py_any(py).unwrap()
243 },
244 Binary => {
245 let ca = s.binary().unwrap();
246 let values = ca.iter().map(|s| s.into_py_any(py).unwrap());
247 PyArray1::from_iter(py, values).into_py_any(py).unwrap()
248 },
249 Categorical(_, _) | Enum(_, _) => {
250 let ca = s.categorical().unwrap();
251 let values = ca.iter_str().map(|s| s.into_py_any(py).unwrap());
252 PyArray1::from_iter(py, values).into_py_any(py).unwrap()
253 },
254 Decimal(_, _) => {
255 let ca = s.decimal().unwrap();
256 let values = decimal_to_pyobject_iter(py, ca)
257 .unwrap()
258 .map(|v| v.into_py_any(py).unwrap());
259 PyArray1::from_iter(py, values).into_py_any(py).unwrap()
260 },
261 List(_) => list_series_to_numpy(py, s, writable),
262 Array(_, _) => array_series_to_numpy(py, s, writable),
263 Struct(_) => {
264 let ca = s.struct_().unwrap();
265 let df = ca.clone().unnest();
266 df_to_numpy(py, &df, IndexOrder::Fortran, writable, true).unwrap()
267 },
268 #[cfg(feature = "object")]
269 Object(_) => {
270 let ca = s
271 .as_any()
272 .downcast_ref::<ObjectChunked<ObjectValue>>()
273 .unwrap();
274 let values = ca.iter().map(|v| v.into_py_any(py).unwrap());
275 PyArray1::from_iter(py, values).into_py_any(py).unwrap()
276 },
277 Null => {
278 let n = s.len();
279 let values = std::iter::repeat_n(f32::NAN, n);
280 PyArray1::from_iter(py, values).into_py_any(py).unwrap()
281 },
282 Unknown(_) | BinaryOffset => unreachable!(),
283 }
284}
285
286fn numeric_series_to_numpy<T, U>(py: Python<'_>, s: &Series) -> PyObject
288where
289 T: PolarsNumericType,
290 T::Native: numpy::Element,
291 U: Float + numpy::Element,
292{
293 let ca: &ChunkedArray<T> = s.as_ref().as_ref();
294 if s.null_count() == 0 {
295 let values = ca.into_no_null_iter();
296 PyArray1::<T::Native>::from_iter(py, values)
297 .into_py_any(py)
298 .unwrap()
299 } else {
300 let mapper = |opt_v: Option<T::Native>| match opt_v {
301 Some(v) => NumCast::from(v).unwrap(),
302 None => U::nan(),
303 };
304 let values = ca.iter().map(mapper);
305 PyArray1::from_iter(py, values).into_py_any(py).unwrap()
306 }
307}
308
309fn boolean_series_to_numpy(py: Python<'_>, s: &Series) -> PyObject {
311 let ca = s.bool().unwrap();
312 if s.null_count() == 0 {
313 let values = ca.into_no_null_iter();
314 PyArray1::<bool>::from_iter(py, values)
315 .into_py_any(py)
316 .unwrap()
317 } else {
318 let values = ca.iter().map(|opt_v| opt_v.into_py_any(py).unwrap());
319 PyArray1::from_iter(py, values).into_py_any(py).unwrap()
320 }
321}
322
323fn date_series_to_numpy(py: Python<'_>, s: &Series) -> PyObject {
325 use numpy::datetime::{Datetime, units};
326
327 let s_phys = s.to_physical_repr();
328 let ca = s_phys.i32().unwrap();
329
330 if s.null_count() == 0 {
331 let mapper = |v: i32| (v as i64).into();
332 let values = ca.into_no_null_iter().map(mapper);
333 PyArray1::<Datetime<units::Days>>::from_iter(py, values)
334 .into_py_any(py)
335 .unwrap()
336 } else {
337 let mapper = |opt_v: Option<i32>| {
338 match opt_v {
339 Some(v) => v as i64,
340 None => i64::MIN,
341 }
342 .into()
343 };
344 let values = ca.iter().map(mapper);
345 PyArray1::<Datetime<units::Days>>::from_iter(py, values)
346 .into_py_any(py)
347 .unwrap()
348 }
349}
350
351fn temporal_series_to_numpy<T>(py: Python<'_>, s: &Series) -> PyObject
353where
354 T: From<i64> + numpy::Element,
355{
356 let s_phys = s.to_physical_repr();
357 let ca = s_phys.i64().unwrap();
358 let values = ca.iter().map(|v| v.unwrap_or(i64::MIN).into());
359 PyArray1::<T>::from_iter(py, values)
360 .into_py_any(py)
361 .unwrap()
362}
363fn list_series_to_numpy(py: Python<'_>, s: &Series, writable: bool) -> PyObject {
364 let ca = s.list().unwrap();
365
366 let iter = ca.amortized_iter().map(|opt_s| match opt_s {
367 None => py.None(),
368 Some(s) => series_to_numpy(py, s.as_ref(), writable, true).unwrap(),
369 });
370 PyArray1::from_iter(py, iter).into_py_any(py).unwrap()
371}
372
373fn array_series_to_numpy(py: Python<'_>, s: &Series, writable: bool) -> PyObject {
375 let ca = s.array().unwrap();
376 let s_inner = ca.get_inner();
377 let np_array_flat = series_to_numpy_with_copy(py, &s_inner, writable);
378
379 let DataType::Array(_, width) = s.dtype() else {
381 unreachable!()
382 };
383 reshape_numpy_array(py, np_array_flat, ca.len(), *width).unwrap()
384}