1use ndarray::IntoDimension;
2use num_traits::{Float, NumCast};
3use numpy::npyffi::flags;
4use numpy::{Element, PyArray1};
5use polars::prelude::*;
6use pyo3::IntoPyObjectExt;
7use pyo3::exceptions::PyRuntimeError;
8use pyo3::prelude::*;
9
10use super::to_numpy_df::df_to_numpy;
11use super::utils::{
12 create_borrowed_np_array, dtype_supports_view, polars_dtype_to_np_temporal_dtype,
13 reshape_numpy_array, series_contains_null,
14};
15use crate::conversion::ObjectValue;
16use crate::conversion::chunked_array::{decimal_to_pyobject_iter, time_to_pyobject_iter};
17use crate::interned;
18use crate::series::PySeries;
19
20#[pymethods]
21impl PySeries {
22 fn to_numpy(&self, py: Python<'_>, writable: bool, allow_copy: bool) -> PyResult<Py<PyAny>> {
28 series_to_numpy(py, &self.series.read(), writable, allow_copy)
29 }
30
31 fn to_numpy_view(&self, py: Python) -> Option<Py<PyAny>> {
37 let (view, _) = try_series_to_numpy_view(py, &self.series.read(), true, false)?;
38 Some(view)
39 }
40}
41
42pub(super) fn series_to_numpy(
44 py: Python<'_>,
45 s: &Series,
46 writable: bool,
47 allow_copy: bool,
48) -> PyResult<Py<PyAny>> {
49 if s.is_empty() {
50 return Ok(series_to_numpy_with_copy(py, s, true));
53 }
54 if let Some((mut arr, writable_flag)) = try_series_to_numpy_view(py, s, false, allow_copy) {
55 if writable && !writable_flag {
56 if !allow_copy {
57 return Err(PyRuntimeError::new_err(
58 "copy not allowed: cannot create a writable array without copying data",
59 ));
60 }
61 arr = arr.call_method0(py, interned::COPY.get(py))?;
62 }
63 return Ok(arr);
64 }
65
66 if !allow_copy {
67 return Err(PyRuntimeError::new_err(
68 "copy not allowed: cannot convert to a NumPy array without copying data",
69 ));
70 }
71
72 Ok(series_to_numpy_with_copy(py, s, writable))
73}
74
75fn try_series_to_numpy_view(
77 py: Python<'_>,
78 s: &Series,
79 allow_nulls: bool,
80 allow_rechunk: bool,
81) -> Option<(Py<PyAny>, bool)> {
82 if !dtype_supports_view(s.dtype()) {
83 return None;
84 }
85 if !allow_nulls && series_contains_null(s) {
86 return None;
87 }
88 let (s_owned, writable_flag) = handle_chunks(py, s, allow_rechunk)?;
89 let array = series_to_numpy_view_recursive(py, s_owned, writable_flag);
90 Some((array, writable_flag))
91}
92
93fn handle_chunks(py: Python<'_>, s: &Series, allow_rechunk: bool) -> Option<(Series, bool)> {
98 let is_chunked = s.n_chunks() > 1;
99 match (is_chunked, allow_rechunk) {
100 (true, false) => None,
101 (true, true) => Some((py.detach(|| s.rechunk()), true)),
102 (false, _) => Some((s.clone(), false)),
103 }
104}
105
106fn series_to_numpy_view_recursive(py: Python<'_>, s: Series, writable: bool) -> Py<PyAny> {
108 debug_assert!(s.n_chunks() == 1);
109 match s.dtype() {
110 dt if dt.is_primitive_numeric() => numeric_series_to_numpy_view(py, s, writable),
111 DataType::Datetime(_, _) | DataType::Duration(_) => {
112 temporal_series_to_numpy_view(py, s, writable)
113 },
114 DataType::Array(_, _) => array_series_to_numpy_view(py, &s, writable),
115 _ => panic!("invalid data type"),
116 }
117}
118
119fn numeric_series_to_numpy_view(py: Python<'_>, s: Series, writable: bool) -> Py<PyAny> {
121 let dims = [s.len()].into_dimension();
122 with_match_physical_numpy_polars_type!(s.dtype(), |$T| {
123 let np_dtype = <$T as PolarsNumericType>::Native::get_dtype(py);
124 let ca: &ChunkedArray<$T> = s.unpack::<$T>().unwrap();
125 let flags = if writable {
126 flags::NPY_ARRAY_FARRAY
127 } else {
128 flags::NPY_ARRAY_FARRAY_RO
129 };
130
131 let slice = ca.data_views().next().unwrap();
132
133 unsafe {
134 create_borrowed_np_array::<_>(
135 py,
136 np_dtype,
137 dims,
138 flags,
139 slice.as_ptr() as _,
140 PySeries::from(s).into_py_any(py).unwrap(), )
142 }
143 })
144}
145
146fn temporal_series_to_numpy_view(py: Python<'_>, s: Series, writable: bool) -> Py<PyAny> {
148 let np_dtype = polars_dtype_to_np_temporal_dtype(py, s.dtype());
149
150 let phys = s.to_physical_repr();
151 let ca = phys.i64().unwrap();
152 let slice = ca.data_views().next().unwrap();
153 let dims = [s.len()].into_dimension();
154 let flags = if writable {
155 flags::NPY_ARRAY_FARRAY
156 } else {
157 flags::NPY_ARRAY_FARRAY_RO
158 };
159
160 unsafe {
161 create_borrowed_np_array::<_>(
162 py,
163 np_dtype,
164 dims,
165 flags,
166 slice.as_ptr() as _,
167 PySeries::from(s).into_py_any(py).unwrap(), )
169 }
170}
171
172fn array_series_to_numpy_view(py: Python<'_>, s: &Series, writable: bool) -> Py<PyAny> {
174 let ca = s.array().unwrap();
175 let s_inner = ca.get_inner();
176 let np_array_flat = series_to_numpy_view_recursive(py, s_inner, writable);
177
178 let DataType::Array(_, width) = s.dtype() else {
180 unreachable!()
181 };
182 reshape_numpy_array(py, np_array_flat, ca.len(), *width).unwrap()
183}
184
185fn series_to_numpy_with_copy(py: Python<'_>, s: &Series, writable: bool) -> Py<PyAny> {
189 use DataType::*;
190 match s.dtype() {
191 Int8 => numeric_series_to_numpy::<Int8Type, f32>(py, s),
192 Int16 => numeric_series_to_numpy::<Int16Type, f32>(py, s),
193 Int32 => numeric_series_to_numpy::<Int32Type, f64>(py, s),
194 Int64 => numeric_series_to_numpy::<Int64Type, f64>(py, s),
195 Int128 => {
196 let s = s.cast(&DataType::Float64).unwrap();
197 series_to_numpy(py, &s, writable, true).unwrap()
198 },
199 UInt8 => numeric_series_to_numpy::<UInt8Type, f32>(py, s),
200 UInt16 => numeric_series_to_numpy::<UInt16Type, f32>(py, s),
201 UInt32 => numeric_series_to_numpy::<UInt32Type, f64>(py, s),
202 UInt64 => numeric_series_to_numpy::<UInt64Type, f64>(py, s),
203 UInt128 => {
204 let s = s.cast(&DataType::Float64).unwrap();
205 series_to_numpy(py, &s, writable, true).unwrap()
206 },
207 Float16 => numeric_series_to_numpy::<Float16Type, pf16>(py, s),
208 Float32 => numeric_series_to_numpy::<Float32Type, f32>(py, s),
209 Float64 => numeric_series_to_numpy::<Float64Type, f64>(py, s),
210 Boolean => boolean_series_to_numpy(py, s),
211 Date => date_series_to_numpy(py, s),
212 Datetime(tu, _) => {
213 use numpy::datetime::{Datetime, units};
214 match tu {
215 TimeUnit::Milliseconds => {
216 temporal_series_to_numpy::<Datetime<units::Milliseconds>>(py, s)
217 },
218 TimeUnit::Microseconds => {
219 temporal_series_to_numpy::<Datetime<units::Microseconds>>(py, s)
220 },
221 TimeUnit::Nanoseconds => {
222 temporal_series_to_numpy::<Datetime<units::Nanoseconds>>(py, s)
223 },
224 }
225 },
226 Duration(tu) => {
227 use numpy::datetime::{Timedelta, units};
228 match tu {
229 TimeUnit::Milliseconds => {
230 temporal_series_to_numpy::<Timedelta<units::Milliseconds>>(py, s)
231 },
232 TimeUnit::Microseconds => {
233 temporal_series_to_numpy::<Timedelta<units::Microseconds>>(py, s)
234 },
235 TimeUnit::Nanoseconds => {
236 temporal_series_to_numpy::<Timedelta<units::Nanoseconds>>(py, s)
237 },
238 }
239 },
240 Time => {
241 let ca = s.time().unwrap();
242 let values = time_to_pyobject_iter(ca).map(|v| v.into_py_any(py).unwrap());
243 PyArray1::from_iter(py, values).into_py_any(py).unwrap()
244 },
245 String => {
246 let ca = s.str().unwrap();
247 let values = ca.iter().map(|s| s.into_py_any(py).unwrap());
248 PyArray1::from_iter(py, values).into_py_any(py).unwrap()
249 },
250 Binary => {
251 let ca = s.binary().unwrap();
252 let values = ca.iter().map(|s| s.into_py_any(py).unwrap());
253 PyArray1::from_iter(py, values).into_py_any(py).unwrap()
254 },
255 Categorical(_, _) | Enum(_, _) => {
256 with_match_categorical_physical_type!(s.dtype().cat_physical().unwrap(), |$C| {
257 let ca = s.cat::<$C>().unwrap();
258 let values = ca.iter_str().map(|s| s.into_py_any(py).unwrap());
259 PyArray1::from_iter(py, values).into_py_any(py).unwrap()
260 })
261 },
262 Decimal(_, _) => {
263 let ca = s.decimal().unwrap();
264 let values = decimal_to_pyobject_iter(py, ca)
265 .unwrap()
266 .map(|v| v.into_py_any(py).unwrap());
267 PyArray1::from_iter(py, values).into_py_any(py).unwrap()
268 },
269 List(_) => list_series_to_numpy(py, s, writable),
270 Array(_, _) => array_series_to_numpy(py, s, writable),
271 Struct(_) => {
272 let ca = s.struct_().unwrap();
273 let df = ca.clone().unnest();
274 df_to_numpy(py, &df, IndexOrder::Fortran, writable, true).unwrap()
275 },
276 #[cfg(feature = "object")]
277 Object(_) => {
278 let ca = s
279 .as_any()
280 .downcast_ref::<ObjectChunked<ObjectValue>>()
281 .unwrap();
282 let values = ca.iter().map(|v| v.into_py_any(py).unwrap());
283 PyArray1::from_iter(py, values).into_py_any(py).unwrap()
284 },
285 Null => {
286 let n = s.len();
287 let values = std::iter::repeat_n(f32::NAN, n);
288 PyArray1::from_iter(py, values).into_py_any(py).unwrap()
289 },
290 Extension(_, _) => series_to_numpy_with_copy(py, s.ext().unwrap().storage(), writable),
291 Unknown(_) | BinaryOffset => unreachable!(),
292 }
293}
294
295fn numeric_series_to_numpy<T, U>(py: Python<'_>, s: &Series) -> Py<PyAny>
297where
298 T: PolarsNumericType,
299 T::Native: numpy::Element,
300 U: Float + numpy::Element,
301{
302 let ca: &ChunkedArray<T> = s.as_ref().as_ref();
303 if s.null_count() == 0 {
304 let values = ca.into_no_null_iter();
305 PyArray1::<T::Native>::from_iter(py, values)
306 .into_py_any(py)
307 .unwrap()
308 } else {
309 let mapper = |opt_v: Option<T::Native>| match opt_v {
310 Some(v) => NumCast::from(v).unwrap(),
311 None => U::nan(),
312 };
313 let values = ca.iter().map(mapper);
314 PyArray1::from_iter(py, values).into_py_any(py).unwrap()
315 }
316}
317
318fn boolean_series_to_numpy(py: Python<'_>, s: &Series) -> Py<PyAny> {
320 let ca = s.bool().unwrap();
321 if s.null_count() == 0 {
322 let values = ca.no_null_iter();
323 PyArray1::<bool>::from_iter(py, values)
324 .into_py_any(py)
325 .unwrap()
326 } else {
327 let values = ca.iter().map(|opt_v| opt_v.into_py_any(py).unwrap());
328 PyArray1::from_iter(py, values).into_py_any(py).unwrap()
329 }
330}
331
332fn date_series_to_numpy(py: Python<'_>, s: &Series) -> Py<PyAny> {
334 use numpy::datetime::{Datetime, units};
335
336 let s_phys = s.to_physical_repr();
337 let ca = s_phys.i32().unwrap();
338
339 if s.null_count() == 0 {
340 let mapper = |v: i32| (v as i64).into();
341 let values = ca.into_no_null_iter().map(mapper);
342 PyArray1::<Datetime<units::Days>>::from_iter(py, values)
343 .into_py_any(py)
344 .unwrap()
345 } else {
346 let mapper = |opt_v: Option<i32>| {
347 match opt_v {
348 Some(v) => v as i64,
349 None => i64::MIN,
350 }
351 .into()
352 };
353 let values = ca.iter().map(mapper);
354 PyArray1::<Datetime<units::Days>>::from_iter(py, values)
355 .into_py_any(py)
356 .unwrap()
357 }
358}
359
360fn temporal_series_to_numpy<T>(py: Python<'_>, s: &Series) -> Py<PyAny>
362where
363 T: From<i64> + numpy::Element,
364{
365 let s_phys = s.to_physical_repr();
366 let ca = s_phys.i64().unwrap();
367 let values = ca.iter().map(|v| v.unwrap_or(i64::MIN).into());
368 PyArray1::<T>::from_iter(py, values)
369 .into_py_any(py)
370 .unwrap()
371}
372fn list_series_to_numpy(py: Python<'_>, s: &Series, writable: bool) -> Py<PyAny> {
373 let ca = s.list().unwrap();
374
375 let iter = ca.amortized_iter().map(|opt_s| match opt_s {
376 None => py.None(),
377 Some(s) => series_to_numpy(py, s.as_ref(), writable, true).unwrap(),
378 });
379 PyArray1::from_iter(py, iter).into_py_any(py).unwrap()
380}
381
382fn array_series_to_numpy(py: Python<'_>, s: &Series, writable: bool) -> Py<PyAny> {
384 let ca = s.array().unwrap();
385 let s_inner = ca.get_inner();
386 let np_array_flat = series_to_numpy_with_copy(py, &s_inner, writable);
387
388 let DataType::Array(_, width) = s.dtype() else {
390 unreachable!()
391 };
392 reshape_numpy_array(py, np_array_flat, ca.len(), *width).unwrap()
393}