polars_python/interop/numpy/
to_numpy_df.rs1use ndarray::IntoDimension;
2use numpy::npyffi::flags;
3use numpy::{Element, IntoPyArray, PyArray1};
4use polars_core::prelude::*;
5use polars_core::utils::dtypes_to_supertype;
6use polars_core::with_match_physical_numeric_polars_type;
7use pyo3::exceptions::PyRuntimeError;
8use pyo3::prelude::*;
9use pyo3::types::{PyList, PyTuple};
10use pyo3::{IntoPyObjectExt, intern};
11
12use super::to_numpy_series::series_to_numpy;
13use super::utils::{
14 create_borrowed_np_array, dtype_supports_view, polars_dtype_to_np_temporal_dtype,
15};
16use crate::conversion::Wrap;
17use crate::dataframe::PyDataFrame;
18use crate::interned;
19use crate::utils::EnterPolarsExt;
20
21#[pymethods]
22impl PyDataFrame {
23 fn to_numpy(
25 &self,
26 py: Python<'_>,
27 order: Wrap<IndexOrder>,
28 writable: bool,
29 allow_copy: bool,
30 ) -> PyResult<Py<PyAny>> {
31 df_to_numpy(py, &self.df.read(), order.0, writable, allow_copy)
32 }
33}
34
35pub(super) fn df_to_numpy(
36 py: Python<'_>,
37 df: &DataFrame,
38 order: IndexOrder,
39 writable: bool,
40 allow_copy: bool,
41) -> PyResult<Py<PyAny>> {
42 if df.shape_has_zero() {
43 if df.width() == 0 {
44 let shape = PyTuple::new(py, [df.height(), df.width()])?;
45 let numpy = super::utils::get_numpy_module(py)?;
46
47 return Ok(numpy
48 .call_method1(
49 intern!(py, "zeros"),
50 (shape, numpy.getattr(intern!(py, "int8"))?),
51 )?
52 .unbind());
53 }
54 return df_to_numpy_with_copy(py, df, order, true);
57 }
58
59 if matches!(order, IndexOrder::Fortran) {
60 if let Some(mut arr) = try_df_to_numpy_view(py, df, false) {
61 if writable {
62 if !allow_copy {
63 return Err(PyRuntimeError::new_err(
64 "copy not allowed: cannot create a writable array without copying data",
65 ));
66 }
67 arr = arr.call_method0(py, interned::COPY.get(py))?;
68 }
69 return Ok(arr);
70 }
71 }
72
73 if !allow_copy {
74 return Err(PyRuntimeError::new_err(
75 "copy not allowed: cannot convert to a NumPy array without copying data",
76 ));
77 }
78
79 df_to_numpy_with_copy(py, df, order, writable)
80}
81
82fn try_df_to_numpy_view(py: Python<'_>, df: &DataFrame, allow_nulls: bool) -> Option<Py<PyAny>> {
84 let first_dtype = check_df_dtypes_support_view(df)?;
85
86 if !allow_nulls && df.columns().iter().any(|s| s.null_count() > 0) {
88 return None;
89 }
90 if !check_df_columns_contiguous(df) {
91 return None;
92 }
93
94 let owner = PyDataFrame::from(df.clone()).into_py_any(py).ok()?; let arr = match first_dtype {
97 dt if dt.is_primitive_numeric() => {
98 with_match_physical_numpy_polars_type!(first_dtype, |$T| {
99 numeric_df_to_numpy_view::<$T>(py, df, owner)
100 })
101 },
102 DataType::Datetime(_, _) | DataType::Duration(_) => {
103 temporal_df_to_numpy_view(py, df, owner)
104 },
105 _ => unreachable!(),
106 };
107 Some(arr)
108}
109fn check_df_dtypes_support_view(df: &DataFrame) -> Option<&DataType> {
113 let columns = df.columns();
114 let first_dtype = columns.first()?.dtype();
115
116 if first_dtype.is_array() || !dtype_supports_view(first_dtype) {
118 return None;
119 }
120 if columns.iter().any(|s| s.dtype() != first_dtype) {
121 return None;
122 }
123 Some(first_dtype)
124}
125fn check_df_columns_contiguous(df: &DataFrame) -> bool {
127 let columns = df.columns();
128
129 if columns
130 .iter()
131 .any(|s| s.as_materialized_series().n_chunks() > 1)
132 {
133 return false;
134 }
135 if columns.len() <= 1 {
136 return true;
137 }
138
139 match columns.first().unwrap().dtype() {
140 dt if dt.is_primitive_numeric() => {
141 with_match_physical_numeric_polars_type!(dt, |$T| {
142 let slices = columns
143 .iter()
144 .map(|s| {
145 let ca: &ChunkedArray<$T> = s.as_materialized_series().unpack().unwrap();
146 ca.data_views().next().unwrap()
147 })
148 .collect::<Vec<_>>();
149
150 check_slices_contiguous::<$T>(slices)
151 })
152 },
153 DataType::Datetime(_, _) | DataType::Duration(_) => {
154 let phys: Vec<_> = columns.iter().map(|s| s.to_physical_repr()).collect();
155 let slices = phys
156 .iter()
157 .map(|s| {
158 let ca = s.i64().unwrap();
159 ca.data_views().next().unwrap()
160 })
161 .collect::<Vec<_>>();
162
163 check_slices_contiguous::<Int64Type>(slices)
164 },
165 _ => panic!("invalid data type"),
166 }
167}
168fn check_slices_contiguous<T>(slices: Vec<&[T::Native]>) -> bool
170where
171 T: PolarsNumericType,
172{
173 let first_slice = slices.first().unwrap();
174
175 let mut end_ptr = unsafe { first_slice.as_ptr().add(first_slice.len()) };
177 slices[1..].iter().all(|slice| {
178 let slice_ptr = slice.as_ptr();
179 let valid = std::ptr::eq(slice_ptr, end_ptr);
180
181 end_ptr = unsafe { slice_ptr.add(slice.len()) };
182
183 valid
184 })
185}
186
187fn numeric_df_to_numpy_view<T>(py: Python<'_>, df: &DataFrame, owner: Py<PyAny>) -> Py<PyAny>
189where
190 T: PolarsNumericType,
191 T::Native: Element,
192{
193 let ca: &ChunkedArray<T> = df
194 .columns()
195 .first()
196 .unwrap()
197 .as_materialized_series()
198 .unpack()
199 .unwrap();
200 let first_slice = ca.data_views().next().unwrap();
201
202 let start_ptr = first_slice.as_ptr();
203 let np_dtype = T::Native::get_dtype(py);
204 let dims = [first_slice.len(), df.width()].into_dimension();
205
206 unsafe {
207 create_borrowed_np_array::<_>(
208 py,
209 np_dtype,
210 dims,
211 flags::NPY_ARRAY_FARRAY_RO,
212 start_ptr as _,
213 owner,
214 )
215 }
216}
217fn temporal_df_to_numpy_view(py: Python<'_>, df: &DataFrame, owner: Py<PyAny>) -> Py<PyAny> {
219 let s = df.columns().first().unwrap();
220 let phys = s.to_physical_repr();
221 let ca = phys.i64().unwrap();
222 let first_slice = ca.data_views().next().unwrap();
223
224 let start_ptr = first_slice.as_ptr();
225 let np_dtype = polars_dtype_to_np_temporal_dtype(py, s.dtype());
226 let dims = [first_slice.len(), df.width()].into_dimension();
227
228 unsafe {
229 create_borrowed_np_array::<_>(
230 py,
231 np_dtype,
232 dims,
233 flags::NPY_ARRAY_FARRAY_RO,
234 start_ptr as _,
235 owner,
236 )
237 }
238}
239
240fn df_to_numpy_with_copy(
241 py: Python<'_>,
242 df: &DataFrame,
243 order: IndexOrder,
244 writable: bool,
245) -> PyResult<Py<PyAny>> {
246 if let Some(arr) = try_df_to_numpy_numeric_supertype(py, df, order) {
247 Ok(arr)
248 } else {
249 df_columns_to_numpy(py, df, order, writable)
250 }
251}
252fn try_df_to_numpy_numeric_supertype(
253 py: Python<'_>,
254 df: &DataFrame,
255 order: IndexOrder,
256) -> Option<Py<PyAny>> {
257 let st = dtypes_to_supertype(df.columns().iter().map(|s| s.dtype())).ok()?;
258
259 let np_array = match st {
260 dt if dt.is_primitive_numeric() => with_match_physical_numpy_polars_type!(dt, |$T| {
261 let arr = py.enter_polars(|| df.to_ndarray::<$T>(order)).ok()?;
262 arr.into_pyarray(py).into_py_any(py).ok()?
263 }),
264 _ => return None,
265 };
266 Some(np_array)
267}
268
269fn df_columns_to_numpy(
270 py: Python<'_>,
271 df: &DataFrame,
272 order: IndexOrder,
273 writable: bool,
274) -> PyResult<Py<PyAny>> {
275 let np_arrays = df.columns().iter().map(|c| {
276 let mut arr = series_to_numpy(py, c.as_materialized_series(), writable, true).unwrap();
277
278 let shape: Vec<usize> = arr
280 .getattr(py, interned::SHAPE.get(py))
281 .unwrap()
282 .extract(py)
283 .unwrap();
284 if shape.len() > 1 {
285 let subarrays = (0..shape[0]).map(|idx| {
287 arr.call_method1(py, interned::DUNDER_GETITEM.get(py), (idx,))
288 .unwrap()
289 });
290 arr = PyArray1::from_iter(py, subarrays).into_py_any(py).unwrap();
291 }
292 arr
293 });
294
295 let numpy = super::utils::get_numpy_module(py)?;
296 let np_array = match order {
297 IndexOrder::C => numpy
298 .getattr(intern!(py, "column_stack"))?
299 .call1((PyList::new(py, np_arrays)?,))?,
300 IndexOrder::Fortran => numpy
301 .getattr(intern!(py, "vstack"))?
302 .call1((PyList::new(py, np_arrays)?,))?
303 .getattr(intern!(py, "T"))?,
304 };
305
306 Ok(np_array.into())
307}