polars_python/interop/numpy/
to_numpy_df.rs1use ndarray::IntoDimension;
2use numpy::npyffi::flags;
3use numpy::{Element, IntoPyArray, PyArray1};
4use polars_core::prelude::*;
5use polars_core::utils::dtypes_to_supertype;
6use polars_core::with_match_physical_numeric_polars_type;
7use pyo3::exceptions::PyRuntimeError;
8use pyo3::prelude::*;
9use pyo3::types::PyList;
10use pyo3::{IntoPyObjectExt, intern};
11
12use super::to_numpy_series::series_to_numpy;
13use super::utils::{
14 create_borrowed_np_array, dtype_supports_view, polars_dtype_to_np_temporal_dtype,
15};
16use crate::conversion::Wrap;
17use crate::dataframe::PyDataFrame;
18
19#[pymethods]
20impl PyDataFrame {
21 fn to_numpy(
23 &self,
24 py: Python<'_>,
25 order: Wrap<IndexOrder>,
26 writable: bool,
27 allow_copy: bool,
28 ) -> PyResult<PyObject> {
29 df_to_numpy(py, &self.df, order.0, writable, allow_copy)
30 }
31}
32
33pub(super) fn df_to_numpy(
34 py: Python<'_>,
35 df: &DataFrame,
36 order: IndexOrder,
37 writable: bool,
38 allow_copy: bool,
39) -> PyResult<PyObject> {
40 if df.is_empty() {
41 return df_to_numpy_with_copy(py, df, order, true);
44 }
45
46 if matches!(order, IndexOrder::Fortran) {
47 if let Some(mut arr) = try_df_to_numpy_view(py, df, false) {
48 if writable {
49 if !allow_copy {
50 return Err(PyRuntimeError::new_err(
51 "copy not allowed: cannot create a writable array without copying data",
52 ));
53 }
54 arr = arr.call_method0(py, intern!(py, "copy"))?;
55 }
56 return Ok(arr);
57 }
58 }
59
60 if !allow_copy {
61 return Err(PyRuntimeError::new_err(
62 "copy not allowed: cannot convert to a NumPy array without copying data",
63 ));
64 }
65
66 df_to_numpy_with_copy(py, df, order, writable)
67}
68
69fn try_df_to_numpy_view(py: Python<'_>, df: &DataFrame, allow_nulls: bool) -> Option<PyObject> {
71 let first_dtype = check_df_dtypes_support_view(df)?;
72
73 if !allow_nulls && df.get_columns().iter().any(|s| s.null_count() > 0) {
75 return None;
76 }
77 if !check_df_columns_contiguous(df) {
78 return None;
79 }
80
81 let owner = PyDataFrame::from(df.clone()).into_py_any(py).ok()?; let arr = match first_dtype {
84 dt if dt.is_primitive_numeric() => {
85 with_match_physical_numpy_polars_type!(first_dtype, |$T| {
86 numeric_df_to_numpy_view::<$T>(py, df, owner)
87 })
88 },
89 DataType::Datetime(_, _) | DataType::Duration(_) => {
90 temporal_df_to_numpy_view(py, df, owner)
91 },
92 _ => unreachable!(),
93 };
94 Some(arr)
95}
96fn check_df_dtypes_support_view(df: &DataFrame) -> Option<&DataType> {
100 let columns = df.get_columns();
101 let first_dtype = columns.first()?.dtype();
102
103 if first_dtype.is_array() || !dtype_supports_view(first_dtype) {
105 return None;
106 }
107 if columns.iter().any(|s| s.dtype() != first_dtype) {
108 return None;
109 }
110 Some(first_dtype)
111}
112fn check_df_columns_contiguous(df: &DataFrame) -> bool {
114 let columns = df.get_columns();
115
116 if columns
117 .iter()
118 .any(|s| s.as_materialized_series().n_chunks() > 1)
119 {
120 return false;
121 }
122 if columns.len() <= 1 {
123 return true;
124 }
125
126 match columns.first().unwrap().dtype() {
127 dt if dt.is_primitive_numeric() => {
128 with_match_physical_numeric_polars_type!(dt, |$T| {
129 let slices = columns
130 .iter()
131 .map(|s| {
132 let ca: &ChunkedArray<$T> = s.as_materialized_series().unpack().unwrap();
133 ca.data_views().next().unwrap()
134 })
135 .collect::<Vec<_>>();
136
137 check_slices_contiguous::<$T>(slices)
138 })
139 },
140 DataType::Datetime(_, _) | DataType::Duration(_) => {
141 let phys: Vec<_> = columns.iter().map(|s| s.to_physical_repr()).collect();
142 let slices = phys
143 .iter()
144 .map(|s| {
145 let ca = s.i64().unwrap();
146 ca.data_views().next().unwrap()
147 })
148 .collect::<Vec<_>>();
149
150 check_slices_contiguous::<Int64Type>(slices)
151 },
152 _ => panic!("invalid data type"),
153 }
154}
155fn check_slices_contiguous<T>(slices: Vec<&[T::Native]>) -> bool
157where
158 T: PolarsNumericType,
159{
160 let first_slice = slices.first().unwrap();
161
162 let mut end_ptr = unsafe { first_slice.as_ptr().add(first_slice.len()) };
164 slices[1..].iter().all(|slice| {
165 let slice_ptr = slice.as_ptr();
166 let valid = std::ptr::eq(slice_ptr, end_ptr);
167
168 end_ptr = unsafe { slice_ptr.add(slice.len()) };
169
170 valid
171 })
172}
173
174fn numeric_df_to_numpy_view<T>(py: Python<'_>, df: &DataFrame, owner: PyObject) -> PyObject
176where
177 T: PolarsNumericType,
178 T::Native: Element,
179{
180 let ca: &ChunkedArray<T> = df
181 .get_columns()
182 .first()
183 .unwrap()
184 .as_materialized_series()
185 .unpack()
186 .unwrap();
187 let first_slice = ca.data_views().next().unwrap();
188
189 let start_ptr = first_slice.as_ptr();
190 let np_dtype = T::Native::get_dtype(py);
191 let dims = [first_slice.len(), df.width()].into_dimension();
192
193 unsafe {
194 create_borrowed_np_array::<_>(
195 py,
196 np_dtype,
197 dims,
198 flags::NPY_ARRAY_FARRAY_RO,
199 start_ptr as _,
200 owner,
201 )
202 }
203}
204fn temporal_df_to_numpy_view(py: Python<'_>, df: &DataFrame, owner: PyObject) -> PyObject {
206 let s = df.get_columns().first().unwrap();
207 let phys = s.to_physical_repr();
208 let ca = phys.i64().unwrap();
209 let first_slice = ca.data_views().next().unwrap();
210
211 let start_ptr = first_slice.as_ptr();
212 let np_dtype = polars_dtype_to_np_temporal_dtype(py, s.dtype());
213 let dims = [first_slice.len(), df.width()].into_dimension();
214
215 unsafe {
216 create_borrowed_np_array::<_>(
217 py,
218 np_dtype,
219 dims,
220 flags::NPY_ARRAY_FARRAY_RO,
221 start_ptr as _,
222 owner,
223 )
224 }
225}
226
227fn df_to_numpy_with_copy(
228 py: Python<'_>,
229 df: &DataFrame,
230 order: IndexOrder,
231 writable: bool,
232) -> PyResult<PyObject> {
233 if let Some(arr) = try_df_to_numpy_numeric_supertype(py, df, order) {
234 Ok(arr)
235 } else {
236 df_columns_to_numpy(py, df, order, writable)
237 }
238}
239fn try_df_to_numpy_numeric_supertype(
240 py: Python<'_>,
241 df: &DataFrame,
242 order: IndexOrder,
243) -> Option<PyObject> {
244 let st = dtypes_to_supertype(df.iter().map(|s| s.dtype())).ok()?;
245
246 let np_array = match st {
247 dt if dt.is_primitive_numeric() => with_match_physical_numpy_polars_type!(dt, |$T| {
248 df.to_ndarray::<$T>(order).ok()?.into_pyarray(py).into_py_any(py).ok()?
249 }),
250 _ => return None,
251 };
252 Some(np_array)
253}
254
255fn df_columns_to_numpy(
256 py: Python<'_>,
257 df: &DataFrame,
258 order: IndexOrder,
259 writable: bool,
260) -> PyResult<PyObject> {
261 let np_arrays = df.iter().map(|s| {
262 let mut arr = series_to_numpy(py, s, writable, true).unwrap();
263
264 let shape: Vec<usize> = arr
266 .getattr(py, intern!(py, "shape"))
267 .unwrap()
268 .extract(py)
269 .unwrap();
270 if shape.len() > 1 {
271 let subarrays = (0..shape[0]).map(|idx| {
273 arr.call_method1(py, intern!(py, "__getitem__"), (idx,))
274 .unwrap()
275 });
276 arr = PyArray1::from_iter(py, subarrays).into_py_any(py).unwrap();
277 }
278 arr
279 });
280
281 let numpy = PyModule::import(py, intern!(py, "numpy"))?;
282 let np_array = match order {
283 IndexOrder::C => numpy
284 .getattr(intern!(py, "column_stack"))?
285 .call1((PyList::new(py, np_arrays)?,))?,
286 IndexOrder::Fortran => numpy
287 .getattr(intern!(py, "vstack"))?
288 .call1((PyList::new(py, np_arrays)?,))?
289 .getattr(intern!(py, "T"))?,
290 };
291
292 Ok(np_array.into())
293}