polars_python/interop/numpy/
to_numpy_df.rs1use ndarray::IntoDimension;
2use numpy::npyffi::flags;
3use numpy::{Element, IntoPyArray, PyArray1};
4use polars_core::prelude::*;
5use polars_core::utils::dtypes_to_supertype;
6use polars_core::with_match_physical_numeric_polars_type;
7use pyo3::exceptions::PyRuntimeError;
8use pyo3::prelude::*;
9use pyo3::types::{PyList, PyTuple};
10use pyo3::{IntoPyObjectExt, intern};
11
12use super::to_numpy_series::series_to_numpy;
13use super::utils::{
14 create_borrowed_np_array, dtype_supports_view, polars_dtype_to_np_temporal_dtype,
15};
16use crate::conversion::Wrap;
17use crate::dataframe::PyDataFrame;
18
19#[pymethods]
20impl PyDataFrame {
21 fn to_numpy(
23 &self,
24 py: Python<'_>,
25 order: Wrap<IndexOrder>,
26 writable: bool,
27 allow_copy: bool,
28 ) -> PyResult<Py<PyAny>> {
29 df_to_numpy(py, &self.df.read(), order.0, writable, allow_copy)
30 }
31}
32
33pub(super) fn df_to_numpy(
34 py: Python<'_>,
35 df: &DataFrame,
36 order: IndexOrder,
37 writable: bool,
38 allow_copy: bool,
39) -> PyResult<Py<PyAny>> {
40 if df.shape_has_zero() {
41 if df.width() == 0 {
42 let shape = PyTuple::new(py, [df.height(), df.width()])?;
43 let numpy = super::utils::get_numpy_module(py)?;
44
45 return Ok(numpy
46 .call_method1(
47 intern!(py, "zeros"),
48 (shape, numpy.getattr(intern!(py, "int8"))?),
49 )?
50 .unbind());
51 }
52 return df_to_numpy_with_copy(py, df, order, true);
55 }
56
57 if matches!(order, IndexOrder::Fortran) {
58 if let Some(mut arr) = try_df_to_numpy_view(py, df, false) {
59 if writable {
60 if !allow_copy {
61 return Err(PyRuntimeError::new_err(
62 "copy not allowed: cannot create a writable array without copying data",
63 ));
64 }
65 arr = arr.call_method0(py, intern!(py, "copy"))?;
66 }
67 return Ok(arr);
68 }
69 }
70
71 if !allow_copy {
72 return Err(PyRuntimeError::new_err(
73 "copy not allowed: cannot convert to a NumPy array without copying data",
74 ));
75 }
76
77 df_to_numpy_with_copy(py, df, order, writable)
78}
79
80fn try_df_to_numpy_view(py: Python<'_>, df: &DataFrame, allow_nulls: bool) -> Option<Py<PyAny>> {
82 let first_dtype = check_df_dtypes_support_view(df)?;
83
84 if !allow_nulls && df.columns().iter().any(|s| s.null_count() > 0) {
86 return None;
87 }
88 if !check_df_columns_contiguous(df) {
89 return None;
90 }
91
92 let owner = PyDataFrame::from(df.clone()).into_py_any(py).ok()?; let arr = match first_dtype {
95 dt if dt.is_primitive_numeric() => {
96 with_match_physical_numpy_polars_type!(first_dtype, |$T| {
97 numeric_df_to_numpy_view::<$T>(py, df, owner)
98 })
99 },
100 DataType::Datetime(_, _) | DataType::Duration(_) => {
101 temporal_df_to_numpy_view(py, df, owner)
102 },
103 _ => unreachable!(),
104 };
105 Some(arr)
106}
107fn check_df_dtypes_support_view(df: &DataFrame) -> Option<&DataType> {
111 let columns = df.columns();
112 let first_dtype = columns.first()?.dtype();
113
114 if first_dtype.is_array() || !dtype_supports_view(first_dtype) {
116 return None;
117 }
118 if columns.iter().any(|s| s.dtype() != first_dtype) {
119 return None;
120 }
121 Some(first_dtype)
122}
123fn check_df_columns_contiguous(df: &DataFrame) -> bool {
125 let columns = df.columns();
126
127 if columns
128 .iter()
129 .any(|s| s.as_materialized_series().n_chunks() > 1)
130 {
131 return false;
132 }
133 if columns.len() <= 1 {
134 return true;
135 }
136
137 match columns.first().unwrap().dtype() {
138 dt if dt.is_primitive_numeric() => {
139 with_match_physical_numeric_polars_type!(dt, |$T| {
140 let slices = columns
141 .iter()
142 .map(|s| {
143 let ca: &ChunkedArray<$T> = s.as_materialized_series().unpack().unwrap();
144 ca.data_views().next().unwrap()
145 })
146 .collect::<Vec<_>>();
147
148 check_slices_contiguous::<$T>(slices)
149 })
150 },
151 DataType::Datetime(_, _) | DataType::Duration(_) => {
152 let phys: Vec<_> = columns.iter().map(|s| s.to_physical_repr()).collect();
153 let slices = phys
154 .iter()
155 .map(|s| {
156 let ca = s.i64().unwrap();
157 ca.data_views().next().unwrap()
158 })
159 .collect::<Vec<_>>();
160
161 check_slices_contiguous::<Int64Type>(slices)
162 },
163 _ => panic!("invalid data type"),
164 }
165}
166fn check_slices_contiguous<T>(slices: Vec<&[T::Native]>) -> bool
168where
169 T: PolarsNumericType,
170{
171 let first_slice = slices.first().unwrap();
172
173 let mut end_ptr = unsafe { first_slice.as_ptr().add(first_slice.len()) };
175 slices[1..].iter().all(|slice| {
176 let slice_ptr = slice.as_ptr();
177 let valid = std::ptr::eq(slice_ptr, end_ptr);
178
179 end_ptr = unsafe { slice_ptr.add(slice.len()) };
180
181 valid
182 })
183}
184
185fn numeric_df_to_numpy_view<T>(py: Python<'_>, df: &DataFrame, owner: Py<PyAny>) -> Py<PyAny>
187where
188 T: PolarsNumericType,
189 T::Native: Element,
190{
191 let ca: &ChunkedArray<T> = df
192 .columns()
193 .first()
194 .unwrap()
195 .as_materialized_series()
196 .unpack()
197 .unwrap();
198 let first_slice = ca.data_views().next().unwrap();
199
200 let start_ptr = first_slice.as_ptr();
201 let np_dtype = T::Native::get_dtype(py);
202 let dims = [first_slice.len(), df.width()].into_dimension();
203
204 unsafe {
205 create_borrowed_np_array::<_>(
206 py,
207 np_dtype,
208 dims,
209 flags::NPY_ARRAY_FARRAY_RO,
210 start_ptr as _,
211 owner,
212 )
213 }
214}
215fn temporal_df_to_numpy_view(py: Python<'_>, df: &DataFrame, owner: Py<PyAny>) -> Py<PyAny> {
217 let s = df.columns().first().unwrap();
218 let phys = s.to_physical_repr();
219 let ca = phys.i64().unwrap();
220 let first_slice = ca.data_views().next().unwrap();
221
222 let start_ptr = first_slice.as_ptr();
223 let np_dtype = polars_dtype_to_np_temporal_dtype(py, s.dtype());
224 let dims = [first_slice.len(), df.width()].into_dimension();
225
226 unsafe {
227 create_borrowed_np_array::<_>(
228 py,
229 np_dtype,
230 dims,
231 flags::NPY_ARRAY_FARRAY_RO,
232 start_ptr as _,
233 owner,
234 )
235 }
236}
237
238fn df_to_numpy_with_copy(
239 py: Python<'_>,
240 df: &DataFrame,
241 order: IndexOrder,
242 writable: bool,
243) -> PyResult<Py<PyAny>> {
244 if let Some(arr) = try_df_to_numpy_numeric_supertype(py, df, order) {
245 Ok(arr)
246 } else {
247 df_columns_to_numpy(py, df, order, writable)
248 }
249}
250fn try_df_to_numpy_numeric_supertype(
251 py: Python<'_>,
252 df: &DataFrame,
253 order: IndexOrder,
254) -> Option<Py<PyAny>> {
255 let st = dtypes_to_supertype(df.columns().iter().map(|s| s.dtype())).ok()?;
256
257 let np_array = match st {
258 dt if dt.is_primitive_numeric() => with_match_physical_numpy_polars_type!(dt, |$T| {
259 df.to_ndarray::<$T>(order).ok()?.into_pyarray(py).into_py_any(py).ok()?
260 }),
261 _ => return None,
262 };
263 Some(np_array)
264}
265
266fn df_columns_to_numpy(
267 py: Python<'_>,
268 df: &DataFrame,
269 order: IndexOrder,
270 writable: bool,
271) -> PyResult<Py<PyAny>> {
272 let np_arrays = df.columns().iter().map(|c| {
273 let mut arr = series_to_numpy(py, c.as_materialized_series(), writable, true).unwrap();
274
275 let shape: Vec<usize> = arr
277 .getattr(py, intern!(py, "shape"))
278 .unwrap()
279 .extract(py)
280 .unwrap();
281 if shape.len() > 1 {
282 let subarrays = (0..shape[0]).map(|idx| {
284 arr.call_method1(py, intern!(py, "__getitem__"), (idx,))
285 .unwrap()
286 });
287 arr = PyArray1::from_iter(py, subarrays).into_py_any(py).unwrap();
288 }
289 arr
290 });
291
292 let numpy = super::utils::get_numpy_module(py)?;
293 let np_array = match order {
294 IndexOrder::C => numpy
295 .getattr(intern!(py, "column_stack"))?
296 .call1((PyList::new(py, np_arrays)?,))?,
297 IndexOrder::Fortran => numpy
298 .getattr(intern!(py, "vstack"))?
299 .call1((PyList::new(py, np_arrays)?,))?
300 .getattr(intern!(py, "T"))?,
301 };
302
303 Ok(np_array.into())
304}