polars_python/dataframe/
export.rs1use arrow::datatypes::IntegerType;
2use arrow::record_batch::RecordBatch;
3use polars::prelude::*;
4use polars_compute::cast::CastOptionsImpl;
5use pyo3::IntoPyObjectExt;
6use pyo3::prelude::*;
7use pyo3::types::{PyCapsule, PyList, PyTuple};
8
9use super::PyDataFrame;
10use crate::conversion::{ObjectValue, Wrap};
11use crate::error::PyPolarsErr;
12use crate::interop;
13use crate::interop::arrow::to_py::dataframe_to_stream;
14use crate::prelude::PyCompatLevel;
15use crate::utils::EnterPolarsExt;
16
17#[pymethods]
18impl PyDataFrame {
19 #[cfg(feature = "object")]
20 pub fn row_tuple<'py>(&self, idx: i64, py: Python<'py>) -> PyResult<Bound<'py, PyTuple>> {
21 let idx = if idx < 0 {
22 (self.df.height() as i64 + idx) as usize
23 } else {
24 idx as usize
25 };
26 if idx >= self.df.height() {
27 return Err(PyPolarsErr::from(polars_err!(oob = idx, self.df.height())).into());
28 }
29 PyTuple::new(
30 py,
31 self.df.get_columns().iter().map(|s| match s.dtype() {
32 DataType::Object(_) => {
33 let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
34 obj.into_py_any(py).unwrap()
35 },
36 _ => Wrap(s.get(idx).unwrap()).into_py_any(py).unwrap(),
37 }),
38 )
39 }
40
41 #[cfg(feature = "object")]
42 pub fn row_tuples<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyList>> {
43 let mut rechunked;
44 let df = if self.df.max_n_chunks() > 16 {
47 rechunked = self.df.clone();
48 rechunked.as_single_chunk_par();
49 &rechunked
50 } else {
51 &self.df
52 };
53 PyList::new(
54 py,
55 (0..df.height()).map(|idx| {
56 PyTuple::new(
57 py,
58 df.get_columns().iter().map(|c| match c.dtype() {
59 DataType::Null => py.None(),
60 DataType::Object(_) => {
61 let obj: Option<&ObjectValue> = c.get_object(idx).map(|any| any.into());
62 obj.into_py_any(py).unwrap()
63 },
64 _ => {
65 let av = unsafe { c.get_unchecked(idx) };
67 Wrap(av).into_py_any(py).unwrap()
68 },
69 }),
70 )
71 .unwrap()
72 }),
73 )
74 }
75
76 #[allow(clippy::wrong_self_convention)]
77 pub fn to_arrow(
78 &mut self,
79 py: Python<'_>,
80 compat_level: PyCompatLevel,
81 ) -> PyResult<Vec<PyObject>> {
82 py.enter_polars_ok(|| self.df.align_chunks_par())?;
83 let pyarrow = py.import("pyarrow")?;
84
85 let rbs = self
86 .df
87 .iter_chunks(compat_level.0, true)
88 .map(|rb| interop::arrow::to_py::to_py_rb(&rb, py, &pyarrow))
89 .collect::<PyResult<_>>()?;
90 Ok(rbs)
91 }
92
93 #[allow(clippy::wrong_self_convention)]
99 pub fn to_pandas(&mut self, py: Python) -> PyResult<Vec<PyObject>> {
100 py.enter_polars_ok(|| self.df.as_single_chunk_par())?;
101 Python::with_gil(|py| {
102 let pyarrow = py.import("pyarrow")?;
103 let cat_columns = self
104 .df
105 .get_columns()
106 .iter()
107 .enumerate()
108 .filter(|(_i, s)| {
109 matches!(
110 s.dtype(),
111 DataType::Categorical(_, _) | DataType::Enum(_, _)
112 )
113 })
114 .map(|(i, _)| i)
115 .collect::<Vec<_>>();
116
117 let enum_and_categorical_dtype = ArrowDataType::Dictionary(
118 IntegerType::Int64,
119 Box::new(ArrowDataType::LargeUtf8),
120 false,
121 );
122
123 let mut replaced_schema = None;
124 let rbs = self
125 .df
126 .iter_chunks(CompatLevel::oldest(), true)
127 .map(|rb| {
128 let length = rb.len();
129 let (schema, mut arrays) = rb.into_schema_and_arrays();
130
131 replaced_schema =
133 (replaced_schema.is_none() && !cat_columns.is_empty()).then(|| {
134 let mut schema = schema.as_ref().clone();
135 for i in &cat_columns {
136 let (_, field) = schema.get_at_index_mut(*i).unwrap();
137 field.dtype = enum_and_categorical_dtype.clone();
138 }
139 Arc::new(schema)
140 });
141
142 for i in &cat_columns {
143 let arr = arrays.get_mut(*i).unwrap();
144 let out = polars_compute::cast::cast(
145 &**arr,
146 &enum_and_categorical_dtype,
147 CastOptionsImpl::default(),
148 )
149 .unwrap();
150 *arr = out;
151 }
152 let schema = replaced_schema
153 .as_ref()
154 .map_or(schema, |replaced| replaced.clone());
155 let rb = RecordBatch::new(length, schema, arrays);
156
157 interop::arrow::to_py::to_py_rb(&rb, py, &pyarrow)
158 })
159 .collect::<PyResult<_>>()?;
160 Ok(rbs)
161 })
162 }
163
164 #[allow(unused_variables)]
165 #[pyo3(signature = (requested_schema=None))]
166 fn __arrow_c_stream__<'py>(
167 &mut self,
168 py: Python<'py>,
169 requested_schema: Option<PyObject>,
170 ) -> PyResult<Bound<'py, PyCapsule>> {
171 py.enter_polars_ok(|| self.df.align_chunks_par())?;
172 dataframe_to_stream(&self.df, py)
173 }
174}