1use std::convert::Infallible;
2use std::mem::ManuallyDrop;
3
4use polars::prelude::*;
5use polars_core::datatypes::DataType;
6use polars_core::utils::materialize_dyn_int;
7use polars_ffi::version_0::{export_series, import_series, SeriesExport};
8#[cfg(feature = "lazy")]
9use polars_lazy::frame::LazyFrame;
10#[cfg(feature = "lazy")]
11use polars_plan::dsl::DslPlan;
12#[cfg(feature = "lazy")]
13use polars_plan::dsl::Expr;
14#[cfg(feature = "lazy")]
15use polars_utils::pl_serialize;
16use pyo3::exceptions::{PyTypeError, PyValueError};
17use pyo3::intern;
18use pyo3::prelude::*;
19use pyo3::pybacked::PyBackedStr;
20#[cfg(feature = "lazy")]
21use pyo3::types::PyBytes;
22#[cfg(feature = "dtype-struct")]
23use pyo3::types::PyList;
24use pyo3::types::{PyDict, PyString};
25
26use super::*;
27use crate::error::PyPolarsErr;
28
29#[cfg(feature = "dtype-categorical")]
30pub(crate) fn get_series(obj: &Bound<'_, PyAny>) -> PyResult<Series> {
31 let s = obj.getattr(intern!(obj.py(), "_s"))?;
32 Ok(s.extract::<PySeries>()?.0)
33}
34
35#[repr(transparent)]
36#[derive(Debug, Clone)]
37pub struct PySeries(pub Series);
39
40#[repr(transparent)]
41#[derive(Debug, Clone)]
42pub struct PyDataFrame(pub DataFrame);
44
45#[cfg(feature = "lazy")]
46#[repr(transparent)]
47#[derive(Clone)]
48pub struct PyLazyFrame(pub LazyFrame);
56
57#[cfg(feature = "lazy")]
58#[repr(transparent)]
59#[derive(Clone)]
60pub struct PyExpr(pub Expr);
61
62#[repr(transparent)]
63#[derive(Clone)]
64pub struct PySchema(pub SchemaRef);
65
66#[repr(transparent)]
67#[derive(Clone)]
68pub struct PyDataType(pub DataType);
69
70#[repr(transparent)]
71#[derive(Clone, Copy)]
72pub struct PyTimeUnit(TimeUnit);
73
74#[repr(transparent)]
75#[derive(Clone)]
76pub struct PyField(Field);
77
78impl<'a, 'py> FromPyObject<'a, 'py> for PyField {
79 type Error = PyErr;
80
81 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
82 let py = ob.py();
83 let name = ob
84 .getattr(intern!(py, "name"))?
85 .str()?
86 .extract::<PyBackedStr>()?;
87 let dtype = ob.getattr(intern!(py, "dtype"))?.extract::<PyDataType>()?;
88 let name: &str = name.as_ref();
89 Ok(PyField(Field::new(name.into(), dtype.0)))
90 }
91}
92
93impl<'a, 'py> FromPyObject<'a, 'py> for PyTimeUnit {
94 type Error = PyErr;
95
96 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
97 let parsed = match &*ob.extract::<PyBackedStr>()? {
98 "ns" => TimeUnit::Nanoseconds,
99 "us" => TimeUnit::Microseconds,
100 "ms" => TimeUnit::Milliseconds,
101 v => {
102 return Err(PyValueError::new_err(format!(
103 "`time_unit` must be one of {{'ns', 'us', 'ms'}}, got {v}",
104 )));
105 },
106 };
107 Ok(PyTimeUnit(parsed))
108 }
109}
110
111impl<'py> IntoPyObject<'py> for PyTimeUnit {
112 type Target = PyString;
113 type Output = Bound<'py, Self::Target>;
114 type Error = Infallible;
115
116 fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
117 let time_unit = match self.0 {
118 TimeUnit::Nanoseconds => "ns",
119 TimeUnit::Microseconds => "us",
120 TimeUnit::Milliseconds => "ms",
121 };
122 time_unit.into_pyobject(py)
123 }
124}
125
126impl From<PyDataFrame> for DataFrame {
127 fn from(value: PyDataFrame) -> Self {
128 value.0
129 }
130}
131
132impl From<PySeries> for Series {
133 fn from(value: PySeries) -> Self {
134 value.0
135 }
136}
137
138#[cfg(feature = "lazy")]
139impl From<PyLazyFrame> for LazyFrame {
140 fn from(value: PyLazyFrame) -> Self {
141 value.0
142 }
143}
144
145impl From<PySchema> for SchemaRef {
146 fn from(value: PySchema) -> Self {
147 value.0
148 }
149}
150
151impl AsRef<Series> for PySeries {
152 fn as_ref(&self) -> &Series {
153 &self.0
154 }
155}
156
157impl AsRef<DataFrame> for PyDataFrame {
158 fn as_ref(&self) -> &DataFrame {
159 &self.0
160 }
161}
162
163#[cfg(feature = "lazy")]
164impl AsRef<LazyFrame> for PyLazyFrame {
165 fn as_ref(&self) -> &LazyFrame {
166 &self.0
167 }
168}
169
170impl AsRef<Schema> for PySchema {
171 fn as_ref(&self) -> &Schema {
172 self.0.as_ref()
173 }
174}
175
176impl<'a, 'py> FromPyObject<'a, 'py> for PySeries {
177 type Error = PyErr;
178
179 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
180 let mut export = SeriesExport::empty();
181 ob.getattr("_s")?
182 .call_method1("_export", ((&raw mut export).addr(),))?;
183 let series = unsafe { import_series(export).map_err(PyPolarsErr::from)? };
184 Ok(PySeries(series))
185 }
186}
187
188impl<'a, 'py> FromPyObject<'a, 'py> for PyDataFrame {
189 type Error = PyErr;
190
191 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
192 let series = ob.call_method0("get_columns")?;
193 let n = ob.getattr("width")?.extract::<usize>()?;
194 let mut columns = Vec::with_capacity(n);
195 for pyseries in series.try_iter()? {
196 let pyseries = pyseries?;
197 let s = pyseries.extract::<PySeries>()?.0;
198 columns.push(s.into_column());
199 }
200 unsafe { Ok(PyDataFrame(DataFrame::new_unchecked_infer_height(columns))) }
201 }
202}
203
204#[cfg(feature = "lazy")]
205impl<'a, 'py> FromPyObject<'a, 'py> for PyLazyFrame {
206 type Error = PyErr;
207
208 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
209 let s = ob.call_method0("__getstate__")?;
210 let b = s.extract::<Bound<'_, PyBytes>>()?;
211 let b = b.as_bytes();
212
213 let lp = DslPlan::deserialize_versioned(b).map_err(
214 |e| PyPolarsErr::Other(
215 format!("Error when deserializing LazyFrame. This may be due to mismatched polars versions. {e}")
216 ))
217 ?;
218
219 Ok(PyLazyFrame(LazyFrame::from(lp)))
220 }
221}
222
223#[cfg(feature = "lazy")]
224impl<'a, 'py> FromPyObject<'a, 'py> for PyExpr {
225 type Error = PyErr;
226
227 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
228 let s = ob.call_method0("__getstate__")?.extract::<Vec<u8>>()?;
229
230 let e: Expr = pl_serialize::SerializeOptions::default()
231 .deserialize_from_reader::<Expr, &[u8], false>(&*s)
232 .map_err(
233 |e| PyPolarsErr::Other(
234 format!("Error when deserializing 'Expr'. This may be due to mismatched polars versions. {e}")
235 )
236 )?;
237
238 Ok(PyExpr(e))
239 }
240}
241
242impl<'py> IntoPyObject<'py> for PySeries {
243 type Target = PyAny;
244 type Output = Bound<'py, Self::Target>;
245 type Error = PyErr;
246
247 fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
248 let s = SERIES.bind(py);
249 let series_export = ManuallyDrop::new(export_series(&self.0));
250 s.call_method1("_import", ((&raw const series_export).addr(),))
251 }
252}
253
254impl<'py> IntoPyObject<'py> for PyDataFrame {
255 type Target = PyAny;
256 type Output = Bound<'py, Self::Target>;
257 type Error = PyErr;
258
259 fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
260 let pyseries = self
261 .0
262 .columns()
263 .iter()
264 .map(|s| PySeries(s.as_materialized_series().clone()).into_pyobject(py))
265 .collect::<PyResult<Vec<_>>>()?;
266
267 let polars = POLARS.bind(py);
268 polars.call_method1("DataFrame", (pyseries,))
269 }
270}
271
272#[cfg(feature = "lazy")]
273impl<'py> IntoPyObject<'py> for PyLazyFrame {
274 type Target = PyAny;
275 type Output = Bound<'py, Self::Target>;
276 type Error = PyErr;
277
278 fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
279 use polars::prelude::PlanSerializationContext;
280
281 let polars = POLARS.bind(py);
282 let cls = polars.getattr("LazyFrame")?;
283 let instance = cls.call_method1(intern!(py, "__new__"), (&cls,)).unwrap();
284
285 let mut v = vec![];
286 self.0
287 .logical_plan
288 .serialize_versioned(&mut v, PlanSerializationContext::default())
289 .unwrap();
290 instance.call_method1("__setstate__", (&v,))?;
291 Ok(instance)
292 }
293}
294
295#[cfg(feature = "lazy")]
296impl<'py> IntoPyObject<'py> for PyExpr {
297 type Target = PyAny;
298 type Output = Bound<'py, Self::Target>;
299 type Error = PyErr;
300
301 fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
302 let polars = POLARS.bind(py);
303 let cls = polars.getattr("Expr")?;
304 let instance = cls.call_method1(intern!(py, "__new__"), (&cls,))?;
305
306 let buf = pl_serialize::SerializeOptions::default()
307 .serialize_to_bytes::<Expr, false>(&self.0)
308 .unwrap();
309
310 instance
311 .call_method1("__setstate__", (&buf,))
312 .map_err(|err| {
313 let msg = format!("deserialization failed: {err}");
314 PyValueError::new_err(msg)
315 })?;
316 Ok(instance)
317 }
318}
319
320#[cfg(feature = "dtype-categorical")]
321pub(crate) fn to_series(py: Python, s: PySeries) -> Py<PyAny> {
322 let series = SERIES.bind(py);
323 let constructor = series
324 .getattr(intern!(series.py(), "_from_pyseries"))
325 .unwrap();
326 constructor
327 .call1((s,))
328 .unwrap()
329 .into_pyobject(py)
330 .unwrap()
331 .into()
332}
333
334impl<'py> IntoPyObject<'py> for PyDataType {
335 type Target = PyAny;
336 type Output = Bound<'py, Self::Target>;
337 type Error = PyErr;
338
339 fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
340 let pl = POLARS.bind(py);
341
342 match &self.0 {
343 DataType::Int8 => {
344 let class = pl.getattr(intern!(py, "Int8")).unwrap();
345 class.call0()
346 },
347 DataType::Int16 => {
348 let class = pl.getattr(intern!(py, "Int16")).unwrap();
349 class.call0()
350 },
351 DataType::Int32 => {
352 let class = pl.getattr(intern!(py, "Int32")).unwrap();
353 class.call0()
354 },
355 DataType::Int64 => {
356 let class = pl.getattr(intern!(py, "Int64")).unwrap();
357 class.call0()
358 },
359 DataType::Int128 => {
360 let class = pl.getattr(intern!(py, "Int128")).unwrap();
361 class.call0()
362 },
363 DataType::UInt8 => {
364 let class = pl.getattr(intern!(py, "UInt8")).unwrap();
365 class.call0()
366 },
367 DataType::UInt16 => {
368 let class = pl.getattr(intern!(py, "UInt16")).unwrap();
369 class.call0()
370 },
371 DataType::UInt32 => {
372 let class = pl.getattr(intern!(py, "UInt32")).unwrap();
373 class.call0()
374 },
375 DataType::UInt64 => {
376 let class = pl.getattr(intern!(py, "UInt64")).unwrap();
377 class.call0()
378 },
379 DataType::UInt128 => {
380 let class = pl.getattr(intern!(py, "UInt128")).unwrap();
381 class.call0()
382 },
383 DataType::Float16 => {
384 let class = pl.getattr(intern!(py, "Float16")).unwrap();
385 class.call0()
386 },
387 DataType::Float32 => {
388 let class = pl.getattr(intern!(py, "Float32")).unwrap();
389 class.call0()
390 },
391 DataType::Float64 | DataType::Unknown(UnknownKind::Float) => {
392 let class = pl.getattr(intern!(py, "Float64")).unwrap();
393 class.call0()
394 },
395 #[cfg(feature = "dtype-decimal")]
396 DataType::Decimal(precision, scale) => {
397 let class = pl.getattr(intern!(py, "Decimal")).unwrap();
398 let args = (*precision, *scale);
399 class.call1(args)
400 },
401 DataType::Boolean => {
402 let class = pl.getattr(intern!(py, "Boolean")).unwrap();
403 class.call0()
404 },
405 DataType::String | DataType::Unknown(UnknownKind::Str) => {
406 let class = pl.getattr(intern!(py, "String")).unwrap();
407 class.call0()
408 },
409 DataType::Binary => {
410 let class = pl.getattr(intern!(py, "Binary")).unwrap();
411 class.call0()
412 },
413 #[cfg(feature = "dtype-array")]
414 DataType::Array(inner, size) => {
415 let class = pl.getattr(intern!(py, "Array")).unwrap();
416 let inner = PyDataType(*inner.clone()).into_pyobject(py)?;
417 let args = (inner, *size);
418 class.call1(args)
419 },
420 DataType::List(inner) => {
421 let class = pl.getattr(intern!(py, "List")).unwrap();
422 let inner = PyDataType(*inner.clone()).into_pyobject(py)?;
423 class.call1((inner,))
424 },
425 DataType::Date => {
426 let class = pl.getattr(intern!(py, "Date")).unwrap();
427 class.call0()
428 },
429 DataType::Datetime(tu, tz) => {
430 let datetime_class = pl.getattr(intern!(py, "Datetime")).unwrap();
431 datetime_class.call1((tu.to_ascii(), tz.as_ref().map(|s| s.as_str())))
432 },
433 DataType::Duration(tu) => {
434 let duration_class = pl.getattr(intern!(py, "Duration")).unwrap();
435 duration_class.call1((tu.to_ascii(),))
436 },
437 #[cfg(feature = "object")]
438 DataType::Object(_) => {
439 let class = pl.getattr(intern!(py, "Object")).unwrap();
440 class.call0()
441 },
442 #[cfg(feature = "dtype-categorical")]
443 DataType::Categorical(_, _) => {
444 let class = pl.getattr(intern!(py, "Categorical")).unwrap();
445 class.call1(())
446 },
447 #[cfg(feature = "dtype-categorical")]
448 DataType::Enum(categories, _) => {
449 let class = pl.getattr(intern!(py, "Enum")).unwrap();
451 let s =
452 Series::from_arrow("category".into(), categories.categories().clone().boxed())
453 .unwrap();
454 let series = to_series(py, PySeries(s));
455 class.call1((series,))
456 },
457 DataType::Time => pl.getattr(intern!(py, "Time")),
458 #[cfg(feature = "dtype-struct")]
459 DataType::Struct(fields) => {
460 let field_class = pl.getattr(intern!(py, "Field")).unwrap();
461 let iter = fields
462 .iter()
463 .map(|fld| {
464 let name = fld.name().as_str();
465 let dtype = PyDataType(fld.dtype().clone()).into_pyobject(py)?;
466 field_class.call1((name, dtype))
467 })
468 .collect::<PyResult<Vec<_>>>()?;
469 let fields = PyList::new(py, iter)?;
470 let struct_class = pl.getattr(intern!(py, "Struct")).unwrap();
471 struct_class.call1((fields,))
472 },
473 DataType::Null => {
474 let class = pl.getattr(intern!(py, "Null")).unwrap();
475 class.call0()
476 },
477 DataType::Unknown(UnknownKind::Int(v)) => {
478 PyDataType(materialize_dyn_int(*v).dtype()).into_pyobject(py)
479 },
480 DataType::Unknown(_) => {
481 let class = pl.getattr(intern!(py, "Unknown")).unwrap();
482 class.call0()
483 },
484 DataType::BinaryOffset => {
485 panic!("this type isn't exposed to python")
486 },
487 #[allow(unreachable_patterns)]
488 _ => panic!("activate dtype"),
489 }
490 }
491}
492
493impl<'py> IntoPyObject<'py> for PySchema {
494 type Target = PyDict;
495 type Output = Bound<'py, Self::Target>;
496 type Error = PyErr;
497
498 fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
499 let dict = PyDict::new(py);
500 for (k, v) in self.0.iter() {
501 dict.set_item(k.as_str(), PyDataType(v.clone()).into_pyobject(py)?)?;
502 }
503 Ok(dict)
504 }
505}
506
507impl<'a, 'py> FromPyObject<'a, 'py> for PyDataType {
508 type Error = PyErr;
509
510 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
511 let py = ob.py();
512 let type_name = ob.get_type().qualname()?.to_string();
513
514 let dtype = match type_name.as_ref() {
515 "DataTypeClass" => {
516 let name = ob
518 .getattr(intern!(py, "__name__"))?
519 .str()?
520 .extract::<PyBackedStr>()?;
521 match &*name {
522 "Int8" => DataType::Int8,
523 "Int16" => DataType::Int16,
524 "Int32" => DataType::Int32,
525 "Int64" => DataType::Int64,
526 "Int128" => DataType::Int128,
527 "UInt8" => DataType::UInt8,
528 "UInt16" => DataType::UInt16,
529 "UInt32" => DataType::UInt32,
530 "UInt64" => DataType::UInt64,
531 "UInt128" => DataType::UInt128,
532 "Float16" => DataType::Float16,
533 "Float32" => DataType::Float32,
534 "Float64" => DataType::Float64,
535 "Boolean" => DataType::Boolean,
536 "String" => DataType::String,
537 "Binary" => DataType::Binary,
538 #[cfg(feature = "dtype-categorical")]
539 "Categorical" => {
540 DataType::Categorical(Categories::global(), Categories::global().mapping())
541 },
542 #[cfg(feature = "dtype-categorical")]
543 "Enum" => {
544 let categories = FrozenCategories::new([]).unwrap();
545 let mapping = categories.mapping().clone();
546 DataType::Enum(categories, mapping)
547 },
548 "Date" => DataType::Date,
549 "Time" => DataType::Time,
550 "Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
551 "Duration" => DataType::Duration(TimeUnit::Microseconds),
552 #[cfg(feature = "dtype-decimal")]
553 "Decimal" => {
554 return Err(PyTypeError::new_err("Decimal without specifying precision and scale is not a valid Polars data type".to_string()));
555 },
556 "List" => DataType::List(Box::new(DataType::Null)),
557 #[cfg(feature = "dtype-array")]
558 "Array" => DataType::Array(Box::new(DataType::Null), 0),
559 #[cfg(feature = "dtype-struct")]
560 "Struct" => DataType::Struct(vec![]),
561 "Null" => DataType::Null,
562 #[cfg(feature = "object")]
563 "Object" => todo!(),
564 "Unknown" => DataType::Unknown(Default::default()),
565 dt => {
566 return Err(PyTypeError::new_err(format!(
567 "'{dt}' is not a Polars data type, or the plugin isn't compiled with the right features",
568 )));
569 },
570 }
571 },
572 "Int8" => DataType::Int8,
573 "Int16" => DataType::Int16,
574 "Int32" => DataType::Int32,
575 "Int64" => DataType::Int64,
576 "Int128" => DataType::Int128,
577 "UInt8" => DataType::UInt8,
578 "UInt16" => DataType::UInt16,
579 "UInt32" => DataType::UInt32,
580 "UInt64" => DataType::UInt64,
581 "UInt128" => DataType::UInt128,
582 "Float16" => DataType::Float16,
583 "Float32" => DataType::Float32,
584 "Float64" => DataType::Float64,
585 "Boolean" => DataType::Boolean,
586 "String" => DataType::String,
587 "Binary" => DataType::Binary,
588 #[cfg(feature = "dtype-categorical")]
589 "Categorical" => {
590 DataType::Categorical(Categories::global(), Categories::global().mapping())
591 },
592 #[cfg(feature = "dtype-categorical")]
593 "Enum" => {
594 let categories = ob.getattr(intern!(py, "categories")).unwrap();
595 let s = get_series(&categories.as_borrowed())?;
596 let ca = s.str().map_err(PyPolarsErr::from)?;
597 let categories = ca.iter();
598 let categories = FrozenCategories::new(categories.map(|v| v.unwrap())).unwrap();
599 let mapping = categories.mapping().clone();
600 DataType::Enum(categories, mapping)
601 },
602 "Date" => DataType::Date,
603 "Time" => DataType::Time,
604 "Datetime" => {
605 let time_unit = ob.getattr(intern!(py, "time_unit")).unwrap();
606 let time_unit = time_unit.extract::<PyTimeUnit>()?.0;
607 let time_zone = ob.getattr(intern!(py, "time_zone")).unwrap();
608 let time_zone: Option<String> = time_zone.extract()?;
609 DataType::Datetime(time_unit, TimeZone::opt_try_new(time_zone).unwrap())
610 },
611 "Duration" => {
612 let time_unit = ob.getattr(intern!(py, "time_unit")).unwrap();
613 let time_unit = time_unit.extract::<PyTimeUnit>()?.0;
614 DataType::Duration(time_unit)
615 },
616 #[cfg(feature = "dtype-decimal")]
617 "Decimal" => {
618 let precision = ob.getattr(intern!(py, "precision"))?.extract()?;
619 let scale = ob.getattr(intern!(py, "scale"))?.extract()?;
620 DataType::Decimal(precision, scale)
621 },
622 "List" => {
623 let inner = ob.getattr(intern!(py, "inner")).unwrap();
624 let inner = inner.extract::<PyDataType>()?;
625 DataType::List(Box::new(inner.0))
626 },
627 #[cfg(feature = "dtype-array")]
628 "Array" => {
629 let inner = ob.getattr(intern!(py, "inner")).unwrap();
630 let size = ob.getattr(intern!(py, "size")).unwrap();
631 let inner = inner.extract::<PyDataType>()?;
632 let size = size.extract::<usize>()?;
633 DataType::Array(Box::new(inner.0), size)
634 },
635 #[cfg(feature = "dtype-struct")]
636 "Struct" => {
637 let fields = ob.getattr(intern!(py, "fields"))?;
638 let fields = fields
639 .extract::<Vec<PyField>>()?
640 .into_iter()
641 .map(|f| f.0)
642 .collect::<Vec<Field>>();
643 DataType::Struct(fields)
644 },
645 "Null" => DataType::Null,
646 #[cfg(feature = "object")]
647 "Object" => panic!("object not supported"),
648 "Unknown" => DataType::Unknown(Default::default()),
649 dt => {
650 return Err(PyTypeError::new_err(format!(
651 "'{dt}' is not a Polars data type, or the plugin isn't compiled with the right features",
652 )));
653 },
654 };
655 Ok(PyDataType(dtype))
656 }
657}