1use std::borrow::{Borrow, Cow};
2
3use chrono::{
4 DateTime, Datelike, FixedOffset, NaiveDate, NaiveDateTime, NaiveTime, TimeDelta, Timelike,
5};
6use chrono_tz::Tz;
7#[cfg(feature = "object")]
8use polars::chunked_array::object::PolarsObjectSafe;
9#[cfg(feature = "object")]
10use polars::datatypes::OwnedObject;
11use polars::datatypes::{DataType, Field, PlHashMap, TimeUnit};
12use polars::prelude::{AnyValue, PlSmallStr, Series};
13use polars_core::utils::any_values_to_supertype_and_n_dtypes;
14use polars_core::utils::arrow::temporal_conversions::date32_to_date;
15use pyo3::exceptions::{PyOverflowError, PyTypeError, PyValueError};
16use pyo3::prelude::*;
17use pyo3::types::{
18 PyBool, PyBytes, PyDict, PyFloat, PyInt, PyList, PySequence, PyString, PyTuple, PyType,
19};
20use pyo3::{intern, IntoPyObjectExt};
21
22use super::datetime::{
23 datetime_to_py_object, elapsed_offset_to_timedelta, nanos_since_midnight_to_naivetime,
24};
25use super::{decimal_to_digits, struct_dict, ObjectValue, Wrap};
26use crate::error::PyPolarsErr;
27use crate::py_modules::{pl_series, pl_utils};
28use crate::series::PySeries;
29
30impl<'py> IntoPyObject<'py> for Wrap<AnyValue<'_>> {
31 type Target = PyAny;
32 type Output = Bound<'py, Self::Target>;
33 type Error = PyErr;
34
35 fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
36 any_value_into_py_object(self.0, py)
37 }
38}
39
40impl<'py> IntoPyObject<'py> for &Wrap<AnyValue<'_>> {
41 type Target = PyAny;
42 type Output = Bound<'py, Self::Target>;
43 type Error = PyErr;
44
45 fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
46 self.clone().into_pyobject(py)
47 }
48}
49
50impl<'py> FromPyObject<'py> for Wrap<AnyValue<'py>> {
51 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
52 py_object_to_any_value(ob, true, true).map(Wrap)
53 }
54}
55
56pub(crate) fn any_value_into_py_object<'py>(
57 av: AnyValue,
58 py: Python<'py>,
59) -> PyResult<Bound<'py, PyAny>> {
60 let utils = pl_utils(py).bind(py);
61 match av {
62 AnyValue::UInt8(v) => v.into_bound_py_any(py),
63 AnyValue::UInt16(v) => v.into_bound_py_any(py),
64 AnyValue::UInt32(v) => v.into_bound_py_any(py),
65 AnyValue::UInt64(v) => v.into_bound_py_any(py),
66 AnyValue::Int8(v) => v.into_bound_py_any(py),
67 AnyValue::Int16(v) => v.into_bound_py_any(py),
68 AnyValue::Int32(v) => v.into_bound_py_any(py),
69 AnyValue::Int64(v) => v.into_bound_py_any(py),
70 AnyValue::Int128(v) => v.into_bound_py_any(py),
71 AnyValue::Float32(v) => v.into_bound_py_any(py),
72 AnyValue::Float64(v) => v.into_bound_py_any(py),
73 AnyValue::Null => py.None().into_bound_py_any(py),
74 AnyValue::Boolean(v) => v.into_bound_py_any(py),
75 AnyValue::String(v) => v.into_bound_py_any(py),
76 AnyValue::StringOwned(v) => v.into_bound_py_any(py),
77 AnyValue::Categorical(idx, rev, arr) | AnyValue::Enum(idx, rev, arr) => {
78 let s = if arr.is_null() {
79 rev.get(idx)
80 } else {
81 unsafe { arr.deref_unchecked().value(idx as usize) }
82 };
83 s.into_bound_py_any(py)
84 },
85 AnyValue::CategoricalOwned(idx, rev, arr) | AnyValue::EnumOwned(idx, rev, arr) => {
86 let s = if arr.is_null() {
87 rev.get(idx)
88 } else {
89 unsafe { arr.deref_unchecked().value(idx as usize) }
90 };
91 s.into_bound_py_any(py)
92 },
93 AnyValue::Date(v) => {
94 let date = date32_to_date(v);
95 date.into_bound_py_any(py)
96 },
97 AnyValue::Datetime(v, time_unit, time_zone) => {
98 datetime_to_py_object(py, v, time_unit, time_zone)
99 },
100 AnyValue::DatetimeOwned(v, time_unit, time_zone) => {
101 datetime_to_py_object(py, v, time_unit, time_zone.as_ref().map(AsRef::as_ref))
102 },
103 AnyValue::Duration(v, time_unit) => {
104 let time_delta = elapsed_offset_to_timedelta(v, time_unit);
105 time_delta.into_bound_py_any(py)
106 },
107 AnyValue::Time(v) => nanos_since_midnight_to_naivetime(v).into_bound_py_any(py),
108 AnyValue::Array(v, _) | AnyValue::List(v) => PySeries::new(v).to_list(py),
109 ref av @ AnyValue::Struct(_, _, flds) => {
110 Ok(struct_dict(py, av._iter_struct_av(), flds)?.into_any())
111 },
112 AnyValue::StructOwned(payload) => {
113 Ok(struct_dict(py, payload.0.into_iter(), &payload.1)?.into_any())
114 },
115 #[cfg(feature = "object")]
116 AnyValue::Object(v) => {
117 let object = v.as_any().downcast_ref::<ObjectValue>().unwrap();
118 Ok(object.inner.clone_ref(py).into_bound(py))
119 },
120 #[cfg(feature = "object")]
121 AnyValue::ObjectOwned(v) => {
122 let object = v.0.as_any().downcast_ref::<ObjectValue>().unwrap();
123 Ok(object.inner.clone_ref(py).into_bound(py))
124 },
125 AnyValue::Binary(v) => PyBytes::new(py, v).into_bound_py_any(py),
126 AnyValue::BinaryOwned(v) => PyBytes::new(py, &v).into_bound_py_any(py),
127 AnyValue::Decimal(v, scale) => {
128 let convert = utils.getattr(intern!(py, "to_py_decimal"))?;
129 const N: usize = 3;
130 let mut buf = [0_u128; N];
131 let n_digits = decimal_to_digits(v.abs(), &mut buf);
132 let buf = unsafe {
133 std::slice::from_raw_parts(
134 buf.as_slice().as_ptr() as *const u8,
135 N * size_of::<u128>(),
136 )
137 };
138 let digits = PyTuple::new(py, buf.iter().take(n_digits))?;
139 convert.call1((v.is_negative() as u8, digits, n_digits, -(scale as i32)))
140 },
141 }
142}
143
144#[derive(Debug)]
148pub struct TypeObjectKey {
149 #[allow(unused)]
150 type_object: Py<PyType>,
151 address: usize,
153}
154
155impl TypeObjectKey {
156 fn new(type_object: Py<PyType>) -> Self {
157 let address = type_object.as_ptr() as usize;
158 Self {
159 type_object,
160 address,
161 }
162 }
163}
164
165impl PartialEq for TypeObjectKey {
166 fn eq(&self, other: &Self) -> bool {
167 self.address == other.address
168 }
169}
170
171impl Eq for TypeObjectKey {}
172
173impl std::borrow::Borrow<usize> for TypeObjectKey {
174 fn borrow(&self) -> &usize {
175 &self.address
176 }
177}
178
179impl std::hash::Hash for TypeObjectKey {
180 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
181 let v: &usize = self.borrow();
182 v.hash(state)
183 }
184}
185
186type InitFn = for<'py> fn(&Bound<'py, PyAny>, bool) -> PyResult<AnyValue<'py>>;
187pub(crate) static LUT: crate::gil_once_cell::GILOnceCell<PlHashMap<TypeObjectKey, InitFn>> =
188 crate::gil_once_cell::GILOnceCell::new();
189
190pub(crate) fn py_object_to_any_value<'py>(
192 ob: &Bound<'py, PyAny>,
193 strict: bool,
194 allow_object: bool,
195) -> PyResult<AnyValue<'py>> {
196 fn get_null(_ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
198 Ok(AnyValue::Null)
199 }
200
201 fn get_bool(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
202 let b = ob.extract::<bool>()?;
203 Ok(AnyValue::Boolean(b))
204 }
205
206 fn get_int(ob: &Bound<'_, PyAny>, strict: bool) -> PyResult<AnyValue<'static>> {
207 if let Ok(v) = ob.extract::<i64>() {
208 Ok(AnyValue::Int64(v))
209 } else if let Ok(v) = ob.extract::<i128>() {
210 Ok(AnyValue::Int128(v))
211 } else if !strict {
212 let f = ob.extract::<f64>()?;
213 Ok(AnyValue::Float64(f))
214 } else {
215 Err(PyOverflowError::new_err(format!(
216 "int value too large for Polars integer types: {ob}"
217 )))
218 }
219 }
220
221 fn get_float(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
222 Ok(AnyValue::Float64(ob.extract::<f64>()?))
223 }
224
225 fn get_str(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
226 Ok(AnyValue::StringOwned(ob.extract::<String>()?.into()))
238 }
239
240 fn get_bytes<'py>(ob: &Bound<'py, PyAny>, _strict: bool) -> PyResult<AnyValue<'py>> {
241 let value = ob.extract::<Vec<u8>>()?;
242 Ok(AnyValue::BinaryOwned(value))
243 }
244
245 fn get_date(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
246 const UNIX_EPOCH: NaiveDate = NaiveDateTime::UNIX_EPOCH.date();
247 let date = ob.extract::<NaiveDate>()?;
248 let elapsed = date.signed_duration_since(UNIX_EPOCH);
249 Ok(AnyValue::Date(elapsed.num_days() as i32))
250 }
251
252 fn get_datetime(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
253 let py = ob.py();
254 let tzinfo = ob.getattr(intern!(py, "tzinfo"))?;
255
256 let timestamp = if tzinfo.is_none() {
257 let datetime = ob.extract::<NaiveDateTime>()?;
258 let delta = datetime - NaiveDateTime::UNIX_EPOCH;
259 delta.num_microseconds().unwrap()
260 } else if tzinfo.hasattr(intern!(py, "key"))? {
261 let datetime = ob.extract::<DateTime<Tz>>()?;
262 if datetime.year() >= 2100 {
263 pl_utils(py)
266 .bind(py)
267 .getattr(intern!(py, "datetime_to_int"))?
268 .call1((ob, intern!(py, "us")))?
269 .extract::<i64>()?
270 } else {
271 let delta = datetime.to_utc() - DateTime::UNIX_EPOCH;
272 delta.num_microseconds().unwrap()
273 }
274 } else {
275 let datetime = ob.extract::<DateTime<FixedOffset>>()?;
276 let delta = datetime.to_utc() - DateTime::UNIX_EPOCH;
277 delta.num_microseconds().unwrap()
278 };
279
280 Ok(AnyValue::Datetime(timestamp, TimeUnit::Microseconds, None))
281 }
282
283 fn get_timedelta(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
284 let timedelta = ob.extract::<TimeDelta>()?;
285 if let Some(micros) = timedelta.num_microseconds() {
286 Ok(AnyValue::Duration(micros, TimeUnit::Microseconds))
287 } else {
288 Ok(AnyValue::Duration(
289 timedelta.num_milliseconds(),
290 TimeUnit::Milliseconds,
291 ))
292 }
293 }
294
295 fn get_time(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
296 let time = ob.extract::<NaiveTime>()?;
297
298 Ok(AnyValue::Time(
299 (time.num_seconds_from_midnight() as i64) * 1_000_000_000 + time.nanosecond() as i64,
300 ))
301 }
302
303 fn get_decimal(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
304 fn abs_decimal_from_digits(
305 digits: impl IntoIterator<Item = u8>,
306 exp: i32,
307 ) -> Option<(i128, usize)> {
308 const MAX_ABS_DEC: i128 = 10_i128.pow(38) - 1;
309 let mut v = 0_i128;
310 for (i, d) in digits.into_iter().map(i128::from).enumerate() {
311 if i < 38 {
312 v = v * 10 + d;
313 } else {
314 v = v.checked_mul(10).and_then(|v| v.checked_add(d))?;
315 }
316 }
317 let scale = if exp > 0 {
319 v = 10_i128
321 .checked_pow(exp as u32)
322 .and_then(|factor| v.checked_mul(factor))?;
323 0
324 } else {
325 (-exp) as usize
326 };
327 (v <= MAX_ABS_DEC).then_some((v, scale))
329 }
330
331 let (sign, digits, exp): (i8, Vec<u8>, i32) = ob
333 .call_method0(intern!(ob.py(), "as_tuple"))
334 .unwrap()
335 .extract()
336 .unwrap();
337 let (mut v, scale) = abs_decimal_from_digits(digits, exp).ok_or_else(|| {
338 PyErr::from(PyPolarsErr::Other(
339 "Decimal is too large to fit in Decimal128".into(),
340 ))
341 })?;
342 if sign > 0 {
343 v = -v; }
345 Ok(AnyValue::Decimal(v, scale))
346 }
347
348 fn get_list(ob: &Bound<'_, PyAny>, strict: bool) -> PyResult<AnyValue<'static>> {
349 fn get_list_with_constructor(
350 ob: &Bound<'_, PyAny>,
351 strict: bool,
352 ) -> PyResult<AnyValue<'static>> {
353 let py = ob.py();
357 let kwargs = PyDict::new(py);
358 kwargs.set_item("strict", strict)?;
359 let s = pl_series(py).call(py, (ob,), Some(&kwargs))?;
360 get_list_from_series(s.bind(py), strict)
361 }
362
363 if ob.is_empty()? {
364 Ok(AnyValue::List(Series::new_empty(
365 PlSmallStr::EMPTY,
366 &DataType::Null,
367 )))
368 } else if ob.is_instance_of::<PyList>() | ob.is_instance_of::<PyTuple>() {
369 const INFER_SCHEMA_LENGTH: usize = 25;
370
371 let list = ob.downcast::<PySequence>()?;
372
373 let mut avs = Vec::with_capacity(INFER_SCHEMA_LENGTH);
374 let mut iter = list.try_iter()?;
375 let mut items = Vec::with_capacity(INFER_SCHEMA_LENGTH);
376 for item in (&mut iter).take(INFER_SCHEMA_LENGTH) {
377 items.push(item?);
378 let av = py_object_to_any_value(items.last().unwrap(), strict, true)?;
379 avs.push(av)
380 }
381 let (dtype, n_dtypes) = any_values_to_supertype_and_n_dtypes(&avs)
382 .map_err(|e| PyTypeError::new_err(e.to_string()))?;
383
384 if dtype.is_primitive() && n_dtypes == 1 {
386 get_list_with_constructor(ob, strict)
387 } else {
388 let length = list.len()?;
390 avs.reserve(length);
391 let mut rest = Vec::with_capacity(length);
392 for item in iter {
393 rest.push(item?);
394 let av = py_object_to_any_value(rest.last().unwrap(), strict, true)?;
395 avs.push(av)
396 }
397
398 let s = Series::from_any_values_and_dtype(PlSmallStr::EMPTY, &avs, &dtype, strict)
399 .map_err(|e| {
400 PyTypeError::new_err(format!(
401 "{e}\n\nHint: Try setting `strict=False` to allow passing data with mixed types."
402 ))
403 })?;
404 Ok(AnyValue::List(s))
405 }
406 } else {
407 get_list_with_constructor(ob, strict)
409 }
410 }
411
412 fn get_list_from_series(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
413 let s = super::get_series(ob)?;
414 Ok(AnyValue::List(s))
415 }
416
417 fn get_struct<'py>(ob: &Bound<'py, PyAny>, strict: bool) -> PyResult<AnyValue<'py>> {
418 let dict = ob.downcast::<PyDict>().unwrap();
419 let len = dict.len();
420 let mut keys = Vec::with_capacity(len);
421 let mut vals = Vec::with_capacity(len);
422 for (k, v) in dict.into_iter() {
423 let key = k.extract::<Cow<str>>()?;
424 let val = py_object_to_any_value(&v, strict, true)?;
425 let dtype = val.dtype();
426 keys.push(Field::new(key.as_ref().into(), dtype));
427 vals.push(val)
428 }
429 Ok(AnyValue::StructOwned(Box::new((vals, keys))))
430 }
431
432 fn get_object(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
433 #[cfg(feature = "object")]
434 {
435 let v = &ObjectValue {
437 inner: ob.clone().unbind(),
438 };
439 Ok(AnyValue::ObjectOwned(OwnedObject(v.to_boxed())))
440 }
441 #[cfg(not(feature = "object"))]
442 panic!("activate object")
443 }
444
445 fn get_conversion_function(
450 ob: &Bound<'_, PyAny>,
451 py: Python<'_>,
452 allow_object: bool,
453 ) -> PyResult<InitFn> {
454 if ob.is_none() {
455 Ok(get_null)
456 }
457 else if ob.is_instance_of::<PyBool>() {
459 Ok(get_bool)
460 } else if ob.is_instance_of::<PyInt>() {
461 Ok(get_int)
462 } else if ob.is_instance_of::<PyFloat>() {
463 Ok(get_float)
464 } else if ob.is_instance_of::<PyString>() {
465 Ok(get_str)
466 } else if ob.is_instance_of::<PyBytes>() {
467 Ok(get_bytes)
468 } else if ob.is_instance_of::<PyList>() || ob.is_instance_of::<PyTuple>() {
469 Ok(get_list)
470 } else if ob.is_instance_of::<PyDict>() {
471 Ok(get_struct)
472 } else {
473 let ob_type = ob.get_type();
474 let type_name = ob_type.fully_qualified_name()?.to_string();
475 match type_name.as_str() {
476 "datetime.date" => Ok(get_date as InitFn),
479 "datetime.time" => Ok(get_time as InitFn),
480 "datetime.datetime" => Ok(get_datetime as InitFn),
481 "datetime.timedelta" => Ok(get_timedelta as InitFn),
482 "decimal.Decimal" => Ok(get_decimal as InitFn),
483 "range" => Ok(get_list as InitFn),
484 _ => {
485 if ob.extract::<i64>().is_ok() || ob.extract::<u64>().is_ok() {
487 return Ok(get_int as InitFn);
488 } else if ob.extract::<f64>().is_ok() {
489 return Ok(get_float as InitFn);
490 }
491
492 let ancestors = ob_type.getattr(intern!(py, "__mro__"))?;
494 let ancestors_str_iter = ancestors
495 .try_iter()?
496 .map(|b| b.unwrap().str().unwrap().to_string());
497 for c in ancestors_str_iter {
498 match &*c {
499 "<class 'datetime.datetime'>" => {
502 return Ok(get_datetime as InitFn);
503 },
504 "<class 'datetime.date'>" => return Ok(get_date as InitFn),
505 "<class 'datetime.timedelta'>" => return Ok(get_timedelta as InitFn),
506 "<class 'datetime.time'>" => return Ok(get_time as InitFn),
507 _ => (),
508 }
509 }
510
511 if allow_object {
512 Ok(get_object as InitFn)
513 } else {
514 Err(PyValueError::new_err(format!("Cannot convert {ob}")))
515 }
516 },
517 }
518 }
519 }
520
521 let py_type = ob.get_type();
522 let py_type_address = py_type.as_ptr() as usize;
523
524 Python::with_gil(move |py| {
525 LUT.with_gil(py, move |lut| {
526 if !lut.contains_key(&py_type_address) {
527 let k = TypeObjectKey::new(py_type.clone().unbind());
528
529 assert_eq!(k.address, py_type_address);
530
531 unsafe {
532 lut.insert_unique_unchecked(k, get_conversion_function(ob, py, allow_object)?);
533 }
534 }
535
536 let conversion_func = lut.get(&py_type_address).unwrap();
537 conversion_func(ob, strict)
538 })
539 })
540}