1use std::borrow::{Borrow, Cow};
2use std::sync::{Arc, Mutex};
3
4use chrono::{
5 DateTime, Datelike, FixedOffset, NaiveDate, NaiveDateTime, NaiveTime, TimeDelta, Timelike,
6};
7use chrono_tz::Tz;
8use hashbrown::HashMap;
9#[cfg(feature = "object")]
10use polars::chunked_array::object::PolarsObjectSafe;
11#[cfg(feature = "object")]
12use polars::datatypes::OwnedObject;
13use polars::datatypes::{DataType, Field, TimeUnit};
14use polars::prelude::{AnyValue, PlSmallStr, Series};
15use polars_core::utils::any_values_to_supertype_and_n_dtypes;
16use polars_core::utils::arrow::temporal_conversions::date32_to_date;
17use polars_utils::aliases::PlFixedStateQuality;
18use pyo3::exceptions::{PyOverflowError, PyTypeError, PyValueError};
19use pyo3::prelude::*;
20use pyo3::pybacked::PyBackedStr;
21use pyo3::types::{
22 PyBool, PyBytes, PyDict, PyFloat, PyInt, PyList, PySequence, PyString, PyTuple, PyType,
23};
24use pyo3::{IntoPyObjectExt, intern};
25
26use super::datetime::{
27 datetime_to_py_object, elapsed_offset_to_timedelta, nanos_since_midnight_to_naivetime,
28};
29use super::{ObjectValue, Wrap, decimal_to_digits, struct_dict};
30use crate::error::PyPolarsErr;
31use crate::py_modules::{pl_series, pl_utils};
32use crate::series::PySeries;
33
34impl<'py> IntoPyObject<'py> for Wrap<AnyValue<'_>> {
35 type Target = PyAny;
36 type Output = Bound<'py, Self::Target>;
37 type Error = PyErr;
38
39 fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
40 any_value_into_py_object(self.0, py)
41 }
42}
43
44impl<'py> IntoPyObject<'py> for &Wrap<AnyValue<'_>> {
45 type Target = PyAny;
46 type Output = Bound<'py, Self::Target>;
47 type Error = PyErr;
48
49 fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
50 self.clone().into_pyobject(py)
51 }
52}
53
54impl<'py> FromPyObject<'py> for Wrap<AnyValue<'py>> {
55 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
56 py_object_to_any_value(ob, true, true).map(Wrap)
57 }
58}
59
60pub(crate) fn any_value_into_py_object<'py>(
61 av: AnyValue,
62 py: Python<'py>,
63) -> PyResult<Bound<'py, PyAny>> {
64 let utils = pl_utils(py).bind(py);
65 match av {
66 AnyValue::UInt8(v) => v.into_bound_py_any(py),
67 AnyValue::UInt16(v) => v.into_bound_py_any(py),
68 AnyValue::UInt32(v) => v.into_bound_py_any(py),
69 AnyValue::UInt64(v) => v.into_bound_py_any(py),
70 AnyValue::Int8(v) => v.into_bound_py_any(py),
71 AnyValue::Int16(v) => v.into_bound_py_any(py),
72 AnyValue::Int32(v) => v.into_bound_py_any(py),
73 AnyValue::Int64(v) => v.into_bound_py_any(py),
74 AnyValue::Int128(v) => v.into_bound_py_any(py),
75 AnyValue::Float32(v) => v.into_bound_py_any(py),
76 AnyValue::Float64(v) => v.into_bound_py_any(py),
77 AnyValue::Null => py.None().into_bound_py_any(py),
78 AnyValue::Boolean(v) => v.into_bound_py_any(py),
79 AnyValue::String(v) => v.into_bound_py_any(py),
80 AnyValue::StringOwned(v) => v.into_bound_py_any(py),
81 AnyValue::Categorical(idx, rev, arr) | AnyValue::Enum(idx, rev, arr) => {
82 let s = if arr.is_null() {
83 rev.get(idx)
84 } else {
85 unsafe { arr.deref_unchecked().value(idx as usize) }
86 };
87 s.into_bound_py_any(py)
88 },
89 AnyValue::CategoricalOwned(idx, rev, arr) | AnyValue::EnumOwned(idx, rev, arr) => {
90 let s = if arr.is_null() {
91 rev.get(idx)
92 } else {
93 unsafe { arr.deref_unchecked().value(idx as usize) }
94 };
95 s.into_bound_py_any(py)
96 },
97 AnyValue::Date(v) => {
98 let date = date32_to_date(v);
99 date.into_bound_py_any(py)
100 },
101 AnyValue::Datetime(v, time_unit, time_zone) => {
102 datetime_to_py_object(py, v, time_unit, time_zone)
103 },
104 AnyValue::DatetimeOwned(v, time_unit, time_zone) => {
105 datetime_to_py_object(py, v, time_unit, time_zone.as_ref().map(AsRef::as_ref))
106 },
107 AnyValue::Duration(v, time_unit) => {
108 let time_delta = elapsed_offset_to_timedelta(v, time_unit);
109 time_delta.into_bound_py_any(py)
110 },
111 AnyValue::Time(v) => nanos_since_midnight_to_naivetime(v).into_bound_py_any(py),
112 AnyValue::Array(v, _) | AnyValue::List(v) => PySeries::new(v).to_list(py),
113 ref av @ AnyValue::Struct(_, _, flds) => {
114 Ok(struct_dict(py, av._iter_struct_av(), flds)?.into_any())
115 },
116 AnyValue::StructOwned(payload) => {
117 Ok(struct_dict(py, payload.0.into_iter(), &payload.1)?.into_any())
118 },
119 #[cfg(feature = "object")]
120 AnyValue::Object(v) => {
121 let object = v.as_any().downcast_ref::<ObjectValue>().unwrap();
122 Ok(object.inner.clone_ref(py).into_bound(py))
123 },
124 #[cfg(feature = "object")]
125 AnyValue::ObjectOwned(v) => {
126 let object = v.0.as_any().downcast_ref::<ObjectValue>().unwrap();
127 Ok(object.inner.clone_ref(py).into_bound(py))
128 },
129 AnyValue::Binary(v) => PyBytes::new(py, v).into_bound_py_any(py),
130 AnyValue::BinaryOwned(v) => PyBytes::new(py, &v).into_bound_py_any(py),
131 AnyValue::Decimal(v, scale) => {
132 let convert = utils.getattr(intern!(py, "to_py_decimal"))?;
133 const N: usize = 3;
134 let mut buf = [0_u128; N];
135 let n_digits = decimal_to_digits(v.abs(), &mut buf);
136 let buf = unsafe {
137 std::slice::from_raw_parts(
138 buf.as_slice().as_ptr() as *const u8,
139 N * size_of::<u128>(),
140 )
141 };
142 let digits = PyTuple::new(py, buf.iter().take(n_digits))?;
143 convert.call1((v.is_negative() as u8, digits, n_digits, -(scale as i32)))
144 },
145 }
146}
147
148#[derive(Debug)]
152pub struct TypeObjectKey {
153 #[allow(unused)]
154 type_object: Py<PyType>,
155 address: usize,
157}
158
159impl TypeObjectKey {
160 fn new(type_object: Py<PyType>) -> Self {
161 let address = type_object.as_ptr() as usize;
162 Self {
163 type_object,
164 address,
165 }
166 }
167}
168
169impl PartialEq for TypeObjectKey {
170 fn eq(&self, other: &Self) -> bool {
171 self.address == other.address
172 }
173}
174
175impl Eq for TypeObjectKey {}
176
177impl std::borrow::Borrow<usize> for TypeObjectKey {
178 fn borrow(&self) -> &usize {
179 &self.address
180 }
181}
182
183impl std::hash::Hash for TypeObjectKey {
184 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
185 let v: &usize = self.borrow();
186 v.hash(state)
187 }
188}
189
190type InitFn = for<'py> fn(&Bound<'py, PyAny>, bool) -> PyResult<AnyValue<'py>>;
191pub(crate) static LUT: Mutex<HashMap<TypeObjectKey, InitFn, PlFixedStateQuality>> =
192 Mutex::new(HashMap::with_hasher(PlFixedStateQuality::with_seed(0)));
193
194pub(crate) fn py_object_to_any_value<'py>(
196 ob: &Bound<'py, PyAny>,
197 strict: bool,
198 allow_object: bool,
199) -> PyResult<AnyValue<'py>> {
200 fn get_null(_ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
202 Ok(AnyValue::Null)
203 }
204
205 fn get_bool(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
206 let b = ob.extract::<bool>()?;
207 Ok(AnyValue::Boolean(b))
208 }
209
210 fn get_int(ob: &Bound<'_, PyAny>, strict: bool) -> PyResult<AnyValue<'static>> {
211 if let Ok(v) = ob.extract::<i64>() {
212 Ok(AnyValue::Int64(v))
213 } else if let Ok(v) = ob.extract::<i128>() {
214 Ok(AnyValue::Int128(v))
215 } else if !strict {
216 let f = ob.extract::<f64>()?;
217 Ok(AnyValue::Float64(f))
218 } else {
219 Err(PyOverflowError::new_err(format!(
220 "int value too large for Polars integer types: {ob}"
221 )))
222 }
223 }
224
225 fn get_float(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
226 Ok(AnyValue::Float64(ob.extract::<f64>()?))
227 }
228
229 fn get_str(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
230 Ok(AnyValue::StringOwned(ob.extract::<String>()?.into()))
242 }
243
244 fn get_bytes<'py>(ob: &Bound<'py, PyAny>, _strict: bool) -> PyResult<AnyValue<'py>> {
245 let value = ob.extract::<Vec<u8>>()?;
246 Ok(AnyValue::BinaryOwned(value))
247 }
248
249 fn get_date(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
250 const UNIX_EPOCH: NaiveDate = DateTime::UNIX_EPOCH.naive_utc().date();
251 let date = ob.extract::<NaiveDate>()?;
252 let elapsed = date.signed_duration_since(UNIX_EPOCH);
253 Ok(AnyValue::Date(elapsed.num_days() as i32))
254 }
255
256 fn get_datetime(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
257 let py = ob.py();
258 let tzinfo = ob.getattr(intern!(py, "tzinfo"))?;
259
260 if tzinfo.is_none() {
261 let datetime = ob.extract::<NaiveDateTime>()?;
262 let delta = datetime - DateTime::UNIX_EPOCH.naive_utc();
263 let timestamp = delta.num_microseconds().unwrap();
264 return Ok(AnyValue::Datetime(timestamp, TimeUnit::Microseconds, None));
265 }
266
267 let (ob, tzinfo) = if let Some(tz) = tzinfo
269 .getattr(intern!(py, "zone"))
270 .ok()
271 .and_then(|zone| zone.extract::<PyBackedStr>().ok()?.parse::<Tz>().ok())
272 {
273 let tzinfo = tz.into_pyobject(py)?;
274 (
275 &ob.call_method(intern!(py, "astimezone"), (&tzinfo,), None)?,
276 tzinfo,
277 )
278 } else {
279 (ob, tzinfo)
280 };
281
282 let (timestamp, tz) = if tzinfo.hasattr(intern!(py, "key"))? {
283 let datetime = ob.extract::<DateTime<Tz>>()?;
284 let tz = datetime.timezone().name().into();
285 if datetime.year() >= 2100 {
286 (
289 pl_utils(py)
290 .bind(py)
291 .getattr(intern!(py, "datetime_to_int"))?
292 .call1((ob, intern!(py, "us")))?
293 .extract::<i64>()?,
294 tz,
295 )
296 } else {
297 let delta = datetime.to_utc() - DateTime::UNIX_EPOCH;
298 (delta.num_microseconds().unwrap(), tz)
299 }
300 } else {
301 let datetime = ob.extract::<DateTime<FixedOffset>>()?;
302 let delta = datetime.to_utc() - DateTime::UNIX_EPOCH;
303 (delta.num_microseconds().unwrap(), "UTC".into())
304 };
305
306 Ok(AnyValue::DatetimeOwned(
307 timestamp,
308 TimeUnit::Microseconds,
309 Some(Arc::new(tz)),
310 ))
311 }
312
313 fn get_timedelta(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
314 let timedelta = ob.extract::<TimeDelta>()?;
315 if let Some(micros) = timedelta.num_microseconds() {
316 Ok(AnyValue::Duration(micros, TimeUnit::Microseconds))
317 } else {
318 Ok(AnyValue::Duration(
319 timedelta.num_milliseconds(),
320 TimeUnit::Milliseconds,
321 ))
322 }
323 }
324
325 fn get_time(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
326 let time = ob.extract::<NaiveTime>()?;
327
328 Ok(AnyValue::Time(
329 (time.num_seconds_from_midnight() as i64) * 1_000_000_000 + time.nanosecond() as i64,
330 ))
331 }
332
333 fn get_decimal(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
334 fn abs_decimal_from_digits(
335 digits: impl IntoIterator<Item = u8>,
336 exp: i32,
337 ) -> Option<(i128, usize)> {
338 const MAX_ABS_DEC: i128 = 10_i128.pow(38) - 1;
339 let mut v = 0_i128;
340 for (i, d) in digits.into_iter().map(i128::from).enumerate() {
341 if i < 38 {
342 v = v * 10 + d;
343 } else {
344 v = v.checked_mul(10).and_then(|v| v.checked_add(d))?;
345 }
346 }
347 let scale = if exp > 0 {
349 v = 10_i128
351 .checked_pow(exp as u32)
352 .and_then(|factor| v.checked_mul(factor))?;
353 0
354 } else {
355 (-exp) as usize
356 };
357 (v <= MAX_ABS_DEC).then_some((v, scale))
359 }
360
361 let (sign, digits, exp): (i8, Vec<u8>, i32) = ob
363 .call_method0(intern!(ob.py(), "as_tuple"))
364 .unwrap()
365 .extract()
366 .unwrap();
367 let (mut v, scale) = abs_decimal_from_digits(digits, exp).ok_or_else(|| {
368 PyErr::from(PyPolarsErr::Other(
369 "Decimal is too large to fit in Decimal128".into(),
370 ))
371 })?;
372 if sign > 0 {
373 v = -v; }
375 Ok(AnyValue::Decimal(v, scale))
376 }
377
378 fn get_list(ob: &Bound<'_, PyAny>, strict: bool) -> PyResult<AnyValue<'static>> {
379 fn get_list_with_constructor(
380 ob: &Bound<'_, PyAny>,
381 strict: bool,
382 ) -> PyResult<AnyValue<'static>> {
383 let py = ob.py();
387 let kwargs = PyDict::new(py);
388 kwargs.set_item("strict", strict)?;
389 let s = pl_series(py).call(py, (ob,), Some(&kwargs))?;
390 get_list_from_series(s.bind(py), strict)
391 }
392
393 if ob.is_empty()? {
394 Ok(AnyValue::List(Series::new_empty(
395 PlSmallStr::EMPTY,
396 &DataType::Null,
397 )))
398 } else if ob.is_instance_of::<PyList>() | ob.is_instance_of::<PyTuple>() {
399 const INFER_SCHEMA_LENGTH: usize = 25;
400
401 let list = ob.downcast::<PySequence>()?;
402
403 let mut avs = Vec::with_capacity(INFER_SCHEMA_LENGTH);
404 let mut iter = list.try_iter()?;
405 let mut items = Vec::with_capacity(INFER_SCHEMA_LENGTH);
406 for item in (&mut iter).take(INFER_SCHEMA_LENGTH) {
407 items.push(item?);
408 let av = py_object_to_any_value(items.last().unwrap(), strict, true)?;
409 avs.push(av)
410 }
411 let (dtype, n_dtypes) = any_values_to_supertype_and_n_dtypes(&avs)
412 .map_err(|e| PyTypeError::new_err(e.to_string()))?;
413
414 if dtype.is_primitive() && n_dtypes == 1 {
416 get_list_with_constructor(ob, strict)
417 } else {
418 let length = list.len()?;
420 avs.reserve(length);
421 let mut rest = Vec::with_capacity(length);
422 for item in iter {
423 rest.push(item?);
424 let av = py_object_to_any_value(rest.last().unwrap(), strict, true)?;
425 avs.push(av)
426 }
427
428 let s = Series::from_any_values_and_dtype(PlSmallStr::EMPTY, &avs, &dtype, strict)
429 .map_err(|e| {
430 PyTypeError::new_err(format!(
431 "{e}\n\nHint: Try setting `strict=False` to allow passing data with mixed types."
432 ))
433 })?;
434 Ok(AnyValue::List(s))
435 }
436 } else {
437 get_list_with_constructor(ob, strict)
439 }
440 }
441
442 fn get_list_from_series(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
443 let s = super::get_series(ob)?;
444 Ok(AnyValue::List(s))
445 }
446
447 fn get_struct<'py>(ob: &Bound<'py, PyAny>, strict: bool) -> PyResult<AnyValue<'py>> {
448 let dict = ob.downcast::<PyDict>().unwrap();
449 let len = dict.len();
450 let mut keys = Vec::with_capacity(len);
451 let mut vals = Vec::with_capacity(len);
452 for (k, v) in dict.into_iter() {
453 let key = k.extract::<Cow<str>>()?;
454 let val = py_object_to_any_value(&v, strict, true)?;
455 let dtype = val.dtype();
456 keys.push(Field::new(key.as_ref().into(), dtype));
457 vals.push(val)
458 }
459 Ok(AnyValue::StructOwned(Box::new((vals, keys))))
460 }
461
462 fn get_object(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
463 #[cfg(feature = "object")]
464 {
465 let v = &ObjectValue {
467 inner: ob.clone().unbind(),
468 };
469 Ok(AnyValue::ObjectOwned(OwnedObject(v.to_boxed())))
470 }
471 #[cfg(not(feature = "object"))]
472 panic!("activate object")
473 }
474
475 fn get_conversion_function(ob: &Bound<'_, PyAny>, allow_object: bool) -> PyResult<InitFn> {
480 let py = ob.py();
481 if ob.is_none() {
482 Ok(get_null)
483 }
484 else if ob.is_instance_of::<PyBool>() {
486 Ok(get_bool)
487 } else if ob.is_instance_of::<PyInt>() {
488 Ok(get_int)
489 } else if ob.is_instance_of::<PyFloat>() {
490 Ok(get_float)
491 } else if ob.is_instance_of::<PyString>() {
492 Ok(get_str)
493 } else if ob.is_instance_of::<PyBytes>() {
494 Ok(get_bytes)
495 } else if ob.is_instance_of::<PyList>() || ob.is_instance_of::<PyTuple>() {
496 Ok(get_list)
497 } else if ob.is_instance_of::<PyDict>() {
498 Ok(get_struct)
499 } else {
500 let ob_type = ob.get_type();
501 let type_name = ob_type.fully_qualified_name()?.to_string();
502 match type_name.as_str() {
503 "datetime.date" => Ok(get_date as InitFn),
506 "datetime.time" => Ok(get_time as InitFn),
507 "datetime.datetime" => Ok(get_datetime as InitFn),
508 "datetime.timedelta" => Ok(get_timedelta as InitFn),
509 "decimal.Decimal" => Ok(get_decimal as InitFn),
510 "range" => Ok(get_list as InitFn),
511 _ => {
512 if ob.extract::<i64>().is_ok() || ob.extract::<u64>().is_ok() {
514 return Ok(get_int as InitFn);
515 } else if ob.extract::<f64>().is_ok() {
516 return Ok(get_float as InitFn);
517 }
518
519 let ancestors = ob_type.getattr(intern!(py, "__mro__"))?;
521 let ancestors_str_iter = ancestors
522 .try_iter()?
523 .map(|b| b.unwrap().str().unwrap().to_string());
524 for c in ancestors_str_iter {
525 match &*c {
526 "<class 'datetime.datetime'>" => {
529 return Ok(get_datetime as InitFn);
530 },
531 "<class 'datetime.date'>" => return Ok(get_date as InitFn),
532 "<class 'datetime.timedelta'>" => return Ok(get_timedelta as InitFn),
533 "<class 'datetime.time'>" => return Ok(get_time as InitFn),
534 _ => (),
535 }
536 }
537
538 if allow_object {
539 Ok(get_object as InitFn)
540 } else {
541 Err(PyValueError::new_err(format!("Cannot convert {ob}")))
542 }
543 },
544 }
545 }
546 }
547
548 let py_type = ob.get_type();
549 let py_type_address = py_type.as_ptr() as usize;
550
551 let conversion_func = {
552 if let Some(cached_func) = LUT.lock().unwrap().get(&py_type_address) {
553 *cached_func
554 } else {
555 let k = TypeObjectKey::new(py_type.clone().unbind());
556 assert_eq!(k.address, py_type_address);
557
558 let func = get_conversion_function(ob, allow_object)?;
559 LUT.lock().unwrap().insert(k, func);
560 func
561 }
562 };
563
564 conversion_func(ob, strict)
565}