1use std::borrow::{Borrow, Cow};
2use std::sync::{Arc, Mutex};
3
4use chrono::{
5 DateTime, Datelike, FixedOffset, NaiveDate, NaiveDateTime, NaiveTime, TimeDelta, Timelike,
6};
7use chrono_tz::Tz;
8use hashbrown::HashMap;
9#[cfg(feature = "object")]
10use polars::chunked_array::object::PolarsObjectSafe;
11#[cfg(feature = "object")]
12use polars::datatypes::OwnedObject;
13use polars::datatypes::{DataType, Field, TimeUnit};
14use polars::prelude::{AnyValue, PlSmallStr, Series, TimeZone};
15use polars_core::utils::any_values_to_supertype_and_n_dtypes;
16use polars_core::utils::arrow::temporal_conversions::date32_to_date;
17use polars_utils::aliases::PlFixedStateQuality;
18use pyo3::exceptions::{PyOverflowError, PyTypeError, PyValueError};
19use pyo3::prelude::*;
20use pyo3::pybacked::PyBackedStr;
21use pyo3::types::{
22 PyBool, PyBytes, PyDict, PyFloat, PyInt, PyList, PyMapping, PySequence, PyString, PyTuple,
23 PyType,
24};
25use pyo3::{IntoPyObjectExt, PyTypeCheck, intern};
26
27use super::datetime::{
28 datetime_to_py_object, elapsed_offset_to_timedelta, nanos_since_midnight_to_naivetime,
29};
30use super::{ObjectValue, Wrap, decimal_to_digits, struct_dict};
31use crate::error::PyPolarsErr;
32use crate::py_modules::{pl_series, pl_utils};
33use crate::series::PySeries;
34
35impl<'py> IntoPyObject<'py> for Wrap<AnyValue<'_>> {
36 type Target = PyAny;
37 type Output = Bound<'py, Self::Target>;
38 type Error = PyErr;
39
40 fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
41 any_value_into_py_object(self.0, py)
42 }
43}
44
45impl<'py> IntoPyObject<'py> for &Wrap<AnyValue<'_>> {
46 type Target = PyAny;
47 type Output = Bound<'py, Self::Target>;
48 type Error = PyErr;
49
50 fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
51 self.clone().into_pyobject(py)
52 }
53}
54
55impl<'py> FromPyObject<'py> for Wrap<AnyValue<'static>> {
56 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
57 py_object_to_any_value(ob, true, true).map(Wrap)
58 }
59}
60
61pub(crate) fn any_value_into_py_object<'py>(
62 av: AnyValue<'_>,
63 py: Python<'py>,
64) -> PyResult<Bound<'py, PyAny>> {
65 let utils = pl_utils(py).bind(py);
66 match av {
67 AnyValue::UInt8(v) => v.into_bound_py_any(py),
68 AnyValue::UInt16(v) => v.into_bound_py_any(py),
69 AnyValue::UInt32(v) => v.into_bound_py_any(py),
70 AnyValue::UInt64(v) => v.into_bound_py_any(py),
71 AnyValue::Int8(v) => v.into_bound_py_any(py),
72 AnyValue::Int16(v) => v.into_bound_py_any(py),
73 AnyValue::Int32(v) => v.into_bound_py_any(py),
74 AnyValue::Int64(v) => v.into_bound_py_any(py),
75 AnyValue::Int128(v) => v.into_bound_py_any(py),
76 AnyValue::Float32(v) => v.into_bound_py_any(py),
77 AnyValue::Float64(v) => v.into_bound_py_any(py),
78 AnyValue::Null => py.None().into_bound_py_any(py),
79 AnyValue::Boolean(v) => v.into_bound_py_any(py),
80 AnyValue::String(v) => v.into_bound_py_any(py),
81 AnyValue::StringOwned(v) => v.into_bound_py_any(py),
82 AnyValue::Categorical(idx, rev, arr) | AnyValue::Enum(idx, rev, arr) => {
83 let s = if arr.is_null() {
84 rev.get(idx)
85 } else {
86 unsafe { arr.deref_unchecked().value(idx as usize) }
87 };
88 s.into_bound_py_any(py)
89 },
90 AnyValue::CategoricalOwned(idx, rev, arr) | AnyValue::EnumOwned(idx, rev, arr) => {
91 let s = if arr.is_null() {
92 rev.get(idx)
93 } else {
94 unsafe { arr.deref_unchecked().value(idx as usize) }
95 };
96 s.into_bound_py_any(py)
97 },
98 AnyValue::Date(v) => {
99 let date = date32_to_date(v);
100 date.into_bound_py_any(py)
101 },
102 AnyValue::Datetime(v, time_unit, time_zone) => {
103 datetime_to_py_object(py, v, time_unit, time_zone)
104 },
105 AnyValue::DatetimeOwned(v, time_unit, time_zone) => {
106 datetime_to_py_object(py, v, time_unit, time_zone.as_ref().map(AsRef::as_ref))
107 },
108 AnyValue::Duration(v, time_unit) => {
109 let time_delta = elapsed_offset_to_timedelta(v, time_unit);
110 time_delta.into_bound_py_any(py)
111 },
112 AnyValue::Time(v) => nanos_since_midnight_to_naivetime(v).into_bound_py_any(py),
113 AnyValue::Array(v, _) | AnyValue::List(v) => PySeries::new(v).to_list(py),
114 ref av @ AnyValue::Struct(_, _, flds) => {
115 Ok(struct_dict(py, av._iter_struct_av(), flds)?.into_any())
116 },
117 AnyValue::StructOwned(payload) => {
118 Ok(struct_dict(py, payload.0.into_iter(), &payload.1)?.into_any())
119 },
120 #[cfg(feature = "object")]
121 AnyValue::Object(v) => {
122 let object = v.as_any().downcast_ref::<ObjectValue>().unwrap();
123 Ok(object.inner.clone_ref(py).into_bound(py))
124 },
125 #[cfg(feature = "object")]
126 AnyValue::ObjectOwned(v) => {
127 let object = v.0.as_any().downcast_ref::<ObjectValue>().unwrap();
128 Ok(object.inner.clone_ref(py).into_bound(py))
129 },
130 AnyValue::Binary(v) => PyBytes::new(py, v).into_bound_py_any(py),
131 AnyValue::BinaryOwned(v) => PyBytes::new(py, &v).into_bound_py_any(py),
132 AnyValue::Decimal(v, scale) => {
133 let convert = utils.getattr(intern!(py, "to_py_decimal"))?;
134 const N: usize = 3;
135 let mut buf = [0_u128; N];
136 let n_digits = decimal_to_digits(v.abs(), &mut buf);
137 let buf = unsafe {
138 std::slice::from_raw_parts(
139 buf.as_slice().as_ptr() as *const u8,
140 N * size_of::<u128>(),
141 )
142 };
143 let digits = PyTuple::new(py, buf.iter().take(n_digits))?;
144 convert.call1((v.is_negative() as u8, digits, n_digits, -(scale as i32)))
145 },
146 }
147}
148
149#[derive(Debug)]
153pub struct TypeObjectKey {
154 #[allow(unused)]
155 type_object: Py<PyType>,
156 address: usize,
158}
159
160impl TypeObjectKey {
161 fn new(type_object: Py<PyType>) -> Self {
162 let address = type_object.as_ptr() as usize;
163 Self {
164 type_object,
165 address,
166 }
167 }
168}
169
170impl PartialEq for TypeObjectKey {
171 fn eq(&self, other: &Self) -> bool {
172 self.address == other.address
173 }
174}
175
176impl Eq for TypeObjectKey {}
177
178impl std::borrow::Borrow<usize> for TypeObjectKey {
179 fn borrow(&self) -> &usize {
180 &self.address
181 }
182}
183
184impl std::hash::Hash for TypeObjectKey {
185 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
186 let v: &usize = self.borrow();
187 v.hash(state)
188 }
189}
190
191type InitFn = fn(&Bound<'_, PyAny>, bool) -> PyResult<AnyValue<'static>>;
192pub(crate) static LUT: Mutex<HashMap<TypeObjectKey, InitFn, PlFixedStateQuality>> =
193 Mutex::new(HashMap::with_hasher(PlFixedStateQuality::with_seed(0)));
194
195pub(crate) fn py_object_to_any_value(
197 ob: &Bound<'_, PyAny>,
198 strict: bool,
199 allow_object: bool,
200) -> PyResult<AnyValue<'static>> {
201 fn get_null(_ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
203 Ok(AnyValue::Null)
204 }
205
206 fn get_bool(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
207 let b = ob.extract::<bool>()?;
208 Ok(AnyValue::Boolean(b))
209 }
210
211 fn get_int(ob: &Bound<'_, PyAny>, strict: bool) -> PyResult<AnyValue<'static>> {
212 if let Ok(v) = ob.extract::<i64>() {
213 Ok(AnyValue::Int64(v))
214 } else if let Ok(v) = ob.extract::<i128>() {
215 Ok(AnyValue::Int128(v))
216 } else if !strict {
217 let f = ob.extract::<f64>()?;
218 Ok(AnyValue::Float64(f))
219 } else {
220 Err(PyOverflowError::new_err(format!(
221 "int value too large for Polars integer types: {ob}"
222 )))
223 }
224 }
225
226 fn get_float(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
227 Ok(AnyValue::Float64(ob.extract::<f64>()?))
228 }
229
230 fn get_str(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
231 Ok(AnyValue::StringOwned(ob.extract::<String>()?.into()))
243 }
244
245 fn get_bytes(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
246 let value = ob.extract::<Vec<u8>>()?;
247 Ok(AnyValue::BinaryOwned(value))
248 }
249
250 fn get_date(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
251 const UNIX_EPOCH: NaiveDate = DateTime::UNIX_EPOCH.naive_utc().date();
252 let date = ob.extract::<NaiveDate>()?;
253 let elapsed = date.signed_duration_since(UNIX_EPOCH);
254 Ok(AnyValue::Date(elapsed.num_days() as i32))
255 }
256
257 fn get_datetime(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
258 let py = ob.py();
259 let tzinfo = ob.getattr(intern!(py, "tzinfo"))?;
260
261 if tzinfo.is_none() {
262 let datetime = ob.extract::<NaiveDateTime>()?;
263 let delta = datetime - DateTime::UNIX_EPOCH.naive_utc();
264 let timestamp = delta.num_microseconds().unwrap();
265 return Ok(AnyValue::Datetime(timestamp, TimeUnit::Microseconds, None));
266 }
267
268 let (ob, tzinfo) = if let Some(tz) = tzinfo
270 .getattr(intern!(py, "zone"))
271 .ok()
272 .and_then(|zone| zone.extract::<PyBackedStr>().ok()?.parse::<Tz>().ok())
273 {
274 let tzinfo = tz.into_pyobject(py)?;
275 (
276 &ob.call_method(intern!(py, "astimezone"), (&tzinfo,), None)?,
277 tzinfo,
278 )
279 } else {
280 (ob, tzinfo)
281 };
282
283 let (timestamp, tz) = if tzinfo.hasattr(intern!(py, "key"))? {
284 let datetime = ob.extract::<DateTime<Tz>>()?;
285 let tz = unsafe { TimeZone::from_static(datetime.timezone().name()) };
286 if datetime.year() >= 2100 {
287 (
290 pl_utils(py)
291 .bind(py)
292 .getattr(intern!(py, "datetime_to_int"))?
293 .call1((ob, intern!(py, "us")))?
294 .extract::<i64>()?,
295 tz,
296 )
297 } else {
298 let delta = datetime.to_utc() - DateTime::UNIX_EPOCH;
299 (delta.num_microseconds().unwrap(), tz)
300 }
301 } else {
302 let datetime = ob.extract::<DateTime<FixedOffset>>()?;
303 let delta = datetime.to_utc() - DateTime::UNIX_EPOCH;
304 (delta.num_microseconds().unwrap(), TimeZone::UTC)
305 };
306
307 Ok(AnyValue::DatetimeOwned(
308 timestamp,
309 TimeUnit::Microseconds,
310 Some(Arc::new(tz)),
311 ))
312 }
313
314 fn get_timedelta(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
315 let timedelta = ob.extract::<TimeDelta>()?;
316 if let Some(micros) = timedelta.num_microseconds() {
317 Ok(AnyValue::Duration(micros, TimeUnit::Microseconds))
318 } else {
319 Ok(AnyValue::Duration(
320 timedelta.num_milliseconds(),
321 TimeUnit::Milliseconds,
322 ))
323 }
324 }
325
326 fn get_time(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
327 let time = ob.extract::<NaiveTime>()?;
328
329 Ok(AnyValue::Time(
330 (time.num_seconds_from_midnight() as i64) * 1_000_000_000 + time.nanosecond() as i64,
331 ))
332 }
333
334 fn get_decimal(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
335 fn abs_decimal_from_digits(
336 digits: impl IntoIterator<Item = u8>,
337 exp: i32,
338 ) -> Option<(i128, usize)> {
339 const MAX_ABS_DEC: i128 = 10_i128.pow(38) - 1;
340 let mut v = 0_i128;
341 for (i, d) in digits.into_iter().map(i128::from).enumerate() {
342 if i < 38 {
343 v = v * 10 + d;
344 } else {
345 v = v.checked_mul(10).and_then(|v| v.checked_add(d))?;
346 }
347 }
348 let scale = if exp > 0 {
350 v = 10_i128
352 .checked_pow(exp as u32)
353 .and_then(|factor| v.checked_mul(factor))?;
354 0
355 } else {
356 (-exp) as usize
357 };
358 (v <= MAX_ABS_DEC).then_some((v, scale))
360 }
361
362 let (sign, digits, exp): (i8, Vec<u8>, i32) = ob
364 .call_method0(intern!(ob.py(), "as_tuple"))
365 .unwrap()
366 .extract()
367 .unwrap();
368 let (mut v, scale) = abs_decimal_from_digits(digits, exp).ok_or_else(|| {
369 PyErr::from(PyPolarsErr::Other(
370 "Decimal is too large to fit in Decimal128".into(),
371 ))
372 })?;
373 if sign > 0 {
374 v = -v; }
376 Ok(AnyValue::Decimal(v, scale))
377 }
378
379 fn get_list(ob: &Bound<'_, PyAny>, strict: bool) -> PyResult<AnyValue<'static>> {
380 fn get_list_with_constructor(
381 ob: &Bound<'_, PyAny>,
382 strict: bool,
383 ) -> PyResult<AnyValue<'static>> {
384 let py = ob.py();
388 let kwargs = PyDict::new(py);
389 kwargs.set_item("strict", strict)?;
390 let s = pl_series(py).call(py, (ob,), Some(&kwargs))?;
391 get_list_from_series(s.bind(py), strict)
392 }
393
394 if ob.is_empty()? {
395 Ok(AnyValue::List(Series::new_empty(
396 PlSmallStr::EMPTY,
397 &DataType::Null,
398 )))
399 } else if ob.is_instance_of::<PyList>() | ob.is_instance_of::<PyTuple>() {
400 const INFER_SCHEMA_LENGTH: usize = 25;
401
402 let list = ob.downcast::<PySequence>()?;
403
404 let mut avs = Vec::with_capacity(INFER_SCHEMA_LENGTH);
405 let mut iter = list.try_iter()?;
406 let mut items = Vec::with_capacity(INFER_SCHEMA_LENGTH);
407 for item in (&mut iter).take(INFER_SCHEMA_LENGTH) {
408 items.push(item?);
409 let av = py_object_to_any_value(items.last().unwrap(), strict, true)?;
410 avs.push(av)
411 }
412 let (dtype, n_dtypes) = any_values_to_supertype_and_n_dtypes(&avs)
413 .map_err(|e| PyTypeError::new_err(e.to_string()))?;
414
415 if dtype.is_primitive() && n_dtypes == 1 {
417 get_list_with_constructor(ob, strict)
418 } else {
419 let length = list.len()?;
421 avs.reserve(length);
422 let mut rest = Vec::with_capacity(length);
423 for item in iter {
424 rest.push(item?);
425 let av = py_object_to_any_value(rest.last().unwrap(), strict, true)?;
426 avs.push(av)
427 }
428
429 let s = Series::from_any_values_and_dtype(PlSmallStr::EMPTY, &avs, &dtype, strict)
430 .map_err(|e| {
431 PyTypeError::new_err(format!(
432 "{e}\n\nHint: Try setting `strict=False` to allow passing data with mixed types."
433 ))
434 })?;
435 Ok(AnyValue::List(s))
436 }
437 } else {
438 get_list_with_constructor(ob, strict)
440 }
441 }
442
443 fn get_list_from_series(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
444 let s = super::get_series(ob)?;
445 Ok(AnyValue::List(s))
446 }
447
448 fn get_mapping(ob: &Bound<'_, PyAny>, strict: bool) -> PyResult<AnyValue<'static>> {
449 let mapping = ob.downcast::<PyMapping>()?;
450 let len = mapping.len()?;
451 let mut keys = Vec::with_capacity(len);
452 let mut vals = Vec::with_capacity(len);
453
454 for item in mapping.items()?.try_iter()? {
455 let item = item?.downcast_into::<PyTuple>()?;
456 let (key_py, val_py) = (item.get_item(0)?, item.get_item(1)?);
457
458 let key: Cow<str> = key_py.extract()?;
459 let val = py_object_to_any_value(&val_py, strict, true)?;
460
461 keys.push(Field::new(key.as_ref().into(), val.dtype()));
462 vals.push(val);
463 }
464 Ok(AnyValue::StructOwned(Box::new((vals, keys))))
465 }
466
467 fn get_struct(ob: &Bound<'_, PyAny>, strict: bool) -> PyResult<AnyValue<'static>> {
468 let dict = ob.downcast::<PyDict>().unwrap();
469 let len = dict.len();
470 let mut keys = Vec::with_capacity(len);
471 let mut vals = Vec::with_capacity(len);
472 for (k, v) in dict.into_iter() {
473 let key = k.extract::<Cow<str>>()?;
474 let val = py_object_to_any_value(&v, strict, true)?;
475 let dtype = val.dtype();
476 keys.push(Field::new(key.as_ref().into(), dtype));
477 vals.push(val)
478 }
479 Ok(AnyValue::StructOwned(Box::new((vals, keys))))
480 }
481
482 fn get_object(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
483 #[cfg(feature = "object")]
484 {
485 let v = &ObjectValue {
487 inner: ob.clone().unbind(),
488 };
489 Ok(AnyValue::ObjectOwned(OwnedObject(v.to_boxed())))
490 }
491 #[cfg(not(feature = "object"))]
492 panic!("activate object")
493 }
494
495 fn get_conversion_function(ob: &Bound<'_, PyAny>, allow_object: bool) -> PyResult<InitFn> {
500 let py = ob.py();
501 if ob.is_none() {
502 Ok(get_null)
503 }
504 else if ob.is_instance_of::<PyBool>() {
506 Ok(get_bool)
507 } else if ob.is_instance_of::<PyInt>() {
508 Ok(get_int)
509 } else if ob.is_instance_of::<PyFloat>() {
510 Ok(get_float)
511 } else if ob.is_instance_of::<PyString>() {
512 Ok(get_str)
513 } else if ob.is_instance_of::<PyBytes>() {
514 Ok(get_bytes)
515 } else if ob.is_instance_of::<PyList>() || ob.is_instance_of::<PyTuple>() {
516 Ok(get_list)
517 } else if ob.is_instance_of::<PyDict>() {
518 Ok(get_struct)
519 } else if PyMapping::type_check(ob) {
520 Ok(get_mapping)
521 } else {
522 let ob_type = ob.get_type();
523 let type_name = ob_type.fully_qualified_name()?.to_string();
524 match type_name.as_str() {
525 "datetime.date" => Ok(get_date as InitFn),
528 "datetime.time" => Ok(get_time as InitFn),
529 "datetime.datetime" => Ok(get_datetime as InitFn),
530 "datetime.timedelta" => Ok(get_timedelta as InitFn),
531 "decimal.Decimal" => Ok(get_decimal as InitFn),
532 "range" => Ok(get_list as InitFn),
533 _ => {
534 if ob.extract::<i64>().is_ok() || ob.extract::<u64>().is_ok() {
536 return Ok(get_int as InitFn);
537 } else if ob.extract::<f64>().is_ok() {
538 return Ok(get_float as InitFn);
539 }
540
541 let ancestors = ob_type.getattr(intern!(py, "__mro__"))?;
543 let ancestors_str_iter = ancestors
544 .try_iter()?
545 .map(|b| b.unwrap().str().unwrap().to_string());
546 for c in ancestors_str_iter {
547 match &*c {
548 "<class 'datetime.datetime'>" => {
551 return Ok(get_datetime as InitFn);
552 },
553 "<class 'datetime.date'>" => return Ok(get_date as InitFn),
554 "<class 'datetime.timedelta'>" => return Ok(get_timedelta as InitFn),
555 "<class 'datetime.time'>" => return Ok(get_time as InitFn),
556 _ => (),
557 }
558 }
559
560 if allow_object {
561 Ok(get_object as InitFn)
562 } else {
563 Err(PyValueError::new_err(format!("Cannot convert {ob}")))
564 }
565 },
566 }
567 }
568 }
569
570 let py_type = ob.get_type();
571 let py_type_address = py_type.as_ptr() as usize;
572
573 let conversion_func = {
574 if let Some(cached_func) = LUT.lock().unwrap().get(&py_type_address) {
575 *cached_func
576 } else {
577 let k = TypeObjectKey::new(py_type.clone().unbind());
578 assert_eq!(k.address, py_type_address);
579
580 let func = get_conversion_function(ob, allow_object)?;
581 LUT.lock().unwrap().insert(k, func);
582 func
583 }
584 };
585
586 conversion_func(ob, strict)
587}