1use std::borrow::{Borrow, Cow};
2use std::sync::{Arc, Mutex};
3
4use chrono::{
5 DateTime, Datelike, FixedOffset, NaiveDate, NaiveDateTime, NaiveTime, TimeDelta, Timelike,
6};
7use chrono_tz::Tz;
8use hashbrown::HashMap;
9#[cfg(feature = "object")]
10use polars::chunked_array::object::PolarsObjectSafe;
11#[cfg(feature = "object")]
12use polars::datatypes::OwnedObject;
13use polars::datatypes::{DataType, Field, TimeUnit};
14use polars::prelude::{AnyValue, PlSmallStr, Series, TimeZone};
15use polars_core::utils::any_values_to_supertype_and_n_dtypes;
16use polars_core::utils::arrow::temporal_conversions::date32_to_date;
17use polars_utils::aliases::PlFixedStateQuality;
18use pyo3::exceptions::{PyOverflowError, PyTypeError, PyValueError};
19use pyo3::prelude::*;
20use pyo3::sync::GILOnceCell;
21use pyo3::types::{
22 PyBool, PyBytes, PyDate, PyDateTime, PyDelta, PyDict, PyFloat, PyInt, PyList, PyMapping,
23 PyRange, PySequence, PyString, PyTime, PyTuple, PyType, PyTzInfo,
24};
25use pyo3::{IntoPyObjectExt, PyTypeCheck, intern};
26
27use super::datetime::{
28 datetime_to_py_object, elapsed_offset_to_timedelta, nanos_since_midnight_to_naivetime,
29};
30use super::{ObjectValue, Wrap, decimal_to_digits, struct_dict};
31use crate::error::PyPolarsErr;
32use crate::py_modules::{pl_series, pl_utils};
33use crate::series::PySeries;
34
35impl<'py> IntoPyObject<'py> for Wrap<AnyValue<'_>> {
36 type Target = PyAny;
37 type Output = Bound<'py, Self::Target>;
38 type Error = PyErr;
39
40 fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
41 any_value_into_py_object(self.0, py)
42 }
43}
44
45impl<'py> IntoPyObject<'py> for &Wrap<AnyValue<'_>> {
46 type Target = PyAny;
47 type Output = Bound<'py, Self::Target>;
48 type Error = PyErr;
49
50 fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
51 self.clone().into_pyobject(py)
52 }
53}
54
55impl<'py> FromPyObject<'py> for Wrap<AnyValue<'static>> {
56 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
57 py_object_to_any_value(ob, true, true).map(Wrap)
58 }
59}
60
61pub(crate) fn any_value_into_py_object<'py>(
62 av: AnyValue<'_>,
63 py: Python<'py>,
64) -> PyResult<Bound<'py, PyAny>> {
65 let utils = pl_utils(py).bind(py);
66 match av {
67 AnyValue::UInt8(v) => v.into_bound_py_any(py),
68 AnyValue::UInt16(v) => v.into_bound_py_any(py),
69 AnyValue::UInt32(v) => v.into_bound_py_any(py),
70 AnyValue::UInt64(v) => v.into_bound_py_any(py),
71 AnyValue::Int8(v) => v.into_bound_py_any(py),
72 AnyValue::Int16(v) => v.into_bound_py_any(py),
73 AnyValue::Int32(v) => v.into_bound_py_any(py),
74 AnyValue::Int64(v) => v.into_bound_py_any(py),
75 AnyValue::Int128(v) => v.into_bound_py_any(py),
76 AnyValue::Float32(v) => v.into_bound_py_any(py),
77 AnyValue::Float64(v) => v.into_bound_py_any(py),
78 AnyValue::Null => py.None().into_bound_py_any(py),
79 AnyValue::Boolean(v) => v.into_bound_py_any(py),
80 AnyValue::String(v) => v.into_bound_py_any(py),
81 AnyValue::StringOwned(v) => v.into_bound_py_any(py),
82 AnyValue::Categorical(cat, map) | AnyValue::Enum(cat, map) => unsafe {
83 map.cat_to_str_unchecked(cat).into_bound_py_any(py)
84 },
85 AnyValue::CategoricalOwned(cat, map) | AnyValue::EnumOwned(cat, map) => unsafe {
86 map.cat_to_str_unchecked(cat).into_bound_py_any(py)
87 },
88 AnyValue::Date(v) => {
89 let date = date32_to_date(v);
90 date.into_bound_py_any(py)
91 },
92 AnyValue::Datetime(v, time_unit, time_zone) => {
93 datetime_to_py_object(py, v, time_unit, time_zone)
94 },
95 AnyValue::DatetimeOwned(v, time_unit, time_zone) => {
96 datetime_to_py_object(py, v, time_unit, time_zone.as_ref().map(AsRef::as_ref))
97 },
98 AnyValue::Duration(v, time_unit) => {
99 let time_delta = elapsed_offset_to_timedelta(v, time_unit);
100 time_delta.into_bound_py_any(py)
101 },
102 AnyValue::Time(v) => nanos_since_midnight_to_naivetime(v).into_bound_py_any(py),
103 AnyValue::Array(v, _) | AnyValue::List(v) => PySeries::new(v).to_list(py),
104 ref av @ AnyValue::Struct(_, _, flds) => {
105 Ok(struct_dict(py, av._iter_struct_av(), flds)?.into_any())
106 },
107 AnyValue::StructOwned(payload) => {
108 Ok(struct_dict(py, payload.0.into_iter(), &payload.1)?.into_any())
109 },
110 #[cfg(feature = "object")]
111 AnyValue::Object(v) => {
112 let object = v.as_any().downcast_ref::<ObjectValue>().unwrap();
113 Ok(object.inner.clone_ref(py).into_bound(py))
114 },
115 #[cfg(feature = "object")]
116 AnyValue::ObjectOwned(v) => {
117 let object = v.0.as_any().downcast_ref::<ObjectValue>().unwrap();
118 Ok(object.inner.clone_ref(py).into_bound(py))
119 },
120 AnyValue::Binary(v) => PyBytes::new(py, v).into_bound_py_any(py),
121 AnyValue::BinaryOwned(v) => PyBytes::new(py, &v).into_bound_py_any(py),
122 AnyValue::Decimal(v, scale) => {
123 let convert = utils.getattr(intern!(py, "to_py_decimal"))?;
124 const N: usize = 3;
125 let mut buf = [0_u128; N];
126 let n_digits = decimal_to_digits(v.abs(), &mut buf);
127 let buf = unsafe {
128 std::slice::from_raw_parts(
129 buf.as_slice().as_ptr() as *const u8,
130 N * size_of::<u128>(),
131 )
132 };
133 let digits = PyTuple::new(py, buf.iter().take(n_digits))?;
134 convert.call1((v.is_negative() as u8, digits, n_digits, -(scale as i32)))
135 },
136 }
137}
138
139#[derive(Debug)]
143pub struct TypeObjectKey {
144 #[allow(unused)]
145 type_object: Py<PyType>,
146 address: usize,
148}
149
150impl TypeObjectKey {
151 fn new(type_object: Py<PyType>) -> Self {
152 let address = type_object.as_ptr() as usize;
153 Self {
154 type_object,
155 address,
156 }
157 }
158}
159
160impl PartialEq for TypeObjectKey {
161 fn eq(&self, other: &Self) -> bool {
162 self.address == other.address
163 }
164}
165
166impl Eq for TypeObjectKey {}
167
168impl std::borrow::Borrow<usize> for TypeObjectKey {
169 fn borrow(&self) -> &usize {
170 &self.address
171 }
172}
173
174impl std::hash::Hash for TypeObjectKey {
175 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
176 let v: &usize = self.borrow();
177 v.hash(state)
178 }
179}
180
181type InitFn = fn(&Bound<'_, PyAny>, bool) -> PyResult<AnyValue<'static>>;
182pub(crate) static LUT: Mutex<HashMap<TypeObjectKey, InitFn, PlFixedStateQuality>> =
183 Mutex::new(HashMap::with_hasher(PlFixedStateQuality::with_seed(0)));
184
185pub(crate) fn py_object_to_any_value(
187 ob: &Bound<'_, PyAny>,
188 strict: bool,
189 allow_object: bool,
190) -> PyResult<AnyValue<'static>> {
191 fn get_null(_ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
193 Ok(AnyValue::Null)
194 }
195
196 fn get_bool(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
197 let b = ob.extract::<bool>()?;
198 Ok(AnyValue::Boolean(b))
199 }
200
201 fn get_int(ob: &Bound<'_, PyAny>, strict: bool) -> PyResult<AnyValue<'static>> {
202 if let Ok(v) = ob.extract::<i64>() {
203 Ok(AnyValue::Int64(v))
204 } else if let Ok(v) = ob.extract::<i128>() {
205 Ok(AnyValue::Int128(v))
206 } else if !strict {
207 let f = ob.extract::<f64>()?;
208 Ok(AnyValue::Float64(f))
209 } else {
210 Err(PyOverflowError::new_err(format!(
211 "int value too large for Polars integer types: {ob}"
212 )))
213 }
214 }
215
216 fn get_float(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
217 Ok(AnyValue::Float64(ob.extract::<f64>()?))
218 }
219
220 fn get_str(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
221 Ok(AnyValue::StringOwned(ob.extract::<String>()?.into()))
233 }
234
235 fn get_bytes(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
236 let value = ob.extract::<Vec<u8>>()?;
237 Ok(AnyValue::BinaryOwned(value))
238 }
239
240 fn get_date(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
241 const UNIX_EPOCH: NaiveDate = DateTime::UNIX_EPOCH.naive_utc().date();
242 let date = ob.extract::<NaiveDate>()?;
243 let elapsed = date.signed_duration_since(UNIX_EPOCH);
244 Ok(AnyValue::Date(elapsed.num_days() as i32))
245 }
246
247 fn get_datetime(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
248 let py = ob.py();
249 let tzinfo = ob.getattr(intern!(py, "tzinfo"))?;
250
251 if tzinfo.is_none() {
252 let datetime = ob.extract::<NaiveDateTime>()?;
253 let delta = datetime - DateTime::UNIX_EPOCH.naive_utc();
254 let timestamp = delta.num_microseconds().unwrap();
255 return Ok(AnyValue::Datetime(timestamp, TimeUnit::Microseconds, None));
256 }
257
258 let (ob, tzinfo) = if let Some(tz) = tzinfo
260 .getattr(intern!(py, "zone"))
261 .ok()
262 .and_then(|tz| (!tz.is_none()).then_some(tz))
263 {
264 let tzinfo = PyTzInfo::timezone(py, tz.downcast_into::<PyString>()?)?;
265 (
266 &ob.call_method(intern!(py, "astimezone"), (&tzinfo,), None)?,
267 tzinfo,
268 )
269 } else {
270 (ob, tzinfo.downcast_into()?)
271 };
272
273 let (timestamp, tz) = if tzinfo.hasattr(intern!(py, "key"))? {
274 let datetime = ob.extract::<DateTime<Tz>>()?;
275 let tz = unsafe { TimeZone::from_static(datetime.timezone().name()) };
276 if datetime.year() >= 2100 {
277 (
280 pl_utils(py)
281 .bind(py)
282 .getattr(intern!(py, "datetime_to_int"))?
283 .call1((ob, intern!(py, "us")))?
284 .extract::<i64>()?,
285 tz,
286 )
287 } else {
288 let delta = datetime.to_utc() - DateTime::UNIX_EPOCH;
289 (delta.num_microseconds().unwrap(), tz)
290 }
291 } else {
292 let datetime = ob.extract::<DateTime<FixedOffset>>()?;
293 let delta = datetime.to_utc() - DateTime::UNIX_EPOCH;
294 (delta.num_microseconds().unwrap(), TimeZone::UTC)
295 };
296
297 Ok(AnyValue::DatetimeOwned(
298 timestamp,
299 TimeUnit::Microseconds,
300 Some(Arc::new(tz)),
301 ))
302 }
303
304 fn get_timedelta(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
305 let timedelta = ob.extract::<TimeDelta>()?;
306 if let Some(micros) = timedelta.num_microseconds() {
307 Ok(AnyValue::Duration(micros, TimeUnit::Microseconds))
308 } else {
309 Ok(AnyValue::Duration(
310 timedelta.num_milliseconds(),
311 TimeUnit::Milliseconds,
312 ))
313 }
314 }
315
316 fn get_time(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
317 let time = ob.extract::<NaiveTime>()?;
318
319 Ok(AnyValue::Time(
320 (time.num_seconds_from_midnight() as i64) * 1_000_000_000 + time.nanosecond() as i64,
321 ))
322 }
323
324 fn get_decimal(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
325 fn abs_decimal_from_digits(
326 digits: impl IntoIterator<Item = u8>,
327 exp: i32,
328 ) -> Option<(i128, usize)> {
329 const MAX_ABS_DEC: i128 = 10_i128.pow(38) - 1;
330 let mut v = 0_i128;
331 for (i, d) in digits.into_iter().map(i128::from).enumerate() {
332 if i < 38 {
333 v = v * 10 + d;
334 } else {
335 v = v.checked_mul(10).and_then(|v| v.checked_add(d))?;
336 }
337 }
338 let scale = if exp > 0 {
340 v = 10_i128
342 .checked_pow(exp as u32)
343 .and_then(|factor| v.checked_mul(factor))?;
344 0
345 } else {
346 (-exp) as usize
347 };
348 (v <= MAX_ABS_DEC).then_some((v, scale))
350 }
351
352 let (sign, digits, exp): (i8, Vec<u8>, i32) = ob
354 .call_method0(intern!(ob.py(), "as_tuple"))
355 .unwrap()
356 .extract()
357 .unwrap();
358 let (mut v, scale) = abs_decimal_from_digits(digits, exp).ok_or_else(|| {
359 PyErr::from(PyPolarsErr::Other(
360 "Decimal is too large to fit in Decimal128".into(),
361 ))
362 })?;
363 if sign > 0 {
364 v = -v; }
366 Ok(AnyValue::Decimal(v, scale))
367 }
368
369 fn get_list(ob: &Bound<'_, PyAny>, strict: bool) -> PyResult<AnyValue<'static>> {
370 fn get_list_with_constructor(
371 ob: &Bound<'_, PyAny>,
372 strict: bool,
373 ) -> PyResult<AnyValue<'static>> {
374 let py = ob.py();
378 let kwargs = PyDict::new(py);
379 kwargs.set_item("strict", strict)?;
380 let s = pl_series(py).call(py, (ob,), Some(&kwargs))?;
381 get_list_from_series(s.bind(py), strict)
382 }
383
384 if ob.is_empty()? {
385 Ok(AnyValue::List(Series::new_empty(
386 PlSmallStr::EMPTY,
387 &DataType::Null,
388 )))
389 } else if ob.is_instance_of::<PyList>() | ob.is_instance_of::<PyTuple>() {
390 let list = ob.downcast::<PySequence>()?;
391
392 let length = list.len()?;
394 let mut iter = list.try_iter()?;
395 let mut avs = Vec::new();
396 for item in &mut iter {
397 let av = py_object_to_any_value(&item?, strict, true)?;
398 let is_null = av.is_null();
399 avs.push(av);
400 if is_null {
401 break;
402 }
403 }
404
405 if let Some(av) = avs.last()
407 && !av.is_null()
408 && av.dtype().is_primitive()
409 {
410 match get_list_with_constructor(ob, true) {
413 Ok(ret) => return Ok(ret),
414 Err(e) => {
415 if strict {
416 return Err(e);
417 }
418 },
419 }
420 }
421
422 avs.reserve(length);
424 for item in &mut iter {
425 avs.push(py_object_to_any_value(&item?, strict, true)?);
426 }
427
428 let (dtype, _n_dtypes) = any_values_to_supertype_and_n_dtypes(&avs)
429 .map_err(|e| PyTypeError::new_err(e.to_string()))?;
430 let s = Series::from_any_values_and_dtype(PlSmallStr::EMPTY, &avs, &dtype, strict)
431 .map_err(|e| {
432 PyTypeError::new_err(format!(
433 "{e}\n\nHint: Try setting `strict=False` to allow passing data with mixed types."
434 ))
435 })?;
436 Ok(AnyValue::List(s))
437 } else {
438 get_list_with_constructor(ob, strict)
440 }
441 }
442
443 fn get_list_from_series(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
444 let s = super::get_series(ob)?;
445 Ok(AnyValue::List(s))
446 }
447
448 fn get_mapping(ob: &Bound<'_, PyAny>, strict: bool) -> PyResult<AnyValue<'static>> {
449 let mapping = ob.downcast::<PyMapping>()?;
450 let len = mapping.len()?;
451 let mut keys = Vec::with_capacity(len);
452 let mut vals = Vec::with_capacity(len);
453
454 for item in mapping.items()?.try_iter()? {
455 let item = item?.downcast_into::<PyTuple>()?;
456 let (key_py, val_py) = (item.get_item(0)?, item.get_item(1)?);
457
458 let key: Cow<str> = key_py.extract()?;
459 let val = py_object_to_any_value(&val_py, strict, true)?;
460
461 keys.push(Field::new(key.as_ref().into(), val.dtype()));
462 vals.push(val);
463 }
464 Ok(AnyValue::StructOwned(Box::new((vals, keys))))
465 }
466
467 fn get_struct(ob: &Bound<'_, PyAny>, strict: bool) -> PyResult<AnyValue<'static>> {
468 let dict = ob.downcast::<PyDict>().unwrap();
469 let len = dict.len();
470 let mut keys = Vec::with_capacity(len);
471 let mut vals = Vec::with_capacity(len);
472 for (k, v) in dict.into_iter() {
473 let key = k.extract::<Cow<str>>()?;
474 let val = py_object_to_any_value(&v, strict, true)?;
475 let dtype = val.dtype();
476 keys.push(Field::new(key.as_ref().into(), dtype));
477 vals.push(val)
478 }
479 Ok(AnyValue::StructOwned(Box::new((vals, keys))))
480 }
481
482 fn get_object(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
483 #[cfg(feature = "object")]
484 {
485 let v = &ObjectValue {
487 inner: ob.clone().unbind(),
488 };
489 Ok(AnyValue::ObjectOwned(OwnedObject(v.to_boxed())))
490 }
491 #[cfg(not(feature = "object"))]
492 panic!("activate object")
493 }
494
495 fn get_conversion_function(ob: &Bound<'_, PyAny>, allow_object: bool) -> PyResult<InitFn> {
500 let py = ob.py();
501 if ob.is_none() {
502 Ok(get_null)
503 }
504 else if ob.is_instance_of::<PyBool>() {
506 Ok(get_bool)
507 } else if ob.is_instance_of::<PyInt>() {
508 Ok(get_int)
509 } else if ob.is_instance_of::<PyFloat>() {
510 Ok(get_float)
511 } else if ob.is_instance_of::<PyString>() {
512 Ok(get_str)
513 } else if ob.is_instance_of::<PyBytes>() {
514 Ok(get_bytes)
515 } else if ob.is_instance_of::<PyList>() || ob.is_instance_of::<PyTuple>() {
516 Ok(get_list)
517 } else if ob.is_instance_of::<PyDict>() {
518 Ok(get_struct)
519 } else if PyMapping::type_check(ob) {
520 Ok(get_mapping)
521 }
522 else if PyDateTime::type_check(ob) {
525 Ok(get_datetime as InitFn)
526 } else if PyDate::type_check(ob) {
527 Ok(get_date as InitFn)
528 } else if PyTime::type_check(ob) {
529 Ok(get_time as InitFn)
530 } else if PyDelta::type_check(ob) {
531 Ok(get_timedelta as InitFn)
532 } else if ob.is_instance_of::<PyRange>() {
533 Ok(get_list as InitFn)
534 } else {
535 static DECIMAL_TYPE: GILOnceCell<Py<PyType>> = GILOnceCell::new();
536 if ob.is_instance(DECIMAL_TYPE.import(py, "decimal", "Decimal")?)? {
537 return Ok(get_decimal as InitFn);
538 }
539
540 if ob.extract::<i64>().is_ok() || ob.extract::<u64>().is_ok() {
542 return Ok(get_int as InitFn);
543 } else if ob.extract::<f64>().is_ok() {
544 return Ok(get_float as InitFn);
545 }
546
547 if allow_object {
548 Ok(get_object as InitFn)
549 } else {
550 Err(PyValueError::new_err(format!("Cannot convert {ob}")))
551 }
552 }
553 }
554
555 let py_type = ob.get_type();
556 let py_type_address = py_type.as_ptr() as usize;
557
558 let conversion_func = {
559 if let Some(cached_func) = LUT.lock().unwrap().get(&py_type_address) {
560 *cached_func
561 } else {
562 let k = TypeObjectKey::new(py_type.clone().unbind());
563 assert_eq!(k.address, py_type_address);
564
565 let func = get_conversion_function(ob, allow_object)?;
566 LUT.lock().unwrap().insert(k, func);
567 func
568 }
569 };
570
571 conversion_func(ob, strict)
572}