1use std::borrow::{Borrow, Cow};
2use std::sync::{Arc, Mutex};
3
4use chrono::{
5 DateTime, Datelike, FixedOffset, NaiveDate, NaiveDateTime, NaiveTime, TimeDelta, Timelike,
6};
7use chrono_tz::Tz;
8use hashbrown::HashMap;
9#[cfg(feature = "object")]
10use polars::chunked_array::object::PolarsObjectSafe;
11#[cfg(feature = "object")]
12use polars::datatypes::OwnedObject;
13use polars::datatypes::{DataType, Field, TimeUnit};
14use polars::prelude::{AnyValue, PlSmallStr, Series, TimeZone};
15use polars_compute::decimal::{DEC128_MAX_PREC, DecimalFmtBuffer, dec128_fits};
16use polars_core::utils::any_values_to_supertype_and_n_dtypes;
17use polars_core::utils::arrow::temporal_conversions::date32_to_date;
18use polars_utils::aliases::PlFixedStateQuality;
19use pyo3::exceptions::{PyOverflowError, PyTypeError, PyValueError};
20use pyo3::prelude::*;
21use pyo3::sync::PyOnceLock;
22use pyo3::types::{
23 PyBool, PyBytes, PyDate, PyDateTime, PyDelta, PyDict, PyFloat, PyInt, PyList, PyMapping,
24 PyRange, PySequence, PyString, PyTime, PyTuple, PyType, PyTzInfo,
25};
26use pyo3::{IntoPyObjectExt, PyTypeCheck, intern};
27
28use super::datetime::{
29 datetime_to_py_object, elapsed_offset_to_timedelta, nanos_since_midnight_to_naivetime,
30};
31use super::{ObjectValue, Wrap, struct_dict};
32use crate::error::PyPolarsErr;
33use crate::py_modules::{pl_series, pl_utils};
34use crate::series::PySeries;
35
36impl<'py> IntoPyObject<'py> for Wrap<AnyValue<'_>> {
37 type Target = PyAny;
38 type Output = Bound<'py, Self::Target>;
39 type Error = PyErr;
40
41 fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
42 any_value_into_py_object(self.0, py)
43 }
44}
45
46impl<'py> IntoPyObject<'py> for &Wrap<AnyValue<'_>> {
47 type Target = PyAny;
48 type Output = Bound<'py, Self::Target>;
49 type Error = PyErr;
50
51 fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
52 self.clone().into_pyobject(py)
53 }
54}
55
56impl<'py> FromPyObject<'py> for Wrap<AnyValue<'static>> {
57 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
58 py_object_to_any_value(ob, true, true).map(Wrap)
59 }
60}
61
62pub(crate) fn any_value_into_py_object<'py>(
63 av: AnyValue<'_>,
64 py: Python<'py>,
65) -> PyResult<Bound<'py, PyAny>> {
66 let utils = pl_utils(py).bind(py);
67 match av {
68 AnyValue::UInt8(v) => v.into_bound_py_any(py),
69 AnyValue::UInt16(v) => v.into_bound_py_any(py),
70 AnyValue::UInt32(v) => v.into_bound_py_any(py),
71 AnyValue::UInt64(v) => v.into_bound_py_any(py),
72 AnyValue::UInt128(v) => v.into_bound_py_any(py),
73 AnyValue::Int8(v) => v.into_bound_py_any(py),
74 AnyValue::Int16(v) => v.into_bound_py_any(py),
75 AnyValue::Int32(v) => v.into_bound_py_any(py),
76 AnyValue::Int64(v) => v.into_bound_py_any(py),
77 AnyValue::Int128(v) => v.into_bound_py_any(py),
78 AnyValue::Float32(v) => v.into_bound_py_any(py),
79 AnyValue::Float64(v) => v.into_bound_py_any(py),
80 AnyValue::Null => py.None().into_bound_py_any(py),
81 AnyValue::Boolean(v) => v.into_bound_py_any(py),
82 AnyValue::String(v) => v.into_bound_py_any(py),
83 AnyValue::StringOwned(v) => v.into_bound_py_any(py),
84 AnyValue::Categorical(cat, map) | AnyValue::Enum(cat, map) => unsafe {
85 map.cat_to_str_unchecked(cat).into_bound_py_any(py)
86 },
87 AnyValue::CategoricalOwned(cat, map) | AnyValue::EnumOwned(cat, map) => unsafe {
88 map.cat_to_str_unchecked(cat).into_bound_py_any(py)
89 },
90 AnyValue::Date(v) => {
91 let date = date32_to_date(v);
92 date.into_bound_py_any(py)
93 },
94 AnyValue::Datetime(v, time_unit, time_zone) => {
95 datetime_to_py_object(py, v, time_unit, time_zone)
96 },
97 AnyValue::DatetimeOwned(v, time_unit, time_zone) => {
98 datetime_to_py_object(py, v, time_unit, time_zone.as_ref().map(AsRef::as_ref))
99 },
100 AnyValue::Duration(v, time_unit) => {
101 let time_delta = elapsed_offset_to_timedelta(v, time_unit);
102 time_delta.into_bound_py_any(py)
103 },
104 AnyValue::Time(v) => nanos_since_midnight_to_naivetime(v).into_bound_py_any(py),
105 AnyValue::Array(v, _) | AnyValue::List(v) => PySeries::new(v).to_list(py),
106 ref av @ AnyValue::Struct(_, _, flds) => {
107 Ok(struct_dict(py, av._iter_struct_av(), flds)?.into_any())
108 },
109 AnyValue::StructOwned(payload) => {
110 Ok(struct_dict(py, payload.0.into_iter(), &payload.1)?.into_any())
111 },
112 #[cfg(feature = "object")]
113 AnyValue::Object(v) => {
114 let object = v.as_any().downcast_ref::<ObjectValue>().unwrap();
115 Ok(object.inner.clone_ref(py).into_bound(py))
116 },
117 #[cfg(feature = "object")]
118 AnyValue::ObjectOwned(v) => {
119 let object = v.0.as_any().downcast_ref::<ObjectValue>().unwrap();
120 Ok(object.inner.clone_ref(py).into_bound(py))
121 },
122 AnyValue::Binary(v) => PyBytes::new(py, v).into_bound_py_any(py),
123 AnyValue::BinaryOwned(v) => PyBytes::new(py, &v).into_bound_py_any(py),
124 AnyValue::Decimal(v, prec, scale) => {
125 let convert = utils.getattr(intern!(py, "to_py_decimal"))?;
126 let mut buf = DecimalFmtBuffer::new();
127 let s = buf.format_dec128(v, scale, false, false);
128 convert.call1((prec, s))
129 },
130 }
131}
132
133#[derive(Debug)]
137pub struct TypeObjectKey {
138 #[allow(unused)]
139 type_object: Py<PyType>,
140 address: usize,
142}
143
144impl TypeObjectKey {
145 fn new(type_object: Py<PyType>) -> Self {
146 let address = type_object.as_ptr() as usize;
147 Self {
148 type_object,
149 address,
150 }
151 }
152}
153
154impl PartialEq for TypeObjectKey {
155 fn eq(&self, other: &Self) -> bool {
156 self.address == other.address
157 }
158}
159
160impl Eq for TypeObjectKey {}
161
162impl std::borrow::Borrow<usize> for TypeObjectKey {
163 fn borrow(&self) -> &usize {
164 &self.address
165 }
166}
167
168impl std::hash::Hash for TypeObjectKey {
169 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
170 let v: &usize = self.borrow();
171 v.hash(state)
172 }
173}
174
175type InitFn = fn(&Bound<'_, PyAny>, bool) -> PyResult<AnyValue<'static>>;
176pub(crate) static LUT: Mutex<HashMap<TypeObjectKey, InitFn, PlFixedStateQuality>> =
177 Mutex::new(HashMap::with_hasher(PlFixedStateQuality::with_seed(0)));
178
179pub(crate) fn py_object_to_any_value(
181 ob: &Bound<'_, PyAny>,
182 strict: bool,
183 allow_object: bool,
184) -> PyResult<AnyValue<'static>> {
185 fn get_null(_ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
187 Ok(AnyValue::Null)
188 }
189
190 fn get_bool(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
191 let b = ob.extract::<bool>()?;
192 Ok(AnyValue::Boolean(b))
193 }
194
195 fn get_int(ob: &Bound<'_, PyAny>, strict: bool) -> PyResult<AnyValue<'static>> {
196 if let Ok(v) = ob.extract::<i64>() {
197 Ok(AnyValue::Int64(v))
198 } else if let Ok(v) = ob.extract::<i128>() {
199 Ok(AnyValue::Int128(v))
200 } else if let Ok(v) = ob.extract::<u64>() {
201 Ok(AnyValue::UInt64(v))
202 } else if let Ok(v) = ob.extract::<u128>() {
203 Ok(AnyValue::UInt128(v))
204 } else if !strict {
205 let f = ob.extract::<f64>()?;
206 Ok(AnyValue::Float64(f))
207 } else {
208 Err(PyOverflowError::new_err(format!(
209 "int value too large for Polars integer types: {ob}"
210 )))
211 }
212 }
213
214 fn get_float(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
215 Ok(AnyValue::Float64(ob.extract::<f64>()?))
216 }
217
218 fn get_str(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
219 Ok(AnyValue::StringOwned(ob.extract::<String>()?.into()))
231 }
232
233 fn get_bytes(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
234 let value = ob.extract::<Vec<u8>>()?;
235 Ok(AnyValue::BinaryOwned(value))
236 }
237
238 fn get_date(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
239 const UNIX_EPOCH: NaiveDate = DateTime::UNIX_EPOCH.naive_utc().date();
240 let date = ob.extract::<NaiveDate>()?;
241 let elapsed = date.signed_duration_since(UNIX_EPOCH);
242 Ok(AnyValue::Date(elapsed.num_days() as i32))
243 }
244
245 fn get_datetime(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
246 let py = ob.py();
247 let tzinfo = ob.getattr(intern!(py, "tzinfo"))?;
248
249 if tzinfo.is_none() {
250 let datetime = ob.extract::<NaiveDateTime>()?;
251 let delta = datetime - DateTime::UNIX_EPOCH.naive_utc();
252 let timestamp = delta.num_microseconds().unwrap();
253 return Ok(AnyValue::Datetime(timestamp, TimeUnit::Microseconds, None));
254 }
255
256 let (ob, tzinfo) = if let Some(tz) = tzinfo
258 .getattr(intern!(py, "zone"))
259 .ok()
260 .and_then(|tz| (!tz.is_none()).then_some(tz))
261 {
262 let tzinfo = PyTzInfo::timezone(py, tz.downcast_into::<PyString>()?)?;
263 (
264 &ob.call_method(intern!(py, "astimezone"), (&tzinfo,), None)?,
265 tzinfo,
266 )
267 } else {
268 (ob, tzinfo.downcast_into()?)
269 };
270
271 let (timestamp, tz) = if tzinfo.hasattr(intern!(py, "key"))? {
272 let datetime = ob.extract::<DateTime<Tz>>()?;
273 let tz = unsafe { TimeZone::from_static(datetime.timezone().name()) };
274 if datetime.year() >= 2100 {
275 (
278 pl_utils(py)
279 .bind(py)
280 .getattr(intern!(py, "datetime_to_int"))?
281 .call1((ob, intern!(py, "us")))?
282 .extract::<i64>()?,
283 tz,
284 )
285 } else {
286 let delta = datetime.to_utc() - DateTime::UNIX_EPOCH;
287 (delta.num_microseconds().unwrap(), tz)
288 }
289 } else {
290 let datetime = ob.extract::<DateTime<FixedOffset>>()?;
291 let delta = datetime.to_utc() - DateTime::UNIX_EPOCH;
292 (delta.num_microseconds().unwrap(), TimeZone::UTC)
293 };
294
295 Ok(AnyValue::DatetimeOwned(
296 timestamp,
297 TimeUnit::Microseconds,
298 Some(Arc::new(tz)),
299 ))
300 }
301
302 fn get_timedelta(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
303 let timedelta = ob.extract::<TimeDelta>()?;
304 if let Some(micros) = timedelta.num_microseconds() {
305 Ok(AnyValue::Duration(micros, TimeUnit::Microseconds))
306 } else {
307 Ok(AnyValue::Duration(
308 timedelta.num_milliseconds(),
309 TimeUnit::Milliseconds,
310 ))
311 }
312 }
313
314 fn get_time(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
315 let time = ob.extract::<NaiveTime>()?;
316
317 Ok(AnyValue::Time(
318 (time.num_seconds_from_midnight() as i64) * 1_000_000_000 + time.nanosecond() as i64,
319 ))
320 }
321
322 fn get_decimal(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
323 fn abs_decimal_from_digits(
324 digits: impl IntoIterator<Item = u8>,
325 exp: i32,
326 ) -> Option<(i128, usize)> {
327 let mut v = 0_i128;
328 for d in digits {
329 v = v.checked_mul(10)?.checked_add(d as i128)?;
330 }
331 let scale = if exp > 0 {
332 v = 10_i128.checked_pow(exp as u32)?.checked_mul(v)?;
333 0
334 } else {
335 (-exp) as usize
336 };
337 dec128_fits(v, DEC128_MAX_PREC).then_some((v, scale))
338 }
339
340 let (sign, digits, exp): (i8, Vec<u8>, i32) = ob
342 .call_method0(intern!(ob.py(), "as_tuple"))
343 .unwrap()
344 .extract()
345 .unwrap();
346 let (mut v, scale) = abs_decimal_from_digits(digits, exp).ok_or_else(|| {
347 PyErr::from(PyPolarsErr::Other(
348 "Decimal is too large to fit in Decimal128".into(),
349 ))
350 })?;
351 if sign > 0 {
352 v = -v; }
354 Ok(AnyValue::Decimal(v, DEC128_MAX_PREC, scale))
355 }
356
357 fn get_list(ob: &Bound<'_, PyAny>, strict: bool) -> PyResult<AnyValue<'static>> {
358 fn get_list_with_constructor(
359 ob: &Bound<'_, PyAny>,
360 strict: bool,
361 ) -> PyResult<AnyValue<'static>> {
362 let py = ob.py();
366 let kwargs = PyDict::new(py);
367 kwargs.set_item("strict", strict)?;
368 let s = pl_series(py).call(py, (ob,), Some(&kwargs))?;
369 get_list_from_series(s.bind(py), strict)
370 }
371
372 if ob.is_empty()? {
373 Ok(AnyValue::List(Series::new_empty(
374 PlSmallStr::EMPTY,
375 &DataType::Null,
376 )))
377 } else if ob.is_instance_of::<PyList>() | ob.is_instance_of::<PyTuple>() {
378 let list = ob.downcast::<PySequence>()?;
379
380 let length = list.len()?;
382 let mut iter = list.try_iter()?;
383 let mut avs = Vec::new();
384 for item in &mut iter {
385 let av = py_object_to_any_value(&item?, strict, true)?;
386 let is_null = av.is_null();
387 avs.push(av);
388 if is_null {
389 break;
390 }
391 }
392
393 if let Some(av) = avs.last()
395 && !av.is_null()
396 && av.dtype().is_primitive()
397 {
398 match get_list_with_constructor(ob, true) {
401 Ok(ret) => return Ok(ret),
402 Err(e) => {
403 if strict {
404 return Err(e);
405 }
406 },
407 }
408 }
409
410 avs.reserve(length);
412 for item in &mut iter {
413 avs.push(py_object_to_any_value(&item?, strict, true)?);
414 }
415
416 let (dtype, _n_dtypes) = any_values_to_supertype_and_n_dtypes(&avs)
417 .map_err(|e| PyTypeError::new_err(e.to_string()))?;
418 let s = Series::from_any_values_and_dtype(PlSmallStr::EMPTY, &avs, &dtype, strict)
419 .map_err(|e| {
420 PyTypeError::new_err(format!(
421 "{e}\n\nHint: Try setting `strict=False` to allow passing data with mixed types."
422 ))
423 })?;
424 Ok(AnyValue::List(s))
425 } else {
426 get_list_with_constructor(ob, strict)
428 }
429 }
430
431 fn get_list_from_series(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
432 let s = super::get_series(ob)?;
433 Ok(AnyValue::List(s))
434 }
435
436 fn get_mapping(ob: &Bound<'_, PyAny>, strict: bool) -> PyResult<AnyValue<'static>> {
437 let mapping = ob.downcast::<PyMapping>()?;
438 let len = mapping.len()?;
439 let mut keys = Vec::with_capacity(len);
440 let mut vals = Vec::with_capacity(len);
441
442 for item in mapping.items()?.try_iter()? {
443 let item = item?.downcast_into::<PyTuple>()?;
444 let (key_py, val_py) = (item.get_item(0)?, item.get_item(1)?);
445
446 let key: Cow<str> = key_py.extract()?;
447 let val = py_object_to_any_value(&val_py, strict, true)?;
448
449 keys.push(Field::new(key.as_ref().into(), val.dtype()));
450 vals.push(val);
451 }
452 Ok(AnyValue::StructOwned(Box::new((vals, keys))))
453 }
454
455 fn get_struct(ob: &Bound<'_, PyAny>, strict: bool) -> PyResult<AnyValue<'static>> {
456 let dict = ob.downcast::<PyDict>().unwrap();
457 let len = dict.len();
458 let mut keys = Vec::with_capacity(len);
459 let mut vals = Vec::with_capacity(len);
460 for (k, v) in dict.into_iter() {
461 let key = k.extract::<Cow<str>>()?;
462 let val = py_object_to_any_value(&v, strict, true)?;
463 let dtype = val.dtype();
464 keys.push(Field::new(key.as_ref().into(), dtype));
465 vals.push(val)
466 }
467 Ok(AnyValue::StructOwned(Box::new((vals, keys))))
468 }
469
470 fn get_object(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
471 #[cfg(feature = "object")]
472 {
473 let v = &ObjectValue {
475 inner: ob.clone().unbind(),
476 };
477 Ok(AnyValue::ObjectOwned(OwnedObject(v.to_boxed())))
478 }
479 #[cfg(not(feature = "object"))]
480 panic!("activate object")
481 }
482
483 fn get_conversion_function(ob: &Bound<'_, PyAny>, allow_object: bool) -> PyResult<InitFn> {
488 let py = ob.py();
489 if ob.is_none() {
490 Ok(get_null)
491 }
492 else if ob.is_instance_of::<PyBool>() {
494 Ok(get_bool)
495 } else if ob.is_instance_of::<PyInt>() {
496 Ok(get_int)
497 } else if ob.is_instance_of::<PyFloat>() {
498 Ok(get_float)
499 } else if ob.is_instance_of::<PyString>() {
500 Ok(get_str)
501 } else if ob.is_instance_of::<PyBytes>() {
502 Ok(get_bytes)
503 } else if ob.is_instance_of::<PyList>() || ob.is_instance_of::<PyTuple>() {
504 Ok(get_list)
505 } else if ob.is_instance_of::<PyDict>() {
506 Ok(get_struct)
507 } else if PyMapping::type_check(ob) {
508 Ok(get_mapping)
509 }
510 else if PyDateTime::type_check(ob) {
513 Ok(get_datetime as InitFn)
514 } else if PyDate::type_check(ob) {
515 Ok(get_date as InitFn)
516 } else if PyTime::type_check(ob) {
517 Ok(get_time as InitFn)
518 } else if PyDelta::type_check(ob) {
519 Ok(get_timedelta as InitFn)
520 } else if ob.is_instance_of::<PyRange>() {
521 Ok(get_list as InitFn)
522 } else {
523 static NDARRAY_TYPE: PyOnceLock<Py<PyType>> = PyOnceLock::new();
524 if let Ok(ndarray_type) = NDARRAY_TYPE.import(py, "numpy", "ndarray") {
525 if ob.is_instance(ndarray_type)? {
526 return Ok(get_list as InitFn);
528 }
529 }
530 static DECIMAL_TYPE: PyOnceLock<Py<PyType>> = PyOnceLock::new();
531 if ob.is_instance(DECIMAL_TYPE.import(py, "decimal", "Decimal")?)? {
532 return Ok(get_decimal as InitFn);
533 }
534
535 if ob.extract::<i64>().is_ok() || ob.extract::<u64>().is_ok() {
537 return Ok(get_int as InitFn);
538 } else if ob.extract::<f64>().is_ok() {
539 return Ok(get_float as InitFn);
540 }
541
542 if allow_object {
543 Ok(get_object as InitFn)
544 } else {
545 Err(PyValueError::new_err(format!("Cannot convert {ob}")))
546 }
547 }
548 }
549
550 let py_type = ob.get_type();
551 let py_type_address = py_type.as_ptr() as usize;
552
553 let conversion_func = {
554 if let Some(cached_func) = LUT.lock().unwrap().get(&py_type_address) {
555 *cached_func
556 } else {
557 let k = TypeObjectKey::new(py_type.clone().unbind());
558 assert_eq!(k.address, py_type_address);
559
560 let func = get_conversion_function(ob, allow_object)?;
561 LUT.lock().unwrap().insert(k, func);
562 func
563 }
564 };
565
566 conversion_func(ob, strict)
567}