1use std::borrow::{Borrow, Cow};
2use std::sync::{Arc, Mutex};
3
4use chrono::{
5 DateTime, Datelike, FixedOffset, NaiveDate, NaiveDateTime, NaiveTime, TimeDelta, Timelike,
6};
7use chrono_tz::Tz;
8use hashbrown::HashMap;
9use num_traits::ToPrimitive;
10#[cfg(feature = "object")]
11use polars::chunked_array::object::PolarsObjectSafe;
12#[cfg(feature = "object")]
13use polars::datatypes::OwnedObject;
14use polars::datatypes::{DataType, Field, TimeUnit};
15use polars::prelude::{AnyValue, PlSmallStr, Series, TimeZone};
16use polars_compute::decimal::{DEC128_MAX_PREC, DecimalFmtBuffer, dec128_fits};
17use polars_core::utils::any_values_to_supertype_and_n_dtypes;
18use polars_core::utils::arrow::temporal_conversions::date32_to_date;
19use polars_utils::aliases::PlFixedStateQuality;
20use pyo3::exceptions::{PyOverflowError, PyTypeError, PyValueError};
21use pyo3::prelude::*;
22use pyo3::sync::PyOnceLock;
23use pyo3::types::{
24 PyBool, PyBytes, PyDate, PyDateTime, PyDelta, PyDict, PyFloat, PyInt, PyList, PyMapping,
25 PyRange, PySequence, PyString, PyTime, PyTuple, PyType, PyTzInfo,
26};
27use pyo3::{IntoPyObjectExt, PyTypeCheck, intern};
28
29use super::datetime::{
30 datetime_to_py_object, elapsed_offset_to_timedelta, nanos_since_midnight_to_naivetime,
31};
32use super::{ObjectValue, Wrap, struct_dict};
33use crate::error::PyPolarsErr;
34use crate::py_modules::{pl_series, pl_utils};
35use crate::series::PySeries;
36
37impl<'py> IntoPyObject<'py> for Wrap<AnyValue<'_>> {
38 type Target = PyAny;
39 type Output = Bound<'py, Self::Target>;
40 type Error = PyErr;
41
42 fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
43 any_value_into_py_object(self.0, py)
44 }
45}
46
47impl<'py> IntoPyObject<'py> for &Wrap<AnyValue<'_>> {
48 type Target = PyAny;
49 type Output = Bound<'py, Self::Target>;
50 type Error = PyErr;
51
52 fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
53 self.clone().into_pyobject(py)
54 }
55}
56
57impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<AnyValue<'static>> {
58 type Error = PyErr;
59
60 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
61 py_object_to_any_value(&ob.to_owned(), true, true).map(Wrap)
62 }
63}
64
65pub(crate) fn any_value_into_py_object<'py>(
66 av: AnyValue<'_>,
67 py: Python<'py>,
68) -> PyResult<Bound<'py, PyAny>> {
69 let utils = pl_utils(py).bind(py);
70 match av {
71 AnyValue::UInt8(v) => v.into_bound_py_any(py),
72 AnyValue::UInt16(v) => v.into_bound_py_any(py),
73 AnyValue::UInt32(v) => v.into_bound_py_any(py),
74 AnyValue::UInt64(v) => v.into_bound_py_any(py),
75 AnyValue::UInt128(v) => v.into_bound_py_any(py),
76 AnyValue::Int8(v) => v.into_bound_py_any(py),
77 AnyValue::Int16(v) => v.into_bound_py_any(py),
78 AnyValue::Int32(v) => v.into_bound_py_any(py),
79 AnyValue::Int64(v) => v.into_bound_py_any(py),
80 AnyValue::Int128(v) => v.into_bound_py_any(py),
81 AnyValue::Float16(v) => v.to_f32().into_bound_py_any(py),
82 AnyValue::Float32(v) => v.into_bound_py_any(py),
83 AnyValue::Float64(v) => v.into_bound_py_any(py),
84 AnyValue::Null => py.None().into_bound_py_any(py),
85 AnyValue::Boolean(v) => v.into_bound_py_any(py),
86 AnyValue::String(v) => v.into_bound_py_any(py),
87 AnyValue::StringOwned(v) => v.into_bound_py_any(py),
88 AnyValue::Categorical(cat, map) | AnyValue::Enum(cat, map) => unsafe {
89 map.cat_to_str_unchecked(cat).into_bound_py_any(py)
90 },
91 AnyValue::CategoricalOwned(cat, map) | AnyValue::EnumOwned(cat, map) => unsafe {
92 map.cat_to_str_unchecked(cat).into_bound_py_any(py)
93 },
94 AnyValue::Date(v) => {
95 let date = date32_to_date(v);
96 date.into_bound_py_any(py)
97 },
98 AnyValue::Datetime(v, time_unit, time_zone) => {
99 datetime_to_py_object(py, v, time_unit, time_zone)
100 },
101 AnyValue::DatetimeOwned(v, time_unit, time_zone) => {
102 datetime_to_py_object(py, v, time_unit, time_zone.as_ref().map(AsRef::as_ref))
103 },
104 AnyValue::Duration(v, time_unit) => {
105 let time_delta = elapsed_offset_to_timedelta(v, time_unit);
106 time_delta.into_bound_py_any(py)
107 },
108 AnyValue::Time(v) => nanos_since_midnight_to_naivetime(v).into_bound_py_any(py),
109 AnyValue::Array(v, _) | AnyValue::List(v) => PySeries::new(v).to_list(py),
110 ref av @ AnyValue::Struct(_, _, flds) => {
111 Ok(struct_dict(py, av._iter_struct_av(), flds)?.into_any())
112 },
113 AnyValue::StructOwned(payload) => {
114 Ok(struct_dict(py, payload.0.into_iter(), &payload.1)?.into_any())
115 },
116 #[cfg(feature = "object")]
117 AnyValue::Object(v) => {
118 let object = v.as_any().downcast_ref::<ObjectValue>().unwrap();
119 Ok(object.inner.clone_ref(py).into_bound(py))
120 },
121 #[cfg(feature = "object")]
122 AnyValue::ObjectOwned(v) => {
123 let object = v.0.as_any().downcast_ref::<ObjectValue>().unwrap();
124 Ok(object.inner.clone_ref(py).into_bound(py))
125 },
126 AnyValue::Binary(v) => PyBytes::new(py, v).into_bound_py_any(py),
127 AnyValue::BinaryOwned(v) => PyBytes::new(py, &v).into_bound_py_any(py),
128 AnyValue::Decimal(v, prec, scale) => {
129 let convert = utils.getattr(intern!(py, "to_py_decimal"))?;
130 let mut buf = DecimalFmtBuffer::new();
131 let s = buf.format_dec128(v, scale, false, false);
132 convert.call1((prec, s))
133 },
134 }
135}
136
137#[derive(Debug)]
141pub struct TypeObjectKey {
142 #[allow(unused)]
143 type_object: Py<PyType>,
144 address: usize,
146}
147
148impl TypeObjectKey {
149 fn new(type_object: Py<PyType>) -> Self {
150 let address = type_object.as_ptr() as usize;
151 Self {
152 type_object,
153 address,
154 }
155 }
156}
157
158impl PartialEq for TypeObjectKey {
159 fn eq(&self, other: &Self) -> bool {
160 self.address == other.address
161 }
162}
163
164impl Eq for TypeObjectKey {}
165
166impl std::borrow::Borrow<usize> for TypeObjectKey {
167 fn borrow(&self) -> &usize {
168 &self.address
169 }
170}
171
172impl std::hash::Hash for TypeObjectKey {
173 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
174 let v: &usize = self.borrow();
175 v.hash(state)
176 }
177}
178
179type InitFn = fn(&Bound<'_, PyAny>, bool) -> PyResult<AnyValue<'static>>;
180pub(crate) static LUT: Mutex<HashMap<TypeObjectKey, InitFn, PlFixedStateQuality>> =
181 Mutex::new(HashMap::with_hasher(PlFixedStateQuality::with_seed(0)));
182
183pub(crate) fn py_object_to_any_value(
185 ob: &Bound<'_, PyAny>,
186 strict: bool,
187 allow_object: bool,
188) -> PyResult<AnyValue<'static>> {
189 fn get_null(_ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
191 Ok(AnyValue::Null)
192 }
193
194 fn get_bool(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
195 let b = ob.extract::<bool>()?;
196 Ok(AnyValue::Boolean(b))
197 }
198
199 fn get_int(ob: &Bound<'_, PyAny>, strict: bool) -> PyResult<AnyValue<'static>> {
200 if let Ok(v) = ob.extract::<i64>() {
201 Ok(AnyValue::Int64(v))
202 } else if let Ok(v) = ob.extract::<i128>() {
203 Ok(AnyValue::Int128(v))
204 } else if let Ok(v) = ob.extract::<u64>() {
205 Ok(AnyValue::UInt64(v))
206 } else if let Ok(v) = ob.extract::<u128>() {
207 Ok(AnyValue::UInt128(v))
208 } else if !strict {
209 let f = ob.extract::<f64>()?;
210 Ok(AnyValue::Float64(f))
211 } else {
212 Err(PyOverflowError::new_err(format!(
213 "int value too large for Polars integer types: {ob}"
214 )))
215 }
216 }
217
218 fn get_float(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
219 Ok(AnyValue::Float64(ob.extract::<f64>()?))
220 }
221
222 fn get_str(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
223 Ok(AnyValue::StringOwned(PlSmallStr::from(
224 ob.extract::<&str>()?,
225 )))
226 }
227
228 fn get_bytes(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
229 let value = ob.extract::<Vec<u8>>()?;
230 Ok(AnyValue::BinaryOwned(value))
231 }
232
233 fn get_date(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
234 const UNIX_EPOCH: NaiveDate = DateTime::UNIX_EPOCH.naive_utc().date();
235 let date = ob.extract::<NaiveDate>()?;
236 let elapsed = date.signed_duration_since(UNIX_EPOCH);
237 Ok(AnyValue::Date(elapsed.num_days() as i32))
238 }
239
240 fn get_datetime(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
241 let py = ob.py();
242 let tzinfo = ob.getattr(intern!(py, "tzinfo"))?;
243
244 if tzinfo.is_none() {
245 let datetime = ob.extract::<NaiveDateTime>()?;
246 let delta = datetime - DateTime::UNIX_EPOCH.naive_utc();
247 let timestamp = delta.num_microseconds().unwrap();
248 return Ok(AnyValue::Datetime(timestamp, TimeUnit::Microseconds, None));
249 }
250
251 let (ob, tzinfo) = if let Some(tz) = tzinfo
253 .getattr(intern!(py, "zone"))
254 .ok()
255 .and_then(|tz| (!tz.is_none()).then_some(tz))
256 {
257 let tzinfo = PyTzInfo::timezone(py, tz.cast_into::<PyString>()?)?;
258 (
259 &ob.call_method(intern!(py, "astimezone"), (&tzinfo,), None)?,
260 tzinfo,
261 )
262 } else {
263 (ob, tzinfo.cast_into()?)
264 };
265
266 let (timestamp, tz) = if tzinfo.hasattr(intern!(py, "key"))? {
267 let datetime = ob.extract::<DateTime<Tz>>()?;
268 let tz = unsafe { TimeZone::from_static(datetime.timezone().name()) };
269 if datetime.year() >= 2100 {
270 (
273 pl_utils(py)
274 .bind(py)
275 .getattr(intern!(py, "datetime_to_int"))?
276 .call1((ob, intern!(py, "us")))?
277 .extract::<i64>()?,
278 tz,
279 )
280 } else {
281 let delta = datetime.to_utc() - DateTime::UNIX_EPOCH;
282 (delta.num_microseconds().unwrap(), tz)
283 }
284 } else {
285 let datetime = ob.extract::<DateTime<FixedOffset>>()?;
286 let delta = datetime.to_utc() - DateTime::UNIX_EPOCH;
287 (delta.num_microseconds().unwrap(), TimeZone::UTC)
288 };
289
290 Ok(AnyValue::DatetimeOwned(
291 timestamp,
292 TimeUnit::Microseconds,
293 Some(Arc::new(tz)),
294 ))
295 }
296
297 fn get_timedelta(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
298 let timedelta = ob.extract::<TimeDelta>()?;
299 if let Some(micros) = timedelta.num_microseconds() {
300 Ok(AnyValue::Duration(micros, TimeUnit::Microseconds))
301 } else {
302 Ok(AnyValue::Duration(
303 timedelta.num_milliseconds(),
304 TimeUnit::Milliseconds,
305 ))
306 }
307 }
308
309 fn get_time(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
310 let time = ob.extract::<NaiveTime>()?;
311
312 Ok(AnyValue::Time(
313 (time.num_seconds_from_midnight() as i64) * 1_000_000_000 + time.nanosecond() as i64,
314 ))
315 }
316
317 fn get_decimal(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
318 fn abs_decimal_from_digits(
319 digits: impl IntoIterator<Item = u8>,
320 exp: i32,
321 ) -> Option<(i128, usize)> {
322 let mut v = 0_i128;
323 for d in digits {
324 v = v.checked_mul(10)?.checked_add(d as i128)?;
325 }
326 let scale = if exp > 0 {
327 v = 10_i128.checked_pow(exp as u32)?.checked_mul(v)?;
328 0
329 } else {
330 (-exp) as usize
331 };
332 dec128_fits(v, DEC128_MAX_PREC).then_some((v, scale))
333 }
334
335 let (sign, digits, exp): (i8, Vec<u8>, i32) = ob
337 .call_method0(intern!(ob.py(), "as_tuple"))
338 .unwrap()
339 .extract()
340 .unwrap();
341 let (mut v, scale) = abs_decimal_from_digits(digits, exp).ok_or_else(|| {
342 PyErr::from(PyPolarsErr::Other(
343 "Decimal is too large to fit in Decimal128".into(),
344 ))
345 })?;
346 if sign > 0 {
347 v = -v; }
349 Ok(AnyValue::Decimal(v, DEC128_MAX_PREC, scale))
350 }
351
352 fn get_list(ob: &Bound<'_, PyAny>, strict: bool) -> PyResult<AnyValue<'static>> {
353 fn get_list_with_constructor(
354 ob: &Bound<'_, PyAny>,
355 strict: bool,
356 ) -> PyResult<AnyValue<'static>> {
357 let py = ob.py();
361 let kwargs = PyDict::new(py);
362 kwargs.set_item("strict", strict)?;
363 let s = pl_series(py).call(py, (ob,), Some(&kwargs))?;
364 get_list_from_series(s.bind(py), strict)
365 }
366
367 if ob.is_empty()? {
368 Ok(AnyValue::List(Series::new_empty(
369 PlSmallStr::EMPTY,
370 &DataType::Null,
371 )))
372 } else if ob.is_instance_of::<PyList>() | ob.is_instance_of::<PyTuple>() {
373 let list = ob.cast::<PySequence>()?;
374
375 let length = list.len()?;
377 let mut iter = list.try_iter()?;
378 let mut avs = Vec::new();
379 for item in &mut iter {
380 let av = py_object_to_any_value(&item?, strict, true)?;
381 let is_null = av.is_null();
382 avs.push(av);
383 if is_null {
384 break;
385 }
386 }
387
388 if let Some(av) = avs.last()
390 && !av.is_null()
391 && av.dtype().is_primitive()
392 {
393 match get_list_with_constructor(ob, true) {
396 Ok(ret) => return Ok(ret),
397 Err(e) => {
398 if strict {
399 return Err(e);
400 }
401 },
402 }
403 }
404
405 avs.reserve(length);
407 for item in &mut iter {
408 avs.push(py_object_to_any_value(&item?, strict, true)?);
409 }
410
411 let (dtype, _n_dtypes) = any_values_to_supertype_and_n_dtypes(&avs)
412 .map_err(|e| PyTypeError::new_err(e.to_string()))?;
413 let s = Series::from_any_values_and_dtype(PlSmallStr::EMPTY, &avs, &dtype, strict)
414 .map_err(|e| {
415 PyTypeError::new_err(format!(
416 "{e}\n\nHint: Try setting `strict=False` to allow passing data with mixed types."
417 ))
418 })?;
419 Ok(AnyValue::List(s))
420 } else {
421 get_list_with_constructor(ob, strict)
423 }
424 }
425
426 fn get_list_from_series(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
427 let s = super::get_series(ob)?;
428 Ok(AnyValue::List(s))
429 }
430
431 fn get_mapping(ob: &Bound<'_, PyAny>, strict: bool) -> PyResult<AnyValue<'static>> {
432 let mapping = ob.cast::<PyMapping>()?;
433 let len = mapping.len()?;
434 let mut keys = Vec::with_capacity(len);
435 let mut vals = Vec::with_capacity(len);
436
437 for item in mapping.items()?.try_iter()? {
438 let item = item?.cast_into::<PyTuple>()?;
439 let (key_py, val_py) = (item.get_item(0)?, item.get_item(1)?);
440
441 let key: Cow<str> = key_py.extract()?;
442 let val = py_object_to_any_value(&val_py, strict, true)?;
443
444 keys.push(Field::new(key.as_ref().into(), val.dtype()));
445 vals.push(val);
446 }
447 Ok(AnyValue::StructOwned(Box::new((vals, keys))))
448 }
449
450 fn get_struct(ob: &Bound<'_, PyAny>, strict: bool) -> PyResult<AnyValue<'static>> {
451 let dict = ob.cast::<PyDict>().unwrap();
452 let len = dict.len();
453 let mut keys = Vec::with_capacity(len);
454 let mut vals = Vec::with_capacity(len);
455 for (k, v) in dict.into_iter() {
456 let key = k.extract::<Cow<str>>()?;
457 let val = py_object_to_any_value(&v, strict, true)?;
458 let dtype = val.dtype();
459 keys.push(Field::new(key.as_ref().into(), dtype));
460 vals.push(val)
461 }
462 Ok(AnyValue::StructOwned(Box::new((vals, keys))))
463 }
464
465 fn get_namedtuple(ob: &Bound<'_, PyAny>, strict: bool) -> PyResult<AnyValue<'static>> {
466 let tuple = ob.cast::<PyTuple>().unwrap();
467 let len = tuple.len();
468 let fields = ob
469 .getattr(intern!(ob.py(), "_fields"))?
470 .cast_into::<PyTuple>()?;
471 let mut keys = Vec::with_capacity(len);
472 let mut vals = Vec::with_capacity(len);
473 for (k, v) in fields.into_iter().zip(tuple.into_iter()) {
474 let key = k.extract::<Cow<str>>()?;
475 let val = py_object_to_any_value(&v, strict, true)?;
476 let dtype = val.dtype();
477 keys.push(Field::new(key.as_ref().into(), dtype));
478 vals.push(val)
479 }
480 Ok(AnyValue::StructOwned(Box::new((vals, keys))))
481 }
482
483 fn get_object(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
484 #[cfg(feature = "object")]
485 {
486 let v = &ObjectValue {
488 inner: ob.clone().unbind(),
489 };
490 Ok(AnyValue::ObjectOwned(OwnedObject(v.to_boxed())))
491 }
492 #[cfg(not(feature = "object"))]
493 panic!("activate object")
494 }
495
496 fn get_conversion_function(ob: &Bound<'_, PyAny>, allow_object: bool) -> PyResult<InitFn> {
501 let py = ob.py();
502 if ob.is_none() {
503 Ok(get_null)
504 }
505 else if ob.is_instance_of::<PyBool>() {
507 Ok(get_bool)
508 } else if ob.is_instance_of::<PyInt>() {
509 Ok(get_int)
510 } else if ob.is_instance_of::<PyFloat>() {
511 Ok(get_float)
512 } else if ob.is_instance_of::<PyString>() {
513 Ok(get_str)
514 } else if ob.is_instance_of::<PyBytes>() {
515 Ok(get_bytes)
516 } else if ob.is_instance_of::<PyTuple>() {
517 if ob.hasattr(intern!(py, "_fields"))? {
519 Ok(get_namedtuple)
520 } else {
521 Ok(get_list)
522 }
523 } else if ob.is_instance_of::<PyList>() {
524 Ok(get_list)
525 } else if ob.is_instance_of::<PyDict>() {
526 Ok(get_struct)
527 } else if PyMapping::type_check(ob) {
528 Ok(get_mapping)
529 }
530 else if PyDateTime::type_check(ob) {
533 Ok(get_datetime as InitFn)
534 } else if PyDate::type_check(ob) {
535 Ok(get_date as InitFn)
536 } else if PyTime::type_check(ob) {
537 Ok(get_time as InitFn)
538 } else if PyDelta::type_check(ob) {
539 Ok(get_timedelta as InitFn)
540 } else if ob.is_instance_of::<PyRange>() {
541 Ok(get_list as InitFn)
542 } else if ob.is_instance(pl_series(py).bind(py))? {
543 Ok(get_list_from_series as InitFn)
544 } else {
545 static NDARRAY_TYPE: PyOnceLock<Py<PyType>> = PyOnceLock::new();
546 if let Ok(ndarray_type) = NDARRAY_TYPE.import(py, "numpy", "ndarray") {
547 if ob.is_instance(ndarray_type)? {
548 return Ok(get_list as InitFn);
550 }
551 }
552 static DECIMAL_TYPE: PyOnceLock<Py<PyType>> = PyOnceLock::new();
553 if ob.is_instance(DECIMAL_TYPE.import(py, "decimal", "Decimal")?)? {
554 return Ok(get_decimal as InitFn);
555 }
556
557 if ob.extract::<i64>().is_ok() || ob.extract::<u64>().is_ok() {
559 return Ok(get_int as InitFn);
560 } else if ob.extract::<f64>().is_ok() {
561 return Ok(get_float as InitFn);
562 }
563
564 if allow_object {
565 Ok(get_object as InitFn)
566 } else {
567 Err(PyValueError::new_err(format!("Cannot convert {ob}")))
568 }
569 }
570 }
571
572 let py_type = ob.get_type();
573 let py_type_address = py_type.as_ptr() as usize;
574
575 let conversion_func = {
576 if let Some(cached_func) = LUT.lock().unwrap().get(&py_type_address) {
577 *cached_func
578 } else {
579 let k = TypeObjectKey::new(py_type.clone().unbind());
580 assert_eq!(k.address, py_type_address);
581
582 let func = get_conversion_function(ob, allow_object)?;
583 LUT.lock().unwrap().insert(k, func);
584 func
585 }
586 };
587
588 conversion_func(ob, strict)
589}