polars_python/conversion/
mod.rs

1pub(crate) mod any_value;
2pub(crate) mod chunked_array;
3mod datetime;
4
5use std::convert::Infallible;
6use std::fmt::{Display, Formatter};
7use std::fs::File;
8use std::hash::{Hash, Hasher};
9use std::path::PathBuf;
10
11#[cfg(feature = "object")]
12use polars::chunked_array::object::PolarsObjectSafe;
13use polars::frame::row::Row;
14#[cfg(feature = "avro")]
15use polars::io::avro::AvroCompression;
16#[cfg(feature = "cloud")]
17use polars::io::cloud::CloudOptions;
18use polars::prelude::deletion::DeletionFilesList;
19use polars::series::ops::NullBehavior;
20use polars_core::utils::arrow::array::Array;
21use polars_core::utils::arrow::types::NativeType;
22use polars_core::utils::materialize_dyn_int;
23use polars_lazy::prelude::*;
24#[cfg(feature = "parquet")]
25use polars_parquet::write::StatisticsOptions;
26use polars_plan::dsl::ScanSources;
27use polars_utils::mmap::MemSlice;
28use polars_utils::pl_str::PlSmallStr;
29use polars_utils::total_ord::{TotalEq, TotalHash};
30use pyo3::basic::CompareOp;
31use pyo3::exceptions::{PyTypeError, PyValueError};
32use pyo3::intern;
33use pyo3::prelude::*;
34use pyo3::pybacked::PyBackedStr;
35use pyo3::sync::GILOnceCell;
36use pyo3::types::{PyDict, PyList, PySequence, PyString};
37
38use crate::error::PyPolarsErr;
39use crate::expr::PyExpr;
40use crate::file::{PythonScanSourceInput, get_python_scan_source_input};
41#[cfg(feature = "object")]
42use crate::object::OBJECT_NAME;
43use crate::prelude::*;
44use crate::py_modules::{pl_series, polars};
45use crate::series::PySeries;
46use crate::utils::to_py_err;
47use crate::{PyDataFrame, PyLazyFrame};
48
49/// # Safety
50/// Should only be implemented for transparent types
51pub(crate) unsafe trait Transparent {
52    type Target;
53}
54
55unsafe impl Transparent for PySeries {
56    type Target = Series;
57}
58
59unsafe impl<T> Transparent for Wrap<T> {
60    type Target = T;
61}
62
63unsafe impl<T: Transparent> Transparent for Option<T> {
64    type Target = Option<T::Target>;
65}
66
67pub(crate) fn reinterpret_vec<T: Transparent>(input: Vec<T>) -> Vec<T::Target> {
68    assert_eq!(size_of::<T>(), size_of::<T::Target>());
69    assert_eq!(align_of::<T>(), align_of::<T::Target>());
70    let len = input.len();
71    let cap = input.capacity();
72    let mut manual_drop_vec = std::mem::ManuallyDrop::new(input);
73    let vec_ptr: *mut T = manual_drop_vec.as_mut_ptr();
74    let ptr: *mut T::Target = vec_ptr as *mut T::Target;
75    unsafe { Vec::from_raw_parts(ptr, len, cap) }
76}
77
78pub(crate) fn vec_extract_wrapped<T>(buf: Vec<Wrap<T>>) -> Vec<T> {
79    reinterpret_vec(buf)
80}
81
82#[derive(PartialEq, Eq, Hash)]
83#[repr(transparent)]
84pub struct Wrap<T>(pub T);
85
86impl<T> Clone for Wrap<T>
87where
88    T: Clone,
89{
90    fn clone(&self) -> Self {
91        Wrap(self.0.clone())
92    }
93}
94impl<T> From<T> for Wrap<T> {
95    fn from(t: T) -> Self {
96        Wrap(t)
97    }
98}
99
100// extract a Rust DataFrame from a python DataFrame, that is DataFrame<PyDataFrame<RustDataFrame>>
101pub(crate) fn get_df(obj: &Bound<'_, PyAny>) -> PyResult<DataFrame> {
102    let pydf = obj.getattr(intern!(obj.py(), "_df"))?;
103    Ok(pydf.extract::<PyDataFrame>()?.df)
104}
105
106pub(crate) fn get_lf(obj: &Bound<'_, PyAny>) -> PyResult<LazyFrame> {
107    let pydf = obj.getattr(intern!(obj.py(), "_ldf"))?;
108    Ok(pydf.extract::<PyLazyFrame>()?.ldf)
109}
110
111pub(crate) fn get_series(obj: &Bound<'_, PyAny>) -> PyResult<Series> {
112    let s = obj.getattr(intern!(obj.py(), "_s"))?;
113    Ok(s.extract::<PySeries>()?.series)
114}
115
116pub(crate) fn to_series(py: Python<'_>, s: PySeries) -> PyResult<Bound<PyAny>> {
117    let series = pl_series(py).bind(py);
118    let constructor = series.getattr(intern!(py, "_from_pyseries"))?;
119    constructor.call1((s,))
120}
121
122impl<'py> FromPyObject<'py> for Wrap<PlSmallStr> {
123    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
124        Ok(Wrap((&*ob.extract::<PyBackedStr>()?).into()))
125    }
126}
127
128#[cfg(feature = "csv")]
129impl<'py> FromPyObject<'py> for Wrap<NullValues> {
130    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
131        if let Ok(s) = ob.extract::<PyBackedStr>() {
132            Ok(Wrap(NullValues::AllColumnsSingle((&*s).into())))
133        } else if let Ok(s) = ob.extract::<Vec<PyBackedStr>>() {
134            Ok(Wrap(NullValues::AllColumns(
135                s.into_iter().map(|x| (&*x).into()).collect(),
136            )))
137        } else if let Ok(s) = ob.extract::<Vec<(PyBackedStr, PyBackedStr)>>() {
138            Ok(Wrap(NullValues::Named(
139                s.into_iter()
140                    .map(|(a, b)| ((&*a).into(), (&*b).into()))
141                    .collect(),
142            )))
143        } else {
144            Err(
145                PyPolarsErr::Other("could not extract value from null_values argument".into())
146                    .into(),
147            )
148        }
149    }
150}
151
152fn struct_dict<'a, 'py>(
153    py: Python<'py>,
154    vals: impl Iterator<Item = AnyValue<'a>>,
155    flds: &[Field],
156) -> PyResult<Bound<'py, PyDict>> {
157    let dict = PyDict::new(py);
158    flds.iter().zip(vals).try_for_each(|(fld, val)| {
159        dict.set_item(fld.name().as_str(), Wrap(val).into_pyobject(py)?)
160    })?;
161    Ok(dict)
162}
163
164// accept u128 array to ensure alignment is correct
165fn decimal_to_digits(v: i128, buf: &mut [u128; 3]) -> usize {
166    const ZEROS: i128 = 0x3030_3030_3030_3030_3030_3030_3030_3030;
167    // SAFETY: transmute is safe as there are 48 bytes in 3 128bit ints
168    // and the minimal alignment of u8 fits u16
169    let buf = unsafe { std::mem::transmute::<&mut [u128; 3], &mut [u8; 48]>(buf) };
170    let mut buffer = itoa::Buffer::new();
171    let value = buffer.format(v);
172    let len = value.len();
173    for (dst, src) in buf.iter_mut().zip(value.as_bytes().iter()) {
174        *dst = *src
175    }
176
177    let ptr = buf.as_mut_ptr() as *mut i128;
178    unsafe {
179        // this is safe because we know that the buffer is exactly 48 bytes long
180        *ptr -= ZEROS;
181        *ptr.add(1) -= ZEROS;
182        *ptr.add(2) -= ZEROS;
183    }
184    len
185}
186
187impl<'py> IntoPyObject<'py> for &Wrap<DataType> {
188    type Target = PyAny;
189    type Output = Bound<'py, Self::Target>;
190    type Error = PyErr;
191
192    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
193        let pl = polars(py).bind(py);
194
195        match &self.0 {
196            DataType::Int8 => {
197                let class = pl.getattr(intern!(py, "Int8"))?;
198                class.call0()
199            },
200            DataType::Int16 => {
201                let class = pl.getattr(intern!(py, "Int16"))?;
202                class.call0()
203            },
204            DataType::Int32 => {
205                let class = pl.getattr(intern!(py, "Int32"))?;
206                class.call0()
207            },
208            DataType::Int64 => {
209                let class = pl.getattr(intern!(py, "Int64"))?;
210                class.call0()
211            },
212            DataType::UInt8 => {
213                let class = pl.getattr(intern!(py, "UInt8"))?;
214                class.call0()
215            },
216            DataType::UInt16 => {
217                let class = pl.getattr(intern!(py, "UInt16"))?;
218                class.call0()
219            },
220            DataType::UInt32 => {
221                let class = pl.getattr(intern!(py, "UInt32"))?;
222                class.call0()
223            },
224            DataType::UInt64 => {
225                let class = pl.getattr(intern!(py, "UInt64"))?;
226                class.call0()
227            },
228            DataType::Int128 => {
229                let class = pl.getattr(intern!(py, "Int128"))?;
230                class.call0()
231            },
232            DataType::Float32 => {
233                let class = pl.getattr(intern!(py, "Float32"))?;
234                class.call0()
235            },
236            DataType::Float64 | DataType::Unknown(UnknownKind::Float) => {
237                let class = pl.getattr(intern!(py, "Float64"))?;
238                class.call0()
239            },
240            DataType::Decimal(precision, scale) => {
241                let class = pl.getattr(intern!(py, "Decimal"))?;
242                let args = (*precision, *scale);
243                class.call1(args)
244            },
245            DataType::Boolean => {
246                let class = pl.getattr(intern!(py, "Boolean"))?;
247                class.call0()
248            },
249            DataType::String | DataType::Unknown(UnknownKind::Str) => {
250                let class = pl.getattr(intern!(py, "String"))?;
251                class.call0()
252            },
253            DataType::Binary => {
254                let class = pl.getattr(intern!(py, "Binary"))?;
255                class.call0()
256            },
257            DataType::Array(inner, size) => {
258                let class = pl.getattr(intern!(py, "Array"))?;
259                let inner = Wrap(*inner.clone());
260                let args = (&inner, *size);
261                class.call1(args)
262            },
263            DataType::List(inner) => {
264                let class = pl.getattr(intern!(py, "List"))?;
265                let inner = Wrap(*inner.clone());
266                class.call1((&inner,))
267            },
268            DataType::Date => {
269                let class = pl.getattr(intern!(py, "Date"))?;
270                class.call0()
271            },
272            DataType::Datetime(tu, tz) => {
273                let datetime_class = pl.getattr(intern!(py, "Datetime"))?;
274                datetime_class.call1((tu.to_ascii(), tz.as_deref().map(|x| x.as_str())))
275            },
276            DataType::Duration(tu) => {
277                let duration_class = pl.getattr(intern!(py, "Duration"))?;
278                duration_class.call1((tu.to_ascii(),))
279            },
280            #[cfg(feature = "object")]
281            DataType::Object(_) => {
282                let class = pl.getattr(intern!(py, "Object"))?;
283                class.call0()
284            },
285            DataType::Categorical(_, ordering) => {
286                let class = pl.getattr(intern!(py, "Categorical"))?;
287                class.call1((Wrap(*ordering),))
288            },
289            DataType::Enum(rev_map, _) => {
290                // we should always have an initialized rev_map coming from rust
291                let categories = rev_map.as_ref().unwrap().get_categories();
292                let class = pl.getattr(intern!(py, "Enum"))?;
293                let s =
294                    Series::from_arrow(PlSmallStr::from_static("category"), categories.to_boxed())
295                        .map_err(PyPolarsErr::from)?;
296                let series = to_series(py, s.into())?;
297                class.call1((series,))
298            },
299            DataType::Time => pl.getattr(intern!(py, "Time")),
300            DataType::Struct(fields) => {
301                let field_class = pl.getattr(intern!(py, "Field"))?;
302                let iter = fields.iter().map(|fld| {
303                    let name = fld.name().as_str();
304                    let dtype = Wrap(fld.dtype().clone());
305                    field_class.call1((name, &dtype)).unwrap()
306                });
307                let fields = PyList::new(py, iter)?;
308                let struct_class = pl.getattr(intern!(py, "Struct"))?;
309                struct_class.call1((fields,))
310            },
311            DataType::Null => {
312                let class = pl.getattr(intern!(py, "Null"))?;
313                class.call0()
314            },
315            DataType::Unknown(UnknownKind::Int(v)) => {
316                Wrap(materialize_dyn_int(*v).dtype()).into_pyobject(py)
317            },
318            DataType::Unknown(_) => {
319                let class = pl.getattr(intern!(py, "Unknown"))?;
320                class.call0()
321            },
322            DataType::BinaryOffset => {
323                unimplemented!()
324            },
325        }
326    }
327}
328
329impl<'py> FromPyObject<'py> for Wrap<Field> {
330    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
331        let py = ob.py();
332        let name = ob
333            .getattr(intern!(py, "name"))?
334            .str()?
335            .extract::<PyBackedStr>()?;
336        let dtype = ob
337            .getattr(intern!(py, "dtype"))?
338            .extract::<Wrap<DataType>>()?;
339        Ok(Wrap(Field::new((&*name).into(), dtype.0)))
340    }
341}
342
343impl<'py> FromPyObject<'py> for Wrap<DataType> {
344    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
345        let py = ob.py();
346        let type_name = ob.get_type().qualname()?.to_string();
347
348        let dtype = match &*type_name {
349            "DataTypeClass" => {
350                // just the class, not an object
351                let name = ob
352                    .getattr(intern!(py, "__name__"))?
353                    .str()?
354                    .extract::<PyBackedStr>()?;
355                match &*name {
356                    "Int8" => DataType::Int8,
357                    "Int16" => DataType::Int16,
358                    "Int32" => DataType::Int32,
359                    "Int64" => DataType::Int64,
360                    "Int128" => DataType::Int128,
361                    "UInt8" => DataType::UInt8,
362                    "UInt16" => DataType::UInt16,
363                    "UInt32" => DataType::UInt32,
364                    "UInt64" => DataType::UInt64,
365                    "Float32" => DataType::Float32,
366                    "Float64" => DataType::Float64,
367                    "Boolean" => DataType::Boolean,
368                    "String" => DataType::String,
369                    "Binary" => DataType::Binary,
370                    "Categorical" => DataType::Categorical(None, Default::default()),
371                    "Enum" => DataType::Enum(None, Default::default()),
372                    "Date" => DataType::Date,
373                    "Time" => DataType::Time,
374                    "Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
375                    "Duration" => DataType::Duration(TimeUnit::Microseconds),
376                    "Decimal" => DataType::Decimal(None, None), // "none" scale => "infer"
377                    "List" => DataType::List(Box::new(DataType::Null)),
378                    "Array" => DataType::Array(Box::new(DataType::Null), 0),
379                    "Struct" => DataType::Struct(vec![]),
380                    "Null" => DataType::Null,
381                    #[cfg(feature = "object")]
382                    "Object" => DataType::Object(OBJECT_NAME),
383                    "Unknown" => DataType::Unknown(Default::default()),
384                    dt => {
385                        return Err(PyTypeError::new_err(format!(
386                            "'{dt}' is not a Polars data type",
387                        )));
388                    },
389                }
390            },
391            "Int8" => DataType::Int8,
392            "Int16" => DataType::Int16,
393            "Int32" => DataType::Int32,
394            "Int64" => DataType::Int64,
395            "Int128" => DataType::Int128,
396            "UInt8" => DataType::UInt8,
397            "UInt16" => DataType::UInt16,
398            "UInt32" => DataType::UInt32,
399            "UInt64" => DataType::UInt64,
400            "Float32" => DataType::Float32,
401            "Float64" => DataType::Float64,
402            "Boolean" => DataType::Boolean,
403            "String" => DataType::String,
404            "Binary" => DataType::Binary,
405            "Categorical" => {
406                let ordering = ob.getattr(intern!(py, "ordering")).unwrap();
407                let ordering = ordering.extract::<Wrap<CategoricalOrdering>>()?.0;
408                DataType::Categorical(None, ordering)
409            },
410            "Enum" => {
411                let categories = ob.getattr(intern!(py, "categories")).unwrap();
412                let s = get_series(&categories.as_borrowed())?;
413                let ca = s.str().map_err(PyPolarsErr::from)?;
414                let categories = ca.downcast_iter().next().unwrap().clone();
415                create_enum_dtype(categories)
416            },
417            "Date" => DataType::Date,
418            "Time" => DataType::Time,
419            "Datetime" => {
420                let time_unit = ob.getattr(intern!(py, "time_unit")).unwrap();
421                let time_unit = time_unit.extract::<Wrap<TimeUnit>>()?.0;
422                let time_zone = ob.getattr(intern!(py, "time_zone")).unwrap();
423                let time_zone = time_zone.extract::<Option<PyBackedStr>>()?;
424                DataType::Datetime(
425                    time_unit,
426                    TimeZone::opt_try_new(time_zone.as_deref()).map_err(to_py_err)?,
427                )
428            },
429            "Duration" => {
430                let time_unit = ob.getattr(intern!(py, "time_unit")).unwrap();
431                let time_unit = time_unit.extract::<Wrap<TimeUnit>>()?.0;
432                DataType::Duration(time_unit)
433            },
434            "Decimal" => {
435                let precision = ob.getattr(intern!(py, "precision"))?.extract()?;
436                let scale = ob.getattr(intern!(py, "scale"))?.extract()?;
437                DataType::Decimal(precision, Some(scale))
438            },
439            "List" => {
440                let inner = ob.getattr(intern!(py, "inner")).unwrap();
441                let inner = inner.extract::<Wrap<DataType>>()?;
442                DataType::List(Box::new(inner.0))
443            },
444            "Array" => {
445                let inner = ob.getattr(intern!(py, "inner")).unwrap();
446                let size = ob.getattr(intern!(py, "size")).unwrap();
447                let inner = inner.extract::<Wrap<DataType>>()?;
448                let size = size.extract::<usize>()?;
449                DataType::Array(Box::new(inner.0), size)
450            },
451            "Struct" => {
452                let fields = ob.getattr(intern!(py, "fields"))?;
453                let fields = fields
454                    .extract::<Vec<Wrap<Field>>>()?
455                    .into_iter()
456                    .map(|f| f.0)
457                    .collect::<Vec<Field>>();
458                DataType::Struct(fields)
459            },
460            "Null" => DataType::Null,
461            #[cfg(feature = "object")]
462            "Object" => DataType::Object(OBJECT_NAME),
463            "Unknown" => DataType::Unknown(Default::default()),
464            dt => {
465                return Err(PyTypeError::new_err(format!(
466                    "'{dt}' is not a Polars data type",
467                )));
468            },
469        };
470        Ok(Wrap(dtype))
471    }
472}
473
474impl<'py> IntoPyObject<'py> for Wrap<CategoricalOrdering> {
475    type Target = PyString;
476    type Output = Bound<'py, Self::Target>;
477    type Error = Infallible;
478
479    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
480        match self.0 {
481            CategoricalOrdering::Physical => "physical",
482            CategoricalOrdering::Lexical => "lexical",
483        }
484        .into_pyobject(py)
485    }
486}
487
488impl<'py> IntoPyObject<'py> for Wrap<TimeUnit> {
489    type Target = PyString;
490    type Output = Bound<'py, Self::Target>;
491    type Error = Infallible;
492
493    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
494        self.0.to_ascii().into_pyobject(py)
495    }
496}
497
498#[cfg(feature = "parquet")]
499impl<'py> FromPyObject<'py> for Wrap<StatisticsOptions> {
500    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
501        let mut statistics = StatisticsOptions::empty();
502
503        let dict = ob.downcast::<PyDict>()?;
504        for (key, val) in dict {
505            let key = key.extract::<PyBackedStr>()?;
506            let val = val.extract::<bool>()?;
507
508            match key.as_ref() {
509                "min" => statistics.min_value = val,
510                "max" => statistics.max_value = val,
511                "distinct_count" => statistics.distinct_count = val,
512                "null_count" => statistics.null_count = val,
513                _ => {
514                    return Err(PyTypeError::new_err(format!(
515                        "'{key}' is not a valid statistic option",
516                    )));
517                },
518            }
519        }
520
521        Ok(Wrap(statistics))
522    }
523}
524
525impl<'py> FromPyObject<'py> for Wrap<Row<'static>> {
526    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
527        let vals = ob.extract::<Vec<Wrap<AnyValue<'static>>>>()?;
528        let vals = reinterpret_vec(vals);
529        Ok(Wrap(Row(vals)))
530    }
531}
532
533impl<'py> FromPyObject<'py> for Wrap<Schema> {
534    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
535        let dict = ob.downcast::<PyDict>()?;
536
537        Ok(Wrap(
538            dict.iter()
539                .map(|(key, val)| {
540                    let key = key.extract::<PyBackedStr>()?;
541                    let val = val.extract::<Wrap<DataType>>()?;
542
543                    Ok(Field::new((&*key).into(), val.0))
544                })
545                .collect::<PyResult<Schema>>()?,
546        ))
547    }
548}
549
550impl<'py> FromPyObject<'py> for Wrap<ScanSources> {
551    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
552        let list = ob.downcast::<PyList>()?.to_owned();
553
554        if list.is_empty() {
555            return Ok(Wrap(ScanSources::default()));
556        }
557
558        enum MutableSources {
559            Paths(Vec<PathBuf>),
560            Files(Vec<File>),
561            Buffers(Vec<MemSlice>),
562        }
563
564        let num_items = list.len();
565        let mut iter = list
566            .into_iter()
567            .map(|val| get_python_scan_source_input(val.unbind(), false));
568
569        let Some(first) = iter.next() else {
570            return Ok(Wrap(ScanSources::default()));
571        };
572
573        let mut sources = match first? {
574            PythonScanSourceInput::Path(path) => {
575                let mut sources = Vec::with_capacity(num_items);
576                sources.push(path);
577                MutableSources::Paths(sources)
578            },
579            PythonScanSourceInput::File(file) => {
580                let mut sources = Vec::with_capacity(num_items);
581                sources.push(file.into());
582                MutableSources::Files(sources)
583            },
584            PythonScanSourceInput::Buffer(buffer) => {
585                let mut sources = Vec::with_capacity(num_items);
586                sources.push(buffer);
587                MutableSources::Buffers(sources)
588            },
589        };
590
591        for source in iter {
592            match (&mut sources, source?) {
593                (MutableSources::Paths(v), PythonScanSourceInput::Path(p)) => v.push(p),
594                (MutableSources::Files(v), PythonScanSourceInput::File(f)) => v.push(f.into()),
595                (MutableSources::Buffers(v), PythonScanSourceInput::Buffer(f)) => v.push(f),
596                _ => {
597                    return Err(PyTypeError::new_err(
598                        "Cannot combine in-memory bytes, paths and files for scan sources",
599                    ));
600                },
601            }
602        }
603
604        Ok(Wrap(match sources {
605            MutableSources::Paths(i) => ScanSources::Paths(i.into()),
606            MutableSources::Files(i) => ScanSources::Files(i.into()),
607            MutableSources::Buffers(i) => ScanSources::Buffers(i.into()),
608        }))
609    }
610}
611
612impl<'py> IntoPyObject<'py> for Wrap<&Schema> {
613    type Target = PyDict;
614    type Output = Bound<'py, Self::Target>;
615    type Error = PyErr;
616
617    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
618        let dict = PyDict::new(py);
619        self.0
620            .iter()
621            .try_for_each(|(k, v)| dict.set_item(k.as_str(), &Wrap(v.clone())))?;
622        Ok(dict)
623    }
624}
625
626#[derive(Debug)]
627#[repr(transparent)]
628pub struct ObjectValue {
629    pub inner: PyObject,
630}
631
632impl Clone for ObjectValue {
633    fn clone(&self) -> Self {
634        Python::with_gil(|py| Self {
635            inner: self.inner.clone_ref(py),
636        })
637    }
638}
639
640impl Hash for ObjectValue {
641    fn hash<H: Hasher>(&self, state: &mut H) {
642        let h = Python::with_gil(|py| self.inner.bind(py).hash().expect("should be hashable"));
643        state.write_isize(h)
644    }
645}
646
647impl Eq for ObjectValue {}
648
649impl PartialEq for ObjectValue {
650    fn eq(&self, other: &Self) -> bool {
651        Python::with_gil(|py| {
652            match self
653                .inner
654                .bind(py)
655                .rich_compare(other.inner.bind(py), CompareOp::Eq)
656            {
657                Ok(result) => result.is_truthy().unwrap(),
658                Err(_) => false,
659            }
660        })
661    }
662}
663
664impl TotalEq for ObjectValue {
665    fn tot_eq(&self, other: &Self) -> bool {
666        self == other
667    }
668}
669
670impl TotalHash for ObjectValue {
671    fn tot_hash<H>(&self, state: &mut H)
672    where
673        H: Hasher,
674    {
675        self.hash(state);
676    }
677}
678
679impl Display for ObjectValue {
680    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
681        write!(f, "{}", self.inner)
682    }
683}
684
685#[cfg(feature = "object")]
686impl PolarsObject for ObjectValue {
687    fn type_name() -> &'static str {
688        "object"
689    }
690}
691
692impl From<PyObject> for ObjectValue {
693    fn from(p: PyObject) -> Self {
694        Self { inner: p }
695    }
696}
697
698impl<'py> FromPyObject<'py> for ObjectValue {
699    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
700        Ok(ObjectValue {
701            inner: ob.to_owned().unbind(),
702        })
703    }
704}
705
706/// # Safety
707///
708/// The caller is responsible for checking that val is Object otherwise UB
709#[cfg(feature = "object")]
710impl From<&dyn PolarsObjectSafe> for &ObjectValue {
711    fn from(val: &dyn PolarsObjectSafe) -> Self {
712        unsafe { &*(val as *const dyn PolarsObjectSafe as *const ObjectValue) }
713    }
714}
715
716impl<'a, 'py> IntoPyObject<'py> for &'a ObjectValue {
717    type Target = PyAny;
718    type Output = Borrowed<'a, 'py, Self::Target>;
719    type Error = std::convert::Infallible;
720
721    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
722        Ok(self.inner.bind_borrowed(py))
723    }
724}
725
726impl Default for ObjectValue {
727    fn default() -> Self {
728        Python::with_gil(|py| ObjectValue { inner: py.None() })
729    }
730}
731
732impl<'py, T: NativeType + FromPyObject<'py>> FromPyObject<'py> for Wrap<Vec<T>> {
733    fn extract_bound(obj: &Bound<'py, PyAny>) -> PyResult<Self> {
734        let seq = obj.downcast::<PySequence>()?;
735        let mut v = Vec::with_capacity(seq.len().unwrap_or(0));
736        for item in seq.try_iter()? {
737            v.push(item?.extract::<T>()?);
738        }
739        Ok(Wrap(v))
740    }
741}
742
743#[cfg(feature = "asof_join")]
744impl<'py> FromPyObject<'py> for Wrap<AsofStrategy> {
745    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
746        let parsed = match &*(ob.extract::<PyBackedStr>()?) {
747            "backward" => AsofStrategy::Backward,
748            "forward" => AsofStrategy::Forward,
749            "nearest" => AsofStrategy::Nearest,
750            v => {
751                return Err(PyValueError::new_err(format!(
752                    "asof `strategy` must be one of {{'backward', 'forward', 'nearest'}}, got {v}",
753                )));
754            },
755        };
756        Ok(Wrap(parsed))
757    }
758}
759
760impl<'py> FromPyObject<'py> for Wrap<InterpolationMethod> {
761    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
762        let parsed = match &*(ob.extract::<PyBackedStr>()?) {
763            "linear" => InterpolationMethod::Linear,
764            "nearest" => InterpolationMethod::Nearest,
765            v => {
766                return Err(PyValueError::new_err(format!(
767                    "interpolation `method` must be one of {{'linear', 'nearest'}}, got {v}",
768                )));
769            },
770        };
771        Ok(Wrap(parsed))
772    }
773}
774
775#[cfg(feature = "avro")]
776impl<'py> FromPyObject<'py> for Wrap<Option<AvroCompression>> {
777    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
778        let parsed = match &*ob.extract::<PyBackedStr>()? {
779            "uncompressed" => None,
780            "snappy" => Some(AvroCompression::Snappy),
781            "deflate" => Some(AvroCompression::Deflate),
782            v => {
783                return Err(PyValueError::new_err(format!(
784                    "avro `compression` must be one of {{'uncompressed', 'snappy', 'deflate'}}, got {v}",
785                )));
786            },
787        };
788        Ok(Wrap(parsed))
789    }
790}
791
792impl<'py> FromPyObject<'py> for Wrap<CategoricalOrdering> {
793    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
794        let parsed = match &*ob.extract::<PyBackedStr>()? {
795            "physical" => CategoricalOrdering::Physical,
796            "lexical" => CategoricalOrdering::Lexical,
797            v => {
798                return Err(PyValueError::new_err(format!(
799                    "categorical `ordering` must be one of {{'physical', 'lexical'}}, got {v}",
800                )));
801            },
802        };
803        Ok(Wrap(parsed))
804    }
805}
806
807impl<'py> FromPyObject<'py> for Wrap<StartBy> {
808    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
809        let parsed = match &*ob.extract::<PyBackedStr>()? {
810            "window" => StartBy::WindowBound,
811            "datapoint" => StartBy::DataPoint,
812            "monday" => StartBy::Monday,
813            "tuesday" => StartBy::Tuesday,
814            "wednesday" => StartBy::Wednesday,
815            "thursday" => StartBy::Thursday,
816            "friday" => StartBy::Friday,
817            "saturday" => StartBy::Saturday,
818            "sunday" => StartBy::Sunday,
819            v => {
820                return Err(PyValueError::new_err(format!(
821                    "`start_by` must be one of {{'window', 'datapoint', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday'}}, got {v}",
822                )));
823            },
824        };
825        Ok(Wrap(parsed))
826    }
827}
828
829impl<'py> FromPyObject<'py> for Wrap<ClosedWindow> {
830    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
831        let parsed = match &*ob.extract::<PyBackedStr>()? {
832            "left" => ClosedWindow::Left,
833            "right" => ClosedWindow::Right,
834            "both" => ClosedWindow::Both,
835            "none" => ClosedWindow::None,
836            v => {
837                return Err(PyValueError::new_err(format!(
838                    "`closed` must be one of {{'left', 'right', 'both', 'none'}}, got {v}",
839                )));
840            },
841        };
842        Ok(Wrap(parsed))
843    }
844}
845
846impl<'py> FromPyObject<'py> for Wrap<RoundMode> {
847    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
848        let parsed = match &*ob.extract::<PyBackedStr>()? {
849            "half_to_even" => RoundMode::HalfToEven,
850            "half_away_from_zero" => RoundMode::HalfAwayFromZero,
851            v => {
852                return Err(PyValueError::new_err(format!(
853                    "`mode` must be one of {{'half_to_even', 'half_away_from_zero'}}, got {v}",
854                )));
855            },
856        };
857        Ok(Wrap(parsed))
858    }
859}
860
861#[cfg(feature = "csv")]
862impl<'py> FromPyObject<'py> for Wrap<CsvEncoding> {
863    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
864        let parsed = match &*ob.extract::<PyBackedStr>()? {
865            "utf8" => CsvEncoding::Utf8,
866            "utf8-lossy" => CsvEncoding::LossyUtf8,
867            v => {
868                return Err(PyValueError::new_err(format!(
869                    "csv `encoding` must be one of {{'utf8', 'utf8-lossy'}}, got {v}",
870                )));
871            },
872        };
873        Ok(Wrap(parsed))
874    }
875}
876
877#[cfg(feature = "ipc")]
878impl<'py> FromPyObject<'py> for Wrap<Option<IpcCompression>> {
879    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
880        let parsed = match &*ob.extract::<PyBackedStr>()? {
881            "uncompressed" => None,
882            "lz4" => Some(IpcCompression::LZ4),
883            "zstd" => Some(IpcCompression::ZSTD),
884            v => {
885                return Err(PyValueError::new_err(format!(
886                    "ipc `compression` must be one of {{'uncompressed', 'lz4', 'zstd'}}, got {v}",
887                )));
888            },
889        };
890        Ok(Wrap(parsed))
891    }
892}
893
894impl<'py> FromPyObject<'py> for Wrap<JoinType> {
895    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
896        let parsed = match &*ob.extract::<PyBackedStr>()? {
897            "inner" => JoinType::Inner,
898            "left" => JoinType::Left,
899            "right" => JoinType::Right,
900            "full" => JoinType::Full,
901            "semi" => JoinType::Semi,
902            "anti" => JoinType::Anti,
903            #[cfg(feature = "cross_join")]
904            "cross" => JoinType::Cross,
905            v => {
906                return Err(PyValueError::new_err(format!(
907                    "`how` must be one of {{'inner', 'left', 'full', 'semi', 'anti', 'cross'}}, got {v}",
908                )));
909            },
910        };
911        Ok(Wrap(parsed))
912    }
913}
914
915impl<'py> FromPyObject<'py> for Wrap<Label> {
916    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
917        let parsed = match &*ob.extract::<PyBackedStr>()? {
918            "left" => Label::Left,
919            "right" => Label::Right,
920            "datapoint" => Label::DataPoint,
921            v => {
922                return Err(PyValueError::new_err(format!(
923                    "`label` must be one of {{'left', 'right', 'datapoint'}}, got {v}",
924                )));
925            },
926        };
927        Ok(Wrap(parsed))
928    }
929}
930
931impl<'py> FromPyObject<'py> for Wrap<ListToStructWidthStrategy> {
932    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
933        let parsed = match &*ob.extract::<PyBackedStr>()? {
934            "first_non_null" => ListToStructWidthStrategy::FirstNonNull,
935            "max_width" => ListToStructWidthStrategy::MaxWidth,
936            v => {
937                return Err(PyValueError::new_err(format!(
938                    "`n_field_strategy` must be one of {{'first_non_null', 'max_width'}}, got {v}",
939                )));
940            },
941        };
942        Ok(Wrap(parsed))
943    }
944}
945
946impl<'py> FromPyObject<'py> for Wrap<NonExistent> {
947    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
948        let parsed = match &*ob.extract::<PyBackedStr>()? {
949            "null" => NonExistent::Null,
950            "raise" => NonExistent::Raise,
951            v => {
952                return Err(PyValueError::new_err(format!(
953                    "`non_existent` must be one of {{'null', 'raise'}}, got {v}",
954                )));
955            },
956        };
957        Ok(Wrap(parsed))
958    }
959}
960
961impl<'py> FromPyObject<'py> for Wrap<NullBehavior> {
962    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
963        let parsed = match &*ob.extract::<PyBackedStr>()? {
964            "drop" => NullBehavior::Drop,
965            "ignore" => NullBehavior::Ignore,
966            v => {
967                return Err(PyValueError::new_err(format!(
968                    "`null_behavior` must be one of {{'drop', 'ignore'}}, got {v}",
969                )));
970            },
971        };
972        Ok(Wrap(parsed))
973    }
974}
975
976impl<'py> FromPyObject<'py> for Wrap<NullStrategy> {
977    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
978        let parsed = match &*ob.extract::<PyBackedStr>()? {
979            "ignore" => NullStrategy::Ignore,
980            "propagate" => NullStrategy::Propagate,
981            v => {
982                return Err(PyValueError::new_err(format!(
983                    "`null_strategy` must be one of {{'ignore', 'propagate'}}, got {v}",
984                )));
985            },
986        };
987        Ok(Wrap(parsed))
988    }
989}
990
991#[cfg(feature = "parquet")]
992impl<'py> FromPyObject<'py> for Wrap<ParallelStrategy> {
993    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
994        let parsed = match &*ob.extract::<PyBackedStr>()? {
995            "auto" => ParallelStrategy::Auto,
996            "columns" => ParallelStrategy::Columns,
997            "row_groups" => ParallelStrategy::RowGroups,
998            "prefiltered" => ParallelStrategy::Prefiltered,
999            "none" => ParallelStrategy::None,
1000            v => {
1001                return Err(PyValueError::new_err(format!(
1002                    "`parallel` must be one of {{'auto', 'columns', 'row_groups', 'prefiltered', 'none'}}, got {v}",
1003                )));
1004            },
1005        };
1006        Ok(Wrap(parsed))
1007    }
1008}
1009
1010impl<'py> FromPyObject<'py> for Wrap<IndexOrder> {
1011    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1012        let parsed = match &*ob.extract::<PyBackedStr>()? {
1013            "fortran" => IndexOrder::Fortran,
1014            "c" => IndexOrder::C,
1015            v => {
1016                return Err(PyValueError::new_err(format!(
1017                    "`order` must be one of {{'fortran', 'c'}}, got {v}",
1018                )));
1019            },
1020        };
1021        Ok(Wrap(parsed))
1022    }
1023}
1024
1025impl<'py> FromPyObject<'py> for Wrap<QuantileMethod> {
1026    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1027        let parsed = match &*ob.extract::<PyBackedStr>()? {
1028            "lower" => QuantileMethod::Lower,
1029            "higher" => QuantileMethod::Higher,
1030            "nearest" => QuantileMethod::Nearest,
1031            "linear" => QuantileMethod::Linear,
1032            "midpoint" => QuantileMethod::Midpoint,
1033            "equiprobable" => QuantileMethod::Equiprobable,
1034            v => {
1035                return Err(PyValueError::new_err(format!(
1036                    "`interpolation` must be one of {{'lower', 'higher', 'nearest', 'linear', 'midpoint', 'equiprobable'}}, got {v}",
1037                )));
1038            },
1039        };
1040        Ok(Wrap(parsed))
1041    }
1042}
1043
1044impl<'py> FromPyObject<'py> for Wrap<RankMethod> {
1045    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1046        let parsed = match &*ob.extract::<PyBackedStr>()? {
1047            "min" => RankMethod::Min,
1048            "max" => RankMethod::Max,
1049            "average" => RankMethod::Average,
1050            "dense" => RankMethod::Dense,
1051            "ordinal" => RankMethod::Ordinal,
1052            "random" => RankMethod::Random,
1053            v => {
1054                return Err(PyValueError::new_err(format!(
1055                    "rank `method` must be one of {{'min', 'max', 'average', 'dense', 'ordinal', 'random'}}, got {v}",
1056                )));
1057            },
1058        };
1059        Ok(Wrap(parsed))
1060    }
1061}
1062
1063impl<'py> FromPyObject<'py> for Wrap<Roll> {
1064    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1065        let parsed = match &*ob.extract::<PyBackedStr>()? {
1066            "raise" => Roll::Raise,
1067            "forward" => Roll::Forward,
1068            "backward" => Roll::Backward,
1069            v => {
1070                return Err(PyValueError::new_err(format!(
1071                    "`roll` must be one of {{'raise', 'forward', 'backward'}}, got {v}",
1072                )));
1073            },
1074        };
1075        Ok(Wrap(parsed))
1076    }
1077}
1078
1079impl<'py> FromPyObject<'py> for Wrap<TimeUnit> {
1080    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1081        let parsed = match &*ob.extract::<PyBackedStr>()? {
1082            "ns" => TimeUnit::Nanoseconds,
1083            "us" => TimeUnit::Microseconds,
1084            "ms" => TimeUnit::Milliseconds,
1085            v => {
1086                return Err(PyValueError::new_err(format!(
1087                    "`time_unit` must be one of {{'ns', 'us', 'ms'}}, got {v}",
1088                )));
1089            },
1090        };
1091        Ok(Wrap(parsed))
1092    }
1093}
1094
1095impl<'py> FromPyObject<'py> for Wrap<UniqueKeepStrategy> {
1096    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1097        let parsed = match &*ob.extract::<PyBackedStr>()? {
1098            "first" => UniqueKeepStrategy::First,
1099            "last" => UniqueKeepStrategy::Last,
1100            "none" => UniqueKeepStrategy::None,
1101            "any" => UniqueKeepStrategy::Any,
1102            v => {
1103                return Err(PyValueError::new_err(format!(
1104                    "`keep` must be one of {{'first', 'last', 'any', 'none'}}, got {v}",
1105                )));
1106            },
1107        };
1108        Ok(Wrap(parsed))
1109    }
1110}
1111
1112#[cfg(feature = "search_sorted")]
1113impl<'py> FromPyObject<'py> for Wrap<SearchSortedSide> {
1114    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1115        let parsed = match &*ob.extract::<PyBackedStr>()? {
1116            "any" => SearchSortedSide::Any,
1117            "left" => SearchSortedSide::Left,
1118            "right" => SearchSortedSide::Right,
1119            v => {
1120                return Err(PyValueError::new_err(format!(
1121                    "sorted `side` must be one of {{'any', 'left', 'right'}}, got {v}",
1122                )));
1123            },
1124        };
1125        Ok(Wrap(parsed))
1126    }
1127}
1128
1129impl<'py> FromPyObject<'py> for Wrap<ClosedInterval> {
1130    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1131        let parsed = match &*ob.extract::<PyBackedStr>()? {
1132            "both" => ClosedInterval::Both,
1133            "left" => ClosedInterval::Left,
1134            "right" => ClosedInterval::Right,
1135            "none" => ClosedInterval::None,
1136            v => {
1137                return Err(PyValueError::new_err(format!(
1138                    "`closed` must be one of {{'both', 'left', 'right', 'none'}}, got {v}",
1139                )));
1140            },
1141        };
1142        Ok(Wrap(parsed))
1143    }
1144}
1145
1146impl<'py> FromPyObject<'py> for Wrap<WindowMapping> {
1147    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1148        let parsed = match &*ob.extract::<PyBackedStr>()? {
1149            "group_to_rows" => WindowMapping::GroupsToRows,
1150            "join" => WindowMapping::Join,
1151            "explode" => WindowMapping::Explode,
1152            v => {
1153                return Err(PyValueError::new_err(format!(
1154                    "`mapping_strategy` must be one of {{'group_to_rows', 'join', 'explode'}}, got {v}",
1155                )));
1156            },
1157        };
1158        Ok(Wrap(parsed))
1159    }
1160}
1161
1162impl<'py> FromPyObject<'py> for Wrap<JoinValidation> {
1163    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1164        let parsed = match &*ob.extract::<PyBackedStr>()? {
1165            "1:1" => JoinValidation::OneToOne,
1166            "1:m" => JoinValidation::OneToMany,
1167            "m:m" => JoinValidation::ManyToMany,
1168            "m:1" => JoinValidation::ManyToOne,
1169            v => {
1170                return Err(PyValueError::new_err(format!(
1171                    "`validate` must be one of {{'m:m', 'm:1', '1:m', '1:1'}}, got {v}",
1172                )));
1173            },
1174        };
1175        Ok(Wrap(parsed))
1176    }
1177}
1178
1179impl<'py> FromPyObject<'py> for Wrap<MaintainOrderJoin> {
1180    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1181        let parsed = match &*ob.extract::<PyBackedStr>()? {
1182            "none" => MaintainOrderJoin::None,
1183            "left" => MaintainOrderJoin::Left,
1184            "right" => MaintainOrderJoin::Right,
1185            "left_right" => MaintainOrderJoin::LeftRight,
1186            "right_left" => MaintainOrderJoin::RightLeft,
1187            v => {
1188                return Err(PyValueError::new_err(format!(
1189                    "`maintain_order` must be one of {{'none', 'left', 'right', 'left_right', 'right_left'}}, got {v}",
1190                )));
1191            },
1192        };
1193        Ok(Wrap(parsed))
1194    }
1195}
1196
1197#[cfg(feature = "csv")]
1198impl<'py> FromPyObject<'py> for Wrap<QuoteStyle> {
1199    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1200        let parsed = match &*ob.extract::<PyBackedStr>()? {
1201            "always" => QuoteStyle::Always,
1202            "necessary" => QuoteStyle::Necessary,
1203            "non_numeric" => QuoteStyle::NonNumeric,
1204            "never" => QuoteStyle::Never,
1205            v => {
1206                return Err(PyValueError::new_err(format!(
1207                    "`quote_style` must be one of {{'always', 'necessary', 'non_numeric', 'never'}}, got {v}",
1208                )));
1209            },
1210        };
1211        Ok(Wrap(parsed))
1212    }
1213}
1214
1215#[cfg(feature = "cloud")]
1216pub(crate) fn parse_cloud_options(
1217    uri: &str,
1218    kv: impl IntoIterator<Item = (String, String)>,
1219) -> PyResult<CloudOptions> {
1220    let iter: &mut dyn Iterator<Item = _> = &mut kv.into_iter();
1221    let out = CloudOptions::from_untyped_config(uri, iter).map_err(PyPolarsErr::from)?;
1222    Ok(out)
1223}
1224
1225#[cfg(feature = "list_sets")]
1226impl<'py> FromPyObject<'py> for Wrap<SetOperation> {
1227    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1228        let parsed = match &*ob.extract::<PyBackedStr>()? {
1229            "union" => SetOperation::Union,
1230            "difference" => SetOperation::Difference,
1231            "intersection" => SetOperation::Intersection,
1232            "symmetric_difference" => SetOperation::SymmetricDifference,
1233            v => {
1234                return Err(PyValueError::new_err(format!(
1235                    "set operation must be one of {{'union', 'difference', 'intersection', 'symmetric_difference'}}, got {v}",
1236                )));
1237            },
1238        };
1239        Ok(Wrap(parsed))
1240    }
1241}
1242
1243// Conversion from ScanCastOptions class from the Python side.
1244impl<'py> FromPyObject<'py> for Wrap<CastColumnsPolicy> {
1245    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1246        if ob.is_none() {
1247            // Initialize the default ScanCastOptions from Python.
1248            static DEFAULT: GILOnceCell<Wrap<CastColumnsPolicy>> = GILOnceCell::new();
1249
1250            let out = DEFAULT.get_or_try_init(ob.py(), || {
1251                let ob = PyModule::import(ob.py(), "polars.io.scan_options.cast_options")
1252                    .unwrap()
1253                    .getattr("ScanCastOptions")
1254                    .unwrap()
1255                    .call_method0("_default")
1256                    .unwrap();
1257
1258                let out = Self::extract_bound(&ob)?;
1259
1260                // The default policy should match ERROR_ON_MISMATCH (but this can change).
1261                debug_assert_eq!(&out.0, &CastColumnsPolicy::ERROR_ON_MISMATCH);
1262
1263                PyResult::Ok(out)
1264            })?;
1265
1266            return Ok(out.clone());
1267        }
1268
1269        let py = ob.py();
1270
1271        let integer_upcast = match &*ob
1272            .getattr(intern!(py, "integer_cast"))?
1273            .extract::<PyBackedStr>()?
1274        {
1275            "upcast" => true,
1276            "forbid" => false,
1277            v => {
1278                return Err(PyValueError::new_err(format!(
1279                    "unknown option for integer_cast: {v}"
1280                )));
1281            },
1282        };
1283
1284        let mut float_upcast = false;
1285        let mut float_downcast = false;
1286
1287        let float_cast_object = ob.getattr(intern!(py, "float_cast"))?;
1288
1289        parse_multiple_options("float_cast", float_cast_object, |v| {
1290            match v {
1291                "forbid" => {},
1292                "upcast" => float_upcast = true,
1293                "downcast" => float_downcast = true,
1294                v => {
1295                    return Err(PyValueError::new_err(format!(
1296                        "unknown option for float_cast: {v}"
1297                    )));
1298                },
1299            }
1300
1301            Ok(())
1302        })?;
1303
1304        let mut datetime_nanoseconds_downcast = false;
1305        let mut datetime_convert_timezone = false;
1306
1307        let datetime_cast_object = ob.getattr(intern!(py, "datetime_cast"))?;
1308
1309        parse_multiple_options("datetime_cast", datetime_cast_object, |v| {
1310            match v {
1311                "forbid" => {},
1312                "nanosecond-downcast" => datetime_nanoseconds_downcast = true,
1313                "convert-timezone" => datetime_convert_timezone = true,
1314                v => {
1315                    return Err(PyValueError::new_err(format!(
1316                        "unknown option for datetime_cast: {v}"
1317                    )));
1318                },
1319            };
1320
1321            Ok(())
1322        })?;
1323
1324        let missing_struct_fields = match &*ob
1325            .getattr(intern!(py, "missing_struct_fields"))?
1326            .extract::<PyBackedStr>()?
1327        {
1328            "insert" => MissingColumnsPolicy::Insert,
1329            "raise" => MissingColumnsPolicy::Raise,
1330            v => {
1331                return Err(PyValueError::new_err(format!(
1332                    "unknown option for missing_struct_fields: {v}"
1333                )));
1334            },
1335        };
1336
1337        let extra_struct_fields = match &*ob
1338            .getattr(intern!(py, "extra_struct_fields"))?
1339            .extract::<PyBackedStr>()?
1340        {
1341            "ignore" => ExtraColumnsPolicy::Ignore,
1342            "raise" => ExtraColumnsPolicy::Raise,
1343            v => {
1344                return Err(PyValueError::new_err(format!(
1345                    "unknown option for extra_struct_fields: {v}"
1346                )));
1347            },
1348        };
1349
1350        return Ok(Wrap(CastColumnsPolicy {
1351            integer_upcast,
1352            float_upcast,
1353            float_downcast,
1354            datetime_nanoseconds_downcast,
1355            datetime_microseconds_downcast: false,
1356            datetime_convert_timezone,
1357            missing_struct_fields,
1358            extra_struct_fields,
1359        }));
1360
1361        fn parse_multiple_options(
1362            parameter_name: &'static str,
1363            py_object: Bound<'_, PyAny>,
1364            mut parser_func: impl FnMut(&str) -> PyResult<()>,
1365        ) -> PyResult<()> {
1366            if let Ok(v) = py_object.extract::<PyBackedStr>() {
1367                parser_func(&v)?;
1368            } else if let Ok(v) = py_object.try_iter() {
1369                for v in v {
1370                    parser_func(&v?.extract::<PyBackedStr>()?)?;
1371                }
1372            } else {
1373                return Err(PyValueError::new_err(format!(
1374                    "unknown type for {parameter_name}: {py_object}"
1375                )));
1376            }
1377
1378            Ok(())
1379        }
1380    }
1381}
1382
1383pub(crate) fn parse_fill_null_strategy(
1384    strategy: &str,
1385    limit: FillNullLimit,
1386) -> PyResult<FillNullStrategy> {
1387    let parsed = match strategy {
1388        "forward" => FillNullStrategy::Forward(limit),
1389        "backward" => FillNullStrategy::Backward(limit),
1390        "min" => FillNullStrategy::Min,
1391        "max" => FillNullStrategy::Max,
1392        "mean" => FillNullStrategy::Mean,
1393        "zero" => FillNullStrategy::Zero,
1394        "one" => FillNullStrategy::One,
1395        e => {
1396            return Err(PyValueError::new_err(format!(
1397                "`strategy` must be one of {{'forward', 'backward', 'min', 'max', 'mean', 'zero', 'one'}}, got {e}",
1398            )));
1399        },
1400    };
1401    Ok(parsed)
1402}
1403
1404#[cfg(feature = "parquet")]
1405pub(crate) fn parse_parquet_compression(
1406    compression: &str,
1407    compression_level: Option<i32>,
1408) -> PyResult<ParquetCompression> {
1409    let parsed = match compression {
1410        "uncompressed" => ParquetCompression::Uncompressed,
1411        "snappy" => ParquetCompression::Snappy,
1412        "gzip" => ParquetCompression::Gzip(
1413            compression_level
1414                .map(|lvl| {
1415                    GzipLevel::try_new(lvl as u8)
1416                        .map_err(|e| PyValueError::new_err(format!("{e:?}")))
1417                })
1418                .transpose()?,
1419        ),
1420        "lzo" => ParquetCompression::Lzo,
1421        "brotli" => ParquetCompression::Brotli(
1422            compression_level
1423                .map(|lvl| {
1424                    BrotliLevel::try_new(lvl as u32)
1425                        .map_err(|e| PyValueError::new_err(format!("{e:?}")))
1426                })
1427                .transpose()?,
1428        ),
1429        "lz4" => ParquetCompression::Lz4Raw,
1430        "zstd" => ParquetCompression::Zstd(
1431            compression_level
1432                .map(|lvl| {
1433                    ZstdLevel::try_new(lvl).map_err(|e| PyValueError::new_err(format!("{e:?}")))
1434                })
1435                .transpose()?,
1436        ),
1437        e => {
1438            return Err(PyValueError::new_err(format!(
1439                "parquet `compression` must be one of {{'uncompressed', 'snappy', 'gzip', 'lzo', 'brotli', 'lz4', 'zstd'}}, got {e}",
1440            )));
1441        },
1442    };
1443    Ok(parsed)
1444}
1445
1446pub(crate) fn strings_to_pl_smallstr<I, S>(container: I) -> Vec<PlSmallStr>
1447where
1448    I: IntoIterator<Item = S>,
1449    S: AsRef<str>,
1450{
1451    container
1452        .into_iter()
1453        .map(|s| PlSmallStr::from_str(s.as_ref()))
1454        .collect()
1455}
1456
1457#[derive(Debug, Copy, Clone)]
1458pub struct PyCompatLevel(pub CompatLevel);
1459
1460impl<'py> FromPyObject<'py> for PyCompatLevel {
1461    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1462        Ok(PyCompatLevel(if let Ok(level) = ob.extract::<u16>() {
1463            if let Ok(compat_level) = CompatLevel::with_level(level) {
1464                compat_level
1465            } else {
1466                return Err(PyValueError::new_err("invalid compat level"));
1467            }
1468        } else if let Ok(future) = ob.extract::<bool>() {
1469            if future {
1470                CompatLevel::newest()
1471            } else {
1472                CompatLevel::oldest()
1473            }
1474        } else {
1475            return Err(PyTypeError::new_err(
1476                "'compat_level' argument accepts int or bool",
1477            ));
1478        }))
1479    }
1480}
1481
1482#[cfg(feature = "string_normalize")]
1483impl<'py> FromPyObject<'py> for Wrap<UnicodeForm> {
1484    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1485        let parsed = match &*ob.extract::<PyBackedStr>()? {
1486            "NFC" => UnicodeForm::NFC,
1487            "NFKC" => UnicodeForm::NFKC,
1488            "NFD" => UnicodeForm::NFD,
1489            "NFKD" => UnicodeForm::NFKD,
1490            v => {
1491                return Err(PyValueError::new_err(format!(
1492                    "`form` must be one of {{'NFC', 'NFKC', 'NFD', 'NFKD'}}, got {v}",
1493                )));
1494            },
1495        };
1496        Ok(Wrap(parsed))
1497    }
1498}
1499
1500#[cfg(feature = "parquet")]
1501impl<'py> FromPyObject<'py> for Wrap<Option<KeyValueMetadata>> {
1502    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1503        #[derive(FromPyObject)]
1504        enum Metadata {
1505            Static(Vec<(String, String)>),
1506            Dynamic(PyObject),
1507        }
1508
1509        let metadata = Option::<Metadata>::extract_bound(ob)?;
1510        let key_value_metadata = metadata.map(|x| match x {
1511            Metadata::Static(kv) => KeyValueMetadata::from_static(kv),
1512            Metadata::Dynamic(func) => KeyValueMetadata::from_py_function(func),
1513        });
1514        Ok(Wrap(key_value_metadata))
1515    }
1516}
1517
1518impl<'py> FromPyObject<'py> for Wrap<Option<TimeZone>> {
1519    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1520        let tz = Option::<Wrap<PlSmallStr>>::extract_bound(ob)?;
1521
1522        let tz = tz.map(|x| x.0);
1523
1524        Ok(Wrap(TimeZone::opt_try_new(tz).map_err(to_py_err)?))
1525    }
1526}
1527
1528impl<'py> FromPyObject<'py> for Wrap<UpcastOrForbid> {
1529    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1530        let parsed = match &*ob.extract::<PyBackedStr>()? {
1531            "upcast" => UpcastOrForbid::Upcast,
1532            "forbid" => UpcastOrForbid::Forbid,
1533            v => {
1534                return Err(PyValueError::new_err(format!(
1535                    "cast parameter must be one of {{'upcast', 'forbid'}}, got {v}",
1536                )));
1537            },
1538        };
1539        Ok(Wrap(parsed))
1540    }
1541}
1542
1543impl<'py> FromPyObject<'py> for Wrap<ExtraColumnsPolicy> {
1544    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1545        let parsed = match &*ob.extract::<PyBackedStr>()? {
1546            "ignore" => ExtraColumnsPolicy::Ignore,
1547            "raise" => ExtraColumnsPolicy::Raise,
1548            v => {
1549                return Err(PyValueError::new_err(format!(
1550                    "extra column/field parameter must be one of {{'ignore', 'raise'}}, got {v}",
1551                )));
1552            },
1553        };
1554        Ok(Wrap(parsed))
1555    }
1556}
1557
1558impl<'py> FromPyObject<'py> for Wrap<MissingColumnsPolicy> {
1559    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1560        let parsed = match &*ob.extract::<PyBackedStr>()? {
1561            "insert" => MissingColumnsPolicy::Insert,
1562            "raise" => MissingColumnsPolicy::Raise,
1563            v => {
1564                return Err(PyValueError::new_err(format!(
1565                    "missing column/field parameter must be one of {{'insert', 'raise'}}, got {v}",
1566                )));
1567            },
1568        };
1569        Ok(Wrap(parsed))
1570    }
1571}
1572
1573impl<'py> FromPyObject<'py> for Wrap<MissingColumnsPolicyOrExpr> {
1574    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1575        if let Ok(pyexpr) = ob.extract::<PyExpr>() {
1576            return Ok(Wrap(MissingColumnsPolicyOrExpr::InsertWith(pyexpr.inner)));
1577        }
1578
1579        let parsed = match &*ob.extract::<PyBackedStr>()? {
1580            "insert" => MissingColumnsPolicyOrExpr::Insert,
1581            "raise" => MissingColumnsPolicyOrExpr::Raise,
1582            v => {
1583                return Err(PyValueError::new_err(format!(
1584                    "missing column/field parameter must be one of {{'insert', 'raise', expression}}, got {v}",
1585                )));
1586            },
1587        };
1588        Ok(Wrap(parsed))
1589    }
1590}
1591
1592impl<'py> FromPyObject<'py> for Wrap<DeletionFilesList> {
1593    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1594        let (deletion_file_type, ob): (PyBackedStr, Bound<'_, PyAny>) = ob.extract()?;
1595
1596        Ok(Wrap(match &*deletion_file_type {
1597            "iceberg-position-delete" => {
1598                let dict: Bound<'_, PyDict> = ob.extract()?;
1599
1600                let mut out = PlIndexMap::new();
1601
1602                for (k, v) in dict
1603                    .try_iter()?
1604                    .zip(dict.call_method0("values")?.try_iter()?)
1605                {
1606                    let k: usize = k?.extract()?;
1607                    let v: Bound<'_, PyAny> = v?.extract()?;
1608
1609                    let files = v
1610                        .try_iter()?
1611                        .map(|x| {
1612                            x.and_then(|x| {
1613                                let x: String = x.extract()?;
1614                                Ok(x)
1615                            })
1616                        })
1617                        .collect::<PyResult<Arc<[String]>>>()?;
1618
1619                    if !files.is_empty() {
1620                        out.insert(k, files);
1621                    }
1622                }
1623
1624                DeletionFilesList::IcebergPositionDelete(Arc::new(out))
1625            },
1626
1627            v => {
1628                return Err(PyValueError::new_err(format!(
1629                    "unknown deletion file type: {v}"
1630                )));
1631            },
1632        }))
1633    }
1634}