polars_python/conversion/
mod.rs

1pub(crate) mod any_value;
2mod categorical;
3pub(crate) mod chunked_array;
4mod datetime;
5
6use std::convert::Infallible;
7use std::fmt::{Display, Formatter};
8use std::fs::File;
9use std::hash::{Hash, Hasher};
10
11pub use categorical::PyCategories;
12#[cfg(feature = "object")]
13use polars::chunked_array::object::PolarsObjectSafe;
14use polars::frame::row::Row;
15#[cfg(feature = "avro")]
16use polars::io::avro::AvroCompression;
17#[cfg(feature = "cloud")]
18use polars::io::cloud::CloudOptions;
19use polars::prelude::ColumnMapping;
20use polars::prelude::default_values::{
21    DefaultFieldValues, IcebergIdentityTransformedPartitionFields,
22};
23use polars::prelude::deletion::DeletionFilesList;
24use polars::series::ops::NullBehavior;
25use polars_compute::decimal::dec128_verify_prec_scale;
26use polars_core::schema::iceberg::IcebergSchema;
27use polars_core::utils::arrow::array::Array;
28use polars_core::utils::arrow::types::NativeType;
29use polars_core::utils::materialize_dyn_int;
30use polars_lazy::prelude::*;
31#[cfg(feature = "parquet")]
32use polars_parquet::write::StatisticsOptions;
33use polars_plan::dsl::ScanSources;
34use polars_utils::mmap::MemSlice;
35use polars_utils::pl_str::PlSmallStr;
36use polars_utils::total_ord::{TotalEq, TotalHash};
37use pyo3::basic::CompareOp;
38use pyo3::exceptions::{PyTypeError, PyValueError};
39use pyo3::intern;
40use pyo3::prelude::*;
41use pyo3::pybacked::PyBackedStr;
42use pyo3::sync::PyOnceLock;
43use pyo3::types::{IntoPyDict, PyDict, PyList, PySequence, PyString};
44
45use crate::error::PyPolarsErr;
46use crate::expr::PyExpr;
47use crate::file::{PythonScanSourceInput, get_python_scan_source_input};
48use crate::interop::arrow::to_rust::field_to_rust_arrow;
49#[cfg(feature = "object")]
50use crate::object::OBJECT_NAME;
51use crate::prelude::*;
52use crate::py_modules::{pl_series, polars};
53use crate::series::PySeries;
54use crate::utils::to_py_err;
55use crate::{PyDataFrame, PyLazyFrame};
56
57/// # Safety
58/// Should only be implemented for transparent types
59pub(crate) unsafe trait Transparent {
60    type Target;
61}
62
63unsafe impl Transparent for PySeries {
64    type Target = Series;
65}
66
67unsafe impl<T> Transparent for Wrap<T> {
68    type Target = T;
69}
70
71unsafe impl<T: Transparent> Transparent for Option<T> {
72    type Target = Option<T::Target>;
73}
74
75pub(crate) fn reinterpret_vec<T: Transparent>(input: Vec<T>) -> Vec<T::Target> {
76    assert_eq!(size_of::<T>(), size_of::<T::Target>());
77    assert_eq!(align_of::<T>(), align_of::<T::Target>());
78    let len = input.len();
79    let cap = input.capacity();
80    let mut manual_drop_vec = std::mem::ManuallyDrop::new(input);
81    let vec_ptr: *mut T = manual_drop_vec.as_mut_ptr();
82    let ptr: *mut T::Target = vec_ptr as *mut T::Target;
83    unsafe { Vec::from_raw_parts(ptr, len, cap) }
84}
85
86pub(crate) fn vec_extract_wrapped<T>(buf: Vec<Wrap<T>>) -> Vec<T> {
87    reinterpret_vec(buf)
88}
89
90#[derive(PartialEq, Eq, Hash)]
91#[repr(transparent)]
92pub struct Wrap<T>(pub T);
93
94impl<T> Clone for Wrap<T>
95where
96    T: Clone,
97{
98    fn clone(&self) -> Self {
99        Wrap(self.0.clone())
100    }
101}
102impl<T> From<T> for Wrap<T> {
103    fn from(t: T) -> Self {
104        Wrap(t)
105    }
106}
107
108// extract a Rust DataFrame from a python DataFrame, that is DataFrame<PyDataFrame<RustDataFrame>>
109pub(crate) fn get_df(obj: &Bound<'_, PyAny>) -> PyResult<DataFrame> {
110    let pydf = obj.getattr(intern!(obj.py(), "_df"))?;
111    Ok(pydf.extract::<PyDataFrame>()?.df.into_inner())
112}
113
114pub(crate) fn get_lf(obj: &Bound<'_, PyAny>) -> PyResult<LazyFrame> {
115    let pydf = obj.getattr(intern!(obj.py(), "_ldf"))?;
116    Ok(pydf.extract::<PyLazyFrame>()?.ldf.into_inner())
117}
118
119pub(crate) fn get_series(obj: &Bound<'_, PyAny>) -> PyResult<Series> {
120    let s = obj.getattr(intern!(obj.py(), "_s"))?;
121    Ok(s.extract::<PySeries>()?.series.into_inner())
122}
123
124pub(crate) fn to_series(py: Python<'_>, s: PySeries) -> PyResult<Bound<'_, PyAny>> {
125    let series = pl_series(py).bind(py);
126    let constructor = series.getattr(intern!(py, "_from_pyseries"))?;
127    constructor.call1((s,))
128}
129
130impl<'py> FromPyObject<'py> for Wrap<PlSmallStr> {
131    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
132        Ok(Wrap((&*ob.extract::<PyBackedStr>()?).into()))
133    }
134}
135
136#[cfg(feature = "csv")]
137impl<'py> FromPyObject<'py> for Wrap<NullValues> {
138    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
139        if let Ok(s) = ob.extract::<PyBackedStr>() {
140            Ok(Wrap(NullValues::AllColumnsSingle((&*s).into())))
141        } else if let Ok(s) = ob.extract::<Vec<PyBackedStr>>() {
142            Ok(Wrap(NullValues::AllColumns(
143                s.into_iter().map(|x| (&*x).into()).collect(),
144            )))
145        } else if let Ok(s) = ob.extract::<Vec<(PyBackedStr, PyBackedStr)>>() {
146            Ok(Wrap(NullValues::Named(
147                s.into_iter()
148                    .map(|(a, b)| ((&*a).into(), (&*b).into()))
149                    .collect(),
150            )))
151        } else {
152            Err(
153                PyPolarsErr::Other("could not extract value from null_values argument".into())
154                    .into(),
155            )
156        }
157    }
158}
159
160fn struct_dict<'a, 'py>(
161    py: Python<'py>,
162    vals: impl Iterator<Item = AnyValue<'a>>,
163    flds: &[Field],
164) -> PyResult<Bound<'py, PyDict>> {
165    let dict = PyDict::new(py);
166    flds.iter().zip(vals).try_for_each(|(fld, val)| {
167        dict.set_item(fld.name().as_str(), Wrap(val).into_pyobject(py)?)
168    })?;
169    Ok(dict)
170}
171
172impl<'py> IntoPyObject<'py> for &Wrap<DataType> {
173    type Target = PyAny;
174    type Output = Bound<'py, Self::Target>;
175    type Error = PyErr;
176
177    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
178        let pl = polars(py).bind(py);
179
180        match &self.0 {
181            DataType::Int8 => {
182                let class = pl.getattr(intern!(py, "Int8"))?;
183                class.call0()
184            },
185            DataType::Int16 => {
186                let class = pl.getattr(intern!(py, "Int16"))?;
187                class.call0()
188            },
189            DataType::Int32 => {
190                let class = pl.getattr(intern!(py, "Int32"))?;
191                class.call0()
192            },
193            DataType::Int64 => {
194                let class = pl.getattr(intern!(py, "Int64"))?;
195                class.call0()
196            },
197            DataType::UInt8 => {
198                let class = pl.getattr(intern!(py, "UInt8"))?;
199                class.call0()
200            },
201            DataType::UInt16 => {
202                let class = pl.getattr(intern!(py, "UInt16"))?;
203                class.call0()
204            },
205            DataType::UInt32 => {
206                let class = pl.getattr(intern!(py, "UInt32"))?;
207                class.call0()
208            },
209            DataType::UInt64 => {
210                let class = pl.getattr(intern!(py, "UInt64"))?;
211                class.call0()
212            },
213            DataType::UInt128 => {
214                let class = pl.getattr(intern!(py, "UInt128"))?;
215                class.call0()
216            },
217            DataType::Int128 => {
218                let class = pl.getattr(intern!(py, "Int128"))?;
219                class.call0()
220            },
221            DataType::Float32 => {
222                let class = pl.getattr(intern!(py, "Float32"))?;
223                class.call0()
224            },
225            DataType::Float64 | DataType::Unknown(UnknownKind::Float) => {
226                let class = pl.getattr(intern!(py, "Float64"))?;
227                class.call0()
228            },
229            DataType::Decimal(precision, scale) => {
230                let class = pl.getattr(intern!(py, "Decimal"))?;
231                let args = (*precision, *scale);
232                class.call1(args)
233            },
234            DataType::Boolean => {
235                let class = pl.getattr(intern!(py, "Boolean"))?;
236                class.call0()
237            },
238            DataType::String | DataType::Unknown(UnknownKind::Str) => {
239                let class = pl.getattr(intern!(py, "String"))?;
240                class.call0()
241            },
242            DataType::Binary => {
243                let class = pl.getattr(intern!(py, "Binary"))?;
244                class.call0()
245            },
246            DataType::Array(inner, size) => {
247                let class = pl.getattr(intern!(py, "Array"))?;
248                let inner = Wrap(*inner.clone());
249                let args = (&inner, *size);
250                class.call1(args)
251            },
252            DataType::List(inner) => {
253                let class = pl.getattr(intern!(py, "List"))?;
254                let inner = Wrap(*inner.clone());
255                class.call1((&inner,))
256            },
257            DataType::Date => {
258                let class = pl.getattr(intern!(py, "Date"))?;
259                class.call0()
260            },
261            DataType::Datetime(tu, tz) => {
262                let datetime_class = pl.getattr(intern!(py, "Datetime"))?;
263                datetime_class.call1((tu.to_ascii(), tz.as_deref().map(|x| x.as_str())))
264            },
265            DataType::Duration(tu) => {
266                let duration_class = pl.getattr(intern!(py, "Duration"))?;
267                duration_class.call1((tu.to_ascii(),))
268            },
269            #[cfg(feature = "object")]
270            DataType::Object(_) => {
271                let class = pl.getattr(intern!(py, "Object"))?;
272                class.call0()
273            },
274            DataType::Categorical(cats, _) => {
275                let categories_class = pl.getattr(intern!(py, "Categories"))?;
276                let categorical_class = pl.getattr(intern!(py, "Categorical"))?;
277                let categories = categories_class
278                    .call_method1("_from_py_categories", (PyCategories::from(cats.clone()),))?;
279                let kwargs = [("categories", categories)];
280                categorical_class.call((), Some(&kwargs.into_py_dict(py)?))
281            },
282            DataType::Enum(_, mapping) => {
283                let categories = unsafe {
284                    StringChunked::from_chunks(
285                        PlSmallStr::from_static("category"),
286                        vec![mapping.to_arrow(true)],
287                    )
288                };
289                let class = pl.getattr(intern!(py, "Enum"))?;
290                let series = to_series(py, categories.into_series().into())?;
291                class.call1((series,))
292            },
293            DataType::Time => pl.getattr(intern!(py, "Time")).and_then(|x| x.call0()),
294            DataType::Struct(fields) => {
295                let field_class = pl.getattr(intern!(py, "Field"))?;
296                let iter = fields.iter().map(|fld| {
297                    let name = fld.name().as_str();
298                    let dtype = Wrap(fld.dtype().clone());
299                    field_class.call1((name, &dtype)).unwrap()
300                });
301                let fields = PyList::new(py, iter)?;
302                let struct_class = pl.getattr(intern!(py, "Struct"))?;
303                struct_class.call1((fields,))
304            },
305            DataType::Null => {
306                let class = pl.getattr(intern!(py, "Null"))?;
307                class.call0()
308            },
309            DataType::Unknown(UnknownKind::Int(v)) => {
310                Wrap(materialize_dyn_int(*v).dtype()).into_pyobject(py)
311            },
312            DataType::Unknown(_) => {
313                let class = pl.getattr(intern!(py, "Unknown"))?;
314                class.call0()
315            },
316            DataType::BinaryOffset => {
317                unimplemented!()
318            },
319        }
320    }
321}
322
323impl<'py> FromPyObject<'py> for Wrap<Field> {
324    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
325        let py = ob.py();
326        let name = ob
327            .getattr(intern!(py, "name"))?
328            .str()?
329            .extract::<PyBackedStr>()?;
330        let dtype = ob
331            .getattr(intern!(py, "dtype"))?
332            .extract::<Wrap<DataType>>()?;
333        Ok(Wrap(Field::new((&*name).into(), dtype.0)))
334    }
335}
336
337impl<'py> FromPyObject<'py> for Wrap<DataType> {
338    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
339        let py = ob.py();
340        let type_name = ob.get_type().qualname()?.to_string();
341
342        let dtype = match &*type_name {
343            "DataTypeClass" => {
344                // just the class, not an object
345                let name = ob
346                    .getattr(intern!(py, "__name__"))?
347                    .str()?
348                    .extract::<PyBackedStr>()?;
349                match &*name {
350                    "Int8" => DataType::Int8,
351                    "Int16" => DataType::Int16,
352                    "Int32" => DataType::Int32,
353                    "Int64" => DataType::Int64,
354                    "Int128" => DataType::Int128,
355                    "UInt8" => DataType::UInt8,
356                    "UInt16" => DataType::UInt16,
357                    "UInt32" => DataType::UInt32,
358                    "UInt64" => DataType::UInt64,
359                    "UInt128" => DataType::UInt128,
360                    "Float32" => DataType::Float32,
361                    "Float64" => DataType::Float64,
362                    "Boolean" => DataType::Boolean,
363                    "String" => DataType::String,
364                    "Binary" => DataType::Binary,
365                    "Categorical" => DataType::from_categories(Categories::global()),
366                    "Enum" => DataType::from_frozen_categories(FrozenCategories::new([]).unwrap()),
367                    "Date" => DataType::Date,
368                    "Time" => DataType::Time,
369                    "Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
370                    "Duration" => DataType::Duration(TimeUnit::Microseconds),
371                    "List" => DataType::List(Box::new(DataType::Null)),
372                    "Array" => DataType::Array(Box::new(DataType::Null), 0),
373                    "Struct" => DataType::Struct(vec![]),
374                    "Null" => DataType::Null,
375                    #[cfg(feature = "object")]
376                    "Object" => DataType::Object(OBJECT_NAME),
377                    "Unknown" => DataType::Unknown(Default::default()),
378                    "Decimal" => {
379                        return Err(PyTypeError::new_err(
380                            "Decimal without precision/scale set is not a valid Polars datatype",
381                        ));
382                    },
383                    dt => {
384                        return Err(PyTypeError::new_err(format!(
385                            "'{dt}' is not a Polars data type",
386                        )));
387                    },
388                }
389            },
390            "Int8" => DataType::Int8,
391            "Int16" => DataType::Int16,
392            "Int32" => DataType::Int32,
393            "Int64" => DataType::Int64,
394            "Int128" => DataType::Int128,
395            "UInt8" => DataType::UInt8,
396            "UInt16" => DataType::UInt16,
397            "UInt32" => DataType::UInt32,
398            "UInt64" => DataType::UInt64,
399            "UInt128" => DataType::UInt128,
400            "Float32" => DataType::Float32,
401            "Float64" => DataType::Float64,
402            "Boolean" => DataType::Boolean,
403            "String" => DataType::String,
404            "Binary" => DataType::Binary,
405            "Categorical" => {
406                let categories = ob.getattr(intern!(py, "categories")).unwrap();
407                let py_categories = categories.getattr(intern!(py, "_categories")).unwrap();
408                let py_categories = py_categories.extract::<PyCategories>()?;
409                DataType::from_categories(py_categories.categories().clone())
410            },
411            "Enum" => {
412                let categories = ob.getattr(intern!(py, "categories")).unwrap();
413                let s = get_series(&categories.as_borrowed())?;
414                let ca = s.str().map_err(PyPolarsErr::from)?;
415                let categories = ca.downcast_iter().next().unwrap().clone();
416                assert!(!categories.has_nulls());
417                DataType::from_frozen_categories(
418                    FrozenCategories::new(categories.values_iter()).unwrap(),
419                )
420            },
421            "Date" => DataType::Date,
422            "Time" => DataType::Time,
423            "Datetime" => {
424                let time_unit = ob.getattr(intern!(py, "time_unit")).unwrap();
425                let time_unit = time_unit.extract::<Wrap<TimeUnit>>()?.0;
426                let time_zone = ob.getattr(intern!(py, "time_zone")).unwrap();
427                let time_zone = time_zone.extract::<Option<PyBackedStr>>()?;
428                DataType::Datetime(
429                    time_unit,
430                    TimeZone::opt_try_new(time_zone.as_deref()).map_err(to_py_err)?,
431                )
432            },
433            "Duration" => {
434                let time_unit = ob.getattr(intern!(py, "time_unit")).unwrap();
435                let time_unit = time_unit.extract::<Wrap<TimeUnit>>()?.0;
436                DataType::Duration(time_unit)
437            },
438            "Decimal" => {
439                let precision = ob.getattr(intern!(py, "precision"))?.extract()?;
440                let scale = ob.getattr(intern!(py, "scale"))?.extract()?;
441                dec128_verify_prec_scale(precision, scale).map_err(to_py_err)?;
442                DataType::Decimal(precision, scale)
443            },
444            "List" => {
445                let inner = ob.getattr(intern!(py, "inner")).unwrap();
446                let inner = inner.extract::<Wrap<DataType>>()?;
447                DataType::List(Box::new(inner.0))
448            },
449            "Array" => {
450                let inner = ob.getattr(intern!(py, "inner")).unwrap();
451                let size = ob.getattr(intern!(py, "size")).unwrap();
452                let inner = inner.extract::<Wrap<DataType>>()?;
453                let size = size.extract::<usize>()?;
454                DataType::Array(Box::new(inner.0), size)
455            },
456            "Struct" => {
457                let fields = ob.getattr(intern!(py, "fields"))?;
458                let fields = fields
459                    .extract::<Vec<Wrap<Field>>>()?
460                    .into_iter()
461                    .map(|f| f.0)
462                    .collect::<Vec<Field>>();
463                DataType::Struct(fields)
464            },
465            "Null" => DataType::Null,
466            #[cfg(feature = "object")]
467            "Object" => DataType::Object(OBJECT_NAME),
468            "Unknown" => DataType::Unknown(Default::default()),
469            dt => {
470                return Err(PyTypeError::new_err(format!(
471                    "'{dt}' is not a Polars data type",
472                )));
473            },
474        };
475        Ok(Wrap(dtype))
476    }
477}
478
479enum CategoricalOrdering {
480    Lexical,
481}
482
483impl<'py> IntoPyObject<'py> for Wrap<CategoricalOrdering> {
484    type Target = PyString;
485    type Output = Bound<'py, Self::Target>;
486    type Error = Infallible;
487
488    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
489        "lexical".into_pyobject(py)
490    }
491}
492
493impl<'py> IntoPyObject<'py> for Wrap<TimeUnit> {
494    type Target = PyString;
495    type Output = Bound<'py, Self::Target>;
496    type Error = Infallible;
497
498    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
499        self.0.to_ascii().into_pyobject(py)
500    }
501}
502
503#[cfg(feature = "parquet")]
504impl<'py> FromPyObject<'py> for Wrap<StatisticsOptions> {
505    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
506        let mut statistics = StatisticsOptions::empty();
507
508        let dict = ob.downcast::<PyDict>()?;
509        for (key, val) in dict {
510            let key = key.extract::<PyBackedStr>()?;
511            let val = val.extract::<bool>()?;
512
513            match key.as_ref() {
514                "min" => statistics.min_value = val,
515                "max" => statistics.max_value = val,
516                "distinct_count" => statistics.distinct_count = val,
517                "null_count" => statistics.null_count = val,
518                _ => {
519                    return Err(PyTypeError::new_err(format!(
520                        "'{key}' is not a valid statistic option",
521                    )));
522                },
523            }
524        }
525
526        Ok(Wrap(statistics))
527    }
528}
529
530impl<'py> FromPyObject<'py> for Wrap<Row<'static>> {
531    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
532        let vals = ob.extract::<Vec<Wrap<AnyValue<'static>>>>()?;
533        let vals = reinterpret_vec(vals);
534        Ok(Wrap(Row(vals)))
535    }
536}
537
538impl<'py> FromPyObject<'py> for Wrap<Schema> {
539    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
540        let dict = ob.downcast::<PyDict>()?;
541
542        Ok(Wrap(
543            dict.iter()
544                .map(|(key, val)| {
545                    let key = key.extract::<PyBackedStr>()?;
546                    let val = val.extract::<Wrap<DataType>>()?;
547
548                    Ok(Field::new((&*key).into(), val.0))
549                })
550                .collect::<PyResult<Schema>>()?,
551        ))
552    }
553}
554
555impl<'py> FromPyObject<'py> for Wrap<ArrowSchema> {
556    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
557        let py = ob.py();
558
559        let pyarrow_schema_cls = py
560            .import(intern!(py, "pyarrow"))?
561            .getattr(intern!(py, "Schema"))?;
562
563        if ob.is_none() {
564            return Err(PyValueError::new_err("arrow_schema() returned None").into());
565        }
566
567        let schema_cls = ob.getattr(intern!(py, "__class__"))?;
568
569        if !schema_cls.is(&pyarrow_schema_cls) {
570            return Err(PyTypeError::new_err(format!(
571                "expected pyarrow.Schema, got: {schema_cls}"
572            )));
573        }
574
575        let mut iter = ob.try_iter()?.map(|x| x.and_then(field_to_rust_arrow));
576
577        let mut last_err = None;
578
579        let schema =
580            ArrowSchema::from_iter_check_duplicates(std::iter::from_fn(|| match iter.next() {
581                Some(Ok(v)) => Some(v),
582                Some(Err(e)) => {
583                    last_err = Some(e);
584                    None
585                },
586                None => None,
587            }))
588            .map_err(to_py_err)?;
589
590        if let Some(last_err) = last_err {
591            return Err(last_err.into());
592        }
593
594        Ok(Wrap(schema))
595    }
596}
597
598impl<'py> FromPyObject<'py> for Wrap<ScanSources> {
599    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
600        let list = ob.downcast::<PyList>()?.to_owned();
601
602        if list.is_empty() {
603            return Ok(Wrap(ScanSources::default()));
604        }
605
606        enum MutableSources {
607            Paths(Vec<PlPath>),
608            Files(Vec<File>),
609            Buffers(Vec<MemSlice>),
610        }
611
612        let num_items = list.len();
613        let mut iter = list
614            .into_iter()
615            .map(|val| get_python_scan_source_input(val.unbind(), false));
616
617        let Some(first) = iter.next() else {
618            return Ok(Wrap(ScanSources::default()));
619        };
620
621        let mut sources = match first? {
622            PythonScanSourceInput::Path(path) => {
623                let mut sources = Vec::with_capacity(num_items);
624                sources.push(path);
625                MutableSources::Paths(sources)
626            },
627            PythonScanSourceInput::File(file) => {
628                let mut sources = Vec::with_capacity(num_items);
629                sources.push(file.into());
630                MutableSources::Files(sources)
631            },
632            PythonScanSourceInput::Buffer(buffer) => {
633                let mut sources = Vec::with_capacity(num_items);
634                sources.push(buffer);
635                MutableSources::Buffers(sources)
636            },
637        };
638
639        for source in iter {
640            match (&mut sources, source?) {
641                (MutableSources::Paths(v), PythonScanSourceInput::Path(p)) => v.push(p),
642                (MutableSources::Files(v), PythonScanSourceInput::File(f)) => v.push(f.into()),
643                (MutableSources::Buffers(v), PythonScanSourceInput::Buffer(f)) => v.push(f),
644                _ => {
645                    return Err(PyTypeError::new_err(
646                        "Cannot combine in-memory bytes, paths and files for scan sources",
647                    ));
648                },
649            }
650        }
651
652        Ok(Wrap(match sources {
653            MutableSources::Paths(i) => ScanSources::Paths(i.into()),
654            MutableSources::Files(i) => ScanSources::Files(i.into()),
655            MutableSources::Buffers(i) => ScanSources::Buffers(i.into()),
656        }))
657    }
658}
659
660impl<'py> IntoPyObject<'py> for Wrap<Schema> {
661    type Target = PyDict;
662    type Output = Bound<'py, Self::Target>;
663    type Error = PyErr;
664
665    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
666        let dict = PyDict::new(py);
667        self.0
668            .iter()
669            .try_for_each(|(k, v)| dict.set_item(k.as_str(), &Wrap(v.clone())))?;
670        Ok(dict)
671    }
672}
673
674#[derive(Debug)]
675#[repr(transparent)]
676pub struct ObjectValue {
677    pub inner: Py<PyAny>,
678}
679
680impl Clone for ObjectValue {
681    fn clone(&self) -> Self {
682        Python::attach(|py| Self {
683            inner: self.inner.clone_ref(py),
684        })
685    }
686}
687
688impl Hash for ObjectValue {
689    fn hash<H: Hasher>(&self, state: &mut H) {
690        let h = Python::attach(|py| self.inner.bind(py).hash().expect("should be hashable"));
691        state.write_isize(h)
692    }
693}
694
695impl Eq for ObjectValue {}
696
697impl PartialEq for ObjectValue {
698    fn eq(&self, other: &Self) -> bool {
699        Python::attach(|py| {
700            match self
701                .inner
702                .bind(py)
703                .rich_compare(other.inner.bind(py), CompareOp::Eq)
704            {
705                Ok(result) => result.is_truthy().unwrap(),
706                Err(_) => false,
707            }
708        })
709    }
710}
711
712impl TotalEq for ObjectValue {
713    fn tot_eq(&self, other: &Self) -> bool {
714        self == other
715    }
716}
717
718impl TotalHash for ObjectValue {
719    fn tot_hash<H>(&self, state: &mut H)
720    where
721        H: Hasher,
722    {
723        self.hash(state);
724    }
725}
726
727impl Display for ObjectValue {
728    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
729        write!(f, "{}", self.inner)
730    }
731}
732
733#[cfg(feature = "object")]
734impl PolarsObject for ObjectValue {
735    fn type_name() -> &'static str {
736        "object"
737    }
738}
739
740impl From<Py<PyAny>> for ObjectValue {
741    fn from(p: Py<PyAny>) -> Self {
742        Self { inner: p }
743    }
744}
745
746impl<'py> FromPyObject<'py> for ObjectValue {
747    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
748        Ok(ObjectValue {
749            inner: ob.to_owned().unbind(),
750        })
751    }
752}
753
754/// # Safety
755///
756/// The caller is responsible for checking that val is Object otherwise UB
757#[cfg(feature = "object")]
758impl From<&dyn PolarsObjectSafe> for &ObjectValue {
759    fn from(val: &dyn PolarsObjectSafe) -> Self {
760        unsafe { &*(val as *const dyn PolarsObjectSafe as *const ObjectValue) }
761    }
762}
763
764impl<'a, 'py> IntoPyObject<'py> for &'a ObjectValue {
765    type Target = PyAny;
766    type Output = Borrowed<'a, 'py, Self::Target>;
767    type Error = std::convert::Infallible;
768
769    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
770        Ok(self.inner.bind_borrowed(py))
771    }
772}
773
774impl Default for ObjectValue {
775    fn default() -> Self {
776        Python::attach(|py| ObjectValue { inner: py.None() })
777    }
778}
779
780impl<'py, T: NativeType + FromPyObject<'py>> FromPyObject<'py> for Wrap<Vec<T>> {
781    fn extract_bound(obj: &Bound<'py, PyAny>) -> PyResult<Self> {
782        let seq = obj.downcast::<PySequence>()?;
783        let mut v = Vec::with_capacity(seq.len().unwrap_or(0));
784        for item in seq.try_iter()? {
785            v.push(item?.extract::<T>()?);
786        }
787        Ok(Wrap(v))
788    }
789}
790
791#[cfg(feature = "asof_join")]
792impl<'py> FromPyObject<'py> for Wrap<AsofStrategy> {
793    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
794        let parsed = match &*(ob.extract::<PyBackedStr>()?) {
795            "backward" => AsofStrategy::Backward,
796            "forward" => AsofStrategy::Forward,
797            "nearest" => AsofStrategy::Nearest,
798            v => {
799                return Err(PyValueError::new_err(format!(
800                    "asof `strategy` must be one of {{'backward', 'forward', 'nearest'}}, got {v}",
801                )));
802            },
803        };
804        Ok(Wrap(parsed))
805    }
806}
807
808impl<'py> FromPyObject<'py> for Wrap<InterpolationMethod> {
809    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
810        let parsed = match &*(ob.extract::<PyBackedStr>()?) {
811            "linear" => InterpolationMethod::Linear,
812            "nearest" => InterpolationMethod::Nearest,
813            v => {
814                return Err(PyValueError::new_err(format!(
815                    "interpolation `method` must be one of {{'linear', 'nearest'}}, got {v}",
816                )));
817            },
818        };
819        Ok(Wrap(parsed))
820    }
821}
822
823#[cfg(feature = "avro")]
824impl<'py> FromPyObject<'py> for Wrap<Option<AvroCompression>> {
825    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
826        let parsed = match &*ob.extract::<PyBackedStr>()? {
827            "uncompressed" => None,
828            "snappy" => Some(AvroCompression::Snappy),
829            "deflate" => Some(AvroCompression::Deflate),
830            v => {
831                return Err(PyValueError::new_err(format!(
832                    "avro `compression` must be one of {{'uncompressed', 'snappy', 'deflate'}}, got {v}",
833                )));
834            },
835        };
836        Ok(Wrap(parsed))
837    }
838}
839
840impl<'py> FromPyObject<'py> for Wrap<CategoricalOrdering> {
841    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
842        let parsed = match &*ob.extract::<PyBackedStr>()? {
843            "lexical" => CategoricalOrdering::Lexical,
844            "physical" => {
845                polars_warn!(
846                    Deprecation,
847                    "physical ordering is deprecated, will use lexical ordering instead"
848                );
849                CategoricalOrdering::Lexical
850            },
851            v => {
852                return Err(PyValueError::new_err(format!(
853                    "categorical `ordering` must be one of {{'physical', 'lexical'}}, got {v}",
854                )));
855            },
856        };
857        Ok(Wrap(parsed))
858    }
859}
860
861impl<'py> FromPyObject<'py> for Wrap<StartBy> {
862    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
863        let parsed = match &*ob.extract::<PyBackedStr>()? {
864            "window" => StartBy::WindowBound,
865            "datapoint" => StartBy::DataPoint,
866            "monday" => StartBy::Monday,
867            "tuesday" => StartBy::Tuesday,
868            "wednesday" => StartBy::Wednesday,
869            "thursday" => StartBy::Thursday,
870            "friday" => StartBy::Friday,
871            "saturday" => StartBy::Saturday,
872            "sunday" => StartBy::Sunday,
873            v => {
874                return Err(PyValueError::new_err(format!(
875                    "`start_by` must be one of {{'window', 'datapoint', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday'}}, got {v}",
876                )));
877            },
878        };
879        Ok(Wrap(parsed))
880    }
881}
882
883impl<'py> FromPyObject<'py> for Wrap<ClosedWindow> {
884    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
885        let parsed = match &*ob.extract::<PyBackedStr>()? {
886            "left" => ClosedWindow::Left,
887            "right" => ClosedWindow::Right,
888            "both" => ClosedWindow::Both,
889            "none" => ClosedWindow::None,
890            v => {
891                return Err(PyValueError::new_err(format!(
892                    "`closed` must be one of {{'left', 'right', 'both', 'none'}}, got {v}",
893                )));
894            },
895        };
896        Ok(Wrap(parsed))
897    }
898}
899
900impl<'py> FromPyObject<'py> for Wrap<RoundMode> {
901    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
902        let parsed = match &*ob.extract::<PyBackedStr>()? {
903            "half_to_even" => RoundMode::HalfToEven,
904            "half_away_from_zero" => RoundMode::HalfAwayFromZero,
905            v => {
906                return Err(PyValueError::new_err(format!(
907                    "`mode` must be one of {{'half_to_even', 'half_away_from_zero'}}, got {v}",
908                )));
909            },
910        };
911        Ok(Wrap(parsed))
912    }
913}
914
915#[cfg(feature = "csv")]
916impl<'py> FromPyObject<'py> for Wrap<CsvEncoding> {
917    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
918        let parsed = match &*ob.extract::<PyBackedStr>()? {
919            "utf8" => CsvEncoding::Utf8,
920            "utf8-lossy" => CsvEncoding::LossyUtf8,
921            v => {
922                return Err(PyValueError::new_err(format!(
923                    "csv `encoding` must be one of {{'utf8', 'utf8-lossy'}}, got {v}",
924                )));
925            },
926        };
927        Ok(Wrap(parsed))
928    }
929}
930
931#[cfg(feature = "ipc")]
932impl<'py> FromPyObject<'py> for Wrap<Option<IpcCompression>> {
933    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
934        let parsed = match &*ob.extract::<PyBackedStr>()? {
935            "uncompressed" => None,
936            "lz4" => Some(IpcCompression::LZ4),
937            "zstd" => Some(IpcCompression::ZSTD(Default::default())),
938            v => {
939                return Err(PyValueError::new_err(format!(
940                    "ipc `compression` must be one of {{'uncompressed', 'lz4', 'zstd'}}, got {v}",
941                )));
942            },
943        };
944        Ok(Wrap(parsed))
945    }
946}
947
948impl<'py> FromPyObject<'py> for Wrap<JoinType> {
949    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
950        let parsed = match &*ob.extract::<PyBackedStr>()? {
951            "inner" => JoinType::Inner,
952            "left" => JoinType::Left,
953            "right" => JoinType::Right,
954            "full" => JoinType::Full,
955            "semi" => JoinType::Semi,
956            "anti" => JoinType::Anti,
957            #[cfg(feature = "cross_join")]
958            "cross" => JoinType::Cross,
959            v => {
960                return Err(PyValueError::new_err(format!(
961                    "`how` must be one of {{'inner', 'left', 'full', 'semi', 'anti', 'cross'}}, got {v}",
962                )));
963            },
964        };
965        Ok(Wrap(parsed))
966    }
967}
968
969impl<'py> FromPyObject<'py> for Wrap<Label> {
970    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
971        let parsed = match &*ob.extract::<PyBackedStr>()? {
972            "left" => Label::Left,
973            "right" => Label::Right,
974            "datapoint" => Label::DataPoint,
975            v => {
976                return Err(PyValueError::new_err(format!(
977                    "`label` must be one of {{'left', 'right', 'datapoint'}}, got {v}",
978                )));
979            },
980        };
981        Ok(Wrap(parsed))
982    }
983}
984
985impl<'py> FromPyObject<'py> for Wrap<ListToStructWidthStrategy> {
986    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
987        let parsed = match &*ob.extract::<PyBackedStr>()? {
988            "first_non_null" => ListToStructWidthStrategy::FirstNonNull,
989            "max_width" => ListToStructWidthStrategy::MaxWidth,
990            v => {
991                return Err(PyValueError::new_err(format!(
992                    "`n_field_strategy` must be one of {{'first_non_null', 'max_width'}}, got {v}",
993                )));
994            },
995        };
996        Ok(Wrap(parsed))
997    }
998}
999
1000impl<'py> FromPyObject<'py> for Wrap<NonExistent> {
1001    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1002        let parsed = match &*ob.extract::<PyBackedStr>()? {
1003            "null" => NonExistent::Null,
1004            "raise" => NonExistent::Raise,
1005            v => {
1006                return Err(PyValueError::new_err(format!(
1007                    "`non_existent` must be one of {{'null', 'raise'}}, got {v}",
1008                )));
1009            },
1010        };
1011        Ok(Wrap(parsed))
1012    }
1013}
1014
1015impl<'py> FromPyObject<'py> for Wrap<NullBehavior> {
1016    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1017        let parsed = match &*ob.extract::<PyBackedStr>()? {
1018            "drop" => NullBehavior::Drop,
1019            "ignore" => NullBehavior::Ignore,
1020            v => {
1021                return Err(PyValueError::new_err(format!(
1022                    "`null_behavior` must be one of {{'drop', 'ignore'}}, got {v}",
1023                )));
1024            },
1025        };
1026        Ok(Wrap(parsed))
1027    }
1028}
1029
1030impl<'py> FromPyObject<'py> for Wrap<NullStrategy> {
1031    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1032        let parsed = match &*ob.extract::<PyBackedStr>()? {
1033            "ignore" => NullStrategy::Ignore,
1034            "propagate" => NullStrategy::Propagate,
1035            v => {
1036                return Err(PyValueError::new_err(format!(
1037                    "`null_strategy` must be one of {{'ignore', 'propagate'}}, got {v}",
1038                )));
1039            },
1040        };
1041        Ok(Wrap(parsed))
1042    }
1043}
1044
1045#[cfg(feature = "parquet")]
1046impl<'py> FromPyObject<'py> for Wrap<ParallelStrategy> {
1047    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1048        let parsed = match &*ob.extract::<PyBackedStr>()? {
1049            "auto" => ParallelStrategy::Auto,
1050            "columns" => ParallelStrategy::Columns,
1051            "row_groups" => ParallelStrategy::RowGroups,
1052            "prefiltered" => ParallelStrategy::Prefiltered,
1053            "none" => ParallelStrategy::None,
1054            v => {
1055                return Err(PyValueError::new_err(format!(
1056                    "`parallel` must be one of {{'auto', 'columns', 'row_groups', 'prefiltered', 'none'}}, got {v}",
1057                )));
1058            },
1059        };
1060        Ok(Wrap(parsed))
1061    }
1062}
1063
1064impl<'py> FromPyObject<'py> for Wrap<IndexOrder> {
1065    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1066        let parsed = match &*ob.extract::<PyBackedStr>()? {
1067            "fortran" => IndexOrder::Fortran,
1068            "c" => IndexOrder::C,
1069            v => {
1070                return Err(PyValueError::new_err(format!(
1071                    "`order` must be one of {{'fortran', 'c'}}, got {v}",
1072                )));
1073            },
1074        };
1075        Ok(Wrap(parsed))
1076    }
1077}
1078
1079impl<'py> FromPyObject<'py> for Wrap<QuantileMethod> {
1080    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1081        let parsed = match &*ob.extract::<PyBackedStr>()? {
1082            "lower" => QuantileMethod::Lower,
1083            "higher" => QuantileMethod::Higher,
1084            "nearest" => QuantileMethod::Nearest,
1085            "linear" => QuantileMethod::Linear,
1086            "midpoint" => QuantileMethod::Midpoint,
1087            "equiprobable" => QuantileMethod::Equiprobable,
1088            v => {
1089                return Err(PyValueError::new_err(format!(
1090                    "`interpolation` must be one of {{'lower', 'higher', 'nearest', 'linear', 'midpoint', 'equiprobable'}}, got {v}",
1091                )));
1092            },
1093        };
1094        Ok(Wrap(parsed))
1095    }
1096}
1097
1098impl<'py> FromPyObject<'py> for Wrap<RankMethod> {
1099    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1100        let parsed = match &*ob.extract::<PyBackedStr>()? {
1101            "min" => RankMethod::Min,
1102            "max" => RankMethod::Max,
1103            "average" => RankMethod::Average,
1104            "dense" => RankMethod::Dense,
1105            "ordinal" => RankMethod::Ordinal,
1106            "random" => RankMethod::Random,
1107            v => {
1108                return Err(PyValueError::new_err(format!(
1109                    "rank `method` must be one of {{'min', 'max', 'average', 'dense', 'ordinal', 'random'}}, got {v}",
1110                )));
1111            },
1112        };
1113        Ok(Wrap(parsed))
1114    }
1115}
1116
1117impl<'py> FromPyObject<'py> for Wrap<RollingRankMethod> {
1118    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1119        let parsed = match &*ob.extract::<PyBackedStr>()? {
1120            "min" => RollingRankMethod::Min,
1121            "max" => RollingRankMethod::Max,
1122            "average" => RollingRankMethod::Average,
1123            "dense" => RollingRankMethod::Dense,
1124            "random" => RollingRankMethod::Random,
1125            v => {
1126                return Err(PyValueError::new_err(format!(
1127                    "rank `method` must be one of {{'min', 'max', 'average', 'dense', 'random'}}, got {v}",
1128                )));
1129            },
1130        };
1131        Ok(Wrap(parsed))
1132    }
1133}
1134
1135impl<'py> FromPyObject<'py> for Wrap<Roll> {
1136    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1137        let parsed = match &*ob.extract::<PyBackedStr>()? {
1138            "raise" => Roll::Raise,
1139            "forward" => Roll::Forward,
1140            "backward" => Roll::Backward,
1141            v => {
1142                return Err(PyValueError::new_err(format!(
1143                    "`roll` must be one of {{'raise', 'forward', 'backward'}}, got {v}",
1144                )));
1145            },
1146        };
1147        Ok(Wrap(parsed))
1148    }
1149}
1150
1151impl<'py> FromPyObject<'py> for Wrap<TimeUnit> {
1152    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1153        let parsed = match &*ob.extract::<PyBackedStr>()? {
1154            "ns" => TimeUnit::Nanoseconds,
1155            "us" => TimeUnit::Microseconds,
1156            "ms" => TimeUnit::Milliseconds,
1157            v => {
1158                return Err(PyValueError::new_err(format!(
1159                    "`time_unit` must be one of {{'ns', 'us', 'ms'}}, got {v}",
1160                )));
1161            },
1162        };
1163        Ok(Wrap(parsed))
1164    }
1165}
1166
1167impl<'py> FromPyObject<'py> for Wrap<UniqueKeepStrategy> {
1168    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1169        let parsed = match &*ob.extract::<PyBackedStr>()? {
1170            "first" => UniqueKeepStrategy::First,
1171            "last" => UniqueKeepStrategy::Last,
1172            "none" => UniqueKeepStrategy::None,
1173            "any" => UniqueKeepStrategy::Any,
1174            v => {
1175                return Err(PyValueError::new_err(format!(
1176                    "`keep` must be one of {{'first', 'last', 'any', 'none'}}, got {v}",
1177                )));
1178            },
1179        };
1180        Ok(Wrap(parsed))
1181    }
1182}
1183
1184#[cfg(feature = "search_sorted")]
1185impl<'py> FromPyObject<'py> for Wrap<SearchSortedSide> {
1186    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1187        let parsed = match &*ob.extract::<PyBackedStr>()? {
1188            "any" => SearchSortedSide::Any,
1189            "left" => SearchSortedSide::Left,
1190            "right" => SearchSortedSide::Right,
1191            v => {
1192                return Err(PyValueError::new_err(format!(
1193                    "sorted `side` must be one of {{'any', 'left', 'right'}}, got {v}",
1194                )));
1195            },
1196        };
1197        Ok(Wrap(parsed))
1198    }
1199}
1200
1201impl<'py> FromPyObject<'py> for Wrap<ClosedInterval> {
1202    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1203        let parsed = match &*ob.extract::<PyBackedStr>()? {
1204            "both" => ClosedInterval::Both,
1205            "left" => ClosedInterval::Left,
1206            "right" => ClosedInterval::Right,
1207            "none" => ClosedInterval::None,
1208            v => {
1209                return Err(PyValueError::new_err(format!(
1210                    "`closed` must be one of {{'both', 'left', 'right', 'none'}}, got {v}",
1211                )));
1212            },
1213        };
1214        Ok(Wrap(parsed))
1215    }
1216}
1217
1218impl<'py> FromPyObject<'py> for Wrap<WindowMapping> {
1219    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1220        let parsed = match &*ob.extract::<PyBackedStr>()? {
1221            "group_to_rows" => WindowMapping::GroupsToRows,
1222            "join" => WindowMapping::Join,
1223            "explode" => WindowMapping::Explode,
1224            v => {
1225                return Err(PyValueError::new_err(format!(
1226                    "`mapping_strategy` must be one of {{'group_to_rows', 'join', 'explode'}}, got {v}",
1227                )));
1228            },
1229        };
1230        Ok(Wrap(parsed))
1231    }
1232}
1233
1234impl<'py> FromPyObject<'py> for Wrap<JoinValidation> {
1235    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1236        let parsed = match &*ob.extract::<PyBackedStr>()? {
1237            "1:1" => JoinValidation::OneToOne,
1238            "1:m" => JoinValidation::OneToMany,
1239            "m:m" => JoinValidation::ManyToMany,
1240            "m:1" => JoinValidation::ManyToOne,
1241            v => {
1242                return Err(PyValueError::new_err(format!(
1243                    "`validate` must be one of {{'m:m', 'm:1', '1:m', '1:1'}}, got {v}",
1244                )));
1245            },
1246        };
1247        Ok(Wrap(parsed))
1248    }
1249}
1250
1251impl<'py> FromPyObject<'py> for Wrap<MaintainOrderJoin> {
1252    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1253        let parsed = match &*ob.extract::<PyBackedStr>()? {
1254            "none" => MaintainOrderJoin::None,
1255            "left" => MaintainOrderJoin::Left,
1256            "right" => MaintainOrderJoin::Right,
1257            "left_right" => MaintainOrderJoin::LeftRight,
1258            "right_left" => MaintainOrderJoin::RightLeft,
1259            v => {
1260                return Err(PyValueError::new_err(format!(
1261                    "`maintain_order` must be one of {{'none', 'left', 'right', 'left_right', 'right_left'}}, got {v}",
1262                )));
1263            },
1264        };
1265        Ok(Wrap(parsed))
1266    }
1267}
1268
1269#[cfg(feature = "csv")]
1270impl<'py> FromPyObject<'py> for Wrap<QuoteStyle> {
1271    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1272        let parsed = match &*ob.extract::<PyBackedStr>()? {
1273            "always" => QuoteStyle::Always,
1274            "necessary" => QuoteStyle::Necessary,
1275            "non_numeric" => QuoteStyle::NonNumeric,
1276            "never" => QuoteStyle::Never,
1277            v => {
1278                return Err(PyValueError::new_err(format!(
1279                    "`quote_style` must be one of {{'always', 'necessary', 'non_numeric', 'never'}}, got {v}",
1280                )));
1281            },
1282        };
1283        Ok(Wrap(parsed))
1284    }
1285}
1286
1287#[cfg(feature = "cloud")]
1288pub(crate) fn parse_cloud_options(
1289    uri: &str,
1290    kv: impl IntoIterator<Item = (String, String)>,
1291) -> PyResult<CloudOptions> {
1292    let iter: &mut dyn Iterator<Item = _> = &mut kv.into_iter();
1293    let out = CloudOptions::from_untyped_config(CloudScheme::from_uri(uri).as_ref(), iter)
1294        .map_err(PyPolarsErr::from)?;
1295    Ok(out)
1296}
1297
1298#[cfg(feature = "list_sets")]
1299impl<'py> FromPyObject<'py> for Wrap<SetOperation> {
1300    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1301        let parsed = match &*ob.extract::<PyBackedStr>()? {
1302            "union" => SetOperation::Union,
1303            "difference" => SetOperation::Difference,
1304            "intersection" => SetOperation::Intersection,
1305            "symmetric_difference" => SetOperation::SymmetricDifference,
1306            v => {
1307                return Err(PyValueError::new_err(format!(
1308                    "set operation must be one of {{'union', 'difference', 'intersection', 'symmetric_difference'}}, got {v}",
1309                )));
1310            },
1311        };
1312        Ok(Wrap(parsed))
1313    }
1314}
1315
1316// Conversion from ScanCastOptions class from the Python side.
1317impl<'py> FromPyObject<'py> for Wrap<CastColumnsPolicy> {
1318    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1319        if ob.is_none() {
1320            // Initialize the default ScanCastOptions from Python.
1321            static DEFAULT: PyOnceLock<Wrap<CastColumnsPolicy>> = PyOnceLock::new();
1322
1323            let out = DEFAULT.get_or_try_init(ob.py(), || {
1324                let ob = PyModule::import(ob.py(), "polars.io.scan_options.cast_options")
1325                    .unwrap()
1326                    .getattr("ScanCastOptions")
1327                    .unwrap()
1328                    .call_method0("_default")
1329                    .unwrap();
1330
1331                let out = Self::extract_bound(&ob)?;
1332
1333                // The default policy should match ERROR_ON_MISMATCH (but this can change).
1334                debug_assert_eq!(&out.0, &CastColumnsPolicy::ERROR_ON_MISMATCH);
1335
1336                PyResult::Ok(out)
1337            })?;
1338
1339            return Ok(out.clone());
1340        }
1341
1342        let py = ob.py();
1343
1344        let integer_upcast = match &*ob
1345            .getattr(intern!(py, "integer_cast"))?
1346            .extract::<PyBackedStr>()?
1347        {
1348            "upcast" => true,
1349            "forbid" => false,
1350            v => {
1351                return Err(PyValueError::new_err(format!(
1352                    "unknown option for integer_cast: {v}"
1353                )));
1354            },
1355        };
1356
1357        let mut float_upcast = false;
1358        let mut float_downcast = false;
1359
1360        let float_cast_object = ob.getattr(intern!(py, "float_cast"))?;
1361
1362        parse_multiple_options("float_cast", float_cast_object, |v| {
1363            match v {
1364                "forbid" => {},
1365                "upcast" => float_upcast = true,
1366                "downcast" => float_downcast = true,
1367                v => {
1368                    return Err(PyValueError::new_err(format!(
1369                        "unknown option for float_cast: {v}"
1370                    )));
1371                },
1372            }
1373
1374            Ok(())
1375        })?;
1376
1377        let mut datetime_nanoseconds_downcast = false;
1378        let mut datetime_convert_timezone = false;
1379
1380        let datetime_cast_object = ob.getattr(intern!(py, "datetime_cast"))?;
1381
1382        parse_multiple_options("datetime_cast", datetime_cast_object, |v| {
1383            match v {
1384                "forbid" => {},
1385                "nanosecond-downcast" => datetime_nanoseconds_downcast = true,
1386                "convert-timezone" => datetime_convert_timezone = true,
1387                v => {
1388                    return Err(PyValueError::new_err(format!(
1389                        "unknown option for datetime_cast: {v}"
1390                    )));
1391                },
1392            };
1393
1394            Ok(())
1395        })?;
1396
1397        let missing_struct_fields = match &*ob
1398            .getattr(intern!(py, "missing_struct_fields"))?
1399            .extract::<PyBackedStr>()?
1400        {
1401            "insert" => MissingColumnsPolicy::Insert,
1402            "raise" => MissingColumnsPolicy::Raise,
1403            v => {
1404                return Err(PyValueError::new_err(format!(
1405                    "unknown option for missing_struct_fields: {v}"
1406                )));
1407            },
1408        };
1409
1410        let extra_struct_fields = match &*ob
1411            .getattr(intern!(py, "extra_struct_fields"))?
1412            .extract::<PyBackedStr>()?
1413        {
1414            "ignore" => ExtraColumnsPolicy::Ignore,
1415            "raise" => ExtraColumnsPolicy::Raise,
1416            v => {
1417                return Err(PyValueError::new_err(format!(
1418                    "unknown option for extra_struct_fields: {v}"
1419                )));
1420            },
1421        };
1422
1423        let categorical_to_string = match &*ob
1424            .getattr(intern!(py, "categorical_to_string"))?
1425            .extract::<PyBackedStr>()?
1426        {
1427            "allow" => true,
1428            "forbid" => false,
1429            v => {
1430                return Err(PyValueError::new_err(format!(
1431                    "unknown option for categorical_to_string: {v}"
1432                )));
1433            },
1434        };
1435
1436        return Ok(Wrap(CastColumnsPolicy {
1437            integer_upcast,
1438            float_upcast,
1439            float_downcast,
1440            datetime_nanoseconds_downcast,
1441            datetime_microseconds_downcast: false,
1442            datetime_convert_timezone,
1443            null_upcast: true,
1444            categorical_to_string,
1445            missing_struct_fields,
1446            extra_struct_fields,
1447        }));
1448
1449        fn parse_multiple_options(
1450            parameter_name: &'static str,
1451            py_object: Bound<'_, PyAny>,
1452            mut parser_func: impl FnMut(&str) -> PyResult<()>,
1453        ) -> PyResult<()> {
1454            if let Ok(v) = py_object.extract::<PyBackedStr>() {
1455                parser_func(&v)?;
1456            } else if let Ok(v) = py_object.try_iter() {
1457                for v in v {
1458                    parser_func(&v?.extract::<PyBackedStr>()?)?;
1459                }
1460            } else {
1461                return Err(PyValueError::new_err(format!(
1462                    "unknown type for {parameter_name}: {py_object}"
1463                )));
1464            }
1465
1466            Ok(())
1467        }
1468    }
1469}
1470
1471pub(crate) fn parse_fill_null_strategy(
1472    strategy: &str,
1473    limit: FillNullLimit,
1474) -> PyResult<FillNullStrategy> {
1475    let parsed = match strategy {
1476        "forward" => FillNullStrategy::Forward(limit),
1477        "backward" => FillNullStrategy::Backward(limit),
1478        "min" => FillNullStrategy::Min,
1479        "max" => FillNullStrategy::Max,
1480        "mean" => FillNullStrategy::Mean,
1481        "zero" => FillNullStrategy::Zero,
1482        "one" => FillNullStrategy::One,
1483        e => {
1484            return Err(PyValueError::new_err(format!(
1485                "`strategy` must be one of {{'forward', 'backward', 'min', 'max', 'mean', 'zero', 'one'}}, got {e}",
1486            )));
1487        },
1488    };
1489    Ok(parsed)
1490}
1491
1492#[cfg(feature = "parquet")]
1493pub(crate) fn parse_parquet_compression(
1494    compression: &str,
1495    compression_level: Option<i32>,
1496) -> PyResult<ParquetCompression> {
1497    let parsed = match compression {
1498        "uncompressed" => ParquetCompression::Uncompressed,
1499        "snappy" => ParquetCompression::Snappy,
1500        "gzip" => ParquetCompression::Gzip(
1501            compression_level
1502                .map(|lvl| {
1503                    GzipLevel::try_new(lvl as u8)
1504                        .map_err(|e| PyValueError::new_err(format!("{e:?}")))
1505                })
1506                .transpose()?,
1507        ),
1508        "lzo" => ParquetCompression::Lzo,
1509        "brotli" => ParquetCompression::Brotli(
1510            compression_level
1511                .map(|lvl| {
1512                    BrotliLevel::try_new(lvl as u32)
1513                        .map_err(|e| PyValueError::new_err(format!("{e:?}")))
1514                })
1515                .transpose()?,
1516        ),
1517        "lz4" => ParquetCompression::Lz4Raw,
1518        "zstd" => ParquetCompression::Zstd(
1519            compression_level
1520                .map(|lvl| {
1521                    ZstdLevel::try_new(lvl).map_err(|e| PyValueError::new_err(format!("{e:?}")))
1522                })
1523                .transpose()?,
1524        ),
1525        e => {
1526            return Err(PyValueError::new_err(format!(
1527                "parquet `compression` must be one of {{'uncompressed', 'snappy', 'gzip', 'lzo', 'brotli', 'lz4', 'zstd'}}, got {e}",
1528            )));
1529        },
1530    };
1531    Ok(parsed)
1532}
1533
1534pub(crate) fn strings_to_pl_smallstr<I, S>(container: I) -> Vec<PlSmallStr>
1535where
1536    I: IntoIterator<Item = S>,
1537    S: AsRef<str>,
1538{
1539    container
1540        .into_iter()
1541        .map(|s| PlSmallStr::from_str(s.as_ref()))
1542        .collect()
1543}
1544
1545#[derive(Debug, Copy, Clone)]
1546pub struct PyCompatLevel(pub CompatLevel);
1547
1548impl<'py> FromPyObject<'py> for PyCompatLevel {
1549    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1550        Ok(PyCompatLevel(if let Ok(level) = ob.extract::<u16>() {
1551            if let Ok(compat_level) = CompatLevel::with_level(level) {
1552                compat_level
1553            } else {
1554                return Err(PyValueError::new_err("invalid compat level"));
1555            }
1556        } else if let Ok(future) = ob.extract::<bool>() {
1557            if future {
1558                CompatLevel::newest()
1559            } else {
1560                CompatLevel::oldest()
1561            }
1562        } else {
1563            return Err(PyTypeError::new_err(
1564                "'compat_level' argument accepts int or bool",
1565            ));
1566        }))
1567    }
1568}
1569
1570#[cfg(feature = "string_normalize")]
1571impl<'py> FromPyObject<'py> for Wrap<UnicodeForm> {
1572    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1573        let parsed = match &*ob.extract::<PyBackedStr>()? {
1574            "NFC" => UnicodeForm::NFC,
1575            "NFKC" => UnicodeForm::NFKC,
1576            "NFD" => UnicodeForm::NFD,
1577            "NFKD" => UnicodeForm::NFKD,
1578            v => {
1579                return Err(PyValueError::new_err(format!(
1580                    "`form` must be one of {{'NFC', 'NFKC', 'NFD', 'NFKD'}}, got {v}",
1581                )));
1582            },
1583        };
1584        Ok(Wrap(parsed))
1585    }
1586}
1587
1588#[cfg(feature = "parquet")]
1589impl<'py> FromPyObject<'py> for Wrap<Option<KeyValueMetadata>> {
1590    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1591        #[derive(FromPyObject)]
1592        enum Metadata {
1593            Static(Vec<(String, String)>),
1594            Dynamic(Py<PyAny>),
1595        }
1596
1597        let metadata = Option::<Metadata>::extract_bound(ob)?;
1598        let key_value_metadata = metadata.map(|x| match x {
1599            Metadata::Static(kv) => KeyValueMetadata::from_static(kv),
1600            Metadata::Dynamic(func) => KeyValueMetadata::from_py_function(func),
1601        });
1602        Ok(Wrap(key_value_metadata))
1603    }
1604}
1605
1606impl<'py> FromPyObject<'py> for Wrap<Option<TimeZone>> {
1607    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1608        let tz = Option::<Wrap<PlSmallStr>>::extract_bound(ob)?;
1609
1610        let tz = tz.map(|x| x.0);
1611
1612        Ok(Wrap(TimeZone::opt_try_new(tz).map_err(to_py_err)?))
1613    }
1614}
1615
1616impl<'py> FromPyObject<'py> for Wrap<UpcastOrForbid> {
1617    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1618        let parsed = match &*ob.extract::<PyBackedStr>()? {
1619            "upcast" => UpcastOrForbid::Upcast,
1620            "forbid" => UpcastOrForbid::Forbid,
1621            v => {
1622                return Err(PyValueError::new_err(format!(
1623                    "cast parameter must be one of {{'upcast', 'forbid'}}, got {v}",
1624                )));
1625            },
1626        };
1627        Ok(Wrap(parsed))
1628    }
1629}
1630
1631impl<'py> FromPyObject<'py> for Wrap<ExtraColumnsPolicy> {
1632    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1633        let parsed = match &*ob.extract::<PyBackedStr>()? {
1634            "ignore" => ExtraColumnsPolicy::Ignore,
1635            "raise" => ExtraColumnsPolicy::Raise,
1636            v => {
1637                return Err(PyValueError::new_err(format!(
1638                    "extra column/field parameter must be one of {{'ignore', 'raise'}}, got {v}",
1639                )));
1640            },
1641        };
1642        Ok(Wrap(parsed))
1643    }
1644}
1645
1646impl<'py> FromPyObject<'py> for Wrap<MissingColumnsPolicy> {
1647    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1648        let parsed = match &*ob.extract::<PyBackedStr>()? {
1649            "insert" => MissingColumnsPolicy::Insert,
1650            "raise" => MissingColumnsPolicy::Raise,
1651            v => {
1652                return Err(PyValueError::new_err(format!(
1653                    "missing column/field parameter must be one of {{'insert', 'raise'}}, got {v}",
1654                )));
1655            },
1656        };
1657        Ok(Wrap(parsed))
1658    }
1659}
1660
1661impl<'py> FromPyObject<'py> for Wrap<MissingColumnsPolicyOrExpr> {
1662    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1663        if let Ok(pyexpr) = ob.extract::<PyExpr>() {
1664            return Ok(Wrap(MissingColumnsPolicyOrExpr::InsertWith(pyexpr.inner)));
1665        }
1666
1667        let parsed = match &*ob.extract::<PyBackedStr>()? {
1668            "insert" => MissingColumnsPolicyOrExpr::Insert,
1669            "raise" => MissingColumnsPolicyOrExpr::Raise,
1670            v => {
1671                return Err(PyValueError::new_err(format!(
1672                    "missing column/field parameter must be one of {{'insert', 'raise', expression}}, got {v}",
1673                )));
1674            },
1675        };
1676        Ok(Wrap(parsed))
1677    }
1678}
1679
1680impl<'py> FromPyObject<'py> for Wrap<ColumnMapping> {
1681    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1682        let (column_mapping_type, ob): (PyBackedStr, Bound<'_, PyAny>) = ob.extract()?;
1683
1684        Ok(Wrap(match &*column_mapping_type {
1685            "iceberg-column-mapping" => {
1686                let arrow_schema: Wrap<ArrowSchema> = ob.extract()?;
1687                ColumnMapping::Iceberg(Arc::new(
1688                    IcebergSchema::from_arrow_schema(&arrow_schema.0).map_err(to_py_err)?,
1689                ))
1690            },
1691
1692            v => {
1693                return Err(PyValueError::new_err(format!(
1694                    "unknown column mapping type: {v}"
1695                )));
1696            },
1697        }))
1698    }
1699}
1700
1701impl<'py> FromPyObject<'py> for Wrap<DeletionFilesList> {
1702    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1703        let (deletion_file_type, ob): (PyBackedStr, Bound<'_, PyAny>) = ob.extract()?;
1704
1705        Ok(Wrap(match &*deletion_file_type {
1706            "iceberg-position-delete" => {
1707                let dict: Bound<'_, PyDict> = ob.extract()?;
1708
1709                let mut out = PlIndexMap::new();
1710
1711                for (k, v) in dict
1712                    .try_iter()?
1713                    .zip(dict.call_method0("values")?.try_iter()?)
1714                {
1715                    let k: usize = k?.extract()?;
1716                    let v: Bound<'_, PyAny> = v?.extract()?;
1717
1718                    let files = v
1719                        .try_iter()?
1720                        .map(|x| {
1721                            x.and_then(|x| {
1722                                let x: String = x.extract()?;
1723                                Ok(x)
1724                            })
1725                        })
1726                        .collect::<PyResult<Arc<[String]>>>()?;
1727
1728                    if !files.is_empty() {
1729                        out.insert(k, files);
1730                    }
1731                }
1732
1733                DeletionFilesList::IcebergPositionDelete(Arc::new(out))
1734            },
1735
1736            v => {
1737                return Err(PyValueError::new_err(format!(
1738                    "unknown deletion file type: {v}"
1739                )));
1740            },
1741        }))
1742    }
1743}
1744
1745impl<'py> FromPyObject<'py> for Wrap<DefaultFieldValues> {
1746    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1747        let (default_values_type, ob): (PyBackedStr, Bound<'_, PyAny>) = ob.extract()?;
1748
1749        Ok(Wrap(match &*default_values_type {
1750            "iceberg" => {
1751                let dict: Bound<'_, PyDict> = ob.extract()?;
1752
1753                let mut out = PlIndexMap::new();
1754
1755                for (k, v) in dict
1756                    .try_iter()?
1757                    .zip(dict.call_method0("values")?.try_iter()?)
1758                {
1759                    let k: u32 = k?.extract()?;
1760                    let v = v?;
1761
1762                    let v: Result<Column, String> = if let Ok(s) = get_series(&v) {
1763                        Ok(s.into_column())
1764                    } else {
1765                        let err_msg: String = v.extract()?;
1766                        Err(err_msg)
1767                    };
1768
1769                    out.insert(k, v);
1770                }
1771
1772                DefaultFieldValues::Iceberg(Arc::new(IcebergIdentityTransformedPartitionFields(
1773                    out,
1774                )))
1775            },
1776
1777            v => {
1778                return Err(PyValueError::new_err(format!(
1779                    "unknown deletion file type: {v}"
1780                )));
1781            },
1782        }))
1783    }
1784}
1785
1786impl<'py> FromPyObject<'py> for Wrap<PlPath> {
1787    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1788        if let Ok(path) = ob.extract::<PyBackedStr>() {
1789            Ok(Wrap(PlPath::new(&path)))
1790        } else if let Ok(path) = ob.extract::<std::path::PathBuf>() {
1791            Ok(Wrap(PlPath::Local(path.into())))
1792        } else {
1793            Err(
1794                PyTypeError::new_err(format!("PlPath cannot be formed from '{}'", ob.get_type()))
1795                    .into(),
1796            )
1797        }
1798    }
1799}
1800
1801impl<'py> IntoPyObject<'py> for Wrap<PlPath> {
1802    type Target = PyString;
1803    type Output = Bound<'py, Self::Target>;
1804    type Error = Infallible;
1805
1806    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
1807        self.0.to_str().into_pyobject(py)
1808    }
1809}