Skip to main content

polars_python/conversion/
mod.rs

1pub(crate) mod any_value;
2mod categorical;
3pub(crate) mod chunked_array;
4mod datetime;
5
6use std::convert::Infallible;
7use std::fmt::{Display, Formatter};
8use std::fs::File;
9use std::hash::{Hash, Hasher};
10
11pub use categorical::PyCategories;
12#[cfg(feature = "object")]
13use polars::chunked_array::object::PolarsObjectSafe;
14use polars::frame::row::Row;
15#[cfg(feature = "avro")]
16use polars::io::avro::AvroCompression;
17use polars::prelude::ColumnMapping;
18use polars::prelude::default_values::{
19    DefaultFieldValues, IcebergIdentityTransformedPartitionFields,
20};
21use polars::prelude::deletion::DeletionFilesList;
22use polars::series::ops::NullBehavior;
23use polars_buffer::Buffer;
24use polars_compute::decimal::dec128_verify_prec_scale;
25use polars_core::datatypes::extension::get_extension_type_or_generic;
26use polars_core::schema::iceberg::IcebergSchema;
27use polars_core::utils::arrow::array::Array;
28use polars_core::utils::materialize_dyn_int;
29use polars_lazy::prelude::*;
30#[cfg(feature = "parquet")]
31use polars_parquet::write::StatisticsOptions;
32use polars_plan::dsl::ScanSources;
33use polars_utils::compression::{BrotliLevel, GzipLevel, ZstdLevel};
34use polars_utils::pl_str::PlSmallStr;
35use polars_utils::total_ord::{TotalEq, TotalHash};
36use pyo3::basic::CompareOp;
37use pyo3::exceptions::{PyTypeError, PyValueError};
38use pyo3::intern;
39use pyo3::prelude::*;
40use pyo3::pybacked::PyBackedStr;
41use pyo3::sync::PyOnceLock;
42use pyo3::types::{IntoPyDict, PyDict, PyList, PySequence, PyString};
43
44use crate::error::PyPolarsErr;
45use crate::expr::PyExpr;
46use crate::file::{PythonScanSourceInput, get_python_scan_source_input};
47#[cfg(feature = "object")]
48use crate::object::OBJECT_NAME;
49use crate::prelude::*;
50use crate::py_modules::{pl_series, polars};
51use crate::series::{PySeries, import_schema_pycapsule};
52use crate::utils::to_py_err;
53use crate::{PyDataFrame, PyLazyFrame};
54
55/// # Safety
56/// Should only be implemented for transparent types
57pub(crate) unsafe trait Transparent {
58    type Target;
59}
60
61unsafe impl Transparent for PySeries {
62    type Target = Series;
63}
64
65unsafe impl<T> Transparent for Wrap<T> {
66    type Target = T;
67}
68
69unsafe impl<T: Transparent> Transparent for Option<T> {
70    type Target = Option<T::Target>;
71}
72
73pub(crate) fn reinterpret_vec<T: Transparent>(input: Vec<T>) -> Vec<T::Target> {
74    assert_eq!(size_of::<T>(), size_of::<T::Target>());
75    assert_eq!(align_of::<T>(), align_of::<T::Target>());
76    let len = input.len();
77    let cap = input.capacity();
78    let mut manual_drop_vec = std::mem::ManuallyDrop::new(input);
79    let vec_ptr: *mut T = manual_drop_vec.as_mut_ptr();
80    let ptr: *mut T::Target = vec_ptr as *mut T::Target;
81    unsafe { Vec::from_raw_parts(ptr, len, cap) }
82}
83
84pub(crate) fn vec_extract_wrapped<T>(buf: Vec<Wrap<T>>) -> Vec<T> {
85    reinterpret_vec(buf)
86}
87
88#[derive(PartialEq, Eq, Hash)]
89#[repr(transparent)]
90pub struct Wrap<T>(pub T);
91
92impl<T> Clone for Wrap<T>
93where
94    T: Clone,
95{
96    fn clone(&self) -> Self {
97        Wrap(self.0.clone())
98    }
99}
100impl<T> From<T> for Wrap<T> {
101    fn from(t: T) -> Self {
102        Wrap(t)
103    }
104}
105
106// extract a Rust DataFrame from a python DataFrame, that is DataFrame<PyDataFrame<RustDataFrame>>
107pub(crate) fn get_df(obj: &Bound<'_, PyAny>) -> PyResult<DataFrame> {
108    let pydf = obj.getattr(intern!(obj.py(), "_df"))?;
109    Ok(pydf.extract::<PyDataFrame>()?.df.into_inner())
110}
111
112pub(crate) fn get_lf(obj: &Bound<'_, PyAny>) -> PyResult<LazyFrame> {
113    let pydf = obj.getattr(intern!(obj.py(), "_ldf"))?;
114    Ok(pydf.extract::<PyLazyFrame>()?.ldf.into_inner())
115}
116
117pub(crate) fn get_series(obj: &Bound<'_, PyAny>) -> PyResult<Series> {
118    let s = obj.getattr(intern!(obj.py(), "_s"))?;
119    Ok(s.extract::<PySeries>()?.series.into_inner())
120}
121
122pub(crate) fn to_series(py: Python<'_>, s: PySeries) -> PyResult<Bound<'_, PyAny>> {
123    let series = pl_series(py).bind(py);
124    let constructor = series.getattr(intern!(py, "_from_pyseries"))?;
125    constructor.call1((s,))
126}
127
128impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<PlSmallStr> {
129    type Error = PyErr;
130
131    fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
132        Ok(Wrap((&*ob.extract::<PyBackedStr>()?).into()))
133    }
134}
135
136#[cfg(feature = "csv")]
137impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<NullValues> {
138    type Error = PyErr;
139
140    fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
141        if let Ok(s) = ob.extract::<PyBackedStr>() {
142            Ok(Wrap(NullValues::AllColumnsSingle((&*s).into())))
143        } else if let Ok(s) = ob.extract::<Vec<PyBackedStr>>() {
144            Ok(Wrap(NullValues::AllColumns(
145                s.into_iter().map(|x| (&*x).into()).collect(),
146            )))
147        } else if let Ok(s) = ob.extract::<Vec<(PyBackedStr, PyBackedStr)>>() {
148            Ok(Wrap(NullValues::Named(
149                s.into_iter()
150                    .map(|(a, b)| ((&*a).into(), (&*b).into()))
151                    .collect(),
152            )))
153        } else {
154            Err(
155                PyPolarsErr::Other("could not extract value from null_values argument".into())
156                    .into(),
157            )
158        }
159    }
160}
161
162fn struct_dict<'a, 'py>(
163    py: Python<'py>,
164    vals: impl Iterator<Item = AnyValue<'a>>,
165    flds: &[Field],
166) -> PyResult<Bound<'py, PyDict>> {
167    let dict = PyDict::new(py);
168    flds.iter().zip(vals).try_for_each(|(fld, val)| {
169        dict.set_item(fld.name().as_str(), Wrap(val).into_pyobject(py)?)
170    })?;
171    Ok(dict)
172}
173
174impl<'py> IntoPyObject<'py> for Wrap<Series> {
175    type Target = PyAny;
176    type Output = Bound<'py, Self::Target>;
177    type Error = PyErr;
178
179    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
180        to_series(py, PySeries::new(self.0))
181    }
182}
183
184impl<'py> IntoPyObject<'py> for &Wrap<DataType> {
185    type Target = PyAny;
186    type Output = Bound<'py, Self::Target>;
187    type Error = PyErr;
188
189    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
190        let pl = polars(py).bind(py);
191
192        match &self.0 {
193            DataType::Int8 => {
194                let class = pl.getattr(intern!(py, "Int8"))?;
195                class.call0()
196            },
197            DataType::Int16 => {
198                let class = pl.getattr(intern!(py, "Int16"))?;
199                class.call0()
200            },
201            DataType::Int32 => {
202                let class = pl.getattr(intern!(py, "Int32"))?;
203                class.call0()
204            },
205            DataType::Int64 => {
206                let class = pl.getattr(intern!(py, "Int64"))?;
207                class.call0()
208            },
209            DataType::UInt8 => {
210                let class = pl.getattr(intern!(py, "UInt8"))?;
211                class.call0()
212            },
213            DataType::UInt16 => {
214                let class = pl.getattr(intern!(py, "UInt16"))?;
215                class.call0()
216            },
217            DataType::UInt32 => {
218                let class = pl.getattr(intern!(py, "UInt32"))?;
219                class.call0()
220            },
221            DataType::UInt64 => {
222                let class = pl.getattr(intern!(py, "UInt64"))?;
223                class.call0()
224            },
225            DataType::UInt128 => {
226                let class = pl.getattr(intern!(py, "UInt128"))?;
227                class.call0()
228            },
229            DataType::Int128 => {
230                let class = pl.getattr(intern!(py, "Int128"))?;
231                class.call0()
232            },
233            DataType::Float16 => {
234                let class = pl.getattr(intern!(py, "Float16"))?;
235                class.call0()
236            },
237            DataType::Float32 => {
238                let class = pl.getattr(intern!(py, "Float32"))?;
239                class.call0()
240            },
241            DataType::Float64 | DataType::Unknown(UnknownKind::Float) => {
242                let class = pl.getattr(intern!(py, "Float64"))?;
243                class.call0()
244            },
245            DataType::Decimal(precision, scale) => {
246                let class = pl.getattr(intern!(py, "Decimal"))?;
247                let args = (*precision, *scale);
248                class.call1(args)
249            },
250            DataType::Boolean => {
251                let class = pl.getattr(intern!(py, "Boolean"))?;
252                class.call0()
253            },
254            DataType::String | DataType::Unknown(UnknownKind::Str) => {
255                let class = pl.getattr(intern!(py, "String"))?;
256                class.call0()
257            },
258            DataType::Binary => {
259                let class = pl.getattr(intern!(py, "Binary"))?;
260                class.call0()
261            },
262            DataType::Array(inner, size) => {
263                let class = pl.getattr(intern!(py, "Array"))?;
264                let inner = Wrap(*inner.clone());
265                let args = (&inner, *size);
266                class.call1(args)
267            },
268            DataType::List(inner) => {
269                let class = pl.getattr(intern!(py, "List"))?;
270                let inner = Wrap(*inner.clone());
271                class.call1((&inner,))
272            },
273            DataType::Date => {
274                let class = pl.getattr(intern!(py, "Date"))?;
275                class.call0()
276            },
277            DataType::Datetime(tu, tz) => {
278                let datetime_class = pl.getattr(intern!(py, "Datetime"))?;
279                datetime_class.call1((tu.to_ascii(), tz.as_deref().map(|x| x.as_str())))
280            },
281            DataType::Duration(tu) => {
282                let duration_class = pl.getattr(intern!(py, "Duration"))?;
283                duration_class.call1((tu.to_ascii(),))
284            },
285            #[cfg(feature = "object")]
286            DataType::Object(_) => {
287                let class = pl.getattr(intern!(py, "Object"))?;
288                class.call0()
289            },
290            DataType::Categorical(cats, _) => {
291                let categories_class = pl.getattr(intern!(py, "Categories"))?;
292                let categorical_class = pl.getattr(intern!(py, "Categorical"))?;
293                let categories = categories_class
294                    .call_method1("_from_py_categories", (PyCategories::from(cats.clone()),))?;
295                let kwargs = [("categories", categories)];
296                categorical_class.call((), Some(&kwargs.into_py_dict(py)?))
297            },
298            DataType::Enum(_, mapping) => {
299                let categories = unsafe {
300                    StringChunked::from_chunks(
301                        PlSmallStr::from_static("category"),
302                        vec![mapping.to_arrow(true)],
303                    )
304                };
305                let class = pl.getattr(intern!(py, "Enum"))?;
306                let series = to_series(py, categories.into_series().into())?;
307                class.call1((series,))
308            },
309            DataType::Time => pl.getattr(intern!(py, "Time")).and_then(|x| x.call0()),
310            DataType::Struct(fields) => {
311                let field_class = pl.getattr(intern!(py, "Field"))?;
312                let iter = fields.iter().map(|fld| {
313                    let name = fld.name().as_str();
314                    let dtype = Wrap(fld.dtype().clone());
315                    field_class.call1((name, &dtype)).unwrap()
316                });
317                let fields = PyList::new(py, iter)?;
318                let struct_class = pl.getattr(intern!(py, "Struct"))?;
319                struct_class.call1((fields,))
320            },
321            DataType::Null => {
322                let class = pl.getattr(intern!(py, "Null"))?;
323                class.call0()
324            },
325            DataType::Extension(typ, storage) => {
326                let py_storage = Wrap((**storage).clone()).into_pyobject(py)?;
327                let py_typ = pl
328                    .getattr(intern!(py, "get_extension_type"))?
329                    .call1((typ.name(),))?;
330                let class = if py_typ.is_none()
331                    || py_typ.str().map(|s| s == "storage").ok() == Some(true)
332                {
333                    pl.getattr(intern!(py, "Extension"))?
334                } else {
335                    py_typ
336                };
337                let from_params = class.getattr(intern!(py, "ext_from_params"))?;
338                from_params.call1((typ.name(), py_storage, typ.serialize_metadata()))
339            },
340            DataType::Unknown(UnknownKind::Int(v)) => {
341                Wrap(materialize_dyn_int(*v).dtype()).into_pyobject(py)
342            },
343            DataType::Unknown(_) => {
344                let class = pl.getattr(intern!(py, "Unknown"))?;
345                class.call0()
346            },
347            DataType::BinaryOffset => {
348                unimplemented!()
349            },
350        }
351    }
352}
353
354impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<Field> {
355    type Error = PyErr;
356
357    fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
358        let py = ob.py();
359        let name = ob
360            .getattr(intern!(py, "name"))?
361            .str()?
362            .extract::<PyBackedStr>()?;
363        let dtype = ob
364            .getattr(intern!(py, "dtype"))?
365            .extract::<Wrap<DataType>>()?;
366        Ok(Wrap(Field::new((&*name).into(), dtype.0)))
367    }
368}
369
370impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<DataType> {
371    type Error = PyErr;
372
373    fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
374        let py = ob.py();
375        let type_name = ob.get_type().qualname()?.to_string();
376
377        let dtype = match &*type_name {
378            "DataTypeClass" => {
379                // just the class, not an object
380                let name = ob
381                    .getattr(intern!(py, "__name__"))?
382                    .str()?
383                    .extract::<PyBackedStr>()?;
384                match &*name {
385                    "Int8" => DataType::Int8,
386                    "Int16" => DataType::Int16,
387                    "Int32" => DataType::Int32,
388                    "Int64" => DataType::Int64,
389                    "Int128" => DataType::Int128,
390                    "UInt8" => DataType::UInt8,
391                    "UInt16" => DataType::UInt16,
392                    "UInt32" => DataType::UInt32,
393                    "UInt64" => DataType::UInt64,
394                    "UInt128" => DataType::UInt128,
395                    "Float16" => DataType::Float16,
396                    "Float32" => DataType::Float32,
397                    "Float64" => DataType::Float64,
398                    "Boolean" => DataType::Boolean,
399                    "String" => DataType::String,
400                    "Binary" => DataType::Binary,
401                    "Categorical" => DataType::from_categories(Categories::global()),
402                    "Enum" => DataType::from_frozen_categories(FrozenCategories::new([]).unwrap()),
403                    "Date" => DataType::Date,
404                    "Time" => DataType::Time,
405                    "Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
406                    "Duration" => DataType::Duration(TimeUnit::Microseconds),
407                    "List" => DataType::List(Box::new(DataType::Null)),
408                    "Array" => DataType::Array(Box::new(DataType::Null), 0),
409                    "Struct" => DataType::Struct(vec![]),
410                    "Null" => DataType::Null,
411                    #[cfg(feature = "object")]
412                    "Object" => DataType::Object(OBJECT_NAME),
413                    "Unknown" => DataType::Unknown(Default::default()),
414                    "Decimal" => {
415                        return Err(PyTypeError::new_err(
416                            "Decimal without precision/scale set is not a valid Polars datatype",
417                        ));
418                    },
419                    dt => {
420                        return Err(PyTypeError::new_err(format!(
421                            "'{dt}' is not a Polars data type",
422                        )));
423                    },
424                }
425            },
426            "Int8" => DataType::Int8,
427            "Int16" => DataType::Int16,
428            "Int32" => DataType::Int32,
429            "Int64" => DataType::Int64,
430            "Int128" => DataType::Int128,
431            "UInt8" => DataType::UInt8,
432            "UInt16" => DataType::UInt16,
433            "UInt32" => DataType::UInt32,
434            "UInt64" => DataType::UInt64,
435            "UInt128" => DataType::UInt128,
436            "Float16" => DataType::Float16,
437            "Float32" => DataType::Float32,
438            "Float64" => DataType::Float64,
439            "Boolean" => DataType::Boolean,
440            "String" => DataType::String,
441            "Binary" => DataType::Binary,
442            "Categorical" => {
443                let categories = ob.getattr(intern!(py, "categories")).unwrap();
444                let py_categories = categories.getattr(intern!(py, "_categories")).unwrap();
445                let py_categories = py_categories.extract::<PyCategories>()?;
446                DataType::from_categories(py_categories.categories().clone())
447            },
448            "Enum" => {
449                let categories = ob.getattr(intern!(py, "categories")).unwrap();
450                let s = get_series(&categories.as_borrowed())?;
451                let ca = s.str().map_err(PyPolarsErr::from)?;
452                let categories = ca.downcast_iter().next().unwrap().clone();
453                assert!(!categories.has_nulls());
454                DataType::from_frozen_categories(
455                    FrozenCategories::new(categories.values_iter()).unwrap(),
456                )
457            },
458            "Date" => DataType::Date,
459            "Time" => DataType::Time,
460            "Datetime" => {
461                let time_unit = ob.getattr(intern!(py, "time_unit")).unwrap();
462                let time_unit = time_unit.extract::<Wrap<TimeUnit>>()?.0;
463                let time_zone = ob.getattr(intern!(py, "time_zone")).unwrap();
464                let time_zone = time_zone.extract::<Option<PyBackedStr>>()?;
465                DataType::Datetime(
466                    time_unit,
467                    TimeZone::opt_try_new(time_zone.as_deref()).map_err(to_py_err)?,
468                )
469            },
470            "Duration" => {
471                let time_unit = ob.getattr(intern!(py, "time_unit")).unwrap();
472                let time_unit = time_unit.extract::<Wrap<TimeUnit>>()?.0;
473                DataType::Duration(time_unit)
474            },
475            "Decimal" => {
476                let precision = ob.getattr(intern!(py, "precision"))?.extract()?;
477                let scale = ob.getattr(intern!(py, "scale"))?.extract()?;
478                dec128_verify_prec_scale(precision, scale).map_err(to_py_err)?;
479                DataType::Decimal(precision, scale)
480            },
481            "List" => {
482                let inner = ob.getattr(intern!(py, "inner")).unwrap();
483                let inner = inner.extract::<Wrap<DataType>>()?;
484                DataType::List(Box::new(inner.0))
485            },
486            "Array" => {
487                let inner = ob.getattr(intern!(py, "inner")).unwrap();
488                let size = ob.getattr(intern!(py, "size")).unwrap();
489                let inner = inner.extract::<Wrap<DataType>>()?;
490                let size = size.extract::<usize>()?;
491                DataType::Array(Box::new(inner.0), size)
492            },
493            "Struct" => {
494                let fields = ob.getattr(intern!(py, "fields"))?;
495                let fields = fields
496                    .extract::<Vec<Wrap<Field>>>()?
497                    .into_iter()
498                    .map(|f| f.0)
499                    .collect::<Vec<Field>>();
500                DataType::Struct(fields)
501            },
502            "Null" => DataType::Null,
503            #[cfg(feature = "object")]
504            "Object" => DataType::Object(OBJECT_NAME),
505            "Unknown" => DataType::Unknown(Default::default()),
506            dt => {
507                let base_ext = polars(py)
508                    .getattr(py, intern!(py, "BaseExtension"))
509                    .unwrap();
510                if ob.is_instance(base_ext.bind(py))? {
511                    let ext_name_f = ob.getattr(intern!(py, "ext_name"))?;
512                    let ext_metadata_f = ob.getattr(intern!(py, "ext_metadata"))?;
513                    let ext_storage_f = ob.getattr(intern!(py, "ext_storage"))?;
514                    let name: String = ext_name_f.call0()?.extract()?;
515                    let metadata: Option<String> = ext_metadata_f.call0()?.extract()?;
516                    let storage: Wrap<DataType> = ext_storage_f.call0()?.extract()?;
517                    let ext_typ =
518                        get_extension_type_or_generic(&name, &storage.0, metadata.as_deref());
519                    return Ok(Wrap(DataType::Extension(ext_typ, Box::new(storage.0))));
520                }
521
522                return Err(PyTypeError::new_err(format!(
523                    "'{dt}' is not a Polars data type",
524                )));
525            },
526        };
527        Ok(Wrap(dtype))
528    }
529}
530
531impl<'py> IntoPyObject<'py> for Wrap<TimeUnit> {
532    type Target = PyString;
533    type Output = Bound<'py, Self::Target>;
534    type Error = Infallible;
535
536    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
537        self.0.to_ascii().into_pyobject(py)
538    }
539}
540
541#[cfg(feature = "parquet")]
542impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<StatisticsOptions> {
543    type Error = PyErr;
544
545    fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
546        let mut statistics = StatisticsOptions::empty();
547
548        let dict = ob.cast::<PyDict>()?;
549        for (key, val) in dict.iter() {
550            let key = key.extract::<PyBackedStr>()?;
551            let val = val.extract::<bool>()?;
552
553            match key.as_ref() {
554                "min" => statistics.min_value = val,
555                "max" => statistics.max_value = val,
556                "distinct_count" => statistics.distinct_count = val,
557                "null_count" => statistics.null_count = val,
558                _ => {
559                    return Err(PyTypeError::new_err(format!(
560                        "'{key}' is not a valid statistic option",
561                    )));
562                },
563            }
564        }
565
566        Ok(Wrap(statistics))
567    }
568}
569
570impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<Row<'static>> {
571    type Error = PyErr;
572
573    fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
574        let vals = ob.extract::<Vec<Wrap<AnyValue<'static>>>>()?;
575        let vals = reinterpret_vec(vals);
576        Ok(Wrap(Row(vals)))
577    }
578}
579
580impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<Schema> {
581    type Error = PyErr;
582
583    fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
584        let dict = ob.cast::<PyDict>()?;
585
586        Ok(Wrap(
587            dict.iter()
588                .map(|(key, val)| {
589                    let key = key.extract::<PyBackedStr>()?;
590                    let val = val.extract::<Wrap<DataType>>()?;
591
592                    Ok(Field::new((&*key).into(), val.0))
593                })
594                .collect::<PyResult<Schema>>()?,
595        ))
596    }
597}
598
599impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<ArrowSchema> {
600    type Error = PyErr;
601
602    fn extract(schema_object: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
603        let py = schema_object.py();
604
605        let schema_capsule = schema_object
606            .getattr(intern!(py, "__arrow_c_schema__"))?
607            .call0()?;
608
609        let field = import_schema_pycapsule(&schema_capsule.extract()?)?;
610
611        let ArrowDataType::Struct(fields) = field.dtype else {
612            return Err(PyValueError::new_err(format!(
613                "__arrow_c_schema__ of object did not return struct dtype: \
614                object: {:?}, dtype: {:?}",
615                schema_object, &field.dtype
616            )));
617        };
618
619        let mut schema = ArrowSchema::from_iter_check_duplicates(fields).map_err(to_py_err)?;
620
621        if let Some(md) = field.metadata {
622            *schema.metadata_mut() = Arc::unwrap_or_clone(md);
623        }
624
625        Ok(Wrap(schema))
626    }
627}
628
629impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<ScanSources> {
630    type Error = PyErr;
631
632    fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
633        let list = ob.cast::<PyList>()?.to_owned();
634
635        if list.is_empty() {
636            return Ok(Wrap(ScanSources::default()));
637        }
638
639        enum MutableSources {
640            Paths(Vec<PlRefPath>),
641            Files(Vec<File>),
642            Buffers(Vec<Buffer<u8>>),
643        }
644
645        let num_items = list.len();
646        let mut iter = list
647            .into_iter()
648            .map(|val| get_python_scan_source_input(val.unbind(), false));
649
650        let Some(first) = iter.next() else {
651            return Ok(Wrap(ScanSources::default()));
652        };
653
654        let mut sources = match first? {
655            PythonScanSourceInput::Path(path) => {
656                let mut sources = Vec::with_capacity(num_items);
657                sources.push(path);
658                MutableSources::Paths(sources)
659            },
660            PythonScanSourceInput::File(file) => {
661                let mut sources = Vec::with_capacity(num_items);
662                sources.push(file.into());
663                MutableSources::Files(sources)
664            },
665            PythonScanSourceInput::Buffer(buffer) => {
666                let mut sources = Vec::with_capacity(num_items);
667                sources.push(buffer);
668                MutableSources::Buffers(sources)
669            },
670        };
671
672        for source in iter {
673            match (&mut sources, source?) {
674                (MutableSources::Paths(v), PythonScanSourceInput::Path(p)) => v.push(p),
675                (MutableSources::Files(v), PythonScanSourceInput::File(f)) => v.push(f.into()),
676                (MutableSources::Buffers(v), PythonScanSourceInput::Buffer(f)) => v.push(f),
677                _ => {
678                    return Err(PyTypeError::new_err(
679                        "Cannot combine in-memory bytes, paths and files for scan sources",
680                    ));
681                },
682            }
683        }
684
685        Ok(Wrap(match sources {
686            MutableSources::Paths(i) => ScanSources::Paths(i.into()),
687            MutableSources::Files(i) => ScanSources::Files(i.into()),
688            MutableSources::Buffers(i) => ScanSources::Buffers(i.into()),
689        }))
690    }
691}
692
693impl<'py> IntoPyObject<'py> for Wrap<Schema> {
694    type Target = PyDict;
695    type Output = Bound<'py, Self::Target>;
696    type Error = PyErr;
697
698    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
699        let dict = PyDict::new(py);
700        self.0
701            .iter()
702            .try_for_each(|(k, v)| dict.set_item(k.as_str(), &Wrap(v.clone())))?;
703        Ok(dict)
704    }
705}
706
707#[derive(Debug)]
708#[repr(transparent)]
709pub struct ObjectValue {
710    pub inner: Py<PyAny>,
711}
712
713impl Clone for ObjectValue {
714    fn clone(&self) -> Self {
715        Python::attach(|py| Self {
716            inner: self.inner.clone_ref(py),
717        })
718    }
719}
720
721impl Hash for ObjectValue {
722    fn hash<H: Hasher>(&self, state: &mut H) {
723        let h = Python::attach(|py| self.inner.bind(py).hash().expect("should be hashable"));
724        state.write_isize(h)
725    }
726}
727
728impl Eq for ObjectValue {}
729
730impl PartialEq for ObjectValue {
731    fn eq(&self, other: &Self) -> bool {
732        Python::attach(|py| {
733            match self
734                .inner
735                .bind(py)
736                .rich_compare(other.inner.bind(py), CompareOp::Eq)
737            {
738                Ok(result) => result.is_truthy().unwrap(),
739                Err(_) => false,
740            }
741        })
742    }
743}
744
745impl TotalEq for ObjectValue {
746    fn tot_eq(&self, other: &Self) -> bool {
747        self == other
748    }
749}
750
751impl TotalHash for ObjectValue {
752    fn tot_hash<H>(&self, state: &mut H)
753    where
754        H: Hasher,
755    {
756        self.hash(state);
757    }
758}
759
760impl Display for ObjectValue {
761    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
762        write!(f, "{}", self.inner)
763    }
764}
765
766#[cfg(feature = "object")]
767impl PolarsObject for ObjectValue {
768    fn type_name() -> &'static str {
769        "object"
770    }
771}
772
773impl From<Py<PyAny>> for ObjectValue {
774    fn from(p: Py<PyAny>) -> Self {
775        Self { inner: p }
776    }
777}
778
779impl<'a, 'py> FromPyObject<'a, 'py> for ObjectValue {
780    type Error = PyErr;
781
782    fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
783        Ok(ObjectValue {
784            inner: ob.to_owned().unbind(),
785        })
786    }
787}
788
789/// # Safety
790///
791/// The caller is responsible for checking that val is Object otherwise UB
792#[cfg(feature = "object")]
793impl From<&dyn PolarsObjectSafe> for &ObjectValue {
794    fn from(val: &dyn PolarsObjectSafe) -> Self {
795        unsafe { &*(val as *const dyn PolarsObjectSafe as *const ObjectValue) }
796    }
797}
798
799impl<'a, 'py> IntoPyObject<'py> for &'a ObjectValue {
800    type Target = PyAny;
801    type Output = Borrowed<'a, 'py, Self::Target>;
802    type Error = std::convert::Infallible;
803
804    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
805        Ok(self.inner.bind_borrowed(py))
806    }
807}
808
809impl Default for ObjectValue {
810    fn default() -> Self {
811        Python::attach(|py| ObjectValue { inner: py.None() })
812    }
813}
814
815impl<'a, 'py, T> FromPyObject<'a, 'py> for Wrap<Vec<T>>
816where
817    T: FromPyObjectOwned<'py>,
818{
819    type Error = PyErr;
820
821    fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
822        let seq = ob
823            .cast::<PySequence>()
824            .map_err(<PyErr as From<pyo3::CastError>>::from)?;
825        let mut v = Vec::with_capacity(seq.len().unwrap_or(0));
826        for item in seq.try_iter()? {
827            v.push(item?.extract::<T>().map_err(Into::into)?);
828        }
829        Ok(Wrap(v))
830    }
831}
832
833#[cfg(feature = "asof_join")]
834impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<AsofStrategy> {
835    type Error = PyErr;
836
837    fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
838        let parsed = match &*(ob.extract::<PyBackedStr>()?) {
839            "backward" => AsofStrategy::Backward,
840            "forward" => AsofStrategy::Forward,
841            "nearest" => AsofStrategy::Nearest,
842            v => {
843                return Err(PyValueError::new_err(format!(
844                    "asof `strategy` must be one of {{'backward', 'forward', 'nearest'}}, got {v}",
845                )));
846            },
847        };
848        Ok(Wrap(parsed))
849    }
850}
851
852impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<InterpolationMethod> {
853    type Error = PyErr;
854
855    fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
856        let parsed = match &*(ob.extract::<PyBackedStr>()?) {
857            "linear" => InterpolationMethod::Linear,
858            "nearest" => InterpolationMethod::Nearest,
859            v => {
860                return Err(PyValueError::new_err(format!(
861                    "interpolation `method` must be one of {{'linear', 'nearest'}}, got {v}",
862                )));
863            },
864        };
865        Ok(Wrap(parsed))
866    }
867}
868
869#[cfg(feature = "avro")]
870impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<Option<AvroCompression>> {
871    type Error = PyErr;
872
873    fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
874        let parsed = match &*ob.extract::<PyBackedStr>()? {
875            "uncompressed" => None,
876            "snappy" => Some(AvroCompression::Snappy),
877            "deflate" => Some(AvroCompression::Deflate),
878            v => {
879                return Err(PyValueError::new_err(format!(
880                    "avro `compression` must be one of {{'uncompressed', 'snappy', 'deflate'}}, got {v}",
881                )));
882            },
883        };
884        Ok(Wrap(parsed))
885    }
886}
887
888impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<StartBy> {
889    type Error = PyErr;
890
891    fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
892        let parsed = match &*ob.extract::<PyBackedStr>()? {
893            "window" => StartBy::WindowBound,
894            "datapoint" => StartBy::DataPoint,
895            "monday" => StartBy::Monday,
896            "tuesday" => StartBy::Tuesday,
897            "wednesday" => StartBy::Wednesday,
898            "thursday" => StartBy::Thursday,
899            "friday" => StartBy::Friday,
900            "saturday" => StartBy::Saturday,
901            "sunday" => StartBy::Sunday,
902            v => {
903                return Err(PyValueError::new_err(format!(
904                    "`start_by` must be one of {{'window', 'datapoint', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday'}}, got {v}",
905                )));
906            },
907        };
908        Ok(Wrap(parsed))
909    }
910}
911
912impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<ClosedWindow> {
913    type Error = PyErr;
914
915    fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
916        let parsed = match &*ob.extract::<PyBackedStr>()? {
917            "left" => ClosedWindow::Left,
918            "right" => ClosedWindow::Right,
919            "both" => ClosedWindow::Both,
920            "none" => ClosedWindow::None,
921            v => {
922                return Err(PyValueError::new_err(format!(
923                    "`closed` must be one of {{'left', 'right', 'both', 'none'}}, got {v}",
924                )));
925            },
926        };
927        Ok(Wrap(parsed))
928    }
929}
930
931impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<RoundMode> {
932    type Error = PyErr;
933
934    fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
935        let parsed = match &*ob.extract::<PyBackedStr>()? {
936            "half_to_even" => RoundMode::HalfToEven,
937            "half_away_from_zero" => RoundMode::HalfAwayFromZero,
938            v => {
939                return Err(PyValueError::new_err(format!(
940                    "`mode` must be one of {{'half_to_even', 'half_away_from_zero'}}, got {v}",
941                )));
942            },
943        };
944        Ok(Wrap(parsed))
945    }
946}
947
948#[cfg(feature = "csv")]
949impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<CsvEncoding> {
950    type Error = PyErr;
951
952    fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
953        let parsed = match &*ob.extract::<PyBackedStr>()? {
954            "utf8" => CsvEncoding::Utf8,
955            "utf8-lossy" => CsvEncoding::LossyUtf8,
956            v => {
957                return Err(PyValueError::new_err(format!(
958                    "csv `encoding` must be one of {{'utf8', 'utf8-lossy'}}, got {v}",
959                )));
960            },
961        };
962        Ok(Wrap(parsed))
963    }
964}
965
966#[cfg(feature = "ipc")]
967impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<Option<IpcCompression>> {
968    type Error = PyErr;
969
970    fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
971        let parsed = match &*ob.extract::<PyBackedStr>()? {
972            "uncompressed" => None,
973            "lz4" => Some(IpcCompression::LZ4),
974            "zstd" => Some(IpcCompression::ZSTD(Default::default())),
975            v => {
976                return Err(PyValueError::new_err(format!(
977                    "ipc `compression` must be one of {{'uncompressed', 'lz4', 'zstd'}}, got {v}",
978                )));
979            },
980        };
981        Ok(Wrap(parsed))
982    }
983}
984
985impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<JoinType> {
986    type Error = PyErr;
987
988    fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
989        let parsed = match &*ob.extract::<PyBackedStr>()? {
990            "inner" => JoinType::Inner,
991            "left" => JoinType::Left,
992            "right" => JoinType::Right,
993            "full" => JoinType::Full,
994            "semi" => JoinType::Semi,
995            "anti" => JoinType::Anti,
996            #[cfg(feature = "cross_join")]
997            "cross" => JoinType::Cross,
998            v => {
999                return Err(PyValueError::new_err(format!(
1000                    "`how` must be one of {{'inner', 'left', 'full', 'semi', 'anti', 'cross'}}, got {v}",
1001                )));
1002            },
1003        };
1004        Ok(Wrap(parsed))
1005    }
1006}
1007
1008impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<Label> {
1009    type Error = PyErr;
1010
1011    fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1012        let parsed = match &*ob.extract::<PyBackedStr>()? {
1013            "left" => Label::Left,
1014            "right" => Label::Right,
1015            "datapoint" => Label::DataPoint,
1016            v => {
1017                return Err(PyValueError::new_err(format!(
1018                    "`label` must be one of {{'left', 'right', 'datapoint'}}, got {v}",
1019                )));
1020            },
1021        };
1022        Ok(Wrap(parsed))
1023    }
1024}
1025
1026impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<ListToStructWidthStrategy> {
1027    type Error = PyErr;
1028
1029    fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1030        let parsed = match &*ob.extract::<PyBackedStr>()? {
1031            "first_non_null" => ListToStructWidthStrategy::FirstNonNull,
1032            "max_width" => ListToStructWidthStrategy::MaxWidth,
1033            v => {
1034                return Err(PyValueError::new_err(format!(
1035                    "`n_field_strategy` must be one of {{'first_non_null', 'max_width'}}, got {v}",
1036                )));
1037            },
1038        };
1039        Ok(Wrap(parsed))
1040    }
1041}
1042
1043impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<NonExistent> {
1044    type Error = PyErr;
1045
1046    fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1047        let parsed = match &*ob.extract::<PyBackedStr>()? {
1048            "null" => NonExistent::Null,
1049            "raise" => NonExistent::Raise,
1050            v => {
1051                return Err(PyValueError::new_err(format!(
1052                    "`non_existent` must be one of {{'null', 'raise'}}, got {v}",
1053                )));
1054            },
1055        };
1056        Ok(Wrap(parsed))
1057    }
1058}
1059
1060impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<NullBehavior> {
1061    type Error = PyErr;
1062
1063    fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1064        let parsed = match &*ob.extract::<PyBackedStr>()? {
1065            "drop" => NullBehavior::Drop,
1066            "ignore" => NullBehavior::Ignore,
1067            v => {
1068                return Err(PyValueError::new_err(format!(
1069                    "`null_behavior` must be one of {{'drop', 'ignore'}}, got {v}",
1070                )));
1071            },
1072        };
1073        Ok(Wrap(parsed))
1074    }
1075}
1076
1077impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<NullStrategy> {
1078    type Error = PyErr;
1079
1080    fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1081        let parsed = match &*ob.extract::<PyBackedStr>()? {
1082            "ignore" => NullStrategy::Ignore,
1083            "propagate" => NullStrategy::Propagate,
1084            v => {
1085                return Err(PyValueError::new_err(format!(
1086                    "`null_strategy` must be one of {{'ignore', 'propagate'}}, got {v}",
1087                )));
1088            },
1089        };
1090        Ok(Wrap(parsed))
1091    }
1092}
1093
1094#[cfg(feature = "parquet")]
1095impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<ParallelStrategy> {
1096    type Error = PyErr;
1097
1098    fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1099        let parsed = match &*ob.extract::<PyBackedStr>()? {
1100            "auto" => ParallelStrategy::Auto,
1101            "columns" => ParallelStrategy::Columns,
1102            "row_groups" => ParallelStrategy::RowGroups,
1103            "prefiltered" => ParallelStrategy::Prefiltered,
1104            "none" => ParallelStrategy::None,
1105            v => {
1106                return Err(PyValueError::new_err(format!(
1107                    "`parallel` must be one of {{'auto', 'columns', 'row_groups', 'prefiltered', 'none'}}, got {v}",
1108                )));
1109            },
1110        };
1111        Ok(Wrap(parsed))
1112    }
1113}
1114
1115impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<IndexOrder> {
1116    type Error = PyErr;
1117
1118    fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1119        let parsed = match &*ob.extract::<PyBackedStr>()? {
1120            "fortran" => IndexOrder::Fortran,
1121            "c" => IndexOrder::C,
1122            v => {
1123                return Err(PyValueError::new_err(format!(
1124                    "`order` must be one of {{'fortran', 'c'}}, got {v}",
1125                )));
1126            },
1127        };
1128        Ok(Wrap(parsed))
1129    }
1130}
1131
1132impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<QuantileMethod> {
1133    type Error = PyErr;
1134
1135    fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1136        let parsed = match &*ob.extract::<PyBackedStr>()? {
1137            "lower" => QuantileMethod::Lower,
1138            "higher" => QuantileMethod::Higher,
1139            "nearest" => QuantileMethod::Nearest,
1140            "linear" => QuantileMethod::Linear,
1141            "midpoint" => QuantileMethod::Midpoint,
1142            "equiprobable" => QuantileMethod::Equiprobable,
1143            v => {
1144                return Err(PyValueError::new_err(format!(
1145                    "`interpolation` must be one of {{'lower', 'higher', 'nearest', 'linear', 'midpoint', 'equiprobable'}}, got {v}",
1146                )));
1147            },
1148        };
1149        Ok(Wrap(parsed))
1150    }
1151}
1152
1153impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<RankMethod> {
1154    type Error = PyErr;
1155
1156    fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1157        let parsed = match &*ob.extract::<PyBackedStr>()? {
1158            "min" => RankMethod::Min,
1159            "max" => RankMethod::Max,
1160            "average" => RankMethod::Average,
1161            "dense" => RankMethod::Dense,
1162            "ordinal" => RankMethod::Ordinal,
1163            "random" => RankMethod::Random,
1164            v => {
1165                return Err(PyValueError::new_err(format!(
1166                    "rank `method` must be one of {{'min', 'max', 'average', 'dense', 'ordinal', 'random'}}, got {v}",
1167                )));
1168            },
1169        };
1170        Ok(Wrap(parsed))
1171    }
1172}
1173
1174impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<RollingRankMethod> {
1175    type Error = PyErr;
1176
1177    fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1178        let parsed = match &*ob.extract::<PyBackedStr>()? {
1179            "min" => RollingRankMethod::Min,
1180            "max" => RollingRankMethod::Max,
1181            "average" => RollingRankMethod::Average,
1182            "dense" => RollingRankMethod::Dense,
1183            "random" => RollingRankMethod::Random,
1184            v => {
1185                return Err(PyValueError::new_err(format!(
1186                    "rank `method` must be one of {{'min', 'max', 'average', 'dense', 'random'}}, got {v}",
1187                )));
1188            },
1189        };
1190        Ok(Wrap(parsed))
1191    }
1192}
1193
1194impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<Roll> {
1195    type Error = PyErr;
1196
1197    fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1198        let parsed = match &*ob.extract::<PyBackedStr>()? {
1199            "raise" => Roll::Raise,
1200            "forward" => Roll::Forward,
1201            "backward" => Roll::Backward,
1202            v => {
1203                return Err(PyValueError::new_err(format!(
1204                    "`roll` must be one of {{'raise', 'forward', 'backward'}}, got {v}",
1205                )));
1206            },
1207        };
1208        Ok(Wrap(parsed))
1209    }
1210}
1211
1212impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<TimeUnit> {
1213    type Error = PyErr;
1214
1215    fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1216        let parsed = match &*ob.extract::<PyBackedStr>()? {
1217            "ns" => TimeUnit::Nanoseconds,
1218            "us" => TimeUnit::Microseconds,
1219            "ms" => TimeUnit::Milliseconds,
1220            v => {
1221                return Err(PyValueError::new_err(format!(
1222                    "`time_unit` must be one of {{'ns', 'us', 'ms'}}, got {v}",
1223                )));
1224            },
1225        };
1226        Ok(Wrap(parsed))
1227    }
1228}
1229
1230impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<UniqueKeepStrategy> {
1231    type Error = PyErr;
1232
1233    fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1234        let parsed = match &*ob.extract::<PyBackedStr>()? {
1235            "first" => UniqueKeepStrategy::First,
1236            "last" => UniqueKeepStrategy::Last,
1237            "none" => UniqueKeepStrategy::None,
1238            "any" => UniqueKeepStrategy::Any,
1239            v => {
1240                return Err(PyValueError::new_err(format!(
1241                    "`keep` must be one of {{'first', 'last', 'any', 'none'}}, got {v}",
1242                )));
1243            },
1244        };
1245        Ok(Wrap(parsed))
1246    }
1247}
1248
1249#[cfg(feature = "search_sorted")]
1250impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<SearchSortedSide> {
1251    type Error = PyErr;
1252
1253    fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1254        let parsed = match &*ob.extract::<PyBackedStr>()? {
1255            "any" => SearchSortedSide::Any,
1256            "left" => SearchSortedSide::Left,
1257            "right" => SearchSortedSide::Right,
1258            v => {
1259                return Err(PyValueError::new_err(format!(
1260                    "sorted `side` must be one of {{'any', 'left', 'right'}}, got {v}",
1261                )));
1262            },
1263        };
1264        Ok(Wrap(parsed))
1265    }
1266}
1267
1268impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<ClosedInterval> {
1269    type Error = PyErr;
1270
1271    fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1272        let parsed = match &*ob.extract::<PyBackedStr>()? {
1273            "both" => ClosedInterval::Both,
1274            "left" => ClosedInterval::Left,
1275            "right" => ClosedInterval::Right,
1276            "none" => ClosedInterval::None,
1277            v => {
1278                return Err(PyValueError::new_err(format!(
1279                    "`closed` must be one of {{'both', 'left', 'right', 'none'}}, got {v}",
1280                )));
1281            },
1282        };
1283        Ok(Wrap(parsed))
1284    }
1285}
1286
1287impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<WindowMapping> {
1288    type Error = PyErr;
1289
1290    fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1291        let parsed = match &*ob.extract::<PyBackedStr>()? {
1292            "group_to_rows" => WindowMapping::GroupsToRows,
1293            "join" => WindowMapping::Join,
1294            "explode" => WindowMapping::Explode,
1295            v => {
1296                return Err(PyValueError::new_err(format!(
1297                    "`mapping_strategy` must be one of {{'group_to_rows', 'join', 'explode'}}, got {v}",
1298                )));
1299            },
1300        };
1301        Ok(Wrap(parsed))
1302    }
1303}
1304
1305impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<JoinValidation> {
1306    type Error = PyErr;
1307
1308    fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1309        let parsed = match &*ob.extract::<PyBackedStr>()? {
1310            "1:1" => JoinValidation::OneToOne,
1311            "1:m" => JoinValidation::OneToMany,
1312            "m:m" => JoinValidation::ManyToMany,
1313            "m:1" => JoinValidation::ManyToOne,
1314            v => {
1315                return Err(PyValueError::new_err(format!(
1316                    "`validate` must be one of {{'m:m', 'm:1', '1:m', '1:1'}}, got {v}",
1317                )));
1318            },
1319        };
1320        Ok(Wrap(parsed))
1321    }
1322}
1323
1324impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<MaintainOrderJoin> {
1325    type Error = PyErr;
1326
1327    fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1328        let parsed = match &*ob.extract::<PyBackedStr>()? {
1329            "none" => MaintainOrderJoin::None,
1330            "left" => MaintainOrderJoin::Left,
1331            "right" => MaintainOrderJoin::Right,
1332            "left_right" => MaintainOrderJoin::LeftRight,
1333            "right_left" => MaintainOrderJoin::RightLeft,
1334            v => {
1335                return Err(PyValueError::new_err(format!(
1336                    "`maintain_order` must be one of {{'none', 'left', 'right', 'left_right', 'right_left'}}, got {v}",
1337                )));
1338            },
1339        };
1340        Ok(Wrap(parsed))
1341    }
1342}
1343
1344#[cfg(feature = "csv")]
1345impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<QuoteStyle> {
1346    type Error = PyErr;
1347
1348    fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1349        let parsed = match &*ob.extract::<PyBackedStr>()? {
1350            "always" => QuoteStyle::Always,
1351            "necessary" => QuoteStyle::Necessary,
1352            "non_numeric" => QuoteStyle::NonNumeric,
1353            "never" => QuoteStyle::Never,
1354            v => {
1355                return Err(PyValueError::new_err(format!(
1356                    "`quote_style` must be one of {{'always', 'necessary', 'non_numeric', 'never'}}, got {v}",
1357                )));
1358            },
1359        };
1360        Ok(Wrap(parsed))
1361    }
1362}
1363
1364#[cfg(feature = "list_sets")]
1365impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<SetOperation> {
1366    type Error = PyErr;
1367
1368    fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1369        let parsed = match &*ob.extract::<PyBackedStr>()? {
1370            "union" => SetOperation::Union,
1371            "difference" => SetOperation::Difference,
1372            "intersection" => SetOperation::Intersection,
1373            "symmetric_difference" => SetOperation::SymmetricDifference,
1374            v => {
1375                return Err(PyValueError::new_err(format!(
1376                    "set operation must be one of {{'union', 'difference', 'intersection', 'symmetric_difference'}}, got {v}",
1377                )));
1378            },
1379        };
1380        Ok(Wrap(parsed))
1381    }
1382}
1383
1384// Conversion from ScanCastOptions class from the Python side.
1385impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<CastColumnsPolicy> {
1386    type Error = PyErr;
1387
1388    fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1389        if ob.is_none() {
1390            // Initialize the default ScanCastOptions from Python.
1391            static DEFAULT: PyOnceLock<Wrap<CastColumnsPolicy>> = PyOnceLock::new();
1392
1393            let out = DEFAULT.get_or_try_init(ob.py(), || {
1394                let ob = PyModule::import(ob.py(), "polars.io.scan_options.cast_options")
1395                    .unwrap()
1396                    .getattr("ScanCastOptions")
1397                    .unwrap()
1398                    .call_method0("_default")
1399                    .unwrap();
1400
1401                let out = Self::extract(ob.as_borrowed())?;
1402
1403                // The default policy should match ERROR_ON_MISMATCH (but this can change).
1404                debug_assert_eq!(&out.0, &CastColumnsPolicy::ERROR_ON_MISMATCH);
1405
1406                PyResult::Ok(out)
1407            })?;
1408
1409            return Ok(out.clone());
1410        }
1411
1412        let py = ob.py();
1413
1414        let integer_upcast = match &*ob
1415            .getattr(intern!(py, "integer_cast"))?
1416            .extract::<PyBackedStr>()?
1417        {
1418            "upcast" => true,
1419            "forbid" => false,
1420            v => {
1421                return Err(PyValueError::new_err(format!(
1422                    "unknown option for integer_cast: {v}"
1423                )));
1424            },
1425        };
1426
1427        let mut float_upcast = false;
1428        let mut float_downcast = false;
1429
1430        let float_cast_object = ob.getattr(intern!(py, "float_cast"))?;
1431
1432        parse_multiple_options("float_cast", float_cast_object, |v| {
1433            match v {
1434                "forbid" => {},
1435                "upcast" => float_upcast = true,
1436                "downcast" => float_downcast = true,
1437                v => {
1438                    return Err(PyValueError::new_err(format!(
1439                        "unknown option for float_cast: {v}"
1440                    )));
1441                },
1442            }
1443
1444            Ok(())
1445        })?;
1446
1447        let mut datetime_nanoseconds_downcast = false;
1448        let mut datetime_convert_timezone = false;
1449
1450        let datetime_cast_object = ob.getattr(intern!(py, "datetime_cast"))?;
1451
1452        parse_multiple_options("datetime_cast", datetime_cast_object, |v| {
1453            match v {
1454                "forbid" => {},
1455                "nanosecond-downcast" => datetime_nanoseconds_downcast = true,
1456                "convert-timezone" => datetime_convert_timezone = true,
1457                v => {
1458                    return Err(PyValueError::new_err(format!(
1459                        "unknown option for datetime_cast: {v}"
1460                    )));
1461                },
1462            };
1463
1464            Ok(())
1465        })?;
1466
1467        let missing_struct_fields = match &*ob
1468            .getattr(intern!(py, "missing_struct_fields"))?
1469            .extract::<PyBackedStr>()?
1470        {
1471            "insert" => MissingColumnsPolicy::Insert,
1472            "raise" => MissingColumnsPolicy::Raise,
1473            v => {
1474                return Err(PyValueError::new_err(format!(
1475                    "unknown option for missing_struct_fields: {v}"
1476                )));
1477            },
1478        };
1479
1480        let extra_struct_fields = match &*ob
1481            .getattr(intern!(py, "extra_struct_fields"))?
1482            .extract::<PyBackedStr>()?
1483        {
1484            "ignore" => ExtraColumnsPolicy::Ignore,
1485            "raise" => ExtraColumnsPolicy::Raise,
1486            v => {
1487                return Err(PyValueError::new_err(format!(
1488                    "unknown option for extra_struct_fields: {v}"
1489                )));
1490            },
1491        };
1492
1493        let categorical_to_string = match &*ob
1494            .getattr(intern!(py, "categorical_to_string"))?
1495            .extract::<PyBackedStr>()?
1496        {
1497            "allow" => true,
1498            "forbid" => false,
1499            v => {
1500                return Err(PyValueError::new_err(format!(
1501                    "unknown option for categorical_to_string: {v}"
1502                )));
1503            },
1504        };
1505
1506        return Ok(Wrap(CastColumnsPolicy {
1507            integer_upcast,
1508            float_upcast,
1509            float_downcast,
1510            datetime_nanoseconds_downcast,
1511            datetime_microseconds_downcast: false,
1512            datetime_convert_timezone,
1513            null_upcast: true,
1514            categorical_to_string,
1515            missing_struct_fields,
1516            extra_struct_fields,
1517        }));
1518
1519        fn parse_multiple_options(
1520            parameter_name: &'static str,
1521            py_object: Bound<'_, PyAny>,
1522            mut parser_func: impl FnMut(&str) -> PyResult<()>,
1523        ) -> PyResult<()> {
1524            if let Ok(v) = py_object.extract::<PyBackedStr>() {
1525                parser_func(&v)?;
1526            } else if let Ok(v) = py_object.try_iter() {
1527                for v in v {
1528                    parser_func(&v?.extract::<PyBackedStr>()?)?;
1529                }
1530            } else {
1531                return Err(PyValueError::new_err(format!(
1532                    "unknown type for {parameter_name}: {py_object}"
1533                )));
1534            }
1535
1536            Ok(())
1537        }
1538    }
1539}
1540
1541pub(crate) fn parse_fill_null_strategy(
1542    strategy: &str,
1543    limit: FillNullLimit,
1544) -> PyResult<FillNullStrategy> {
1545    let parsed = match strategy {
1546        "forward" => FillNullStrategy::Forward(limit),
1547        "backward" => FillNullStrategy::Backward(limit),
1548        "min" => FillNullStrategy::Min,
1549        "max" => FillNullStrategy::Max,
1550        "mean" => FillNullStrategy::Mean,
1551        "zero" => FillNullStrategy::Zero,
1552        "one" => FillNullStrategy::One,
1553        e => {
1554            return Err(PyValueError::new_err(format!(
1555                "`strategy` must be one of {{'forward', 'backward', 'min', 'max', 'mean', 'zero', 'one'}}, got {e}",
1556            )));
1557        },
1558    };
1559    Ok(parsed)
1560}
1561
1562#[cfg(feature = "parquet")]
1563pub(crate) fn parse_parquet_compression(
1564    compression: &str,
1565    compression_level: Option<i32>,
1566) -> PyResult<ParquetCompression> {
1567    let parsed = match compression {
1568        "uncompressed" => ParquetCompression::Uncompressed,
1569        "snappy" => ParquetCompression::Snappy,
1570        "gzip" => ParquetCompression::Gzip(
1571            compression_level
1572                .map(|lvl| {
1573                    GzipLevel::try_new(lvl as u8)
1574                        .map_err(|e| PyValueError::new_err(format!("{e:?}")))
1575                })
1576                .transpose()?,
1577        ),
1578        "brotli" => ParquetCompression::Brotli(
1579            compression_level
1580                .map(|lvl| {
1581                    BrotliLevel::try_new(lvl as u32)
1582                        .map_err(|e| PyValueError::new_err(format!("{e:?}")))
1583                })
1584                .transpose()?,
1585        ),
1586        "lz4" => ParquetCompression::Lz4Raw,
1587        "zstd" => ParquetCompression::Zstd(
1588            compression_level
1589                .map(|lvl| {
1590                    ZstdLevel::try_new(lvl).map_err(|e| PyValueError::new_err(format!("{e:?}")))
1591                })
1592                .transpose()?,
1593        ),
1594        e => {
1595            return Err(PyValueError::new_err(format!(
1596                "parquet `compression` must be one of {{'uncompressed', 'snappy', 'gzip', 'brotli', 'lz4', 'zstd'}}, got {e}",
1597            )));
1598        },
1599    };
1600    Ok(parsed)
1601}
1602
1603pub(crate) fn strings_to_pl_smallstr<I, S>(container: I) -> Vec<PlSmallStr>
1604where
1605    I: IntoIterator<Item = S>,
1606    S: AsRef<str>,
1607{
1608    container
1609        .into_iter()
1610        .map(|s| PlSmallStr::from_str(s.as_ref()))
1611        .collect()
1612}
1613
1614#[derive(Debug, Copy, Clone)]
1615pub struct PyCompatLevel(pub CompatLevel);
1616
1617impl<'a, 'py> FromPyObject<'a, 'py> for PyCompatLevel {
1618    type Error = PyErr;
1619
1620    fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1621        Ok(PyCompatLevel(if let Ok(level) = ob.extract::<u16>() {
1622            if let Ok(compat_level) = CompatLevel::with_level(level) {
1623                compat_level
1624            } else {
1625                return Err(PyValueError::new_err("invalid compat level"));
1626            }
1627        } else if let Ok(future) = ob.extract::<bool>() {
1628            if future {
1629                CompatLevel::newest()
1630            } else {
1631                CompatLevel::oldest()
1632            }
1633        } else {
1634            return Err(PyTypeError::new_err(
1635                "'compat_level' argument accepts int or bool",
1636            ));
1637        }))
1638    }
1639}
1640
1641#[cfg(feature = "string_normalize")]
1642impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<UnicodeForm> {
1643    type Error = PyErr;
1644
1645    fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1646        let parsed = match &*ob.extract::<PyBackedStr>()? {
1647            "NFC" => UnicodeForm::NFC,
1648            "NFKC" => UnicodeForm::NFKC,
1649            "NFD" => UnicodeForm::NFD,
1650            "NFKD" => UnicodeForm::NFKD,
1651            v => {
1652                return Err(PyValueError::new_err(format!(
1653                    "`form` must be one of {{'NFC', 'NFKC', 'NFD', 'NFKD'}}, got {v}",
1654                )));
1655            },
1656        };
1657        Ok(Wrap(parsed))
1658    }
1659}
1660
1661#[cfg(feature = "parquet")]
1662impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<Option<KeyValueMetadata>> {
1663    type Error = PyErr;
1664
1665    fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1666        #[derive(FromPyObject)]
1667        enum Metadata {
1668            Static(Vec<(String, String)>),
1669            Dynamic(Py<PyAny>),
1670        }
1671
1672        let metadata = Option::<Metadata>::extract(ob)?;
1673        let key_value_metadata = metadata.map(|x| match x {
1674            Metadata::Static(kv) => KeyValueMetadata::from_static(kv),
1675            Metadata::Dynamic(func) => KeyValueMetadata::from_py_function(func),
1676        });
1677        Ok(Wrap(key_value_metadata))
1678    }
1679}
1680
1681impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<Option<TimeZone>> {
1682    type Error = PyErr;
1683
1684    fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1685        let tz = Option::<Wrap<PlSmallStr>>::extract(ob)?;
1686
1687        let tz = tz.map(|x| x.0);
1688
1689        Ok(Wrap(TimeZone::opt_try_new(tz).map_err(to_py_err)?))
1690    }
1691}
1692
1693impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<UpcastOrForbid> {
1694    type Error = PyErr;
1695
1696    fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1697        let parsed = match &*ob.extract::<PyBackedStr>()? {
1698            "upcast" => UpcastOrForbid::Upcast,
1699            "forbid" => UpcastOrForbid::Forbid,
1700            v => {
1701                return Err(PyValueError::new_err(format!(
1702                    "cast parameter must be one of {{'upcast', 'forbid'}}, got {v}",
1703                )));
1704            },
1705        };
1706        Ok(Wrap(parsed))
1707    }
1708}
1709
1710impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<ExtraColumnsPolicy> {
1711    type Error = PyErr;
1712
1713    fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1714        let parsed = match &*ob.extract::<PyBackedStr>()? {
1715            "ignore" => ExtraColumnsPolicy::Ignore,
1716            "raise" => ExtraColumnsPolicy::Raise,
1717            v => {
1718                return Err(PyValueError::new_err(format!(
1719                    "extra column/field parameter must be one of {{'ignore', 'raise'}}, got {v}",
1720                )));
1721            },
1722        };
1723        Ok(Wrap(parsed))
1724    }
1725}
1726
1727impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<MissingColumnsPolicy> {
1728    type Error = PyErr;
1729
1730    fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1731        let parsed = match &*ob.extract::<PyBackedStr>()? {
1732            "insert" => MissingColumnsPolicy::Insert,
1733            "raise" => MissingColumnsPolicy::Raise,
1734            v => {
1735                return Err(PyValueError::new_err(format!(
1736                    "missing column/field parameter must be one of {{'insert', 'raise'}}, got {v}",
1737                )));
1738            },
1739        };
1740        Ok(Wrap(parsed))
1741    }
1742}
1743
1744impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<MissingColumnsPolicyOrExpr> {
1745    type Error = PyErr;
1746
1747    fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1748        if let Ok(pyexpr) = ob.extract::<PyExpr>() {
1749            return Ok(Wrap(MissingColumnsPolicyOrExpr::InsertWith(pyexpr.inner)));
1750        }
1751
1752        let parsed = match &*ob.extract::<PyBackedStr>()? {
1753            "insert" => MissingColumnsPolicyOrExpr::Insert,
1754            "raise" => MissingColumnsPolicyOrExpr::Raise,
1755            v => {
1756                return Err(PyValueError::new_err(format!(
1757                    "missing column/field parameter must be one of {{'insert', 'raise', expression}}, got {v}",
1758                )));
1759            },
1760        };
1761        Ok(Wrap(parsed))
1762    }
1763}
1764
1765impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<ColumnMapping> {
1766    type Error = PyErr;
1767
1768    fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1769        let (column_mapping_type, ob): (PyBackedStr, Bound<'_, PyAny>) = ob.extract()?;
1770
1771        Ok(Wrap(match &*column_mapping_type {
1772            "iceberg-column-mapping" => {
1773                let arrow_schema: Wrap<ArrowSchema> = ob.extract()?;
1774                ColumnMapping::Iceberg(Arc::new(
1775                    IcebergSchema::from_arrow_schema(&arrow_schema.0).map_err(to_py_err)?,
1776                ))
1777            },
1778
1779            v => {
1780                return Err(PyValueError::new_err(format!(
1781                    "unknown column mapping type: {v}"
1782                )));
1783            },
1784        }))
1785    }
1786}
1787
1788impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<DeletionFilesList> {
1789    type Error = PyErr;
1790
1791    fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1792        let (deletion_file_type, ob): (PyBackedStr, Bound<'_, PyAny>) = ob.extract()?;
1793
1794        Ok(Wrap(match &*deletion_file_type {
1795            "iceberg-position-delete" => {
1796                let dict: Bound<'_, PyDict> = ob.extract()?;
1797
1798                let mut out = PlIndexMap::new();
1799
1800                for (k, v) in dict
1801                    .try_iter()?
1802                    .zip(dict.call_method0("values")?.try_iter()?)
1803                {
1804                    let k: usize = k?.extract()?;
1805                    let v: Bound<'_, PyAny> = v?.extract()?;
1806
1807                    let files = v
1808                        .try_iter()?
1809                        .map(|x| {
1810                            x.and_then(|x| {
1811                                let x: String = x.extract()?;
1812                                Ok(x)
1813                            })
1814                        })
1815                        .collect::<PyResult<Arc<[String]>>>()?;
1816
1817                    if !files.is_empty() {
1818                        out.insert(k, files);
1819                    }
1820                }
1821
1822                DeletionFilesList::IcebergPositionDelete(Arc::new(out))
1823            },
1824
1825            v => {
1826                return Err(PyValueError::new_err(format!(
1827                    "unknown deletion file type: {v}"
1828                )));
1829            },
1830        }))
1831    }
1832}
1833
1834impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<DefaultFieldValues> {
1835    type Error = PyErr;
1836
1837    fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1838        let (default_values_type, ob): (PyBackedStr, Bound<'_, PyAny>) = ob.extract()?;
1839
1840        Ok(Wrap(match &*default_values_type {
1841            "iceberg" => {
1842                let dict: Bound<'_, PyDict> = ob.extract()?;
1843
1844                let mut out = PlIndexMap::new();
1845
1846                for (k, v) in dict
1847                    .try_iter()?
1848                    .zip(dict.call_method0("values")?.try_iter()?)
1849                {
1850                    let k: u32 = k?.extract()?;
1851                    let v = v?;
1852
1853                    let v: Result<Column, String> = if let Ok(s) = get_series(&v) {
1854                        Ok(s.into_column())
1855                    } else {
1856                        let err_msg: String = v.extract()?;
1857                        Err(err_msg)
1858                    };
1859
1860                    out.insert(k, v);
1861                }
1862
1863                DefaultFieldValues::Iceberg(Arc::new(IcebergIdentityTransformedPartitionFields(
1864                    out,
1865                )))
1866            },
1867
1868            v => {
1869                return Err(PyValueError::new_err(format!(
1870                    "unknown deletion file type: {v}"
1871                )));
1872            },
1873        }))
1874    }
1875}
1876
1877impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<PlRefPath> {
1878    type Error = PyErr;
1879
1880    fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1881        if let Ok(path) = ob.extract::<PyBackedStr>() {
1882            Ok(Wrap(PlRefPath::new(&*path)))
1883        } else if let Ok(path) = ob.extract::<std::path::PathBuf>() {
1884            Ok(Wrap(PlRefPath::try_from_path(&path).map_err(to_py_err)?))
1885        } else {
1886            Err(PyTypeError::new_err(format!(
1887                "PlRefPath cannot be formed from '{}'",
1888                ob.get_type()
1889            ))
1890            .into())
1891        }
1892    }
1893}
1894
1895impl<'py> IntoPyObject<'py> for Wrap<PlRefPath> {
1896    type Target = PyString;
1897    type Output = Bound<'py, Self::Target>;
1898    type Error = Infallible;
1899
1900    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
1901        self.0.as_str().into_pyobject(py)
1902    }
1903}