polars_python/conversion/
mod.rs

1pub(crate) mod any_value;
2mod categorical;
3pub(crate) mod chunked_array;
4mod datetime;
5
6use std::convert::Infallible;
7use std::fmt::{Display, Formatter};
8use std::fs::File;
9use std::hash::{Hash, Hasher};
10
11pub use categorical::PyCategories;
12#[cfg(feature = "object")]
13use polars::chunked_array::object::PolarsObjectSafe;
14use polars::frame::row::Row;
15#[cfg(feature = "avro")]
16use polars::io::avro::AvroCompression;
17#[cfg(feature = "cloud")]
18use polars::io::cloud::CloudOptions;
19use polars::prelude::ColumnMapping;
20use polars::prelude::default_values::{
21    DefaultFieldValues, IcebergIdentityTransformedPartitionFields,
22};
23use polars::prelude::deletion::DeletionFilesList;
24use polars::series::ops::NullBehavior;
25use polars_core::schema::iceberg::IcebergSchema;
26use polars_core::utils::arrow::array::Array;
27use polars_core::utils::arrow::types::NativeType;
28use polars_core::utils::materialize_dyn_int;
29use polars_lazy::prelude::*;
30#[cfg(feature = "parquet")]
31use polars_parquet::write::StatisticsOptions;
32use polars_plan::dsl::ScanSources;
33use polars_utils::mmap::MemSlice;
34use polars_utils::pl_str::PlSmallStr;
35use polars_utils::total_ord::{TotalEq, TotalHash};
36use pyo3::basic::CompareOp;
37use pyo3::exceptions::{PyTypeError, PyValueError};
38use pyo3::intern;
39use pyo3::prelude::*;
40use pyo3::pybacked::PyBackedStr;
41use pyo3::sync::GILOnceCell;
42use pyo3::types::{IntoPyDict, PyDict, PyList, PySequence, PyString};
43
44use crate::error::PyPolarsErr;
45use crate::expr::PyExpr;
46use crate::file::{PythonScanSourceInput, get_python_scan_source_input};
47use crate::interop::arrow::to_rust::field_to_rust_arrow;
48#[cfg(feature = "object")]
49use crate::object::OBJECT_NAME;
50use crate::prelude::*;
51use crate::py_modules::{pl_series, polars};
52use crate::series::PySeries;
53use crate::utils::to_py_err;
54use crate::{PyDataFrame, PyLazyFrame};
55
56/// # Safety
57/// Should only be implemented for transparent types
58pub(crate) unsafe trait Transparent {
59    type Target;
60}
61
62unsafe impl Transparent for PySeries {
63    type Target = Series;
64}
65
66unsafe impl<T> Transparent for Wrap<T> {
67    type Target = T;
68}
69
70unsafe impl<T: Transparent> Transparent for Option<T> {
71    type Target = Option<T::Target>;
72}
73
74pub(crate) fn reinterpret_vec<T: Transparent>(input: Vec<T>) -> Vec<T::Target> {
75    assert_eq!(size_of::<T>(), size_of::<T::Target>());
76    assert_eq!(align_of::<T>(), align_of::<T::Target>());
77    let len = input.len();
78    let cap = input.capacity();
79    let mut manual_drop_vec = std::mem::ManuallyDrop::new(input);
80    let vec_ptr: *mut T = manual_drop_vec.as_mut_ptr();
81    let ptr: *mut T::Target = vec_ptr as *mut T::Target;
82    unsafe { Vec::from_raw_parts(ptr, len, cap) }
83}
84
85pub(crate) fn vec_extract_wrapped<T>(buf: Vec<Wrap<T>>) -> Vec<T> {
86    reinterpret_vec(buf)
87}
88
89#[derive(PartialEq, Eq, Hash)]
90#[repr(transparent)]
91pub struct Wrap<T>(pub T);
92
93impl<T> Clone for Wrap<T>
94where
95    T: Clone,
96{
97    fn clone(&self) -> Self {
98        Wrap(self.0.clone())
99    }
100}
101impl<T> From<T> for Wrap<T> {
102    fn from(t: T) -> Self {
103        Wrap(t)
104    }
105}
106
107// extract a Rust DataFrame from a python DataFrame, that is DataFrame<PyDataFrame<RustDataFrame>>
108pub(crate) fn get_df(obj: &Bound<'_, PyAny>) -> PyResult<DataFrame> {
109    let pydf = obj.getattr(intern!(obj.py(), "_df"))?;
110    Ok(pydf.extract::<PyDataFrame>()?.df.into_inner())
111}
112
113pub(crate) fn get_lf(obj: &Bound<'_, PyAny>) -> PyResult<LazyFrame> {
114    let pydf = obj.getattr(intern!(obj.py(), "_ldf"))?;
115    Ok(pydf.extract::<PyLazyFrame>()?.ldf.into_inner())
116}
117
118pub(crate) fn get_series(obj: &Bound<'_, PyAny>) -> PyResult<Series> {
119    let s = obj.getattr(intern!(obj.py(), "_s"))?;
120    Ok(s.extract::<PySeries>()?.series.into_inner())
121}
122
123pub(crate) fn to_series(py: Python<'_>, s: PySeries) -> PyResult<Bound<'_, PyAny>> {
124    let series = pl_series(py).bind(py);
125    let constructor = series.getattr(intern!(py, "_from_pyseries"))?;
126    constructor.call1((s,))
127}
128
129impl<'py> FromPyObject<'py> for Wrap<PlSmallStr> {
130    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
131        Ok(Wrap((&*ob.extract::<PyBackedStr>()?).into()))
132    }
133}
134
135#[cfg(feature = "csv")]
136impl<'py> FromPyObject<'py> for Wrap<NullValues> {
137    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
138        if let Ok(s) = ob.extract::<PyBackedStr>() {
139            Ok(Wrap(NullValues::AllColumnsSingle((&*s).into())))
140        } else if let Ok(s) = ob.extract::<Vec<PyBackedStr>>() {
141            Ok(Wrap(NullValues::AllColumns(
142                s.into_iter().map(|x| (&*x).into()).collect(),
143            )))
144        } else if let Ok(s) = ob.extract::<Vec<(PyBackedStr, PyBackedStr)>>() {
145            Ok(Wrap(NullValues::Named(
146                s.into_iter()
147                    .map(|(a, b)| ((&*a).into(), (&*b).into()))
148                    .collect(),
149            )))
150        } else {
151            Err(
152                PyPolarsErr::Other("could not extract value from null_values argument".into())
153                    .into(),
154            )
155        }
156    }
157}
158
159fn struct_dict<'a, 'py>(
160    py: Python<'py>,
161    vals: impl Iterator<Item = AnyValue<'a>>,
162    flds: &[Field],
163) -> PyResult<Bound<'py, PyDict>> {
164    let dict = PyDict::new(py);
165    flds.iter().zip(vals).try_for_each(|(fld, val)| {
166        dict.set_item(fld.name().as_str(), Wrap(val).into_pyobject(py)?)
167    })?;
168    Ok(dict)
169}
170
171// accept u128 array to ensure alignment is correct
172fn decimal_to_digits(v: i128, buf: &mut [u128; 3]) -> usize {
173    const ZEROS: i128 = 0x3030_3030_3030_3030_3030_3030_3030_3030;
174    // SAFETY: transmute is safe as there are 48 bytes in 3 128bit ints
175    // and the minimal alignment of u8 fits u16
176    let buf = unsafe { std::mem::transmute::<&mut [u128; 3], &mut [u8; 48]>(buf) };
177    let mut buffer = itoa::Buffer::new();
178    let value = buffer.format(v);
179    let len = value.len();
180    for (dst, src) in buf.iter_mut().zip(value.as_bytes().iter()) {
181        *dst = *src
182    }
183
184    let ptr = buf.as_mut_ptr() as *mut i128;
185    unsafe {
186        // this is safe because we know that the buffer is exactly 48 bytes long
187        *ptr -= ZEROS;
188        *ptr.add(1) -= ZEROS;
189        *ptr.add(2) -= ZEROS;
190    }
191    len
192}
193
194impl<'py> IntoPyObject<'py> for &Wrap<DataType> {
195    type Target = PyAny;
196    type Output = Bound<'py, Self::Target>;
197    type Error = PyErr;
198
199    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
200        let pl = polars(py).bind(py);
201
202        match &self.0 {
203            DataType::Int8 => {
204                let class = pl.getattr(intern!(py, "Int8"))?;
205                class.call0()
206            },
207            DataType::Int16 => {
208                let class = pl.getattr(intern!(py, "Int16"))?;
209                class.call0()
210            },
211            DataType::Int32 => {
212                let class = pl.getattr(intern!(py, "Int32"))?;
213                class.call0()
214            },
215            DataType::Int64 => {
216                let class = pl.getattr(intern!(py, "Int64"))?;
217                class.call0()
218            },
219            DataType::UInt8 => {
220                let class = pl.getattr(intern!(py, "UInt8"))?;
221                class.call0()
222            },
223            DataType::UInt16 => {
224                let class = pl.getattr(intern!(py, "UInt16"))?;
225                class.call0()
226            },
227            DataType::UInt32 => {
228                let class = pl.getattr(intern!(py, "UInt32"))?;
229                class.call0()
230            },
231            DataType::UInt64 => {
232                let class = pl.getattr(intern!(py, "UInt64"))?;
233                class.call0()
234            },
235            DataType::Int128 => {
236                let class = pl.getattr(intern!(py, "Int128"))?;
237                class.call0()
238            },
239            DataType::Float32 => {
240                let class = pl.getattr(intern!(py, "Float32"))?;
241                class.call0()
242            },
243            DataType::Float64 | DataType::Unknown(UnknownKind::Float) => {
244                let class = pl.getattr(intern!(py, "Float64"))?;
245                class.call0()
246            },
247            DataType::Decimal(precision, scale) => {
248                let class = pl.getattr(intern!(py, "Decimal"))?;
249                let args = (*precision, *scale);
250                class.call1(args)
251            },
252            DataType::Boolean => {
253                let class = pl.getattr(intern!(py, "Boolean"))?;
254                class.call0()
255            },
256            DataType::String | DataType::Unknown(UnknownKind::Str) => {
257                let class = pl.getattr(intern!(py, "String"))?;
258                class.call0()
259            },
260            DataType::Binary => {
261                let class = pl.getattr(intern!(py, "Binary"))?;
262                class.call0()
263            },
264            DataType::Array(inner, size) => {
265                let class = pl.getattr(intern!(py, "Array"))?;
266                let inner = Wrap(*inner.clone());
267                let args = (&inner, *size);
268                class.call1(args)
269            },
270            DataType::List(inner) => {
271                let class = pl.getattr(intern!(py, "List"))?;
272                let inner = Wrap(*inner.clone());
273                class.call1((&inner,))
274            },
275            DataType::Date => {
276                let class = pl.getattr(intern!(py, "Date"))?;
277                class.call0()
278            },
279            DataType::Datetime(tu, tz) => {
280                let datetime_class = pl.getattr(intern!(py, "Datetime"))?;
281                datetime_class.call1((tu.to_ascii(), tz.as_deref().map(|x| x.as_str())))
282            },
283            DataType::Duration(tu) => {
284                let duration_class = pl.getattr(intern!(py, "Duration"))?;
285                duration_class.call1((tu.to_ascii(),))
286            },
287            #[cfg(feature = "object")]
288            DataType::Object(_) => {
289                let class = pl.getattr(intern!(py, "Object"))?;
290                class.call0()
291            },
292            DataType::Categorical(cats, _) => {
293                let categories_class = pl.getattr(intern!(py, "Categories"))?;
294                let categorical_class = pl.getattr(intern!(py, "Categorical"))?;
295                let categories = categories_class
296                    .call_method1("_from_py_categories", (PyCategories::from(cats.clone()),))?;
297                let kwargs = [("categories", categories)];
298                categorical_class.call((), Some(&kwargs.into_py_dict(py)?))
299            },
300            DataType::Enum(_, mapping) => {
301                let categories = unsafe {
302                    StringChunked::from_chunks(
303                        PlSmallStr::from_static("category"),
304                        vec![mapping.to_arrow(true)],
305                    )
306                };
307                let class = pl.getattr(intern!(py, "Enum"))?;
308                let series = to_series(py, categories.into_series().into())?;
309                class.call1((series,))
310            },
311            DataType::Time => pl.getattr(intern!(py, "Time")).and_then(|x| x.call0()),
312            DataType::Struct(fields) => {
313                let field_class = pl.getattr(intern!(py, "Field"))?;
314                let iter = fields.iter().map(|fld| {
315                    let name = fld.name().as_str();
316                    let dtype = Wrap(fld.dtype().clone());
317                    field_class.call1((name, &dtype)).unwrap()
318                });
319                let fields = PyList::new(py, iter)?;
320                let struct_class = pl.getattr(intern!(py, "Struct"))?;
321                struct_class.call1((fields,))
322            },
323            DataType::Null => {
324                let class = pl.getattr(intern!(py, "Null"))?;
325                class.call0()
326            },
327            DataType::Unknown(UnknownKind::Int(v)) => {
328                Wrap(materialize_dyn_int(*v).dtype()).into_pyobject(py)
329            },
330            DataType::Unknown(_) => {
331                let class = pl.getattr(intern!(py, "Unknown"))?;
332                class.call0()
333            },
334            DataType::BinaryOffset => {
335                unimplemented!()
336            },
337        }
338    }
339}
340
341impl<'py> FromPyObject<'py> for Wrap<Field> {
342    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
343        let py = ob.py();
344        let name = ob
345            .getattr(intern!(py, "name"))?
346            .str()?
347            .extract::<PyBackedStr>()?;
348        let dtype = ob
349            .getattr(intern!(py, "dtype"))?
350            .extract::<Wrap<DataType>>()?;
351        Ok(Wrap(Field::new((&*name).into(), dtype.0)))
352    }
353}
354
355impl<'py> FromPyObject<'py> for Wrap<DataType> {
356    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
357        let py = ob.py();
358        let type_name = ob.get_type().qualname()?.to_string();
359
360        let dtype = match &*type_name {
361            "DataTypeClass" => {
362                // just the class, not an object
363                let name = ob
364                    .getattr(intern!(py, "__name__"))?
365                    .str()?
366                    .extract::<PyBackedStr>()?;
367                match &*name {
368                    "Int8" => DataType::Int8,
369                    "Int16" => DataType::Int16,
370                    "Int32" => DataType::Int32,
371                    "Int64" => DataType::Int64,
372                    "Int128" => DataType::Int128,
373                    "UInt8" => DataType::UInt8,
374                    "UInt16" => DataType::UInt16,
375                    "UInt32" => DataType::UInt32,
376                    "UInt64" => DataType::UInt64,
377                    "Float32" => DataType::Float32,
378                    "Float64" => DataType::Float64,
379                    "Boolean" => DataType::Boolean,
380                    "String" => DataType::String,
381                    "Binary" => DataType::Binary,
382                    "Categorical" => DataType::from_categories(Categories::global()),
383                    "Enum" => DataType::from_frozen_categories(FrozenCategories::new([]).unwrap()),
384                    "Date" => DataType::Date,
385                    "Time" => DataType::Time,
386                    "Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
387                    "Duration" => DataType::Duration(TimeUnit::Microseconds),
388                    "Decimal" => DataType::Decimal(None, None), // "none" scale => "infer"
389                    "List" => DataType::List(Box::new(DataType::Null)),
390                    "Array" => DataType::Array(Box::new(DataType::Null), 0),
391                    "Struct" => DataType::Struct(vec![]),
392                    "Null" => DataType::Null,
393                    #[cfg(feature = "object")]
394                    "Object" => DataType::Object(OBJECT_NAME),
395                    "Unknown" => DataType::Unknown(Default::default()),
396                    dt => {
397                        return Err(PyTypeError::new_err(format!(
398                            "'{dt}' is not a Polars data type",
399                        )));
400                    },
401                }
402            },
403            "Int8" => DataType::Int8,
404            "Int16" => DataType::Int16,
405            "Int32" => DataType::Int32,
406            "Int64" => DataType::Int64,
407            "Int128" => DataType::Int128,
408            "UInt8" => DataType::UInt8,
409            "UInt16" => DataType::UInt16,
410            "UInt32" => DataType::UInt32,
411            "UInt64" => DataType::UInt64,
412            "Float32" => DataType::Float32,
413            "Float64" => DataType::Float64,
414            "Boolean" => DataType::Boolean,
415            "String" => DataType::String,
416            "Binary" => DataType::Binary,
417            "Categorical" => {
418                let categories = ob.getattr(intern!(py, "categories")).unwrap();
419                let py_categories = categories.getattr(intern!(py, "_categories")).unwrap();
420                let py_categories = py_categories.extract::<PyCategories>()?;
421                DataType::from_categories(py_categories.categories().clone())
422            },
423            "Enum" => {
424                let categories = ob.getattr(intern!(py, "categories")).unwrap();
425                let s = get_series(&categories.as_borrowed())?;
426                let ca = s.str().map_err(PyPolarsErr::from)?;
427                let categories = ca.downcast_iter().next().unwrap().clone();
428                assert!(!categories.has_nulls());
429                DataType::from_frozen_categories(
430                    FrozenCategories::new(categories.values_iter()).unwrap(),
431                )
432            },
433            "Date" => DataType::Date,
434            "Time" => DataType::Time,
435            "Datetime" => {
436                let time_unit = ob.getattr(intern!(py, "time_unit")).unwrap();
437                let time_unit = time_unit.extract::<Wrap<TimeUnit>>()?.0;
438                let time_zone = ob.getattr(intern!(py, "time_zone")).unwrap();
439                let time_zone = time_zone.extract::<Option<PyBackedStr>>()?;
440                DataType::Datetime(
441                    time_unit,
442                    TimeZone::opt_try_new(time_zone.as_deref()).map_err(to_py_err)?,
443                )
444            },
445            "Duration" => {
446                let time_unit = ob.getattr(intern!(py, "time_unit")).unwrap();
447                let time_unit = time_unit.extract::<Wrap<TimeUnit>>()?.0;
448                DataType::Duration(time_unit)
449            },
450            "Decimal" => {
451                let precision = ob.getattr(intern!(py, "precision"))?.extract()?;
452                let scale = ob.getattr(intern!(py, "scale"))?.extract()?;
453                DataType::Decimal(precision, Some(scale))
454            },
455            "List" => {
456                let inner = ob.getattr(intern!(py, "inner")).unwrap();
457                let inner = inner.extract::<Wrap<DataType>>()?;
458                DataType::List(Box::new(inner.0))
459            },
460            "Array" => {
461                let inner = ob.getattr(intern!(py, "inner")).unwrap();
462                let size = ob.getattr(intern!(py, "size")).unwrap();
463                let inner = inner.extract::<Wrap<DataType>>()?;
464                let size = size.extract::<usize>()?;
465                DataType::Array(Box::new(inner.0), size)
466            },
467            "Struct" => {
468                let fields = ob.getattr(intern!(py, "fields"))?;
469                let fields = fields
470                    .extract::<Vec<Wrap<Field>>>()?
471                    .into_iter()
472                    .map(|f| f.0)
473                    .collect::<Vec<Field>>();
474                DataType::Struct(fields)
475            },
476            "Null" => DataType::Null,
477            #[cfg(feature = "object")]
478            "Object" => DataType::Object(OBJECT_NAME),
479            "Unknown" => DataType::Unknown(Default::default()),
480            dt => {
481                return Err(PyTypeError::new_err(format!(
482                    "'{dt}' is not a Polars data type",
483                )));
484            },
485        };
486        Ok(Wrap(dtype))
487    }
488}
489
490enum CategoricalOrdering {
491    Lexical,
492}
493
494impl<'py> IntoPyObject<'py> for Wrap<CategoricalOrdering> {
495    type Target = PyString;
496    type Output = Bound<'py, Self::Target>;
497    type Error = Infallible;
498
499    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
500        "lexical".into_pyobject(py)
501    }
502}
503
504impl<'py> IntoPyObject<'py> for Wrap<TimeUnit> {
505    type Target = PyString;
506    type Output = Bound<'py, Self::Target>;
507    type Error = Infallible;
508
509    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
510        self.0.to_ascii().into_pyobject(py)
511    }
512}
513
514#[cfg(feature = "parquet")]
515impl<'py> FromPyObject<'py> for Wrap<StatisticsOptions> {
516    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
517        let mut statistics = StatisticsOptions::empty();
518
519        let dict = ob.downcast::<PyDict>()?;
520        for (key, val) in dict {
521            let key = key.extract::<PyBackedStr>()?;
522            let val = val.extract::<bool>()?;
523
524            match key.as_ref() {
525                "min" => statistics.min_value = val,
526                "max" => statistics.max_value = val,
527                "distinct_count" => statistics.distinct_count = val,
528                "null_count" => statistics.null_count = val,
529                _ => {
530                    return Err(PyTypeError::new_err(format!(
531                        "'{key}' is not a valid statistic option",
532                    )));
533                },
534            }
535        }
536
537        Ok(Wrap(statistics))
538    }
539}
540
541impl<'py> FromPyObject<'py> for Wrap<Row<'static>> {
542    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
543        let vals = ob.extract::<Vec<Wrap<AnyValue<'static>>>>()?;
544        let vals = reinterpret_vec(vals);
545        Ok(Wrap(Row(vals)))
546    }
547}
548
549impl<'py> FromPyObject<'py> for Wrap<Schema> {
550    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
551        let dict = ob.downcast::<PyDict>()?;
552
553        Ok(Wrap(
554            dict.iter()
555                .map(|(key, val)| {
556                    let key = key.extract::<PyBackedStr>()?;
557                    let val = val.extract::<Wrap<DataType>>()?;
558
559                    Ok(Field::new((&*key).into(), val.0))
560                })
561                .collect::<PyResult<Schema>>()?,
562        ))
563    }
564}
565
566impl<'py> FromPyObject<'py> for Wrap<ArrowSchema> {
567    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
568        let py = ob.py();
569
570        let pyarrow_schema_cls = py
571            .import(intern!(py, "pyarrow"))?
572            .getattr(intern!(py, "Schema"))?;
573
574        if ob.is_none() {
575            return Err(PyValueError::new_err("arrow_schema() returned None").into());
576        }
577
578        let schema_cls = ob.getattr(intern!(py, "__class__"))?;
579
580        if !schema_cls.is(&pyarrow_schema_cls) {
581            return Err(PyTypeError::new_err(format!(
582                "expected pyarrow.Schema, got: {schema_cls}"
583            )));
584        }
585
586        let mut iter = ob.try_iter()?.map(|x| x.and_then(field_to_rust_arrow));
587
588        let mut last_err = None;
589
590        let schema =
591            ArrowSchema::from_iter_check_duplicates(std::iter::from_fn(|| match iter.next() {
592                Some(Ok(v)) => Some(v),
593                Some(Err(e)) => {
594                    last_err = Some(e);
595                    None
596                },
597                None => None,
598            }))
599            .map_err(to_py_err)?;
600
601        if let Some(last_err) = last_err {
602            return Err(last_err.into());
603        }
604
605        Ok(Wrap(schema))
606    }
607}
608
609impl<'py> FromPyObject<'py> for Wrap<ScanSources> {
610    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
611        let list = ob.downcast::<PyList>()?.to_owned();
612
613        if list.is_empty() {
614            return Ok(Wrap(ScanSources::default()));
615        }
616
617        enum MutableSources {
618            Paths(Vec<PlPath>),
619            Files(Vec<File>),
620            Buffers(Vec<MemSlice>),
621        }
622
623        let num_items = list.len();
624        let mut iter = list
625            .into_iter()
626            .map(|val| get_python_scan_source_input(val.unbind(), false));
627
628        let Some(first) = iter.next() else {
629            return Ok(Wrap(ScanSources::default()));
630        };
631
632        let mut sources = match first? {
633            PythonScanSourceInput::Path(path) => {
634                let mut sources = Vec::with_capacity(num_items);
635                sources.push(path);
636                MutableSources::Paths(sources)
637            },
638            PythonScanSourceInput::File(file) => {
639                let mut sources = Vec::with_capacity(num_items);
640                sources.push(file.into());
641                MutableSources::Files(sources)
642            },
643            PythonScanSourceInput::Buffer(buffer) => {
644                let mut sources = Vec::with_capacity(num_items);
645                sources.push(buffer);
646                MutableSources::Buffers(sources)
647            },
648        };
649
650        for source in iter {
651            match (&mut sources, source?) {
652                (MutableSources::Paths(v), PythonScanSourceInput::Path(p)) => v.push(p),
653                (MutableSources::Files(v), PythonScanSourceInput::File(f)) => v.push(f.into()),
654                (MutableSources::Buffers(v), PythonScanSourceInput::Buffer(f)) => v.push(f),
655                _ => {
656                    return Err(PyTypeError::new_err(
657                        "Cannot combine in-memory bytes, paths and files for scan sources",
658                    ));
659                },
660            }
661        }
662
663        Ok(Wrap(match sources {
664            MutableSources::Paths(i) => ScanSources::Paths(i.into()),
665            MutableSources::Files(i) => ScanSources::Files(i.into()),
666            MutableSources::Buffers(i) => ScanSources::Buffers(i.into()),
667        }))
668    }
669}
670
671impl<'py> IntoPyObject<'py> for Wrap<Schema> {
672    type Target = PyDict;
673    type Output = Bound<'py, Self::Target>;
674    type Error = PyErr;
675
676    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
677        let dict = PyDict::new(py);
678        self.0
679            .iter()
680            .try_for_each(|(k, v)| dict.set_item(k.as_str(), &Wrap(v.clone())))?;
681        Ok(dict)
682    }
683}
684
685#[derive(Debug)]
686#[repr(transparent)]
687pub struct ObjectValue {
688    pub inner: PyObject,
689}
690
691impl Clone for ObjectValue {
692    fn clone(&self) -> Self {
693        Python::with_gil(|py| Self {
694            inner: self.inner.clone_ref(py),
695        })
696    }
697}
698
699impl Hash for ObjectValue {
700    fn hash<H: Hasher>(&self, state: &mut H) {
701        let h = Python::with_gil(|py| self.inner.bind(py).hash().expect("should be hashable"));
702        state.write_isize(h)
703    }
704}
705
706impl Eq for ObjectValue {}
707
708impl PartialEq for ObjectValue {
709    fn eq(&self, other: &Self) -> bool {
710        Python::with_gil(|py| {
711            match self
712                .inner
713                .bind(py)
714                .rich_compare(other.inner.bind(py), CompareOp::Eq)
715            {
716                Ok(result) => result.is_truthy().unwrap(),
717                Err(_) => false,
718            }
719        })
720    }
721}
722
723impl TotalEq for ObjectValue {
724    fn tot_eq(&self, other: &Self) -> bool {
725        self == other
726    }
727}
728
729impl TotalHash for ObjectValue {
730    fn tot_hash<H>(&self, state: &mut H)
731    where
732        H: Hasher,
733    {
734        self.hash(state);
735    }
736}
737
738impl Display for ObjectValue {
739    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
740        write!(f, "{}", self.inner)
741    }
742}
743
744#[cfg(feature = "object")]
745impl PolarsObject for ObjectValue {
746    fn type_name() -> &'static str {
747        "object"
748    }
749}
750
751impl From<PyObject> for ObjectValue {
752    fn from(p: PyObject) -> Self {
753        Self { inner: p }
754    }
755}
756
757impl<'py> FromPyObject<'py> for ObjectValue {
758    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
759        Ok(ObjectValue {
760            inner: ob.to_owned().unbind(),
761        })
762    }
763}
764
765/// # Safety
766///
767/// The caller is responsible for checking that val is Object otherwise UB
768#[cfg(feature = "object")]
769impl From<&dyn PolarsObjectSafe> for &ObjectValue {
770    fn from(val: &dyn PolarsObjectSafe) -> Self {
771        unsafe { &*(val as *const dyn PolarsObjectSafe as *const ObjectValue) }
772    }
773}
774
775impl<'a, 'py> IntoPyObject<'py> for &'a ObjectValue {
776    type Target = PyAny;
777    type Output = Borrowed<'a, 'py, Self::Target>;
778    type Error = std::convert::Infallible;
779
780    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
781        Ok(self.inner.bind_borrowed(py))
782    }
783}
784
785impl Default for ObjectValue {
786    fn default() -> Self {
787        Python::with_gil(|py| ObjectValue { inner: py.None() })
788    }
789}
790
791impl<'py, T: NativeType + FromPyObject<'py>> FromPyObject<'py> for Wrap<Vec<T>> {
792    fn extract_bound(obj: &Bound<'py, PyAny>) -> PyResult<Self> {
793        let seq = obj.downcast::<PySequence>()?;
794        let mut v = Vec::with_capacity(seq.len().unwrap_or(0));
795        for item in seq.try_iter()? {
796            v.push(item?.extract::<T>()?);
797        }
798        Ok(Wrap(v))
799    }
800}
801
802#[cfg(feature = "asof_join")]
803impl<'py> FromPyObject<'py> for Wrap<AsofStrategy> {
804    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
805        let parsed = match &*(ob.extract::<PyBackedStr>()?) {
806            "backward" => AsofStrategy::Backward,
807            "forward" => AsofStrategy::Forward,
808            "nearest" => AsofStrategy::Nearest,
809            v => {
810                return Err(PyValueError::new_err(format!(
811                    "asof `strategy` must be one of {{'backward', 'forward', 'nearest'}}, got {v}",
812                )));
813            },
814        };
815        Ok(Wrap(parsed))
816    }
817}
818
819impl<'py> FromPyObject<'py> for Wrap<InterpolationMethod> {
820    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
821        let parsed = match &*(ob.extract::<PyBackedStr>()?) {
822            "linear" => InterpolationMethod::Linear,
823            "nearest" => InterpolationMethod::Nearest,
824            v => {
825                return Err(PyValueError::new_err(format!(
826                    "interpolation `method` must be one of {{'linear', 'nearest'}}, got {v}",
827                )));
828            },
829        };
830        Ok(Wrap(parsed))
831    }
832}
833
834#[cfg(feature = "avro")]
835impl<'py> FromPyObject<'py> for Wrap<Option<AvroCompression>> {
836    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
837        let parsed = match &*ob.extract::<PyBackedStr>()? {
838            "uncompressed" => None,
839            "snappy" => Some(AvroCompression::Snappy),
840            "deflate" => Some(AvroCompression::Deflate),
841            v => {
842                return Err(PyValueError::new_err(format!(
843                    "avro `compression` must be one of {{'uncompressed', 'snappy', 'deflate'}}, got {v}",
844                )));
845            },
846        };
847        Ok(Wrap(parsed))
848    }
849}
850
851impl<'py> FromPyObject<'py> for Wrap<CategoricalOrdering> {
852    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
853        let parsed = match &*ob.extract::<PyBackedStr>()? {
854            "lexical" => CategoricalOrdering::Lexical,
855            "physical" => {
856                polars_warn!(
857                    Deprecation,
858                    "physical ordering is deprecated, will use lexical ordering instead"
859                );
860                CategoricalOrdering::Lexical
861            },
862            v => {
863                return Err(PyValueError::new_err(format!(
864                    "categorical `ordering` must be one of {{'physical', 'lexical'}}, got {v}",
865                )));
866            },
867        };
868        Ok(Wrap(parsed))
869    }
870}
871
872impl<'py> FromPyObject<'py> for Wrap<StartBy> {
873    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
874        let parsed = match &*ob.extract::<PyBackedStr>()? {
875            "window" => StartBy::WindowBound,
876            "datapoint" => StartBy::DataPoint,
877            "monday" => StartBy::Monday,
878            "tuesday" => StartBy::Tuesday,
879            "wednesday" => StartBy::Wednesday,
880            "thursday" => StartBy::Thursday,
881            "friday" => StartBy::Friday,
882            "saturday" => StartBy::Saturday,
883            "sunday" => StartBy::Sunday,
884            v => {
885                return Err(PyValueError::new_err(format!(
886                    "`start_by` must be one of {{'window', 'datapoint', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday'}}, got {v}",
887                )));
888            },
889        };
890        Ok(Wrap(parsed))
891    }
892}
893
894impl<'py> FromPyObject<'py> for Wrap<ClosedWindow> {
895    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
896        let parsed = match &*ob.extract::<PyBackedStr>()? {
897            "left" => ClosedWindow::Left,
898            "right" => ClosedWindow::Right,
899            "both" => ClosedWindow::Both,
900            "none" => ClosedWindow::None,
901            v => {
902                return Err(PyValueError::new_err(format!(
903                    "`closed` must be one of {{'left', 'right', 'both', 'none'}}, got {v}",
904                )));
905            },
906        };
907        Ok(Wrap(parsed))
908    }
909}
910
911impl<'py> FromPyObject<'py> for Wrap<RoundMode> {
912    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
913        let parsed = match &*ob.extract::<PyBackedStr>()? {
914            "half_to_even" => RoundMode::HalfToEven,
915            "half_away_from_zero" => RoundMode::HalfAwayFromZero,
916            v => {
917                return Err(PyValueError::new_err(format!(
918                    "`mode` must be one of {{'half_to_even', 'half_away_from_zero'}}, got {v}",
919                )));
920            },
921        };
922        Ok(Wrap(parsed))
923    }
924}
925
926#[cfg(feature = "csv")]
927impl<'py> FromPyObject<'py> for Wrap<CsvEncoding> {
928    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
929        let parsed = match &*ob.extract::<PyBackedStr>()? {
930            "utf8" => CsvEncoding::Utf8,
931            "utf8-lossy" => CsvEncoding::LossyUtf8,
932            v => {
933                return Err(PyValueError::new_err(format!(
934                    "csv `encoding` must be one of {{'utf8', 'utf8-lossy'}}, got {v}",
935                )));
936            },
937        };
938        Ok(Wrap(parsed))
939    }
940}
941
942#[cfg(feature = "ipc")]
943impl<'py> FromPyObject<'py> for Wrap<Option<IpcCompression>> {
944    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
945        let parsed = match &*ob.extract::<PyBackedStr>()? {
946            "uncompressed" => None,
947            "lz4" => Some(IpcCompression::LZ4),
948            "zstd" => Some(IpcCompression::ZSTD),
949            v => {
950                return Err(PyValueError::new_err(format!(
951                    "ipc `compression` must be one of {{'uncompressed', 'lz4', 'zstd'}}, got {v}",
952                )));
953            },
954        };
955        Ok(Wrap(parsed))
956    }
957}
958
959impl<'py> FromPyObject<'py> for Wrap<JoinType> {
960    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
961        let parsed = match &*ob.extract::<PyBackedStr>()? {
962            "inner" => JoinType::Inner,
963            "left" => JoinType::Left,
964            "right" => JoinType::Right,
965            "full" => JoinType::Full,
966            "semi" => JoinType::Semi,
967            "anti" => JoinType::Anti,
968            #[cfg(feature = "cross_join")]
969            "cross" => JoinType::Cross,
970            v => {
971                return Err(PyValueError::new_err(format!(
972                    "`how` must be one of {{'inner', 'left', 'full', 'semi', 'anti', 'cross'}}, got {v}",
973                )));
974            },
975        };
976        Ok(Wrap(parsed))
977    }
978}
979
980impl<'py> FromPyObject<'py> for Wrap<Label> {
981    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
982        let parsed = match &*ob.extract::<PyBackedStr>()? {
983            "left" => Label::Left,
984            "right" => Label::Right,
985            "datapoint" => Label::DataPoint,
986            v => {
987                return Err(PyValueError::new_err(format!(
988                    "`label` must be one of {{'left', 'right', 'datapoint'}}, got {v}",
989                )));
990            },
991        };
992        Ok(Wrap(parsed))
993    }
994}
995
996impl<'py> FromPyObject<'py> for Wrap<ListToStructWidthStrategy> {
997    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
998        let parsed = match &*ob.extract::<PyBackedStr>()? {
999            "first_non_null" => ListToStructWidthStrategy::FirstNonNull,
1000            "max_width" => ListToStructWidthStrategy::MaxWidth,
1001            v => {
1002                return Err(PyValueError::new_err(format!(
1003                    "`n_field_strategy` must be one of {{'first_non_null', 'max_width'}}, got {v}",
1004                )));
1005            },
1006        };
1007        Ok(Wrap(parsed))
1008    }
1009}
1010
1011impl<'py> FromPyObject<'py> for Wrap<NonExistent> {
1012    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1013        let parsed = match &*ob.extract::<PyBackedStr>()? {
1014            "null" => NonExistent::Null,
1015            "raise" => NonExistent::Raise,
1016            v => {
1017                return Err(PyValueError::new_err(format!(
1018                    "`non_existent` must be one of {{'null', 'raise'}}, got {v}",
1019                )));
1020            },
1021        };
1022        Ok(Wrap(parsed))
1023    }
1024}
1025
1026impl<'py> FromPyObject<'py> for Wrap<NullBehavior> {
1027    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1028        let parsed = match &*ob.extract::<PyBackedStr>()? {
1029            "drop" => NullBehavior::Drop,
1030            "ignore" => NullBehavior::Ignore,
1031            v => {
1032                return Err(PyValueError::new_err(format!(
1033                    "`null_behavior` must be one of {{'drop', 'ignore'}}, got {v}",
1034                )));
1035            },
1036        };
1037        Ok(Wrap(parsed))
1038    }
1039}
1040
1041impl<'py> FromPyObject<'py> for Wrap<NullStrategy> {
1042    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1043        let parsed = match &*ob.extract::<PyBackedStr>()? {
1044            "ignore" => NullStrategy::Ignore,
1045            "propagate" => NullStrategy::Propagate,
1046            v => {
1047                return Err(PyValueError::new_err(format!(
1048                    "`null_strategy` must be one of {{'ignore', 'propagate'}}, got {v}",
1049                )));
1050            },
1051        };
1052        Ok(Wrap(parsed))
1053    }
1054}
1055
1056#[cfg(feature = "parquet")]
1057impl<'py> FromPyObject<'py> for Wrap<ParallelStrategy> {
1058    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1059        let parsed = match &*ob.extract::<PyBackedStr>()? {
1060            "auto" => ParallelStrategy::Auto,
1061            "columns" => ParallelStrategy::Columns,
1062            "row_groups" => ParallelStrategy::RowGroups,
1063            "prefiltered" => ParallelStrategy::Prefiltered,
1064            "none" => ParallelStrategy::None,
1065            v => {
1066                return Err(PyValueError::new_err(format!(
1067                    "`parallel` must be one of {{'auto', 'columns', 'row_groups', 'prefiltered', 'none'}}, got {v}",
1068                )));
1069            },
1070        };
1071        Ok(Wrap(parsed))
1072    }
1073}
1074
1075impl<'py> FromPyObject<'py> for Wrap<IndexOrder> {
1076    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1077        let parsed = match &*ob.extract::<PyBackedStr>()? {
1078            "fortran" => IndexOrder::Fortran,
1079            "c" => IndexOrder::C,
1080            v => {
1081                return Err(PyValueError::new_err(format!(
1082                    "`order` must be one of {{'fortran', 'c'}}, got {v}",
1083                )));
1084            },
1085        };
1086        Ok(Wrap(parsed))
1087    }
1088}
1089
1090impl<'py> FromPyObject<'py> for Wrap<QuantileMethod> {
1091    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1092        let parsed = match &*ob.extract::<PyBackedStr>()? {
1093            "lower" => QuantileMethod::Lower,
1094            "higher" => QuantileMethod::Higher,
1095            "nearest" => QuantileMethod::Nearest,
1096            "linear" => QuantileMethod::Linear,
1097            "midpoint" => QuantileMethod::Midpoint,
1098            "equiprobable" => QuantileMethod::Equiprobable,
1099            v => {
1100                return Err(PyValueError::new_err(format!(
1101                    "`interpolation` must be one of {{'lower', 'higher', 'nearest', 'linear', 'midpoint', 'equiprobable'}}, got {v}",
1102                )));
1103            },
1104        };
1105        Ok(Wrap(parsed))
1106    }
1107}
1108
1109impl<'py> FromPyObject<'py> for Wrap<RankMethod> {
1110    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1111        let parsed = match &*ob.extract::<PyBackedStr>()? {
1112            "min" => RankMethod::Min,
1113            "max" => RankMethod::Max,
1114            "average" => RankMethod::Average,
1115            "dense" => RankMethod::Dense,
1116            "ordinal" => RankMethod::Ordinal,
1117            "random" => RankMethod::Random,
1118            v => {
1119                return Err(PyValueError::new_err(format!(
1120                    "rank `method` must be one of {{'min', 'max', 'average', 'dense', 'ordinal', 'random'}}, got {v}",
1121                )));
1122            },
1123        };
1124        Ok(Wrap(parsed))
1125    }
1126}
1127
1128impl<'py> FromPyObject<'py> for Wrap<Roll> {
1129    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1130        let parsed = match &*ob.extract::<PyBackedStr>()? {
1131            "raise" => Roll::Raise,
1132            "forward" => Roll::Forward,
1133            "backward" => Roll::Backward,
1134            v => {
1135                return Err(PyValueError::new_err(format!(
1136                    "`roll` must be one of {{'raise', 'forward', 'backward'}}, got {v}",
1137                )));
1138            },
1139        };
1140        Ok(Wrap(parsed))
1141    }
1142}
1143
1144impl<'py> FromPyObject<'py> for Wrap<TimeUnit> {
1145    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1146        let parsed = match &*ob.extract::<PyBackedStr>()? {
1147            "ns" => TimeUnit::Nanoseconds,
1148            "us" => TimeUnit::Microseconds,
1149            "ms" => TimeUnit::Milliseconds,
1150            v => {
1151                return Err(PyValueError::new_err(format!(
1152                    "`time_unit` must be one of {{'ns', 'us', 'ms'}}, got {v}",
1153                )));
1154            },
1155        };
1156        Ok(Wrap(parsed))
1157    }
1158}
1159
1160impl<'py> FromPyObject<'py> for Wrap<UniqueKeepStrategy> {
1161    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1162        let parsed = match &*ob.extract::<PyBackedStr>()? {
1163            "first" => UniqueKeepStrategy::First,
1164            "last" => UniqueKeepStrategy::Last,
1165            "none" => UniqueKeepStrategy::None,
1166            "any" => UniqueKeepStrategy::Any,
1167            v => {
1168                return Err(PyValueError::new_err(format!(
1169                    "`keep` must be one of {{'first', 'last', 'any', 'none'}}, got {v}",
1170                )));
1171            },
1172        };
1173        Ok(Wrap(parsed))
1174    }
1175}
1176
1177#[cfg(feature = "search_sorted")]
1178impl<'py> FromPyObject<'py> for Wrap<SearchSortedSide> {
1179    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1180        let parsed = match &*ob.extract::<PyBackedStr>()? {
1181            "any" => SearchSortedSide::Any,
1182            "left" => SearchSortedSide::Left,
1183            "right" => SearchSortedSide::Right,
1184            v => {
1185                return Err(PyValueError::new_err(format!(
1186                    "sorted `side` must be one of {{'any', 'left', 'right'}}, got {v}",
1187                )));
1188            },
1189        };
1190        Ok(Wrap(parsed))
1191    }
1192}
1193
1194impl<'py> FromPyObject<'py> for Wrap<ClosedInterval> {
1195    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1196        let parsed = match &*ob.extract::<PyBackedStr>()? {
1197            "both" => ClosedInterval::Both,
1198            "left" => ClosedInterval::Left,
1199            "right" => ClosedInterval::Right,
1200            "none" => ClosedInterval::None,
1201            v => {
1202                return Err(PyValueError::new_err(format!(
1203                    "`closed` must be one of {{'both', 'left', 'right', 'none'}}, got {v}",
1204                )));
1205            },
1206        };
1207        Ok(Wrap(parsed))
1208    }
1209}
1210
1211impl<'py> FromPyObject<'py> for Wrap<WindowMapping> {
1212    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1213        let parsed = match &*ob.extract::<PyBackedStr>()? {
1214            "group_to_rows" => WindowMapping::GroupsToRows,
1215            "join" => WindowMapping::Join,
1216            "explode" => WindowMapping::Explode,
1217            v => {
1218                return Err(PyValueError::new_err(format!(
1219                    "`mapping_strategy` must be one of {{'group_to_rows', 'join', 'explode'}}, got {v}",
1220                )));
1221            },
1222        };
1223        Ok(Wrap(parsed))
1224    }
1225}
1226
1227impl<'py> FromPyObject<'py> for Wrap<JoinValidation> {
1228    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1229        let parsed = match &*ob.extract::<PyBackedStr>()? {
1230            "1:1" => JoinValidation::OneToOne,
1231            "1:m" => JoinValidation::OneToMany,
1232            "m:m" => JoinValidation::ManyToMany,
1233            "m:1" => JoinValidation::ManyToOne,
1234            v => {
1235                return Err(PyValueError::new_err(format!(
1236                    "`validate` must be one of {{'m:m', 'm:1', '1:m', '1:1'}}, got {v}",
1237                )));
1238            },
1239        };
1240        Ok(Wrap(parsed))
1241    }
1242}
1243
1244impl<'py> FromPyObject<'py> for Wrap<MaintainOrderJoin> {
1245    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1246        let parsed = match &*ob.extract::<PyBackedStr>()? {
1247            "none" => MaintainOrderJoin::None,
1248            "left" => MaintainOrderJoin::Left,
1249            "right" => MaintainOrderJoin::Right,
1250            "left_right" => MaintainOrderJoin::LeftRight,
1251            "right_left" => MaintainOrderJoin::RightLeft,
1252            v => {
1253                return Err(PyValueError::new_err(format!(
1254                    "`maintain_order` must be one of {{'none', 'left', 'right', 'left_right', 'right_left'}}, got {v}",
1255                )));
1256            },
1257        };
1258        Ok(Wrap(parsed))
1259    }
1260}
1261
1262#[cfg(feature = "csv")]
1263impl<'py> FromPyObject<'py> for Wrap<QuoteStyle> {
1264    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1265        let parsed = match &*ob.extract::<PyBackedStr>()? {
1266            "always" => QuoteStyle::Always,
1267            "necessary" => QuoteStyle::Necessary,
1268            "non_numeric" => QuoteStyle::NonNumeric,
1269            "never" => QuoteStyle::Never,
1270            v => {
1271                return Err(PyValueError::new_err(format!(
1272                    "`quote_style` must be one of {{'always', 'necessary', 'non_numeric', 'never'}}, got {v}",
1273                )));
1274            },
1275        };
1276        Ok(Wrap(parsed))
1277    }
1278}
1279
1280#[cfg(feature = "cloud")]
1281pub(crate) fn parse_cloud_options(
1282    uri: &str,
1283    kv: impl IntoIterator<Item = (String, String)>,
1284) -> PyResult<CloudOptions> {
1285    let iter: &mut dyn Iterator<Item = _> = &mut kv.into_iter();
1286    let out = CloudOptions::from_untyped_config(uri, iter).map_err(PyPolarsErr::from)?;
1287    Ok(out)
1288}
1289
1290#[cfg(feature = "list_sets")]
1291impl<'py> FromPyObject<'py> for Wrap<SetOperation> {
1292    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1293        let parsed = match &*ob.extract::<PyBackedStr>()? {
1294            "union" => SetOperation::Union,
1295            "difference" => SetOperation::Difference,
1296            "intersection" => SetOperation::Intersection,
1297            "symmetric_difference" => SetOperation::SymmetricDifference,
1298            v => {
1299                return Err(PyValueError::new_err(format!(
1300                    "set operation must be one of {{'union', 'difference', 'intersection', 'symmetric_difference'}}, got {v}",
1301                )));
1302            },
1303        };
1304        Ok(Wrap(parsed))
1305    }
1306}
1307
1308// Conversion from ScanCastOptions class from the Python side.
1309impl<'py> FromPyObject<'py> for Wrap<CastColumnsPolicy> {
1310    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1311        if ob.is_none() {
1312            // Initialize the default ScanCastOptions from Python.
1313            static DEFAULT: GILOnceCell<Wrap<CastColumnsPolicy>> = GILOnceCell::new();
1314
1315            let out = DEFAULT.get_or_try_init(ob.py(), || {
1316                let ob = PyModule::import(ob.py(), "polars.io.scan_options.cast_options")
1317                    .unwrap()
1318                    .getattr("ScanCastOptions")
1319                    .unwrap()
1320                    .call_method0("_default")
1321                    .unwrap();
1322
1323                let out = Self::extract_bound(&ob)?;
1324
1325                // The default policy should match ERROR_ON_MISMATCH (but this can change).
1326                debug_assert_eq!(&out.0, &CastColumnsPolicy::ERROR_ON_MISMATCH);
1327
1328                PyResult::Ok(out)
1329            })?;
1330
1331            return Ok(out.clone());
1332        }
1333
1334        let py = ob.py();
1335
1336        let integer_upcast = match &*ob
1337            .getattr(intern!(py, "integer_cast"))?
1338            .extract::<PyBackedStr>()?
1339        {
1340            "upcast" => true,
1341            "forbid" => false,
1342            v => {
1343                return Err(PyValueError::new_err(format!(
1344                    "unknown option for integer_cast: {v}"
1345                )));
1346            },
1347        };
1348
1349        let mut float_upcast = false;
1350        let mut float_downcast = false;
1351
1352        let float_cast_object = ob.getattr(intern!(py, "float_cast"))?;
1353
1354        parse_multiple_options("float_cast", float_cast_object, |v| {
1355            match v {
1356                "forbid" => {},
1357                "upcast" => float_upcast = true,
1358                "downcast" => float_downcast = true,
1359                v => {
1360                    return Err(PyValueError::new_err(format!(
1361                        "unknown option for float_cast: {v}"
1362                    )));
1363                },
1364            }
1365
1366            Ok(())
1367        })?;
1368
1369        let mut datetime_nanoseconds_downcast = false;
1370        let mut datetime_convert_timezone = false;
1371
1372        let datetime_cast_object = ob.getattr(intern!(py, "datetime_cast"))?;
1373
1374        parse_multiple_options("datetime_cast", datetime_cast_object, |v| {
1375            match v {
1376                "forbid" => {},
1377                "nanosecond-downcast" => datetime_nanoseconds_downcast = true,
1378                "convert-timezone" => datetime_convert_timezone = true,
1379                v => {
1380                    return Err(PyValueError::new_err(format!(
1381                        "unknown option for datetime_cast: {v}"
1382                    )));
1383                },
1384            };
1385
1386            Ok(())
1387        })?;
1388
1389        let missing_struct_fields = match &*ob
1390            .getattr(intern!(py, "missing_struct_fields"))?
1391            .extract::<PyBackedStr>()?
1392        {
1393            "insert" => MissingColumnsPolicy::Insert,
1394            "raise" => MissingColumnsPolicy::Raise,
1395            v => {
1396                return Err(PyValueError::new_err(format!(
1397                    "unknown option for missing_struct_fields: {v}"
1398                )));
1399            },
1400        };
1401
1402        let extra_struct_fields = match &*ob
1403            .getattr(intern!(py, "extra_struct_fields"))?
1404            .extract::<PyBackedStr>()?
1405        {
1406            "ignore" => ExtraColumnsPolicy::Ignore,
1407            "raise" => ExtraColumnsPolicy::Raise,
1408            v => {
1409                return Err(PyValueError::new_err(format!(
1410                    "unknown option for extra_struct_fields: {v}"
1411                )));
1412            },
1413        };
1414
1415        return Ok(Wrap(CastColumnsPolicy {
1416            integer_upcast,
1417            float_upcast,
1418            float_downcast,
1419            datetime_nanoseconds_downcast,
1420            datetime_microseconds_downcast: false,
1421            datetime_convert_timezone,
1422            null_upcast: true,
1423            missing_struct_fields,
1424            extra_struct_fields,
1425        }));
1426
1427        fn parse_multiple_options(
1428            parameter_name: &'static str,
1429            py_object: Bound<'_, PyAny>,
1430            mut parser_func: impl FnMut(&str) -> PyResult<()>,
1431        ) -> PyResult<()> {
1432            if let Ok(v) = py_object.extract::<PyBackedStr>() {
1433                parser_func(&v)?;
1434            } else if let Ok(v) = py_object.try_iter() {
1435                for v in v {
1436                    parser_func(&v?.extract::<PyBackedStr>()?)?;
1437                }
1438            } else {
1439                return Err(PyValueError::new_err(format!(
1440                    "unknown type for {parameter_name}: {py_object}"
1441                )));
1442            }
1443
1444            Ok(())
1445        }
1446    }
1447}
1448
1449pub(crate) fn parse_fill_null_strategy(
1450    strategy: &str,
1451    limit: FillNullLimit,
1452) -> PyResult<FillNullStrategy> {
1453    let parsed = match strategy {
1454        "forward" => FillNullStrategy::Forward(limit),
1455        "backward" => FillNullStrategy::Backward(limit),
1456        "min" => FillNullStrategy::Min,
1457        "max" => FillNullStrategy::Max,
1458        "mean" => FillNullStrategy::Mean,
1459        "zero" => FillNullStrategy::Zero,
1460        "one" => FillNullStrategy::One,
1461        e => {
1462            return Err(PyValueError::new_err(format!(
1463                "`strategy` must be one of {{'forward', 'backward', 'min', 'max', 'mean', 'zero', 'one'}}, got {e}",
1464            )));
1465        },
1466    };
1467    Ok(parsed)
1468}
1469
1470#[cfg(feature = "parquet")]
1471pub(crate) fn parse_parquet_compression(
1472    compression: &str,
1473    compression_level: Option<i32>,
1474) -> PyResult<ParquetCompression> {
1475    let parsed = match compression {
1476        "uncompressed" => ParquetCompression::Uncompressed,
1477        "snappy" => ParquetCompression::Snappy,
1478        "gzip" => ParquetCompression::Gzip(
1479            compression_level
1480                .map(|lvl| {
1481                    GzipLevel::try_new(lvl as u8)
1482                        .map_err(|e| PyValueError::new_err(format!("{e:?}")))
1483                })
1484                .transpose()?,
1485        ),
1486        "lzo" => ParquetCompression::Lzo,
1487        "brotli" => ParquetCompression::Brotli(
1488            compression_level
1489                .map(|lvl| {
1490                    BrotliLevel::try_new(lvl as u32)
1491                        .map_err(|e| PyValueError::new_err(format!("{e:?}")))
1492                })
1493                .transpose()?,
1494        ),
1495        "lz4" => ParquetCompression::Lz4Raw,
1496        "zstd" => ParquetCompression::Zstd(
1497            compression_level
1498                .map(|lvl| {
1499                    ZstdLevel::try_new(lvl).map_err(|e| PyValueError::new_err(format!("{e:?}")))
1500                })
1501                .transpose()?,
1502        ),
1503        e => {
1504            return Err(PyValueError::new_err(format!(
1505                "parquet `compression` must be one of {{'uncompressed', 'snappy', 'gzip', 'lzo', 'brotli', 'lz4', 'zstd'}}, got {e}",
1506            )));
1507        },
1508    };
1509    Ok(parsed)
1510}
1511
1512pub(crate) fn strings_to_pl_smallstr<I, S>(container: I) -> Vec<PlSmallStr>
1513where
1514    I: IntoIterator<Item = S>,
1515    S: AsRef<str>,
1516{
1517    container
1518        .into_iter()
1519        .map(|s| PlSmallStr::from_str(s.as_ref()))
1520        .collect()
1521}
1522
1523#[derive(Debug, Copy, Clone)]
1524pub struct PyCompatLevel(pub CompatLevel);
1525
1526impl<'py> FromPyObject<'py> for PyCompatLevel {
1527    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1528        Ok(PyCompatLevel(if let Ok(level) = ob.extract::<u16>() {
1529            if let Ok(compat_level) = CompatLevel::with_level(level) {
1530                compat_level
1531            } else {
1532                return Err(PyValueError::new_err("invalid compat level"));
1533            }
1534        } else if let Ok(future) = ob.extract::<bool>() {
1535            if future {
1536                CompatLevel::newest()
1537            } else {
1538                CompatLevel::oldest()
1539            }
1540        } else {
1541            return Err(PyTypeError::new_err(
1542                "'compat_level' argument accepts int or bool",
1543            ));
1544        }))
1545    }
1546}
1547
1548#[cfg(feature = "string_normalize")]
1549impl<'py> FromPyObject<'py> for Wrap<UnicodeForm> {
1550    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1551        let parsed = match &*ob.extract::<PyBackedStr>()? {
1552            "NFC" => UnicodeForm::NFC,
1553            "NFKC" => UnicodeForm::NFKC,
1554            "NFD" => UnicodeForm::NFD,
1555            "NFKD" => UnicodeForm::NFKD,
1556            v => {
1557                return Err(PyValueError::new_err(format!(
1558                    "`form` must be one of {{'NFC', 'NFKC', 'NFD', 'NFKD'}}, got {v}",
1559                )));
1560            },
1561        };
1562        Ok(Wrap(parsed))
1563    }
1564}
1565
1566#[cfg(feature = "parquet")]
1567impl<'py> FromPyObject<'py> for Wrap<Option<KeyValueMetadata>> {
1568    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1569        #[derive(FromPyObject)]
1570        enum Metadata {
1571            Static(Vec<(String, String)>),
1572            Dynamic(PyObject),
1573        }
1574
1575        let metadata = Option::<Metadata>::extract_bound(ob)?;
1576        let key_value_metadata = metadata.map(|x| match x {
1577            Metadata::Static(kv) => KeyValueMetadata::from_static(kv),
1578            Metadata::Dynamic(func) => KeyValueMetadata::from_py_function(func),
1579        });
1580        Ok(Wrap(key_value_metadata))
1581    }
1582}
1583
1584impl<'py> FromPyObject<'py> for Wrap<Option<TimeZone>> {
1585    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1586        let tz = Option::<Wrap<PlSmallStr>>::extract_bound(ob)?;
1587
1588        let tz = tz.map(|x| x.0);
1589
1590        Ok(Wrap(TimeZone::opt_try_new(tz).map_err(to_py_err)?))
1591    }
1592}
1593
1594impl<'py> FromPyObject<'py> for Wrap<UpcastOrForbid> {
1595    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1596        let parsed = match &*ob.extract::<PyBackedStr>()? {
1597            "upcast" => UpcastOrForbid::Upcast,
1598            "forbid" => UpcastOrForbid::Forbid,
1599            v => {
1600                return Err(PyValueError::new_err(format!(
1601                    "cast parameter must be one of {{'upcast', 'forbid'}}, got {v}",
1602                )));
1603            },
1604        };
1605        Ok(Wrap(parsed))
1606    }
1607}
1608
1609impl<'py> FromPyObject<'py> for Wrap<ExtraColumnsPolicy> {
1610    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1611        let parsed = match &*ob.extract::<PyBackedStr>()? {
1612            "ignore" => ExtraColumnsPolicy::Ignore,
1613            "raise" => ExtraColumnsPolicy::Raise,
1614            v => {
1615                return Err(PyValueError::new_err(format!(
1616                    "extra column/field parameter must be one of {{'ignore', 'raise'}}, got {v}",
1617                )));
1618            },
1619        };
1620        Ok(Wrap(parsed))
1621    }
1622}
1623
1624impl<'py> FromPyObject<'py> for Wrap<MissingColumnsPolicy> {
1625    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1626        let parsed = match &*ob.extract::<PyBackedStr>()? {
1627            "insert" => MissingColumnsPolicy::Insert,
1628            "raise" => MissingColumnsPolicy::Raise,
1629            v => {
1630                return Err(PyValueError::new_err(format!(
1631                    "missing column/field parameter must be one of {{'insert', 'raise'}}, got {v}",
1632                )));
1633            },
1634        };
1635        Ok(Wrap(parsed))
1636    }
1637}
1638
1639impl<'py> FromPyObject<'py> for Wrap<MissingColumnsPolicyOrExpr> {
1640    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1641        if let Ok(pyexpr) = ob.extract::<PyExpr>() {
1642            return Ok(Wrap(MissingColumnsPolicyOrExpr::InsertWith(pyexpr.inner)));
1643        }
1644
1645        let parsed = match &*ob.extract::<PyBackedStr>()? {
1646            "insert" => MissingColumnsPolicyOrExpr::Insert,
1647            "raise" => MissingColumnsPolicyOrExpr::Raise,
1648            v => {
1649                return Err(PyValueError::new_err(format!(
1650                    "missing column/field parameter must be one of {{'insert', 'raise', expression}}, got {v}",
1651                )));
1652            },
1653        };
1654        Ok(Wrap(parsed))
1655    }
1656}
1657
1658impl<'py> FromPyObject<'py> for Wrap<ColumnMapping> {
1659    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1660        let (column_mapping_type, ob): (PyBackedStr, Bound<'_, PyAny>) = ob.extract()?;
1661
1662        Ok(Wrap(match &*column_mapping_type {
1663            "iceberg-column-mapping" => {
1664                let arrow_schema: Wrap<ArrowSchema> = ob.extract()?;
1665                ColumnMapping::Iceberg(Arc::new(
1666                    IcebergSchema::from_arrow_schema(&arrow_schema.0).map_err(to_py_err)?,
1667                ))
1668            },
1669
1670            v => {
1671                return Err(PyValueError::new_err(format!(
1672                    "unknown column mapping type: {v}"
1673                )));
1674            },
1675        }))
1676    }
1677}
1678
1679impl<'py> FromPyObject<'py> for Wrap<DeletionFilesList> {
1680    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1681        let (deletion_file_type, ob): (PyBackedStr, Bound<'_, PyAny>) = ob.extract()?;
1682
1683        Ok(Wrap(match &*deletion_file_type {
1684            "iceberg-position-delete" => {
1685                let dict: Bound<'_, PyDict> = ob.extract()?;
1686
1687                let mut out = PlIndexMap::new();
1688
1689                for (k, v) in dict
1690                    .try_iter()?
1691                    .zip(dict.call_method0("values")?.try_iter()?)
1692                {
1693                    let k: usize = k?.extract()?;
1694                    let v: Bound<'_, PyAny> = v?.extract()?;
1695
1696                    let files = v
1697                        .try_iter()?
1698                        .map(|x| {
1699                            x.and_then(|x| {
1700                                let x: String = x.extract()?;
1701                                Ok(x)
1702                            })
1703                        })
1704                        .collect::<PyResult<Arc<[String]>>>()?;
1705
1706                    if !files.is_empty() {
1707                        out.insert(k, files);
1708                    }
1709                }
1710
1711                DeletionFilesList::IcebergPositionDelete(Arc::new(out))
1712            },
1713
1714            v => {
1715                return Err(PyValueError::new_err(format!(
1716                    "unknown deletion file type: {v}"
1717                )));
1718            },
1719        }))
1720    }
1721}
1722
1723impl<'py> FromPyObject<'py> for Wrap<DefaultFieldValues> {
1724    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1725        let (default_values_type, ob): (PyBackedStr, Bound<'_, PyAny>) = ob.extract()?;
1726
1727        Ok(Wrap(match &*default_values_type {
1728            "iceberg" => {
1729                let dict: Bound<'_, PyDict> = ob.extract()?;
1730
1731                let mut out = PlIndexMap::new();
1732
1733                for (k, v) in dict
1734                    .try_iter()?
1735                    .zip(dict.call_method0("values")?.try_iter()?)
1736                {
1737                    let k: u32 = k?.extract()?;
1738                    let v = v?;
1739
1740                    let v: Result<Column, String> = if let Ok(s) = get_series(&v) {
1741                        Ok(s.into_column())
1742                    } else {
1743                        let err_msg: String = v.extract()?;
1744                        Err(err_msg)
1745                    };
1746
1747                    out.insert(k, v);
1748                }
1749
1750                DefaultFieldValues::Iceberg(Arc::new(IcebergIdentityTransformedPartitionFields(
1751                    out,
1752                )))
1753            },
1754
1755            v => {
1756                return Err(PyValueError::new_err(format!(
1757                    "unknown deletion file type: {v}"
1758                )));
1759            },
1760        }))
1761    }
1762}
1763
1764impl<'py> FromPyObject<'py> for Wrap<PlPath> {
1765    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1766        if let Ok(path) = ob.extract::<PyBackedStr>() {
1767            Ok(Wrap(PlPath::new(&path)))
1768        } else if let Ok(path) = ob.extract::<std::path::PathBuf>() {
1769            Ok(Wrap(PlPath::Local(path.into())))
1770        } else {
1771            Err(
1772                PyTypeError::new_err(format!("PlPath cannot be formed from '{}'", ob.get_type()))
1773                    .into(),
1774            )
1775        }
1776    }
1777}
1778
1779impl<'py> IntoPyObject<'py> for Wrap<PlPath> {
1780    type Target = PyString;
1781    type Output = Bound<'py, Self::Target>;
1782    type Error = Infallible;
1783
1784    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
1785        self.0.to_str().into_pyobject(py)
1786    }
1787}