polars_python/conversion/
mod.rs

1pub(crate) mod any_value;
2mod categorical;
3pub(crate) mod chunked_array;
4mod datetime;
5
6use std::convert::Infallible;
7use std::fmt::{Display, Formatter};
8use std::fs::File;
9use std::hash::{Hash, Hasher};
10
11pub use categorical::PyCategories;
12#[cfg(feature = "object")]
13use polars::chunked_array::object::PolarsObjectSafe;
14use polars::frame::row::Row;
15#[cfg(feature = "avro")]
16use polars::io::avro::AvroCompression;
17#[cfg(feature = "cloud")]
18use polars::io::cloud::CloudOptions;
19use polars::prelude::ColumnMapping;
20use polars::prelude::deletion::DeletionFilesList;
21use polars::series::ops::NullBehavior;
22use polars_core::schema::iceberg::IcebergSchema;
23use polars_core::utils::arrow::array::Array;
24use polars_core::utils::arrow::types::NativeType;
25use polars_core::utils::materialize_dyn_int;
26use polars_lazy::prelude::*;
27#[cfg(feature = "parquet")]
28use polars_parquet::write::StatisticsOptions;
29use polars_plan::dsl::ScanSources;
30use polars_utils::mmap::MemSlice;
31use polars_utils::pl_str::PlSmallStr;
32use polars_utils::total_ord::{TotalEq, TotalHash};
33use pyo3::basic::CompareOp;
34use pyo3::exceptions::{PyTypeError, PyValueError};
35use pyo3::intern;
36use pyo3::prelude::*;
37use pyo3::pybacked::PyBackedStr;
38use pyo3::sync::GILOnceCell;
39use pyo3::types::{IntoPyDict, PyDict, PyList, PySequence, PyString};
40
41use crate::error::PyPolarsErr;
42use crate::expr::PyExpr;
43use crate::file::{PythonScanSourceInput, get_python_scan_source_input};
44use crate::interop::arrow::to_rust::field_to_rust_arrow;
45#[cfg(feature = "object")]
46use crate::object::OBJECT_NAME;
47use crate::prelude::*;
48use crate::py_modules::{pl_series, polars};
49use crate::series::PySeries;
50use crate::utils::to_py_err;
51use crate::{PyDataFrame, PyLazyFrame};
52
53/// # Safety
54/// Should only be implemented for transparent types
55pub(crate) unsafe trait Transparent {
56    type Target;
57}
58
59unsafe impl Transparent for PySeries {
60    type Target = Series;
61}
62
63unsafe impl<T> Transparent for Wrap<T> {
64    type Target = T;
65}
66
67unsafe impl<T: Transparent> Transparent for Option<T> {
68    type Target = Option<T::Target>;
69}
70
71pub(crate) fn reinterpret_vec<T: Transparent>(input: Vec<T>) -> Vec<T::Target> {
72    assert_eq!(size_of::<T>(), size_of::<T::Target>());
73    assert_eq!(align_of::<T>(), align_of::<T::Target>());
74    let len = input.len();
75    let cap = input.capacity();
76    let mut manual_drop_vec = std::mem::ManuallyDrop::new(input);
77    let vec_ptr: *mut T = manual_drop_vec.as_mut_ptr();
78    let ptr: *mut T::Target = vec_ptr as *mut T::Target;
79    unsafe { Vec::from_raw_parts(ptr, len, cap) }
80}
81
82pub(crate) fn vec_extract_wrapped<T>(buf: Vec<Wrap<T>>) -> Vec<T> {
83    reinterpret_vec(buf)
84}
85
86#[derive(PartialEq, Eq, Hash)]
87#[repr(transparent)]
88pub struct Wrap<T>(pub T);
89
90impl<T> Clone for Wrap<T>
91where
92    T: Clone,
93{
94    fn clone(&self) -> Self {
95        Wrap(self.0.clone())
96    }
97}
98impl<T> From<T> for Wrap<T> {
99    fn from(t: T) -> Self {
100        Wrap(t)
101    }
102}
103
104// extract a Rust DataFrame from a python DataFrame, that is DataFrame<PyDataFrame<RustDataFrame>>
105pub(crate) fn get_df(obj: &Bound<'_, PyAny>) -> PyResult<DataFrame> {
106    let pydf = obj.getattr(intern!(obj.py(), "_df"))?;
107    Ok(pydf.extract::<PyDataFrame>()?.df)
108}
109
110pub(crate) fn get_lf(obj: &Bound<'_, PyAny>) -> PyResult<LazyFrame> {
111    let pydf = obj.getattr(intern!(obj.py(), "_ldf"))?;
112    Ok(pydf.extract::<PyLazyFrame>()?.ldf)
113}
114
115pub(crate) fn get_series(obj: &Bound<'_, PyAny>) -> PyResult<Series> {
116    let s = obj.getattr(intern!(obj.py(), "_s"))?;
117    Ok(s.extract::<PySeries>()?.series)
118}
119
120pub(crate) fn to_series(py: Python<'_>, s: PySeries) -> PyResult<Bound<'_, PyAny>> {
121    let series = pl_series(py).bind(py);
122    let constructor = series.getattr(intern!(py, "_from_pyseries"))?;
123    constructor.call1((s,))
124}
125
126impl<'py> FromPyObject<'py> for Wrap<PlSmallStr> {
127    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
128        Ok(Wrap((&*ob.extract::<PyBackedStr>()?).into()))
129    }
130}
131
132#[cfg(feature = "csv")]
133impl<'py> FromPyObject<'py> for Wrap<NullValues> {
134    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
135        if let Ok(s) = ob.extract::<PyBackedStr>() {
136            Ok(Wrap(NullValues::AllColumnsSingle((&*s).into())))
137        } else if let Ok(s) = ob.extract::<Vec<PyBackedStr>>() {
138            Ok(Wrap(NullValues::AllColumns(
139                s.into_iter().map(|x| (&*x).into()).collect(),
140            )))
141        } else if let Ok(s) = ob.extract::<Vec<(PyBackedStr, PyBackedStr)>>() {
142            Ok(Wrap(NullValues::Named(
143                s.into_iter()
144                    .map(|(a, b)| ((&*a).into(), (&*b).into()))
145                    .collect(),
146            )))
147        } else {
148            Err(
149                PyPolarsErr::Other("could not extract value from null_values argument".into())
150                    .into(),
151            )
152        }
153    }
154}
155
156fn struct_dict<'a, 'py>(
157    py: Python<'py>,
158    vals: impl Iterator<Item = AnyValue<'a>>,
159    flds: &[Field],
160) -> PyResult<Bound<'py, PyDict>> {
161    let dict = PyDict::new(py);
162    flds.iter().zip(vals).try_for_each(|(fld, val)| {
163        dict.set_item(fld.name().as_str(), Wrap(val).into_pyobject(py)?)
164    })?;
165    Ok(dict)
166}
167
168// accept u128 array to ensure alignment is correct
169fn decimal_to_digits(v: i128, buf: &mut [u128; 3]) -> usize {
170    const ZEROS: i128 = 0x3030_3030_3030_3030_3030_3030_3030_3030;
171    // SAFETY: transmute is safe as there are 48 bytes in 3 128bit ints
172    // and the minimal alignment of u8 fits u16
173    let buf = unsafe { std::mem::transmute::<&mut [u128; 3], &mut [u8; 48]>(buf) };
174    let mut buffer = itoa::Buffer::new();
175    let value = buffer.format(v);
176    let len = value.len();
177    for (dst, src) in buf.iter_mut().zip(value.as_bytes().iter()) {
178        *dst = *src
179    }
180
181    let ptr = buf.as_mut_ptr() as *mut i128;
182    unsafe {
183        // this is safe because we know that the buffer is exactly 48 bytes long
184        *ptr -= ZEROS;
185        *ptr.add(1) -= ZEROS;
186        *ptr.add(2) -= ZEROS;
187    }
188    len
189}
190
191impl<'py> IntoPyObject<'py> for &Wrap<DataType> {
192    type Target = PyAny;
193    type Output = Bound<'py, Self::Target>;
194    type Error = PyErr;
195
196    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
197        let pl = polars(py).bind(py);
198
199        match &self.0 {
200            DataType::Int8 => {
201                let class = pl.getattr(intern!(py, "Int8"))?;
202                class.call0()
203            },
204            DataType::Int16 => {
205                let class = pl.getattr(intern!(py, "Int16"))?;
206                class.call0()
207            },
208            DataType::Int32 => {
209                let class = pl.getattr(intern!(py, "Int32"))?;
210                class.call0()
211            },
212            DataType::Int64 => {
213                let class = pl.getattr(intern!(py, "Int64"))?;
214                class.call0()
215            },
216            DataType::UInt8 => {
217                let class = pl.getattr(intern!(py, "UInt8"))?;
218                class.call0()
219            },
220            DataType::UInt16 => {
221                let class = pl.getattr(intern!(py, "UInt16"))?;
222                class.call0()
223            },
224            DataType::UInt32 => {
225                let class = pl.getattr(intern!(py, "UInt32"))?;
226                class.call0()
227            },
228            DataType::UInt64 => {
229                let class = pl.getattr(intern!(py, "UInt64"))?;
230                class.call0()
231            },
232            DataType::Int128 => {
233                let class = pl.getattr(intern!(py, "Int128"))?;
234                class.call0()
235            },
236            DataType::Float32 => {
237                let class = pl.getattr(intern!(py, "Float32"))?;
238                class.call0()
239            },
240            DataType::Float64 | DataType::Unknown(UnknownKind::Float) => {
241                let class = pl.getattr(intern!(py, "Float64"))?;
242                class.call0()
243            },
244            DataType::Decimal(precision, scale) => {
245                let class = pl.getattr(intern!(py, "Decimal"))?;
246                let args = (*precision, *scale);
247                class.call1(args)
248            },
249            DataType::Boolean => {
250                let class = pl.getattr(intern!(py, "Boolean"))?;
251                class.call0()
252            },
253            DataType::String | DataType::Unknown(UnknownKind::Str) => {
254                let class = pl.getattr(intern!(py, "String"))?;
255                class.call0()
256            },
257            DataType::Binary => {
258                let class = pl.getattr(intern!(py, "Binary"))?;
259                class.call0()
260            },
261            DataType::Array(inner, size) => {
262                let class = pl.getattr(intern!(py, "Array"))?;
263                let inner = Wrap(*inner.clone());
264                let args = (&inner, *size);
265                class.call1(args)
266            },
267            DataType::List(inner) => {
268                let class = pl.getattr(intern!(py, "List"))?;
269                let inner = Wrap(*inner.clone());
270                class.call1((&inner,))
271            },
272            DataType::Date => {
273                let class = pl.getattr(intern!(py, "Date"))?;
274                class.call0()
275            },
276            DataType::Datetime(tu, tz) => {
277                let datetime_class = pl.getattr(intern!(py, "Datetime"))?;
278                datetime_class.call1((tu.to_ascii(), tz.as_deref().map(|x| x.as_str())))
279            },
280            DataType::Duration(tu) => {
281                let duration_class = pl.getattr(intern!(py, "Duration"))?;
282                duration_class.call1((tu.to_ascii(),))
283            },
284            #[cfg(feature = "object")]
285            DataType::Object(_) => {
286                let class = pl.getattr(intern!(py, "Object"))?;
287                class.call0()
288            },
289            DataType::Categorical(cats, _) => {
290                let categories_class = pl.getattr(intern!(py, "Categories"))?;
291                let categorical_class = pl.getattr(intern!(py, "Categorical"))?;
292                let categories = categories_class
293                    .call_method1("_from_py_categories", (PyCategories::from(cats.clone()),))?;
294                let kwargs = [("categories", categories)];
295                categorical_class.call((), Some(&kwargs.into_py_dict(py)?))
296            },
297            DataType::Enum(_, mapping) => {
298                let categories = unsafe {
299                    StringChunked::from_chunks(
300                        PlSmallStr::from_static("category"),
301                        vec![mapping.to_arrow(true)],
302                    )
303                };
304                let class = pl.getattr(intern!(py, "Enum"))?;
305                let series = to_series(py, categories.into_series().into())?;
306                class.call1((series,))
307            },
308            DataType::Time => pl.getattr(intern!(py, "Time")),
309            DataType::Struct(fields) => {
310                let field_class = pl.getattr(intern!(py, "Field"))?;
311                let iter = fields.iter().map(|fld| {
312                    let name = fld.name().as_str();
313                    let dtype = Wrap(fld.dtype().clone());
314                    field_class.call1((name, &dtype)).unwrap()
315                });
316                let fields = PyList::new(py, iter)?;
317                let struct_class = pl.getattr(intern!(py, "Struct"))?;
318                struct_class.call1((fields,))
319            },
320            DataType::Null => {
321                let class = pl.getattr(intern!(py, "Null"))?;
322                class.call0()
323            },
324            DataType::Unknown(UnknownKind::Int(v)) => {
325                Wrap(materialize_dyn_int(*v).dtype()).into_pyobject(py)
326            },
327            DataType::Unknown(_) => {
328                let class = pl.getattr(intern!(py, "Unknown"))?;
329                class.call0()
330            },
331            DataType::BinaryOffset => {
332                unimplemented!()
333            },
334        }
335    }
336}
337
338impl<'py> FromPyObject<'py> for Wrap<Field> {
339    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
340        let py = ob.py();
341        let name = ob
342            .getattr(intern!(py, "name"))?
343            .str()?
344            .extract::<PyBackedStr>()?;
345        let dtype = ob
346            .getattr(intern!(py, "dtype"))?
347            .extract::<Wrap<DataType>>()?;
348        Ok(Wrap(Field::new((&*name).into(), dtype.0)))
349    }
350}
351
352impl<'py> FromPyObject<'py> for Wrap<DataType> {
353    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
354        let py = ob.py();
355        let type_name = ob.get_type().qualname()?.to_string();
356
357        let dtype = match &*type_name {
358            "DataTypeClass" => {
359                // just the class, not an object
360                let name = ob
361                    .getattr(intern!(py, "__name__"))?
362                    .str()?
363                    .extract::<PyBackedStr>()?;
364                match &*name {
365                    "Int8" => DataType::Int8,
366                    "Int16" => DataType::Int16,
367                    "Int32" => DataType::Int32,
368                    "Int64" => DataType::Int64,
369                    "Int128" => DataType::Int128,
370                    "UInt8" => DataType::UInt8,
371                    "UInt16" => DataType::UInt16,
372                    "UInt32" => DataType::UInt32,
373                    "UInt64" => DataType::UInt64,
374                    "Float32" => DataType::Float32,
375                    "Float64" => DataType::Float64,
376                    "Boolean" => DataType::Boolean,
377                    "String" => DataType::String,
378                    "Binary" => DataType::Binary,
379                    "Categorical" => DataType::from_categories(Categories::global()),
380                    "Enum" => DataType::from_frozen_categories(FrozenCategories::new([]).unwrap()),
381                    "Date" => DataType::Date,
382                    "Time" => DataType::Time,
383                    "Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
384                    "Duration" => DataType::Duration(TimeUnit::Microseconds),
385                    "Decimal" => DataType::Decimal(None, None), // "none" scale => "infer"
386                    "List" => DataType::List(Box::new(DataType::Null)),
387                    "Array" => DataType::Array(Box::new(DataType::Null), 0),
388                    "Struct" => DataType::Struct(vec![]),
389                    "Null" => DataType::Null,
390                    #[cfg(feature = "object")]
391                    "Object" => DataType::Object(OBJECT_NAME),
392                    "Unknown" => DataType::Unknown(Default::default()),
393                    dt => {
394                        return Err(PyTypeError::new_err(format!(
395                            "'{dt}' is not a Polars data type",
396                        )));
397                    },
398                }
399            },
400            "Int8" => DataType::Int8,
401            "Int16" => DataType::Int16,
402            "Int32" => DataType::Int32,
403            "Int64" => DataType::Int64,
404            "Int128" => DataType::Int128,
405            "UInt8" => DataType::UInt8,
406            "UInt16" => DataType::UInt16,
407            "UInt32" => DataType::UInt32,
408            "UInt64" => DataType::UInt64,
409            "Float32" => DataType::Float32,
410            "Float64" => DataType::Float64,
411            "Boolean" => DataType::Boolean,
412            "String" => DataType::String,
413            "Binary" => DataType::Binary,
414            "Categorical" => {
415                let categories = ob.getattr(intern!(py, "categories")).unwrap();
416                let py_categories = categories.getattr(intern!(py, "_categories")).unwrap();
417                let py_categories = py_categories.extract::<PyCategories>()?;
418                DataType::from_categories(py_categories.categories().clone())
419            },
420            "Enum" => {
421                let categories = ob.getattr(intern!(py, "categories")).unwrap();
422                let s = get_series(&categories.as_borrowed())?;
423                let ca = s.str().map_err(PyPolarsErr::from)?;
424                let categories = ca.downcast_iter().next().unwrap().clone();
425                assert!(!categories.has_nulls());
426                DataType::from_frozen_categories(
427                    FrozenCategories::new(categories.values_iter()).unwrap(),
428                )
429            },
430            "Date" => DataType::Date,
431            "Time" => DataType::Time,
432            "Datetime" => {
433                let time_unit = ob.getattr(intern!(py, "time_unit")).unwrap();
434                let time_unit = time_unit.extract::<Wrap<TimeUnit>>()?.0;
435                let time_zone = ob.getattr(intern!(py, "time_zone")).unwrap();
436                let time_zone = time_zone.extract::<Option<PyBackedStr>>()?;
437                DataType::Datetime(
438                    time_unit,
439                    TimeZone::opt_try_new(time_zone.as_deref()).map_err(to_py_err)?,
440                )
441            },
442            "Duration" => {
443                let time_unit = ob.getattr(intern!(py, "time_unit")).unwrap();
444                let time_unit = time_unit.extract::<Wrap<TimeUnit>>()?.0;
445                DataType::Duration(time_unit)
446            },
447            "Decimal" => {
448                let precision = ob.getattr(intern!(py, "precision"))?.extract()?;
449                let scale = ob.getattr(intern!(py, "scale"))?.extract()?;
450                DataType::Decimal(precision, Some(scale))
451            },
452            "List" => {
453                let inner = ob.getattr(intern!(py, "inner")).unwrap();
454                let inner = inner.extract::<Wrap<DataType>>()?;
455                DataType::List(Box::new(inner.0))
456            },
457            "Array" => {
458                let inner = ob.getattr(intern!(py, "inner")).unwrap();
459                let size = ob.getattr(intern!(py, "size")).unwrap();
460                let inner = inner.extract::<Wrap<DataType>>()?;
461                let size = size.extract::<usize>()?;
462                DataType::Array(Box::new(inner.0), size)
463            },
464            "Struct" => {
465                let fields = ob.getattr(intern!(py, "fields"))?;
466                let fields = fields
467                    .extract::<Vec<Wrap<Field>>>()?
468                    .into_iter()
469                    .map(|f| f.0)
470                    .collect::<Vec<Field>>();
471                DataType::Struct(fields)
472            },
473            "Null" => DataType::Null,
474            #[cfg(feature = "object")]
475            "Object" => DataType::Object(OBJECT_NAME),
476            "Unknown" => DataType::Unknown(Default::default()),
477            dt => {
478                return Err(PyTypeError::new_err(format!(
479                    "'{dt}' is not a Polars data type",
480                )));
481            },
482        };
483        Ok(Wrap(dtype))
484    }
485}
486
487enum CategoricalOrdering {
488    Lexical,
489}
490
491impl<'py> IntoPyObject<'py> for Wrap<CategoricalOrdering> {
492    type Target = PyString;
493    type Output = Bound<'py, Self::Target>;
494    type Error = Infallible;
495
496    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
497        "lexical".into_pyobject(py)
498    }
499}
500
501impl<'py> IntoPyObject<'py> for Wrap<TimeUnit> {
502    type Target = PyString;
503    type Output = Bound<'py, Self::Target>;
504    type Error = Infallible;
505
506    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
507        self.0.to_ascii().into_pyobject(py)
508    }
509}
510
511#[cfg(feature = "parquet")]
512impl<'py> FromPyObject<'py> for Wrap<StatisticsOptions> {
513    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
514        let mut statistics = StatisticsOptions::empty();
515
516        let dict = ob.downcast::<PyDict>()?;
517        for (key, val) in dict {
518            let key = key.extract::<PyBackedStr>()?;
519            let val = val.extract::<bool>()?;
520
521            match key.as_ref() {
522                "min" => statistics.min_value = val,
523                "max" => statistics.max_value = val,
524                "distinct_count" => statistics.distinct_count = val,
525                "null_count" => statistics.null_count = val,
526                _ => {
527                    return Err(PyTypeError::new_err(format!(
528                        "'{key}' is not a valid statistic option",
529                    )));
530                },
531            }
532        }
533
534        Ok(Wrap(statistics))
535    }
536}
537
538impl<'py> FromPyObject<'py> for Wrap<Row<'static>> {
539    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
540        let vals = ob.extract::<Vec<Wrap<AnyValue<'static>>>>()?;
541        let vals = reinterpret_vec(vals);
542        Ok(Wrap(Row(vals)))
543    }
544}
545
546impl<'py> FromPyObject<'py> for Wrap<Schema> {
547    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
548        let dict = ob.downcast::<PyDict>()?;
549
550        Ok(Wrap(
551            dict.iter()
552                .map(|(key, val)| {
553                    let key = key.extract::<PyBackedStr>()?;
554                    let val = val.extract::<Wrap<DataType>>()?;
555
556                    Ok(Field::new((&*key).into(), val.0))
557                })
558                .collect::<PyResult<Schema>>()?,
559        ))
560    }
561}
562
563impl<'py> FromPyObject<'py> for Wrap<ArrowSchema> {
564    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
565        let py = ob.py();
566
567        let pyarrow_schema_cls = py
568            .import(intern!(py, "pyarrow"))?
569            .getattr(intern!(py, "Schema"))?;
570
571        if ob.is_none() {
572            return Err(PyValueError::new_err("arrow_schema() returned None").into());
573        }
574
575        let schema_cls = ob.getattr(intern!(py, "__class__"))?;
576
577        if !schema_cls.is(&pyarrow_schema_cls) {
578            return Err(PyTypeError::new_err(format!(
579                "expected pyarrow.Schema, got: {schema_cls}"
580            )));
581        }
582
583        let mut iter = ob.try_iter()?.map(|x| x.and_then(field_to_rust_arrow));
584
585        let mut last_err = None;
586
587        let schema =
588            ArrowSchema::from_iter_check_duplicates(std::iter::from_fn(|| match iter.next() {
589                Some(Ok(v)) => Some(v),
590                Some(Err(e)) => {
591                    last_err = Some(e);
592                    None
593                },
594                None => None,
595            }))
596            .map_err(to_py_err)?;
597
598        if let Some(last_err) = last_err {
599            return Err(last_err.into());
600        }
601
602        Ok(Wrap(schema))
603    }
604}
605
606impl<'py> FromPyObject<'py> for Wrap<ScanSources> {
607    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
608        let list = ob.downcast::<PyList>()?.to_owned();
609
610        if list.is_empty() {
611            return Ok(Wrap(ScanSources::default()));
612        }
613
614        enum MutableSources {
615            Paths(Vec<PlPath>),
616            Files(Vec<File>),
617            Buffers(Vec<MemSlice>),
618        }
619
620        let num_items = list.len();
621        let mut iter = list
622            .into_iter()
623            .map(|val| get_python_scan_source_input(val.unbind(), false));
624
625        let Some(first) = iter.next() else {
626            return Ok(Wrap(ScanSources::default()));
627        };
628
629        let mut sources = match first? {
630            PythonScanSourceInput::Path(path) => {
631                let mut sources = Vec::with_capacity(num_items);
632                sources.push(path);
633                MutableSources::Paths(sources)
634            },
635            PythonScanSourceInput::File(file) => {
636                let mut sources = Vec::with_capacity(num_items);
637                sources.push(file.into());
638                MutableSources::Files(sources)
639            },
640            PythonScanSourceInput::Buffer(buffer) => {
641                let mut sources = Vec::with_capacity(num_items);
642                sources.push(buffer);
643                MutableSources::Buffers(sources)
644            },
645        };
646
647        for source in iter {
648            match (&mut sources, source?) {
649                (MutableSources::Paths(v), PythonScanSourceInput::Path(p)) => v.push(p),
650                (MutableSources::Files(v), PythonScanSourceInput::File(f)) => v.push(f.into()),
651                (MutableSources::Buffers(v), PythonScanSourceInput::Buffer(f)) => v.push(f),
652                _ => {
653                    return Err(PyTypeError::new_err(
654                        "Cannot combine in-memory bytes, paths and files for scan sources",
655                    ));
656                },
657            }
658        }
659
660        Ok(Wrap(match sources {
661            MutableSources::Paths(i) => ScanSources::Paths(i.into()),
662            MutableSources::Files(i) => ScanSources::Files(i.into()),
663            MutableSources::Buffers(i) => ScanSources::Buffers(i.into()),
664        }))
665    }
666}
667
668impl<'py> IntoPyObject<'py> for Wrap<Schema> {
669    type Target = PyDict;
670    type Output = Bound<'py, Self::Target>;
671    type Error = PyErr;
672
673    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
674        let dict = PyDict::new(py);
675        self.0
676            .iter()
677            .try_for_each(|(k, v)| dict.set_item(k.as_str(), &Wrap(v.clone())))?;
678        Ok(dict)
679    }
680}
681
682#[derive(Debug)]
683#[repr(transparent)]
684pub struct ObjectValue {
685    pub inner: PyObject,
686}
687
688impl Clone for ObjectValue {
689    fn clone(&self) -> Self {
690        Python::with_gil(|py| Self {
691            inner: self.inner.clone_ref(py),
692        })
693    }
694}
695
696impl Hash for ObjectValue {
697    fn hash<H: Hasher>(&self, state: &mut H) {
698        let h = Python::with_gil(|py| self.inner.bind(py).hash().expect("should be hashable"));
699        state.write_isize(h)
700    }
701}
702
703impl Eq for ObjectValue {}
704
705impl PartialEq for ObjectValue {
706    fn eq(&self, other: &Self) -> bool {
707        Python::with_gil(|py| {
708            match self
709                .inner
710                .bind(py)
711                .rich_compare(other.inner.bind(py), CompareOp::Eq)
712            {
713                Ok(result) => result.is_truthy().unwrap(),
714                Err(_) => false,
715            }
716        })
717    }
718}
719
720impl TotalEq for ObjectValue {
721    fn tot_eq(&self, other: &Self) -> bool {
722        self == other
723    }
724}
725
726impl TotalHash for ObjectValue {
727    fn tot_hash<H>(&self, state: &mut H)
728    where
729        H: Hasher,
730    {
731        self.hash(state);
732    }
733}
734
735impl Display for ObjectValue {
736    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
737        write!(f, "{}", self.inner)
738    }
739}
740
741#[cfg(feature = "object")]
742impl PolarsObject for ObjectValue {
743    fn type_name() -> &'static str {
744        "object"
745    }
746}
747
748impl From<PyObject> for ObjectValue {
749    fn from(p: PyObject) -> Self {
750        Self { inner: p }
751    }
752}
753
754impl<'py> FromPyObject<'py> for ObjectValue {
755    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
756        Ok(ObjectValue {
757            inner: ob.to_owned().unbind(),
758        })
759    }
760}
761
762/// # Safety
763///
764/// The caller is responsible for checking that val is Object otherwise UB
765#[cfg(feature = "object")]
766impl From<&dyn PolarsObjectSafe> for &ObjectValue {
767    fn from(val: &dyn PolarsObjectSafe) -> Self {
768        unsafe { &*(val as *const dyn PolarsObjectSafe as *const ObjectValue) }
769    }
770}
771
772impl<'a, 'py> IntoPyObject<'py> for &'a ObjectValue {
773    type Target = PyAny;
774    type Output = Borrowed<'a, 'py, Self::Target>;
775    type Error = std::convert::Infallible;
776
777    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
778        Ok(self.inner.bind_borrowed(py))
779    }
780}
781
782impl Default for ObjectValue {
783    fn default() -> Self {
784        Python::with_gil(|py| ObjectValue { inner: py.None() })
785    }
786}
787
788impl<'py, T: NativeType + FromPyObject<'py>> FromPyObject<'py> for Wrap<Vec<T>> {
789    fn extract_bound(obj: &Bound<'py, PyAny>) -> PyResult<Self> {
790        let seq = obj.downcast::<PySequence>()?;
791        let mut v = Vec::with_capacity(seq.len().unwrap_or(0));
792        for item in seq.try_iter()? {
793            v.push(item?.extract::<T>()?);
794        }
795        Ok(Wrap(v))
796    }
797}
798
799#[cfg(feature = "asof_join")]
800impl<'py> FromPyObject<'py> for Wrap<AsofStrategy> {
801    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
802        let parsed = match &*(ob.extract::<PyBackedStr>()?) {
803            "backward" => AsofStrategy::Backward,
804            "forward" => AsofStrategy::Forward,
805            "nearest" => AsofStrategy::Nearest,
806            v => {
807                return Err(PyValueError::new_err(format!(
808                    "asof `strategy` must be one of {{'backward', 'forward', 'nearest'}}, got {v}",
809                )));
810            },
811        };
812        Ok(Wrap(parsed))
813    }
814}
815
816impl<'py> FromPyObject<'py> for Wrap<InterpolationMethod> {
817    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
818        let parsed = match &*(ob.extract::<PyBackedStr>()?) {
819            "linear" => InterpolationMethod::Linear,
820            "nearest" => InterpolationMethod::Nearest,
821            v => {
822                return Err(PyValueError::new_err(format!(
823                    "interpolation `method` must be one of {{'linear', 'nearest'}}, got {v}",
824                )));
825            },
826        };
827        Ok(Wrap(parsed))
828    }
829}
830
831#[cfg(feature = "avro")]
832impl<'py> FromPyObject<'py> for Wrap<Option<AvroCompression>> {
833    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
834        let parsed = match &*ob.extract::<PyBackedStr>()? {
835            "uncompressed" => None,
836            "snappy" => Some(AvroCompression::Snappy),
837            "deflate" => Some(AvroCompression::Deflate),
838            v => {
839                return Err(PyValueError::new_err(format!(
840                    "avro `compression` must be one of {{'uncompressed', 'snappy', 'deflate'}}, got {v}",
841                )));
842            },
843        };
844        Ok(Wrap(parsed))
845    }
846}
847
848impl<'py> FromPyObject<'py> for Wrap<CategoricalOrdering> {
849    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
850        let parsed = match &*ob.extract::<PyBackedStr>()? {
851            "lexical" => CategoricalOrdering::Lexical,
852            "physical" => {
853                polars_warn!(
854                    Deprecation,
855                    "physical ordering is deprecated, will use lexical ordering instead"
856                );
857                CategoricalOrdering::Lexical
858            },
859            v => {
860                return Err(PyValueError::new_err(format!(
861                    "categorical `ordering` must be one of {{'physical', 'lexical'}}, got {v}",
862                )));
863            },
864        };
865        Ok(Wrap(parsed))
866    }
867}
868
869impl<'py> FromPyObject<'py> for Wrap<StartBy> {
870    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
871        let parsed = match &*ob.extract::<PyBackedStr>()? {
872            "window" => StartBy::WindowBound,
873            "datapoint" => StartBy::DataPoint,
874            "monday" => StartBy::Monday,
875            "tuesday" => StartBy::Tuesday,
876            "wednesday" => StartBy::Wednesday,
877            "thursday" => StartBy::Thursday,
878            "friday" => StartBy::Friday,
879            "saturday" => StartBy::Saturday,
880            "sunday" => StartBy::Sunday,
881            v => {
882                return Err(PyValueError::new_err(format!(
883                    "`start_by` must be one of {{'window', 'datapoint', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday'}}, got {v}",
884                )));
885            },
886        };
887        Ok(Wrap(parsed))
888    }
889}
890
891impl<'py> FromPyObject<'py> for Wrap<ClosedWindow> {
892    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
893        let parsed = match &*ob.extract::<PyBackedStr>()? {
894            "left" => ClosedWindow::Left,
895            "right" => ClosedWindow::Right,
896            "both" => ClosedWindow::Both,
897            "none" => ClosedWindow::None,
898            v => {
899                return Err(PyValueError::new_err(format!(
900                    "`closed` must be one of {{'left', 'right', 'both', 'none'}}, got {v}",
901                )));
902            },
903        };
904        Ok(Wrap(parsed))
905    }
906}
907
908impl<'py> FromPyObject<'py> for Wrap<RoundMode> {
909    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
910        let parsed = match &*ob.extract::<PyBackedStr>()? {
911            "half_to_even" => RoundMode::HalfToEven,
912            "half_away_from_zero" => RoundMode::HalfAwayFromZero,
913            v => {
914                return Err(PyValueError::new_err(format!(
915                    "`mode` must be one of {{'half_to_even', 'half_away_from_zero'}}, got {v}",
916                )));
917            },
918        };
919        Ok(Wrap(parsed))
920    }
921}
922
923#[cfg(feature = "csv")]
924impl<'py> FromPyObject<'py> for Wrap<CsvEncoding> {
925    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
926        let parsed = match &*ob.extract::<PyBackedStr>()? {
927            "utf8" => CsvEncoding::Utf8,
928            "utf8-lossy" => CsvEncoding::LossyUtf8,
929            v => {
930                return Err(PyValueError::new_err(format!(
931                    "csv `encoding` must be one of {{'utf8', 'utf8-lossy'}}, got {v}",
932                )));
933            },
934        };
935        Ok(Wrap(parsed))
936    }
937}
938
939#[cfg(feature = "ipc")]
940impl<'py> FromPyObject<'py> for Wrap<Option<IpcCompression>> {
941    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
942        let parsed = match &*ob.extract::<PyBackedStr>()? {
943            "uncompressed" => None,
944            "lz4" => Some(IpcCompression::LZ4),
945            "zstd" => Some(IpcCompression::ZSTD),
946            v => {
947                return Err(PyValueError::new_err(format!(
948                    "ipc `compression` must be one of {{'uncompressed', 'lz4', 'zstd'}}, got {v}",
949                )));
950            },
951        };
952        Ok(Wrap(parsed))
953    }
954}
955
956impl<'py> FromPyObject<'py> for Wrap<JoinType> {
957    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
958        let parsed = match &*ob.extract::<PyBackedStr>()? {
959            "inner" => JoinType::Inner,
960            "left" => JoinType::Left,
961            "right" => JoinType::Right,
962            "full" => JoinType::Full,
963            "semi" => JoinType::Semi,
964            "anti" => JoinType::Anti,
965            #[cfg(feature = "cross_join")]
966            "cross" => JoinType::Cross,
967            v => {
968                return Err(PyValueError::new_err(format!(
969                    "`how` must be one of {{'inner', 'left', 'full', 'semi', 'anti', 'cross'}}, got {v}",
970                )));
971            },
972        };
973        Ok(Wrap(parsed))
974    }
975}
976
977impl<'py> FromPyObject<'py> for Wrap<Label> {
978    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
979        let parsed = match &*ob.extract::<PyBackedStr>()? {
980            "left" => Label::Left,
981            "right" => Label::Right,
982            "datapoint" => Label::DataPoint,
983            v => {
984                return Err(PyValueError::new_err(format!(
985                    "`label` must be one of {{'left', 'right', 'datapoint'}}, got {v}",
986                )));
987            },
988        };
989        Ok(Wrap(parsed))
990    }
991}
992
993impl<'py> FromPyObject<'py> for Wrap<ListToStructWidthStrategy> {
994    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
995        let parsed = match &*ob.extract::<PyBackedStr>()? {
996            "first_non_null" => ListToStructWidthStrategy::FirstNonNull,
997            "max_width" => ListToStructWidthStrategy::MaxWidth,
998            v => {
999                return Err(PyValueError::new_err(format!(
1000                    "`n_field_strategy` must be one of {{'first_non_null', 'max_width'}}, got {v}",
1001                )));
1002            },
1003        };
1004        Ok(Wrap(parsed))
1005    }
1006}
1007
1008impl<'py> FromPyObject<'py> for Wrap<NonExistent> {
1009    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1010        let parsed = match &*ob.extract::<PyBackedStr>()? {
1011            "null" => NonExistent::Null,
1012            "raise" => NonExistent::Raise,
1013            v => {
1014                return Err(PyValueError::new_err(format!(
1015                    "`non_existent` must be one of {{'null', 'raise'}}, got {v}",
1016                )));
1017            },
1018        };
1019        Ok(Wrap(parsed))
1020    }
1021}
1022
1023impl<'py> FromPyObject<'py> for Wrap<NullBehavior> {
1024    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1025        let parsed = match &*ob.extract::<PyBackedStr>()? {
1026            "drop" => NullBehavior::Drop,
1027            "ignore" => NullBehavior::Ignore,
1028            v => {
1029                return Err(PyValueError::new_err(format!(
1030                    "`null_behavior` must be one of {{'drop', 'ignore'}}, got {v}",
1031                )));
1032            },
1033        };
1034        Ok(Wrap(parsed))
1035    }
1036}
1037
1038impl<'py> FromPyObject<'py> for Wrap<NullStrategy> {
1039    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1040        let parsed = match &*ob.extract::<PyBackedStr>()? {
1041            "ignore" => NullStrategy::Ignore,
1042            "propagate" => NullStrategy::Propagate,
1043            v => {
1044                return Err(PyValueError::new_err(format!(
1045                    "`null_strategy` must be one of {{'ignore', 'propagate'}}, got {v}",
1046                )));
1047            },
1048        };
1049        Ok(Wrap(parsed))
1050    }
1051}
1052
1053#[cfg(feature = "parquet")]
1054impl<'py> FromPyObject<'py> for Wrap<ParallelStrategy> {
1055    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1056        let parsed = match &*ob.extract::<PyBackedStr>()? {
1057            "auto" => ParallelStrategy::Auto,
1058            "columns" => ParallelStrategy::Columns,
1059            "row_groups" => ParallelStrategy::RowGroups,
1060            "prefiltered" => ParallelStrategy::Prefiltered,
1061            "none" => ParallelStrategy::None,
1062            v => {
1063                return Err(PyValueError::new_err(format!(
1064                    "`parallel` must be one of {{'auto', 'columns', 'row_groups', 'prefiltered', 'none'}}, got {v}",
1065                )));
1066            },
1067        };
1068        Ok(Wrap(parsed))
1069    }
1070}
1071
1072impl<'py> FromPyObject<'py> for Wrap<IndexOrder> {
1073    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1074        let parsed = match &*ob.extract::<PyBackedStr>()? {
1075            "fortran" => IndexOrder::Fortran,
1076            "c" => IndexOrder::C,
1077            v => {
1078                return Err(PyValueError::new_err(format!(
1079                    "`order` must be one of {{'fortran', 'c'}}, got {v}",
1080                )));
1081            },
1082        };
1083        Ok(Wrap(parsed))
1084    }
1085}
1086
1087impl<'py> FromPyObject<'py> for Wrap<QuantileMethod> {
1088    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1089        let parsed = match &*ob.extract::<PyBackedStr>()? {
1090            "lower" => QuantileMethod::Lower,
1091            "higher" => QuantileMethod::Higher,
1092            "nearest" => QuantileMethod::Nearest,
1093            "linear" => QuantileMethod::Linear,
1094            "midpoint" => QuantileMethod::Midpoint,
1095            "equiprobable" => QuantileMethod::Equiprobable,
1096            v => {
1097                return Err(PyValueError::new_err(format!(
1098                    "`interpolation` must be one of {{'lower', 'higher', 'nearest', 'linear', 'midpoint', 'equiprobable'}}, got {v}",
1099                )));
1100            },
1101        };
1102        Ok(Wrap(parsed))
1103    }
1104}
1105
1106impl<'py> FromPyObject<'py> for Wrap<RankMethod> {
1107    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1108        let parsed = match &*ob.extract::<PyBackedStr>()? {
1109            "min" => RankMethod::Min,
1110            "max" => RankMethod::Max,
1111            "average" => RankMethod::Average,
1112            "dense" => RankMethod::Dense,
1113            "ordinal" => RankMethod::Ordinal,
1114            "random" => RankMethod::Random,
1115            v => {
1116                return Err(PyValueError::new_err(format!(
1117                    "rank `method` must be one of {{'min', 'max', 'average', 'dense', 'ordinal', 'random'}}, got {v}",
1118                )));
1119            },
1120        };
1121        Ok(Wrap(parsed))
1122    }
1123}
1124
1125impl<'py> FromPyObject<'py> for Wrap<Roll> {
1126    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1127        let parsed = match &*ob.extract::<PyBackedStr>()? {
1128            "raise" => Roll::Raise,
1129            "forward" => Roll::Forward,
1130            "backward" => Roll::Backward,
1131            v => {
1132                return Err(PyValueError::new_err(format!(
1133                    "`roll` must be one of {{'raise', 'forward', 'backward'}}, got {v}",
1134                )));
1135            },
1136        };
1137        Ok(Wrap(parsed))
1138    }
1139}
1140
1141impl<'py> FromPyObject<'py> for Wrap<TimeUnit> {
1142    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1143        let parsed = match &*ob.extract::<PyBackedStr>()? {
1144            "ns" => TimeUnit::Nanoseconds,
1145            "us" => TimeUnit::Microseconds,
1146            "ms" => TimeUnit::Milliseconds,
1147            v => {
1148                return Err(PyValueError::new_err(format!(
1149                    "`time_unit` must be one of {{'ns', 'us', 'ms'}}, got {v}",
1150                )));
1151            },
1152        };
1153        Ok(Wrap(parsed))
1154    }
1155}
1156
1157impl<'py> FromPyObject<'py> for Wrap<UniqueKeepStrategy> {
1158    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1159        let parsed = match &*ob.extract::<PyBackedStr>()? {
1160            "first" => UniqueKeepStrategy::First,
1161            "last" => UniqueKeepStrategy::Last,
1162            "none" => UniqueKeepStrategy::None,
1163            "any" => UniqueKeepStrategy::Any,
1164            v => {
1165                return Err(PyValueError::new_err(format!(
1166                    "`keep` must be one of {{'first', 'last', 'any', 'none'}}, got {v}",
1167                )));
1168            },
1169        };
1170        Ok(Wrap(parsed))
1171    }
1172}
1173
1174#[cfg(feature = "search_sorted")]
1175impl<'py> FromPyObject<'py> for Wrap<SearchSortedSide> {
1176    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1177        let parsed = match &*ob.extract::<PyBackedStr>()? {
1178            "any" => SearchSortedSide::Any,
1179            "left" => SearchSortedSide::Left,
1180            "right" => SearchSortedSide::Right,
1181            v => {
1182                return Err(PyValueError::new_err(format!(
1183                    "sorted `side` must be one of {{'any', 'left', 'right'}}, got {v}",
1184                )));
1185            },
1186        };
1187        Ok(Wrap(parsed))
1188    }
1189}
1190
1191impl<'py> FromPyObject<'py> for Wrap<ClosedInterval> {
1192    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1193        let parsed = match &*ob.extract::<PyBackedStr>()? {
1194            "both" => ClosedInterval::Both,
1195            "left" => ClosedInterval::Left,
1196            "right" => ClosedInterval::Right,
1197            "none" => ClosedInterval::None,
1198            v => {
1199                return Err(PyValueError::new_err(format!(
1200                    "`closed` must be one of {{'both', 'left', 'right', 'none'}}, got {v}",
1201                )));
1202            },
1203        };
1204        Ok(Wrap(parsed))
1205    }
1206}
1207
1208impl<'py> FromPyObject<'py> for Wrap<WindowMapping> {
1209    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1210        let parsed = match &*ob.extract::<PyBackedStr>()? {
1211            "group_to_rows" => WindowMapping::GroupsToRows,
1212            "join" => WindowMapping::Join,
1213            "explode" => WindowMapping::Explode,
1214            v => {
1215                return Err(PyValueError::new_err(format!(
1216                    "`mapping_strategy` must be one of {{'group_to_rows', 'join', 'explode'}}, got {v}",
1217                )));
1218            },
1219        };
1220        Ok(Wrap(parsed))
1221    }
1222}
1223
1224impl<'py> FromPyObject<'py> for Wrap<JoinValidation> {
1225    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1226        let parsed = match &*ob.extract::<PyBackedStr>()? {
1227            "1:1" => JoinValidation::OneToOne,
1228            "1:m" => JoinValidation::OneToMany,
1229            "m:m" => JoinValidation::ManyToMany,
1230            "m:1" => JoinValidation::ManyToOne,
1231            v => {
1232                return Err(PyValueError::new_err(format!(
1233                    "`validate` must be one of {{'m:m', 'm:1', '1:m', '1:1'}}, got {v}",
1234                )));
1235            },
1236        };
1237        Ok(Wrap(parsed))
1238    }
1239}
1240
1241impl<'py> FromPyObject<'py> for Wrap<MaintainOrderJoin> {
1242    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1243        let parsed = match &*ob.extract::<PyBackedStr>()? {
1244            "none" => MaintainOrderJoin::None,
1245            "left" => MaintainOrderJoin::Left,
1246            "right" => MaintainOrderJoin::Right,
1247            "left_right" => MaintainOrderJoin::LeftRight,
1248            "right_left" => MaintainOrderJoin::RightLeft,
1249            v => {
1250                return Err(PyValueError::new_err(format!(
1251                    "`maintain_order` must be one of {{'none', 'left', 'right', 'left_right', 'right_left'}}, got {v}",
1252                )));
1253            },
1254        };
1255        Ok(Wrap(parsed))
1256    }
1257}
1258
1259#[cfg(feature = "csv")]
1260impl<'py> FromPyObject<'py> for Wrap<QuoteStyle> {
1261    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1262        let parsed = match &*ob.extract::<PyBackedStr>()? {
1263            "always" => QuoteStyle::Always,
1264            "necessary" => QuoteStyle::Necessary,
1265            "non_numeric" => QuoteStyle::NonNumeric,
1266            "never" => QuoteStyle::Never,
1267            v => {
1268                return Err(PyValueError::new_err(format!(
1269                    "`quote_style` must be one of {{'always', 'necessary', 'non_numeric', 'never'}}, got {v}",
1270                )));
1271            },
1272        };
1273        Ok(Wrap(parsed))
1274    }
1275}
1276
1277#[cfg(feature = "cloud")]
1278pub(crate) fn parse_cloud_options(
1279    uri: &str,
1280    kv: impl IntoIterator<Item = (String, String)>,
1281) -> PyResult<CloudOptions> {
1282    let iter: &mut dyn Iterator<Item = _> = &mut kv.into_iter();
1283    let out = CloudOptions::from_untyped_config(uri, iter).map_err(PyPolarsErr::from)?;
1284    Ok(out)
1285}
1286
1287#[cfg(feature = "list_sets")]
1288impl<'py> FromPyObject<'py> for Wrap<SetOperation> {
1289    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1290        let parsed = match &*ob.extract::<PyBackedStr>()? {
1291            "union" => SetOperation::Union,
1292            "difference" => SetOperation::Difference,
1293            "intersection" => SetOperation::Intersection,
1294            "symmetric_difference" => SetOperation::SymmetricDifference,
1295            v => {
1296                return Err(PyValueError::new_err(format!(
1297                    "set operation must be one of {{'union', 'difference', 'intersection', 'symmetric_difference'}}, got {v}",
1298                )));
1299            },
1300        };
1301        Ok(Wrap(parsed))
1302    }
1303}
1304
1305// Conversion from ScanCastOptions class from the Python side.
1306impl<'py> FromPyObject<'py> for Wrap<CastColumnsPolicy> {
1307    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1308        if ob.is_none() {
1309            // Initialize the default ScanCastOptions from Python.
1310            static DEFAULT: GILOnceCell<Wrap<CastColumnsPolicy>> = GILOnceCell::new();
1311
1312            let out = DEFAULT.get_or_try_init(ob.py(), || {
1313                let ob = PyModule::import(ob.py(), "polars.io.scan_options.cast_options")
1314                    .unwrap()
1315                    .getattr("ScanCastOptions")
1316                    .unwrap()
1317                    .call_method0("_default")
1318                    .unwrap();
1319
1320                let out = Self::extract_bound(&ob)?;
1321
1322                // The default policy should match ERROR_ON_MISMATCH (but this can change).
1323                debug_assert_eq!(&out.0, &CastColumnsPolicy::ERROR_ON_MISMATCH);
1324
1325                PyResult::Ok(out)
1326            })?;
1327
1328            return Ok(out.clone());
1329        }
1330
1331        let py = ob.py();
1332
1333        let integer_upcast = match &*ob
1334            .getattr(intern!(py, "integer_cast"))?
1335            .extract::<PyBackedStr>()?
1336        {
1337            "upcast" => true,
1338            "forbid" => false,
1339            v => {
1340                return Err(PyValueError::new_err(format!(
1341                    "unknown option for integer_cast: {v}"
1342                )));
1343            },
1344        };
1345
1346        let mut float_upcast = false;
1347        let mut float_downcast = false;
1348
1349        let float_cast_object = ob.getattr(intern!(py, "float_cast"))?;
1350
1351        parse_multiple_options("float_cast", float_cast_object, |v| {
1352            match v {
1353                "forbid" => {},
1354                "upcast" => float_upcast = true,
1355                "downcast" => float_downcast = true,
1356                v => {
1357                    return Err(PyValueError::new_err(format!(
1358                        "unknown option for float_cast: {v}"
1359                    )));
1360                },
1361            }
1362
1363            Ok(())
1364        })?;
1365
1366        let mut datetime_nanoseconds_downcast = false;
1367        let mut datetime_convert_timezone = false;
1368
1369        let datetime_cast_object = ob.getattr(intern!(py, "datetime_cast"))?;
1370
1371        parse_multiple_options("datetime_cast", datetime_cast_object, |v| {
1372            match v {
1373                "forbid" => {},
1374                "nanosecond-downcast" => datetime_nanoseconds_downcast = true,
1375                "convert-timezone" => datetime_convert_timezone = true,
1376                v => {
1377                    return Err(PyValueError::new_err(format!(
1378                        "unknown option for datetime_cast: {v}"
1379                    )));
1380                },
1381            };
1382
1383            Ok(())
1384        })?;
1385
1386        let missing_struct_fields = match &*ob
1387            .getattr(intern!(py, "missing_struct_fields"))?
1388            .extract::<PyBackedStr>()?
1389        {
1390            "insert" => MissingColumnsPolicy::Insert,
1391            "raise" => MissingColumnsPolicy::Raise,
1392            v => {
1393                return Err(PyValueError::new_err(format!(
1394                    "unknown option for missing_struct_fields: {v}"
1395                )));
1396            },
1397        };
1398
1399        let extra_struct_fields = match &*ob
1400            .getattr(intern!(py, "extra_struct_fields"))?
1401            .extract::<PyBackedStr>()?
1402        {
1403            "ignore" => ExtraColumnsPolicy::Ignore,
1404            "raise" => ExtraColumnsPolicy::Raise,
1405            v => {
1406                return Err(PyValueError::new_err(format!(
1407                    "unknown option for extra_struct_fields: {v}"
1408                )));
1409            },
1410        };
1411
1412        return Ok(Wrap(CastColumnsPolicy {
1413            integer_upcast,
1414            float_upcast,
1415            float_downcast,
1416            datetime_nanoseconds_downcast,
1417            datetime_microseconds_downcast: false,
1418            datetime_convert_timezone,
1419            missing_struct_fields,
1420            extra_struct_fields,
1421        }));
1422
1423        fn parse_multiple_options(
1424            parameter_name: &'static str,
1425            py_object: Bound<'_, PyAny>,
1426            mut parser_func: impl FnMut(&str) -> PyResult<()>,
1427        ) -> PyResult<()> {
1428            if let Ok(v) = py_object.extract::<PyBackedStr>() {
1429                parser_func(&v)?;
1430            } else if let Ok(v) = py_object.try_iter() {
1431                for v in v {
1432                    parser_func(&v?.extract::<PyBackedStr>()?)?;
1433                }
1434            } else {
1435                return Err(PyValueError::new_err(format!(
1436                    "unknown type for {parameter_name}: {py_object}"
1437                )));
1438            }
1439
1440            Ok(())
1441        }
1442    }
1443}
1444
1445pub(crate) fn parse_fill_null_strategy(
1446    strategy: &str,
1447    limit: FillNullLimit,
1448) -> PyResult<FillNullStrategy> {
1449    let parsed = match strategy {
1450        "forward" => FillNullStrategy::Forward(limit),
1451        "backward" => FillNullStrategy::Backward(limit),
1452        "min" => FillNullStrategy::Min,
1453        "max" => FillNullStrategy::Max,
1454        "mean" => FillNullStrategy::Mean,
1455        "zero" => FillNullStrategy::Zero,
1456        "one" => FillNullStrategy::One,
1457        e => {
1458            return Err(PyValueError::new_err(format!(
1459                "`strategy` must be one of {{'forward', 'backward', 'min', 'max', 'mean', 'zero', 'one'}}, got {e}",
1460            )));
1461        },
1462    };
1463    Ok(parsed)
1464}
1465
1466#[cfg(feature = "parquet")]
1467pub(crate) fn parse_parquet_compression(
1468    compression: &str,
1469    compression_level: Option<i32>,
1470) -> PyResult<ParquetCompression> {
1471    let parsed = match compression {
1472        "uncompressed" => ParquetCompression::Uncompressed,
1473        "snappy" => ParquetCompression::Snappy,
1474        "gzip" => ParquetCompression::Gzip(
1475            compression_level
1476                .map(|lvl| {
1477                    GzipLevel::try_new(lvl as u8)
1478                        .map_err(|e| PyValueError::new_err(format!("{e:?}")))
1479                })
1480                .transpose()?,
1481        ),
1482        "lzo" => ParquetCompression::Lzo,
1483        "brotli" => ParquetCompression::Brotli(
1484            compression_level
1485                .map(|lvl| {
1486                    BrotliLevel::try_new(lvl as u32)
1487                        .map_err(|e| PyValueError::new_err(format!("{e:?}")))
1488                })
1489                .transpose()?,
1490        ),
1491        "lz4" => ParquetCompression::Lz4Raw,
1492        "zstd" => ParquetCompression::Zstd(
1493            compression_level
1494                .map(|lvl| {
1495                    ZstdLevel::try_new(lvl).map_err(|e| PyValueError::new_err(format!("{e:?}")))
1496                })
1497                .transpose()?,
1498        ),
1499        e => {
1500            return Err(PyValueError::new_err(format!(
1501                "parquet `compression` must be one of {{'uncompressed', 'snappy', 'gzip', 'lzo', 'brotli', 'lz4', 'zstd'}}, got {e}",
1502            )));
1503        },
1504    };
1505    Ok(parsed)
1506}
1507
1508pub(crate) fn strings_to_pl_smallstr<I, S>(container: I) -> Vec<PlSmallStr>
1509where
1510    I: IntoIterator<Item = S>,
1511    S: AsRef<str>,
1512{
1513    container
1514        .into_iter()
1515        .map(|s| PlSmallStr::from_str(s.as_ref()))
1516        .collect()
1517}
1518
1519#[derive(Debug, Copy, Clone)]
1520pub struct PyCompatLevel(pub CompatLevel);
1521
1522impl<'py> FromPyObject<'py> for PyCompatLevel {
1523    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1524        Ok(PyCompatLevel(if let Ok(level) = ob.extract::<u16>() {
1525            if let Ok(compat_level) = CompatLevel::with_level(level) {
1526                compat_level
1527            } else {
1528                return Err(PyValueError::new_err("invalid compat level"));
1529            }
1530        } else if let Ok(future) = ob.extract::<bool>() {
1531            if future {
1532                CompatLevel::newest()
1533            } else {
1534                CompatLevel::oldest()
1535            }
1536        } else {
1537            return Err(PyTypeError::new_err(
1538                "'compat_level' argument accepts int or bool",
1539            ));
1540        }))
1541    }
1542}
1543
1544#[cfg(feature = "string_normalize")]
1545impl<'py> FromPyObject<'py> for Wrap<UnicodeForm> {
1546    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1547        let parsed = match &*ob.extract::<PyBackedStr>()? {
1548            "NFC" => UnicodeForm::NFC,
1549            "NFKC" => UnicodeForm::NFKC,
1550            "NFD" => UnicodeForm::NFD,
1551            "NFKD" => UnicodeForm::NFKD,
1552            v => {
1553                return Err(PyValueError::new_err(format!(
1554                    "`form` must be one of {{'NFC', 'NFKC', 'NFD', 'NFKD'}}, got {v}",
1555                )));
1556            },
1557        };
1558        Ok(Wrap(parsed))
1559    }
1560}
1561
1562#[cfg(feature = "parquet")]
1563impl<'py> FromPyObject<'py> for Wrap<Option<KeyValueMetadata>> {
1564    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1565        #[derive(FromPyObject)]
1566        enum Metadata {
1567            Static(Vec<(String, String)>),
1568            Dynamic(PyObject),
1569        }
1570
1571        let metadata = Option::<Metadata>::extract_bound(ob)?;
1572        let key_value_metadata = metadata.map(|x| match x {
1573            Metadata::Static(kv) => KeyValueMetadata::from_static(kv),
1574            Metadata::Dynamic(func) => KeyValueMetadata::from_py_function(func),
1575        });
1576        Ok(Wrap(key_value_metadata))
1577    }
1578}
1579
1580impl<'py> FromPyObject<'py> for Wrap<Option<TimeZone>> {
1581    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1582        let tz = Option::<Wrap<PlSmallStr>>::extract_bound(ob)?;
1583
1584        let tz = tz.map(|x| x.0);
1585
1586        Ok(Wrap(TimeZone::opt_try_new(tz).map_err(to_py_err)?))
1587    }
1588}
1589
1590impl<'py> FromPyObject<'py> for Wrap<UpcastOrForbid> {
1591    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1592        let parsed = match &*ob.extract::<PyBackedStr>()? {
1593            "upcast" => UpcastOrForbid::Upcast,
1594            "forbid" => UpcastOrForbid::Forbid,
1595            v => {
1596                return Err(PyValueError::new_err(format!(
1597                    "cast parameter must be one of {{'upcast', 'forbid'}}, got {v}",
1598                )));
1599            },
1600        };
1601        Ok(Wrap(parsed))
1602    }
1603}
1604
1605impl<'py> FromPyObject<'py> for Wrap<ExtraColumnsPolicy> {
1606    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1607        let parsed = match &*ob.extract::<PyBackedStr>()? {
1608            "ignore" => ExtraColumnsPolicy::Ignore,
1609            "raise" => ExtraColumnsPolicy::Raise,
1610            v => {
1611                return Err(PyValueError::new_err(format!(
1612                    "extra column/field parameter must be one of {{'ignore', 'raise'}}, got {v}",
1613                )));
1614            },
1615        };
1616        Ok(Wrap(parsed))
1617    }
1618}
1619
1620impl<'py> FromPyObject<'py> for Wrap<MissingColumnsPolicy> {
1621    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1622        let parsed = match &*ob.extract::<PyBackedStr>()? {
1623            "insert" => MissingColumnsPolicy::Insert,
1624            "raise" => MissingColumnsPolicy::Raise,
1625            v => {
1626                return Err(PyValueError::new_err(format!(
1627                    "missing column/field parameter must be one of {{'insert', 'raise'}}, got {v}",
1628                )));
1629            },
1630        };
1631        Ok(Wrap(parsed))
1632    }
1633}
1634
1635impl<'py> FromPyObject<'py> for Wrap<MissingColumnsPolicyOrExpr> {
1636    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1637        if let Ok(pyexpr) = ob.extract::<PyExpr>() {
1638            return Ok(Wrap(MissingColumnsPolicyOrExpr::InsertWith(pyexpr.inner)));
1639        }
1640
1641        let parsed = match &*ob.extract::<PyBackedStr>()? {
1642            "insert" => MissingColumnsPolicyOrExpr::Insert,
1643            "raise" => MissingColumnsPolicyOrExpr::Raise,
1644            v => {
1645                return Err(PyValueError::new_err(format!(
1646                    "missing column/field parameter must be one of {{'insert', 'raise', expression}}, got {v}",
1647                )));
1648            },
1649        };
1650        Ok(Wrap(parsed))
1651    }
1652}
1653
1654impl<'py> FromPyObject<'py> for Wrap<ColumnMapping> {
1655    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1656        let (column_mapping_type, ob): (PyBackedStr, Bound<'_, PyAny>) = ob.extract()?;
1657
1658        Ok(Wrap(match &*column_mapping_type {
1659            "iceberg-column-mapping" => {
1660                let arrow_schema: Wrap<ArrowSchema> = ob.extract()?;
1661                ColumnMapping::Iceberg(Arc::new(
1662                    IcebergSchema::from_arrow_schema(&arrow_schema.0).map_err(to_py_err)?,
1663                ))
1664            },
1665
1666            v => {
1667                return Err(PyValueError::new_err(format!(
1668                    "unknown column mapping type: {v}"
1669                )));
1670            },
1671        }))
1672    }
1673}
1674
1675impl<'py> FromPyObject<'py> for Wrap<DeletionFilesList> {
1676    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1677        let (deletion_file_type, ob): (PyBackedStr, Bound<'_, PyAny>) = ob.extract()?;
1678
1679        Ok(Wrap(match &*deletion_file_type {
1680            "iceberg-position-delete" => {
1681                let dict: Bound<'_, PyDict> = ob.extract()?;
1682
1683                let mut out = PlIndexMap::new();
1684
1685                for (k, v) in dict
1686                    .try_iter()?
1687                    .zip(dict.call_method0("values")?.try_iter()?)
1688                {
1689                    let k: usize = k?.extract()?;
1690                    let v: Bound<'_, PyAny> = v?.extract()?;
1691
1692                    let files = v
1693                        .try_iter()?
1694                        .map(|x| {
1695                            x.and_then(|x| {
1696                                let x: String = x.extract()?;
1697                                Ok(x)
1698                            })
1699                        })
1700                        .collect::<PyResult<Arc<[String]>>>()?;
1701
1702                    if !files.is_empty() {
1703                        out.insert(k, files);
1704                    }
1705                }
1706
1707                DeletionFilesList::IcebergPositionDelete(Arc::new(out))
1708            },
1709
1710            v => {
1711                return Err(PyValueError::new_err(format!(
1712                    "unknown deletion file type: {v}"
1713                )));
1714            },
1715        }))
1716    }
1717}
1718
1719impl<'py> FromPyObject<'py> for Wrap<PlPath> {
1720    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1721        if let Ok(path) = ob.extract::<PyBackedStr>() {
1722            Ok(Wrap(PlPath::new(&path)))
1723        } else if let Ok(path) = ob.extract::<std::path::PathBuf>() {
1724            Ok(Wrap(PlPath::Local(path.into())))
1725        } else {
1726            Err(
1727                PyTypeError::new_err(format!("PlPath cannot be formed from '{}'", ob.get_type()))
1728                    .into(),
1729            )
1730        }
1731    }
1732}
1733
1734impl<'py> IntoPyObject<'py> for Wrap<PlPath> {
1735    type Target = PyString;
1736    type Output = Bound<'py, Self::Target>;
1737    type Error = Infallible;
1738
1739    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
1740        self.0.to_str().into_pyobject(py)
1741    }
1742}