1pub(crate) mod any_value;
2mod categorical;
3pub(crate) mod chunked_array;
4mod datetime;
5
6use std::convert::Infallible;
7use std::fmt::{Display, Formatter};
8use std::fs::File;
9use std::hash::{Hash, Hasher};
10
11pub use categorical::PyCategories;
12#[cfg(feature = "object")]
13use polars::chunked_array::object::PolarsObjectSafe;
14use polars::frame::row::Row;
15#[cfg(feature = "avro")]
16use polars::io::avro::AvroCompression;
17#[cfg(feature = "cloud")]
18use polars::io::cloud::CloudOptions;
19use polars::prelude::ColumnMapping;
20use polars::prelude::default_values::{
21 DefaultFieldValues, IcebergIdentityTransformedPartitionFields,
22};
23use polars::prelude::deletion::DeletionFilesList;
24use polars::series::ops::NullBehavior;
25use polars_compute::decimal::dec128_verify_prec_scale;
26use polars_core::schema::iceberg::IcebergSchema;
27use polars_core::utils::arrow::array::Array;
28use polars_core::utils::arrow::types::NativeType;
29use polars_core::utils::materialize_dyn_int;
30use polars_lazy::prelude::*;
31#[cfg(feature = "parquet")]
32use polars_parquet::write::StatisticsOptions;
33use polars_plan::dsl::ScanSources;
34use polars_utils::mmap::MemSlice;
35use polars_utils::pl_str::PlSmallStr;
36use polars_utils::total_ord::{TotalEq, TotalHash};
37use pyo3::basic::CompareOp;
38use pyo3::exceptions::{PyTypeError, PyValueError};
39use pyo3::intern;
40use pyo3::prelude::*;
41use pyo3::pybacked::PyBackedStr;
42use pyo3::sync::PyOnceLock;
43use pyo3::types::{IntoPyDict, PyDict, PyList, PySequence, PyString};
44
45use crate::error::PyPolarsErr;
46use crate::expr::PyExpr;
47use crate::file::{PythonScanSourceInput, get_python_scan_source_input};
48use crate::interop::arrow::to_rust::field_to_rust_arrow;
49#[cfg(feature = "object")]
50use crate::object::OBJECT_NAME;
51use crate::prelude::*;
52use crate::py_modules::{pl_series, polars};
53use crate::series::PySeries;
54use crate::utils::to_py_err;
55use crate::{PyDataFrame, PyLazyFrame};
56
57pub(crate) unsafe trait Transparent {
60 type Target;
61}
62
63unsafe impl Transparent for PySeries {
64 type Target = Series;
65}
66
67unsafe impl<T> Transparent for Wrap<T> {
68 type Target = T;
69}
70
71unsafe impl<T: Transparent> Transparent for Option<T> {
72 type Target = Option<T::Target>;
73}
74
75pub(crate) fn reinterpret_vec<T: Transparent>(input: Vec<T>) -> Vec<T::Target> {
76 assert_eq!(size_of::<T>(), size_of::<T::Target>());
77 assert_eq!(align_of::<T>(), align_of::<T::Target>());
78 let len = input.len();
79 let cap = input.capacity();
80 let mut manual_drop_vec = std::mem::ManuallyDrop::new(input);
81 let vec_ptr: *mut T = manual_drop_vec.as_mut_ptr();
82 let ptr: *mut T::Target = vec_ptr as *mut T::Target;
83 unsafe { Vec::from_raw_parts(ptr, len, cap) }
84}
85
86pub(crate) fn vec_extract_wrapped<T>(buf: Vec<Wrap<T>>) -> Vec<T> {
87 reinterpret_vec(buf)
88}
89
90#[derive(PartialEq, Eq, Hash)]
91#[repr(transparent)]
92pub struct Wrap<T>(pub T);
93
94impl<T> Clone for Wrap<T>
95where
96 T: Clone,
97{
98 fn clone(&self) -> Self {
99 Wrap(self.0.clone())
100 }
101}
102impl<T> From<T> for Wrap<T> {
103 fn from(t: T) -> Self {
104 Wrap(t)
105 }
106}
107
108pub(crate) fn get_df(obj: &Bound<'_, PyAny>) -> PyResult<DataFrame> {
110 let pydf = obj.getattr(intern!(obj.py(), "_df"))?;
111 Ok(pydf.extract::<PyDataFrame>()?.df.into_inner())
112}
113
114pub(crate) fn get_lf(obj: &Bound<'_, PyAny>) -> PyResult<LazyFrame> {
115 let pydf = obj.getattr(intern!(obj.py(), "_ldf"))?;
116 Ok(pydf.extract::<PyLazyFrame>()?.ldf.into_inner())
117}
118
119pub(crate) fn get_series(obj: &Bound<'_, PyAny>) -> PyResult<Series> {
120 let s = obj.getattr(intern!(obj.py(), "_s"))?;
121 Ok(s.extract::<PySeries>()?.series.into_inner())
122}
123
124pub(crate) fn to_series(py: Python<'_>, s: PySeries) -> PyResult<Bound<'_, PyAny>> {
125 let series = pl_series(py).bind(py);
126 let constructor = series.getattr(intern!(py, "_from_pyseries"))?;
127 constructor.call1((s,))
128}
129
130impl<'py> FromPyObject<'py> for Wrap<PlSmallStr> {
131 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
132 Ok(Wrap((&*ob.extract::<PyBackedStr>()?).into()))
133 }
134}
135
136#[cfg(feature = "csv")]
137impl<'py> FromPyObject<'py> for Wrap<NullValues> {
138 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
139 if let Ok(s) = ob.extract::<PyBackedStr>() {
140 Ok(Wrap(NullValues::AllColumnsSingle((&*s).into())))
141 } else if let Ok(s) = ob.extract::<Vec<PyBackedStr>>() {
142 Ok(Wrap(NullValues::AllColumns(
143 s.into_iter().map(|x| (&*x).into()).collect(),
144 )))
145 } else if let Ok(s) = ob.extract::<Vec<(PyBackedStr, PyBackedStr)>>() {
146 Ok(Wrap(NullValues::Named(
147 s.into_iter()
148 .map(|(a, b)| ((&*a).into(), (&*b).into()))
149 .collect(),
150 )))
151 } else {
152 Err(
153 PyPolarsErr::Other("could not extract value from null_values argument".into())
154 .into(),
155 )
156 }
157 }
158}
159
160fn struct_dict<'a, 'py>(
161 py: Python<'py>,
162 vals: impl Iterator<Item = AnyValue<'a>>,
163 flds: &[Field],
164) -> PyResult<Bound<'py, PyDict>> {
165 let dict = PyDict::new(py);
166 flds.iter().zip(vals).try_for_each(|(fld, val)| {
167 dict.set_item(fld.name().as_str(), Wrap(val).into_pyobject(py)?)
168 })?;
169 Ok(dict)
170}
171
172impl<'py> IntoPyObject<'py> for &Wrap<DataType> {
173 type Target = PyAny;
174 type Output = Bound<'py, Self::Target>;
175 type Error = PyErr;
176
177 fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
178 let pl = polars(py).bind(py);
179
180 match &self.0 {
181 DataType::Int8 => {
182 let class = pl.getattr(intern!(py, "Int8"))?;
183 class.call0()
184 },
185 DataType::Int16 => {
186 let class = pl.getattr(intern!(py, "Int16"))?;
187 class.call0()
188 },
189 DataType::Int32 => {
190 let class = pl.getattr(intern!(py, "Int32"))?;
191 class.call0()
192 },
193 DataType::Int64 => {
194 let class = pl.getattr(intern!(py, "Int64"))?;
195 class.call0()
196 },
197 DataType::UInt8 => {
198 let class = pl.getattr(intern!(py, "UInt8"))?;
199 class.call0()
200 },
201 DataType::UInt16 => {
202 let class = pl.getattr(intern!(py, "UInt16"))?;
203 class.call0()
204 },
205 DataType::UInt32 => {
206 let class = pl.getattr(intern!(py, "UInt32"))?;
207 class.call0()
208 },
209 DataType::UInt64 => {
210 let class = pl.getattr(intern!(py, "UInt64"))?;
211 class.call0()
212 },
213 DataType::UInt128 => {
214 let class = pl.getattr(intern!(py, "UInt128"))?;
215 class.call0()
216 },
217 DataType::Int128 => {
218 let class = pl.getattr(intern!(py, "Int128"))?;
219 class.call0()
220 },
221 DataType::Float32 => {
222 let class = pl.getattr(intern!(py, "Float32"))?;
223 class.call0()
224 },
225 DataType::Float64 | DataType::Unknown(UnknownKind::Float) => {
226 let class = pl.getattr(intern!(py, "Float64"))?;
227 class.call0()
228 },
229 DataType::Decimal(precision, scale) => {
230 let class = pl.getattr(intern!(py, "Decimal"))?;
231 let args = (*precision, *scale);
232 class.call1(args)
233 },
234 DataType::Boolean => {
235 let class = pl.getattr(intern!(py, "Boolean"))?;
236 class.call0()
237 },
238 DataType::String | DataType::Unknown(UnknownKind::Str) => {
239 let class = pl.getattr(intern!(py, "String"))?;
240 class.call0()
241 },
242 DataType::Binary => {
243 let class = pl.getattr(intern!(py, "Binary"))?;
244 class.call0()
245 },
246 DataType::Array(inner, size) => {
247 let class = pl.getattr(intern!(py, "Array"))?;
248 let inner = Wrap(*inner.clone());
249 let args = (&inner, *size);
250 class.call1(args)
251 },
252 DataType::List(inner) => {
253 let class = pl.getattr(intern!(py, "List"))?;
254 let inner = Wrap(*inner.clone());
255 class.call1((&inner,))
256 },
257 DataType::Date => {
258 let class = pl.getattr(intern!(py, "Date"))?;
259 class.call0()
260 },
261 DataType::Datetime(tu, tz) => {
262 let datetime_class = pl.getattr(intern!(py, "Datetime"))?;
263 datetime_class.call1((tu.to_ascii(), tz.as_deref().map(|x| x.as_str())))
264 },
265 DataType::Duration(tu) => {
266 let duration_class = pl.getattr(intern!(py, "Duration"))?;
267 duration_class.call1((tu.to_ascii(),))
268 },
269 #[cfg(feature = "object")]
270 DataType::Object(_) => {
271 let class = pl.getattr(intern!(py, "Object"))?;
272 class.call0()
273 },
274 DataType::Categorical(cats, _) => {
275 let categories_class = pl.getattr(intern!(py, "Categories"))?;
276 let categorical_class = pl.getattr(intern!(py, "Categorical"))?;
277 let categories = categories_class
278 .call_method1("_from_py_categories", (PyCategories::from(cats.clone()),))?;
279 let kwargs = [("categories", categories)];
280 categorical_class.call((), Some(&kwargs.into_py_dict(py)?))
281 },
282 DataType::Enum(_, mapping) => {
283 let categories = unsafe {
284 StringChunked::from_chunks(
285 PlSmallStr::from_static("category"),
286 vec![mapping.to_arrow(true)],
287 )
288 };
289 let class = pl.getattr(intern!(py, "Enum"))?;
290 let series = to_series(py, categories.into_series().into())?;
291 class.call1((series,))
292 },
293 DataType::Time => pl.getattr(intern!(py, "Time")).and_then(|x| x.call0()),
294 DataType::Struct(fields) => {
295 let field_class = pl.getattr(intern!(py, "Field"))?;
296 let iter = fields.iter().map(|fld| {
297 let name = fld.name().as_str();
298 let dtype = Wrap(fld.dtype().clone());
299 field_class.call1((name, &dtype)).unwrap()
300 });
301 let fields = PyList::new(py, iter)?;
302 let struct_class = pl.getattr(intern!(py, "Struct"))?;
303 struct_class.call1((fields,))
304 },
305 DataType::Null => {
306 let class = pl.getattr(intern!(py, "Null"))?;
307 class.call0()
308 },
309 DataType::Unknown(UnknownKind::Int(v)) => {
310 Wrap(materialize_dyn_int(*v).dtype()).into_pyobject(py)
311 },
312 DataType::Unknown(_) => {
313 let class = pl.getattr(intern!(py, "Unknown"))?;
314 class.call0()
315 },
316 DataType::BinaryOffset => {
317 unimplemented!()
318 },
319 }
320 }
321}
322
323impl<'py> FromPyObject<'py> for Wrap<Field> {
324 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
325 let py = ob.py();
326 let name = ob
327 .getattr(intern!(py, "name"))?
328 .str()?
329 .extract::<PyBackedStr>()?;
330 let dtype = ob
331 .getattr(intern!(py, "dtype"))?
332 .extract::<Wrap<DataType>>()?;
333 Ok(Wrap(Field::new((&*name).into(), dtype.0)))
334 }
335}
336
337impl<'py> FromPyObject<'py> for Wrap<DataType> {
338 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
339 let py = ob.py();
340 let type_name = ob.get_type().qualname()?.to_string();
341
342 let dtype = match &*type_name {
343 "DataTypeClass" => {
344 let name = ob
346 .getattr(intern!(py, "__name__"))?
347 .str()?
348 .extract::<PyBackedStr>()?;
349 match &*name {
350 "Int8" => DataType::Int8,
351 "Int16" => DataType::Int16,
352 "Int32" => DataType::Int32,
353 "Int64" => DataType::Int64,
354 "Int128" => DataType::Int128,
355 "UInt8" => DataType::UInt8,
356 "UInt16" => DataType::UInt16,
357 "UInt32" => DataType::UInt32,
358 "UInt64" => DataType::UInt64,
359 "UInt128" => DataType::UInt128,
360 "Float32" => DataType::Float32,
361 "Float64" => DataType::Float64,
362 "Boolean" => DataType::Boolean,
363 "String" => DataType::String,
364 "Binary" => DataType::Binary,
365 "Categorical" => DataType::from_categories(Categories::global()),
366 "Enum" => DataType::from_frozen_categories(FrozenCategories::new([]).unwrap()),
367 "Date" => DataType::Date,
368 "Time" => DataType::Time,
369 "Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
370 "Duration" => DataType::Duration(TimeUnit::Microseconds),
371 "List" => DataType::List(Box::new(DataType::Null)),
372 "Array" => DataType::Array(Box::new(DataType::Null), 0),
373 "Struct" => DataType::Struct(vec![]),
374 "Null" => DataType::Null,
375 #[cfg(feature = "object")]
376 "Object" => DataType::Object(OBJECT_NAME),
377 "Unknown" => DataType::Unknown(Default::default()),
378 "Decimal" => {
379 return Err(PyTypeError::new_err(
380 "Decimal without precision/scale set is not a valid Polars datatype",
381 ));
382 },
383 dt => {
384 return Err(PyTypeError::new_err(format!(
385 "'{dt}' is not a Polars data type",
386 )));
387 },
388 }
389 },
390 "Int8" => DataType::Int8,
391 "Int16" => DataType::Int16,
392 "Int32" => DataType::Int32,
393 "Int64" => DataType::Int64,
394 "Int128" => DataType::Int128,
395 "UInt8" => DataType::UInt8,
396 "UInt16" => DataType::UInt16,
397 "UInt32" => DataType::UInt32,
398 "UInt64" => DataType::UInt64,
399 "UInt128" => DataType::UInt128,
400 "Float32" => DataType::Float32,
401 "Float64" => DataType::Float64,
402 "Boolean" => DataType::Boolean,
403 "String" => DataType::String,
404 "Binary" => DataType::Binary,
405 "Categorical" => {
406 let categories = ob.getattr(intern!(py, "categories")).unwrap();
407 let py_categories = categories.getattr(intern!(py, "_categories")).unwrap();
408 let py_categories = py_categories.extract::<PyCategories>()?;
409 DataType::from_categories(py_categories.categories().clone())
410 },
411 "Enum" => {
412 let categories = ob.getattr(intern!(py, "categories")).unwrap();
413 let s = get_series(&categories.as_borrowed())?;
414 let ca = s.str().map_err(PyPolarsErr::from)?;
415 let categories = ca.downcast_iter().next().unwrap().clone();
416 assert!(!categories.has_nulls());
417 DataType::from_frozen_categories(
418 FrozenCategories::new(categories.values_iter()).unwrap(),
419 )
420 },
421 "Date" => DataType::Date,
422 "Time" => DataType::Time,
423 "Datetime" => {
424 let time_unit = ob.getattr(intern!(py, "time_unit")).unwrap();
425 let time_unit = time_unit.extract::<Wrap<TimeUnit>>()?.0;
426 let time_zone = ob.getattr(intern!(py, "time_zone")).unwrap();
427 let time_zone = time_zone.extract::<Option<PyBackedStr>>()?;
428 DataType::Datetime(
429 time_unit,
430 TimeZone::opt_try_new(time_zone.as_deref()).map_err(to_py_err)?,
431 )
432 },
433 "Duration" => {
434 let time_unit = ob.getattr(intern!(py, "time_unit")).unwrap();
435 let time_unit = time_unit.extract::<Wrap<TimeUnit>>()?.0;
436 DataType::Duration(time_unit)
437 },
438 "Decimal" => {
439 let precision = ob.getattr(intern!(py, "precision"))?.extract()?;
440 let scale = ob.getattr(intern!(py, "scale"))?.extract()?;
441 dec128_verify_prec_scale(precision, scale).map_err(to_py_err)?;
442 DataType::Decimal(precision, scale)
443 },
444 "List" => {
445 let inner = ob.getattr(intern!(py, "inner")).unwrap();
446 let inner = inner.extract::<Wrap<DataType>>()?;
447 DataType::List(Box::new(inner.0))
448 },
449 "Array" => {
450 let inner = ob.getattr(intern!(py, "inner")).unwrap();
451 let size = ob.getattr(intern!(py, "size")).unwrap();
452 let inner = inner.extract::<Wrap<DataType>>()?;
453 let size = size.extract::<usize>()?;
454 DataType::Array(Box::new(inner.0), size)
455 },
456 "Struct" => {
457 let fields = ob.getattr(intern!(py, "fields"))?;
458 let fields = fields
459 .extract::<Vec<Wrap<Field>>>()?
460 .into_iter()
461 .map(|f| f.0)
462 .collect::<Vec<Field>>();
463 DataType::Struct(fields)
464 },
465 "Null" => DataType::Null,
466 #[cfg(feature = "object")]
467 "Object" => DataType::Object(OBJECT_NAME),
468 "Unknown" => DataType::Unknown(Default::default()),
469 dt => {
470 return Err(PyTypeError::new_err(format!(
471 "'{dt}' is not a Polars data type",
472 )));
473 },
474 };
475 Ok(Wrap(dtype))
476 }
477}
478
479enum CategoricalOrdering {
480 Lexical,
481}
482
483impl<'py> IntoPyObject<'py> for Wrap<CategoricalOrdering> {
484 type Target = PyString;
485 type Output = Bound<'py, Self::Target>;
486 type Error = Infallible;
487
488 fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
489 "lexical".into_pyobject(py)
490 }
491}
492
493impl<'py> IntoPyObject<'py> for Wrap<TimeUnit> {
494 type Target = PyString;
495 type Output = Bound<'py, Self::Target>;
496 type Error = Infallible;
497
498 fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
499 self.0.to_ascii().into_pyobject(py)
500 }
501}
502
503#[cfg(feature = "parquet")]
504impl<'py> FromPyObject<'py> for Wrap<StatisticsOptions> {
505 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
506 let mut statistics = StatisticsOptions::empty();
507
508 let dict = ob.downcast::<PyDict>()?;
509 for (key, val) in dict {
510 let key = key.extract::<PyBackedStr>()?;
511 let val = val.extract::<bool>()?;
512
513 match key.as_ref() {
514 "min" => statistics.min_value = val,
515 "max" => statistics.max_value = val,
516 "distinct_count" => statistics.distinct_count = val,
517 "null_count" => statistics.null_count = val,
518 _ => {
519 return Err(PyTypeError::new_err(format!(
520 "'{key}' is not a valid statistic option",
521 )));
522 },
523 }
524 }
525
526 Ok(Wrap(statistics))
527 }
528}
529
530impl<'py> FromPyObject<'py> for Wrap<Row<'static>> {
531 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
532 let vals = ob.extract::<Vec<Wrap<AnyValue<'static>>>>()?;
533 let vals = reinterpret_vec(vals);
534 Ok(Wrap(Row(vals)))
535 }
536}
537
538impl<'py> FromPyObject<'py> for Wrap<Schema> {
539 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
540 let dict = ob.downcast::<PyDict>()?;
541
542 Ok(Wrap(
543 dict.iter()
544 .map(|(key, val)| {
545 let key = key.extract::<PyBackedStr>()?;
546 let val = val.extract::<Wrap<DataType>>()?;
547
548 Ok(Field::new((&*key).into(), val.0))
549 })
550 .collect::<PyResult<Schema>>()?,
551 ))
552 }
553}
554
555impl<'py> FromPyObject<'py> for Wrap<ArrowSchema> {
556 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
557 let py = ob.py();
558
559 let pyarrow_schema_cls = py
560 .import(intern!(py, "pyarrow"))?
561 .getattr(intern!(py, "Schema"))?;
562
563 if ob.is_none() {
564 return Err(PyValueError::new_err("arrow_schema() returned None").into());
565 }
566
567 let schema_cls = ob.getattr(intern!(py, "__class__"))?;
568
569 if !schema_cls.is(&pyarrow_schema_cls) {
570 return Err(PyTypeError::new_err(format!(
571 "expected pyarrow.Schema, got: {schema_cls}"
572 )));
573 }
574
575 let mut iter = ob.try_iter()?.map(|x| x.and_then(field_to_rust_arrow));
576
577 let mut last_err = None;
578
579 let schema =
580 ArrowSchema::from_iter_check_duplicates(std::iter::from_fn(|| match iter.next() {
581 Some(Ok(v)) => Some(v),
582 Some(Err(e)) => {
583 last_err = Some(e);
584 None
585 },
586 None => None,
587 }))
588 .map_err(to_py_err)?;
589
590 if let Some(last_err) = last_err {
591 return Err(last_err.into());
592 }
593
594 Ok(Wrap(schema))
595 }
596}
597
598impl<'py> FromPyObject<'py> for Wrap<ScanSources> {
599 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
600 let list = ob.downcast::<PyList>()?.to_owned();
601
602 if list.is_empty() {
603 return Ok(Wrap(ScanSources::default()));
604 }
605
606 enum MutableSources {
607 Paths(Vec<PlPath>),
608 Files(Vec<File>),
609 Buffers(Vec<MemSlice>),
610 }
611
612 let num_items = list.len();
613 let mut iter = list
614 .into_iter()
615 .map(|val| get_python_scan_source_input(val.unbind(), false));
616
617 let Some(first) = iter.next() else {
618 return Ok(Wrap(ScanSources::default()));
619 };
620
621 let mut sources = match first? {
622 PythonScanSourceInput::Path(path) => {
623 let mut sources = Vec::with_capacity(num_items);
624 sources.push(path);
625 MutableSources::Paths(sources)
626 },
627 PythonScanSourceInput::File(file) => {
628 let mut sources = Vec::with_capacity(num_items);
629 sources.push(file.into());
630 MutableSources::Files(sources)
631 },
632 PythonScanSourceInput::Buffer(buffer) => {
633 let mut sources = Vec::with_capacity(num_items);
634 sources.push(buffer);
635 MutableSources::Buffers(sources)
636 },
637 };
638
639 for source in iter {
640 match (&mut sources, source?) {
641 (MutableSources::Paths(v), PythonScanSourceInput::Path(p)) => v.push(p),
642 (MutableSources::Files(v), PythonScanSourceInput::File(f)) => v.push(f.into()),
643 (MutableSources::Buffers(v), PythonScanSourceInput::Buffer(f)) => v.push(f),
644 _ => {
645 return Err(PyTypeError::new_err(
646 "Cannot combine in-memory bytes, paths and files for scan sources",
647 ));
648 },
649 }
650 }
651
652 Ok(Wrap(match sources {
653 MutableSources::Paths(i) => ScanSources::Paths(i.into()),
654 MutableSources::Files(i) => ScanSources::Files(i.into()),
655 MutableSources::Buffers(i) => ScanSources::Buffers(i.into()),
656 }))
657 }
658}
659
660impl<'py> IntoPyObject<'py> for Wrap<Schema> {
661 type Target = PyDict;
662 type Output = Bound<'py, Self::Target>;
663 type Error = PyErr;
664
665 fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
666 let dict = PyDict::new(py);
667 self.0
668 .iter()
669 .try_for_each(|(k, v)| dict.set_item(k.as_str(), &Wrap(v.clone())))?;
670 Ok(dict)
671 }
672}
673
674#[derive(Debug)]
675#[repr(transparent)]
676pub struct ObjectValue {
677 pub inner: Py<PyAny>,
678}
679
680impl Clone for ObjectValue {
681 fn clone(&self) -> Self {
682 Python::attach(|py| Self {
683 inner: self.inner.clone_ref(py),
684 })
685 }
686}
687
688impl Hash for ObjectValue {
689 fn hash<H: Hasher>(&self, state: &mut H) {
690 let h = Python::attach(|py| self.inner.bind(py).hash().expect("should be hashable"));
691 state.write_isize(h)
692 }
693}
694
695impl Eq for ObjectValue {}
696
697impl PartialEq for ObjectValue {
698 fn eq(&self, other: &Self) -> bool {
699 Python::attach(|py| {
700 match self
701 .inner
702 .bind(py)
703 .rich_compare(other.inner.bind(py), CompareOp::Eq)
704 {
705 Ok(result) => result.is_truthy().unwrap(),
706 Err(_) => false,
707 }
708 })
709 }
710}
711
712impl TotalEq for ObjectValue {
713 fn tot_eq(&self, other: &Self) -> bool {
714 self == other
715 }
716}
717
718impl TotalHash for ObjectValue {
719 fn tot_hash<H>(&self, state: &mut H)
720 where
721 H: Hasher,
722 {
723 self.hash(state);
724 }
725}
726
727impl Display for ObjectValue {
728 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
729 write!(f, "{}", self.inner)
730 }
731}
732
733#[cfg(feature = "object")]
734impl PolarsObject for ObjectValue {
735 fn type_name() -> &'static str {
736 "object"
737 }
738}
739
740impl From<Py<PyAny>> for ObjectValue {
741 fn from(p: Py<PyAny>) -> Self {
742 Self { inner: p }
743 }
744}
745
746impl<'py> FromPyObject<'py> for ObjectValue {
747 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
748 Ok(ObjectValue {
749 inner: ob.to_owned().unbind(),
750 })
751 }
752}
753
754#[cfg(feature = "object")]
758impl From<&dyn PolarsObjectSafe> for &ObjectValue {
759 fn from(val: &dyn PolarsObjectSafe) -> Self {
760 unsafe { &*(val as *const dyn PolarsObjectSafe as *const ObjectValue) }
761 }
762}
763
764impl<'a, 'py> IntoPyObject<'py> for &'a ObjectValue {
765 type Target = PyAny;
766 type Output = Borrowed<'a, 'py, Self::Target>;
767 type Error = std::convert::Infallible;
768
769 fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
770 Ok(self.inner.bind_borrowed(py))
771 }
772}
773
774impl Default for ObjectValue {
775 fn default() -> Self {
776 Python::attach(|py| ObjectValue { inner: py.None() })
777 }
778}
779
780impl<'py, T: NativeType + FromPyObject<'py>> FromPyObject<'py> for Wrap<Vec<T>> {
781 fn extract_bound(obj: &Bound<'py, PyAny>) -> PyResult<Self> {
782 let seq = obj.downcast::<PySequence>()?;
783 let mut v = Vec::with_capacity(seq.len().unwrap_or(0));
784 for item in seq.try_iter()? {
785 v.push(item?.extract::<T>()?);
786 }
787 Ok(Wrap(v))
788 }
789}
790
791#[cfg(feature = "asof_join")]
792impl<'py> FromPyObject<'py> for Wrap<AsofStrategy> {
793 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
794 let parsed = match &*(ob.extract::<PyBackedStr>()?) {
795 "backward" => AsofStrategy::Backward,
796 "forward" => AsofStrategy::Forward,
797 "nearest" => AsofStrategy::Nearest,
798 v => {
799 return Err(PyValueError::new_err(format!(
800 "asof `strategy` must be one of {{'backward', 'forward', 'nearest'}}, got {v}",
801 )));
802 },
803 };
804 Ok(Wrap(parsed))
805 }
806}
807
808impl<'py> FromPyObject<'py> for Wrap<InterpolationMethod> {
809 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
810 let parsed = match &*(ob.extract::<PyBackedStr>()?) {
811 "linear" => InterpolationMethod::Linear,
812 "nearest" => InterpolationMethod::Nearest,
813 v => {
814 return Err(PyValueError::new_err(format!(
815 "interpolation `method` must be one of {{'linear', 'nearest'}}, got {v}",
816 )));
817 },
818 };
819 Ok(Wrap(parsed))
820 }
821}
822
823#[cfg(feature = "avro")]
824impl<'py> FromPyObject<'py> for Wrap<Option<AvroCompression>> {
825 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
826 let parsed = match &*ob.extract::<PyBackedStr>()? {
827 "uncompressed" => None,
828 "snappy" => Some(AvroCompression::Snappy),
829 "deflate" => Some(AvroCompression::Deflate),
830 v => {
831 return Err(PyValueError::new_err(format!(
832 "avro `compression` must be one of {{'uncompressed', 'snappy', 'deflate'}}, got {v}",
833 )));
834 },
835 };
836 Ok(Wrap(parsed))
837 }
838}
839
840impl<'py> FromPyObject<'py> for Wrap<CategoricalOrdering> {
841 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
842 let parsed = match &*ob.extract::<PyBackedStr>()? {
843 "lexical" => CategoricalOrdering::Lexical,
844 "physical" => {
845 polars_warn!(
846 Deprecation,
847 "physical ordering is deprecated, will use lexical ordering instead"
848 );
849 CategoricalOrdering::Lexical
850 },
851 v => {
852 return Err(PyValueError::new_err(format!(
853 "categorical `ordering` must be one of {{'physical', 'lexical'}}, got {v}",
854 )));
855 },
856 };
857 Ok(Wrap(parsed))
858 }
859}
860
861impl<'py> FromPyObject<'py> for Wrap<StartBy> {
862 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
863 let parsed = match &*ob.extract::<PyBackedStr>()? {
864 "window" => StartBy::WindowBound,
865 "datapoint" => StartBy::DataPoint,
866 "monday" => StartBy::Monday,
867 "tuesday" => StartBy::Tuesday,
868 "wednesday" => StartBy::Wednesday,
869 "thursday" => StartBy::Thursday,
870 "friday" => StartBy::Friday,
871 "saturday" => StartBy::Saturday,
872 "sunday" => StartBy::Sunday,
873 v => {
874 return Err(PyValueError::new_err(format!(
875 "`start_by` must be one of {{'window', 'datapoint', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday'}}, got {v}",
876 )));
877 },
878 };
879 Ok(Wrap(parsed))
880 }
881}
882
883impl<'py> FromPyObject<'py> for Wrap<ClosedWindow> {
884 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
885 let parsed = match &*ob.extract::<PyBackedStr>()? {
886 "left" => ClosedWindow::Left,
887 "right" => ClosedWindow::Right,
888 "both" => ClosedWindow::Both,
889 "none" => ClosedWindow::None,
890 v => {
891 return Err(PyValueError::new_err(format!(
892 "`closed` must be one of {{'left', 'right', 'both', 'none'}}, got {v}",
893 )));
894 },
895 };
896 Ok(Wrap(parsed))
897 }
898}
899
900impl<'py> FromPyObject<'py> for Wrap<RoundMode> {
901 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
902 let parsed = match &*ob.extract::<PyBackedStr>()? {
903 "half_to_even" => RoundMode::HalfToEven,
904 "half_away_from_zero" => RoundMode::HalfAwayFromZero,
905 v => {
906 return Err(PyValueError::new_err(format!(
907 "`mode` must be one of {{'half_to_even', 'half_away_from_zero'}}, got {v}",
908 )));
909 },
910 };
911 Ok(Wrap(parsed))
912 }
913}
914
915#[cfg(feature = "csv")]
916impl<'py> FromPyObject<'py> for Wrap<CsvEncoding> {
917 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
918 let parsed = match &*ob.extract::<PyBackedStr>()? {
919 "utf8" => CsvEncoding::Utf8,
920 "utf8-lossy" => CsvEncoding::LossyUtf8,
921 v => {
922 return Err(PyValueError::new_err(format!(
923 "csv `encoding` must be one of {{'utf8', 'utf8-lossy'}}, got {v}",
924 )));
925 },
926 };
927 Ok(Wrap(parsed))
928 }
929}
930
931#[cfg(feature = "ipc")]
932impl<'py> FromPyObject<'py> for Wrap<Option<IpcCompression>> {
933 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
934 let parsed = match &*ob.extract::<PyBackedStr>()? {
935 "uncompressed" => None,
936 "lz4" => Some(IpcCompression::LZ4),
937 "zstd" => Some(IpcCompression::ZSTD(Default::default())),
938 v => {
939 return Err(PyValueError::new_err(format!(
940 "ipc `compression` must be one of {{'uncompressed', 'lz4', 'zstd'}}, got {v}",
941 )));
942 },
943 };
944 Ok(Wrap(parsed))
945 }
946}
947
948impl<'py> FromPyObject<'py> for Wrap<JoinType> {
949 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
950 let parsed = match &*ob.extract::<PyBackedStr>()? {
951 "inner" => JoinType::Inner,
952 "left" => JoinType::Left,
953 "right" => JoinType::Right,
954 "full" => JoinType::Full,
955 "semi" => JoinType::Semi,
956 "anti" => JoinType::Anti,
957 #[cfg(feature = "cross_join")]
958 "cross" => JoinType::Cross,
959 v => {
960 return Err(PyValueError::new_err(format!(
961 "`how` must be one of {{'inner', 'left', 'full', 'semi', 'anti', 'cross'}}, got {v}",
962 )));
963 },
964 };
965 Ok(Wrap(parsed))
966 }
967}
968
969impl<'py> FromPyObject<'py> for Wrap<Label> {
970 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
971 let parsed = match &*ob.extract::<PyBackedStr>()? {
972 "left" => Label::Left,
973 "right" => Label::Right,
974 "datapoint" => Label::DataPoint,
975 v => {
976 return Err(PyValueError::new_err(format!(
977 "`label` must be one of {{'left', 'right', 'datapoint'}}, got {v}",
978 )));
979 },
980 };
981 Ok(Wrap(parsed))
982 }
983}
984
985impl<'py> FromPyObject<'py> for Wrap<ListToStructWidthStrategy> {
986 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
987 let parsed = match &*ob.extract::<PyBackedStr>()? {
988 "first_non_null" => ListToStructWidthStrategy::FirstNonNull,
989 "max_width" => ListToStructWidthStrategy::MaxWidth,
990 v => {
991 return Err(PyValueError::new_err(format!(
992 "`n_field_strategy` must be one of {{'first_non_null', 'max_width'}}, got {v}",
993 )));
994 },
995 };
996 Ok(Wrap(parsed))
997 }
998}
999
1000impl<'py> FromPyObject<'py> for Wrap<NonExistent> {
1001 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1002 let parsed = match &*ob.extract::<PyBackedStr>()? {
1003 "null" => NonExistent::Null,
1004 "raise" => NonExistent::Raise,
1005 v => {
1006 return Err(PyValueError::new_err(format!(
1007 "`non_existent` must be one of {{'null', 'raise'}}, got {v}",
1008 )));
1009 },
1010 };
1011 Ok(Wrap(parsed))
1012 }
1013}
1014
1015impl<'py> FromPyObject<'py> for Wrap<NullBehavior> {
1016 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1017 let parsed = match &*ob.extract::<PyBackedStr>()? {
1018 "drop" => NullBehavior::Drop,
1019 "ignore" => NullBehavior::Ignore,
1020 v => {
1021 return Err(PyValueError::new_err(format!(
1022 "`null_behavior` must be one of {{'drop', 'ignore'}}, got {v}",
1023 )));
1024 },
1025 };
1026 Ok(Wrap(parsed))
1027 }
1028}
1029
1030impl<'py> FromPyObject<'py> for Wrap<NullStrategy> {
1031 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1032 let parsed = match &*ob.extract::<PyBackedStr>()? {
1033 "ignore" => NullStrategy::Ignore,
1034 "propagate" => NullStrategy::Propagate,
1035 v => {
1036 return Err(PyValueError::new_err(format!(
1037 "`null_strategy` must be one of {{'ignore', 'propagate'}}, got {v}",
1038 )));
1039 },
1040 };
1041 Ok(Wrap(parsed))
1042 }
1043}
1044
1045#[cfg(feature = "parquet")]
1046impl<'py> FromPyObject<'py> for Wrap<ParallelStrategy> {
1047 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1048 let parsed = match &*ob.extract::<PyBackedStr>()? {
1049 "auto" => ParallelStrategy::Auto,
1050 "columns" => ParallelStrategy::Columns,
1051 "row_groups" => ParallelStrategy::RowGroups,
1052 "prefiltered" => ParallelStrategy::Prefiltered,
1053 "none" => ParallelStrategy::None,
1054 v => {
1055 return Err(PyValueError::new_err(format!(
1056 "`parallel` must be one of {{'auto', 'columns', 'row_groups', 'prefiltered', 'none'}}, got {v}",
1057 )));
1058 },
1059 };
1060 Ok(Wrap(parsed))
1061 }
1062}
1063
1064impl<'py> FromPyObject<'py> for Wrap<IndexOrder> {
1065 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1066 let parsed = match &*ob.extract::<PyBackedStr>()? {
1067 "fortran" => IndexOrder::Fortran,
1068 "c" => IndexOrder::C,
1069 v => {
1070 return Err(PyValueError::new_err(format!(
1071 "`order` must be one of {{'fortran', 'c'}}, got {v}",
1072 )));
1073 },
1074 };
1075 Ok(Wrap(parsed))
1076 }
1077}
1078
1079impl<'py> FromPyObject<'py> for Wrap<QuantileMethod> {
1080 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1081 let parsed = match &*ob.extract::<PyBackedStr>()? {
1082 "lower" => QuantileMethod::Lower,
1083 "higher" => QuantileMethod::Higher,
1084 "nearest" => QuantileMethod::Nearest,
1085 "linear" => QuantileMethod::Linear,
1086 "midpoint" => QuantileMethod::Midpoint,
1087 "equiprobable" => QuantileMethod::Equiprobable,
1088 v => {
1089 return Err(PyValueError::new_err(format!(
1090 "`interpolation` must be one of {{'lower', 'higher', 'nearest', 'linear', 'midpoint', 'equiprobable'}}, got {v}",
1091 )));
1092 },
1093 };
1094 Ok(Wrap(parsed))
1095 }
1096}
1097
1098impl<'py> FromPyObject<'py> for Wrap<RankMethod> {
1099 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1100 let parsed = match &*ob.extract::<PyBackedStr>()? {
1101 "min" => RankMethod::Min,
1102 "max" => RankMethod::Max,
1103 "average" => RankMethod::Average,
1104 "dense" => RankMethod::Dense,
1105 "ordinal" => RankMethod::Ordinal,
1106 "random" => RankMethod::Random,
1107 v => {
1108 return Err(PyValueError::new_err(format!(
1109 "rank `method` must be one of {{'min', 'max', 'average', 'dense', 'ordinal', 'random'}}, got {v}",
1110 )));
1111 },
1112 };
1113 Ok(Wrap(parsed))
1114 }
1115}
1116
1117impl<'py> FromPyObject<'py> for Wrap<RollingRankMethod> {
1118 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1119 let parsed = match &*ob.extract::<PyBackedStr>()? {
1120 "min" => RollingRankMethod::Min,
1121 "max" => RollingRankMethod::Max,
1122 "average" => RollingRankMethod::Average,
1123 "dense" => RollingRankMethod::Dense,
1124 "random" => RollingRankMethod::Random,
1125 v => {
1126 return Err(PyValueError::new_err(format!(
1127 "rank `method` must be one of {{'min', 'max', 'average', 'dense', 'random'}}, got {v}",
1128 )));
1129 },
1130 };
1131 Ok(Wrap(parsed))
1132 }
1133}
1134
1135impl<'py> FromPyObject<'py> for Wrap<Roll> {
1136 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1137 let parsed = match &*ob.extract::<PyBackedStr>()? {
1138 "raise" => Roll::Raise,
1139 "forward" => Roll::Forward,
1140 "backward" => Roll::Backward,
1141 v => {
1142 return Err(PyValueError::new_err(format!(
1143 "`roll` must be one of {{'raise', 'forward', 'backward'}}, got {v}",
1144 )));
1145 },
1146 };
1147 Ok(Wrap(parsed))
1148 }
1149}
1150
1151impl<'py> FromPyObject<'py> for Wrap<TimeUnit> {
1152 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1153 let parsed = match &*ob.extract::<PyBackedStr>()? {
1154 "ns" => TimeUnit::Nanoseconds,
1155 "us" => TimeUnit::Microseconds,
1156 "ms" => TimeUnit::Milliseconds,
1157 v => {
1158 return Err(PyValueError::new_err(format!(
1159 "`time_unit` must be one of {{'ns', 'us', 'ms'}}, got {v}",
1160 )));
1161 },
1162 };
1163 Ok(Wrap(parsed))
1164 }
1165}
1166
1167impl<'py> FromPyObject<'py> for Wrap<UniqueKeepStrategy> {
1168 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1169 let parsed = match &*ob.extract::<PyBackedStr>()? {
1170 "first" => UniqueKeepStrategy::First,
1171 "last" => UniqueKeepStrategy::Last,
1172 "none" => UniqueKeepStrategy::None,
1173 "any" => UniqueKeepStrategy::Any,
1174 v => {
1175 return Err(PyValueError::new_err(format!(
1176 "`keep` must be one of {{'first', 'last', 'any', 'none'}}, got {v}",
1177 )));
1178 },
1179 };
1180 Ok(Wrap(parsed))
1181 }
1182}
1183
1184#[cfg(feature = "search_sorted")]
1185impl<'py> FromPyObject<'py> for Wrap<SearchSortedSide> {
1186 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1187 let parsed = match &*ob.extract::<PyBackedStr>()? {
1188 "any" => SearchSortedSide::Any,
1189 "left" => SearchSortedSide::Left,
1190 "right" => SearchSortedSide::Right,
1191 v => {
1192 return Err(PyValueError::new_err(format!(
1193 "sorted `side` must be one of {{'any', 'left', 'right'}}, got {v}",
1194 )));
1195 },
1196 };
1197 Ok(Wrap(parsed))
1198 }
1199}
1200
1201impl<'py> FromPyObject<'py> for Wrap<ClosedInterval> {
1202 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1203 let parsed = match &*ob.extract::<PyBackedStr>()? {
1204 "both" => ClosedInterval::Both,
1205 "left" => ClosedInterval::Left,
1206 "right" => ClosedInterval::Right,
1207 "none" => ClosedInterval::None,
1208 v => {
1209 return Err(PyValueError::new_err(format!(
1210 "`closed` must be one of {{'both', 'left', 'right', 'none'}}, got {v}",
1211 )));
1212 },
1213 };
1214 Ok(Wrap(parsed))
1215 }
1216}
1217
1218impl<'py> FromPyObject<'py> for Wrap<WindowMapping> {
1219 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1220 let parsed = match &*ob.extract::<PyBackedStr>()? {
1221 "group_to_rows" => WindowMapping::GroupsToRows,
1222 "join" => WindowMapping::Join,
1223 "explode" => WindowMapping::Explode,
1224 v => {
1225 return Err(PyValueError::new_err(format!(
1226 "`mapping_strategy` must be one of {{'group_to_rows', 'join', 'explode'}}, got {v}",
1227 )));
1228 },
1229 };
1230 Ok(Wrap(parsed))
1231 }
1232}
1233
1234impl<'py> FromPyObject<'py> for Wrap<JoinValidation> {
1235 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1236 let parsed = match &*ob.extract::<PyBackedStr>()? {
1237 "1:1" => JoinValidation::OneToOne,
1238 "1:m" => JoinValidation::OneToMany,
1239 "m:m" => JoinValidation::ManyToMany,
1240 "m:1" => JoinValidation::ManyToOne,
1241 v => {
1242 return Err(PyValueError::new_err(format!(
1243 "`validate` must be one of {{'m:m', 'm:1', '1:m', '1:1'}}, got {v}",
1244 )));
1245 },
1246 };
1247 Ok(Wrap(parsed))
1248 }
1249}
1250
1251impl<'py> FromPyObject<'py> for Wrap<MaintainOrderJoin> {
1252 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1253 let parsed = match &*ob.extract::<PyBackedStr>()? {
1254 "none" => MaintainOrderJoin::None,
1255 "left" => MaintainOrderJoin::Left,
1256 "right" => MaintainOrderJoin::Right,
1257 "left_right" => MaintainOrderJoin::LeftRight,
1258 "right_left" => MaintainOrderJoin::RightLeft,
1259 v => {
1260 return Err(PyValueError::new_err(format!(
1261 "`maintain_order` must be one of {{'none', 'left', 'right', 'left_right', 'right_left'}}, got {v}",
1262 )));
1263 },
1264 };
1265 Ok(Wrap(parsed))
1266 }
1267}
1268
1269#[cfg(feature = "csv")]
1270impl<'py> FromPyObject<'py> for Wrap<QuoteStyle> {
1271 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1272 let parsed = match &*ob.extract::<PyBackedStr>()? {
1273 "always" => QuoteStyle::Always,
1274 "necessary" => QuoteStyle::Necessary,
1275 "non_numeric" => QuoteStyle::NonNumeric,
1276 "never" => QuoteStyle::Never,
1277 v => {
1278 return Err(PyValueError::new_err(format!(
1279 "`quote_style` must be one of {{'always', 'necessary', 'non_numeric', 'never'}}, got {v}",
1280 )));
1281 },
1282 };
1283 Ok(Wrap(parsed))
1284 }
1285}
1286
1287#[cfg(feature = "cloud")]
1288pub(crate) fn parse_cloud_options(
1289 uri: &str,
1290 kv: impl IntoIterator<Item = (String, String)>,
1291) -> PyResult<CloudOptions> {
1292 let iter: &mut dyn Iterator<Item = _> = &mut kv.into_iter();
1293 let out = CloudOptions::from_untyped_config(CloudScheme::from_uri(uri).as_ref(), iter)
1294 .map_err(PyPolarsErr::from)?;
1295 Ok(out)
1296}
1297
1298#[cfg(feature = "list_sets")]
1299impl<'py> FromPyObject<'py> for Wrap<SetOperation> {
1300 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1301 let parsed = match &*ob.extract::<PyBackedStr>()? {
1302 "union" => SetOperation::Union,
1303 "difference" => SetOperation::Difference,
1304 "intersection" => SetOperation::Intersection,
1305 "symmetric_difference" => SetOperation::SymmetricDifference,
1306 v => {
1307 return Err(PyValueError::new_err(format!(
1308 "set operation must be one of {{'union', 'difference', 'intersection', 'symmetric_difference'}}, got {v}",
1309 )));
1310 },
1311 };
1312 Ok(Wrap(parsed))
1313 }
1314}
1315
1316impl<'py> FromPyObject<'py> for Wrap<CastColumnsPolicy> {
1318 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1319 if ob.is_none() {
1320 static DEFAULT: PyOnceLock<Wrap<CastColumnsPolicy>> = PyOnceLock::new();
1322
1323 let out = DEFAULT.get_or_try_init(ob.py(), || {
1324 let ob = PyModule::import(ob.py(), "polars.io.scan_options.cast_options")
1325 .unwrap()
1326 .getattr("ScanCastOptions")
1327 .unwrap()
1328 .call_method0("_default")
1329 .unwrap();
1330
1331 let out = Self::extract_bound(&ob)?;
1332
1333 debug_assert_eq!(&out.0, &CastColumnsPolicy::ERROR_ON_MISMATCH);
1335
1336 PyResult::Ok(out)
1337 })?;
1338
1339 return Ok(out.clone());
1340 }
1341
1342 let py = ob.py();
1343
1344 let integer_upcast = match &*ob
1345 .getattr(intern!(py, "integer_cast"))?
1346 .extract::<PyBackedStr>()?
1347 {
1348 "upcast" => true,
1349 "forbid" => false,
1350 v => {
1351 return Err(PyValueError::new_err(format!(
1352 "unknown option for integer_cast: {v}"
1353 )));
1354 },
1355 };
1356
1357 let mut float_upcast = false;
1358 let mut float_downcast = false;
1359
1360 let float_cast_object = ob.getattr(intern!(py, "float_cast"))?;
1361
1362 parse_multiple_options("float_cast", float_cast_object, |v| {
1363 match v {
1364 "forbid" => {},
1365 "upcast" => float_upcast = true,
1366 "downcast" => float_downcast = true,
1367 v => {
1368 return Err(PyValueError::new_err(format!(
1369 "unknown option for float_cast: {v}"
1370 )));
1371 },
1372 }
1373
1374 Ok(())
1375 })?;
1376
1377 let mut datetime_nanoseconds_downcast = false;
1378 let mut datetime_convert_timezone = false;
1379
1380 let datetime_cast_object = ob.getattr(intern!(py, "datetime_cast"))?;
1381
1382 parse_multiple_options("datetime_cast", datetime_cast_object, |v| {
1383 match v {
1384 "forbid" => {},
1385 "nanosecond-downcast" => datetime_nanoseconds_downcast = true,
1386 "convert-timezone" => datetime_convert_timezone = true,
1387 v => {
1388 return Err(PyValueError::new_err(format!(
1389 "unknown option for datetime_cast: {v}"
1390 )));
1391 },
1392 };
1393
1394 Ok(())
1395 })?;
1396
1397 let missing_struct_fields = match &*ob
1398 .getattr(intern!(py, "missing_struct_fields"))?
1399 .extract::<PyBackedStr>()?
1400 {
1401 "insert" => MissingColumnsPolicy::Insert,
1402 "raise" => MissingColumnsPolicy::Raise,
1403 v => {
1404 return Err(PyValueError::new_err(format!(
1405 "unknown option for missing_struct_fields: {v}"
1406 )));
1407 },
1408 };
1409
1410 let extra_struct_fields = match &*ob
1411 .getattr(intern!(py, "extra_struct_fields"))?
1412 .extract::<PyBackedStr>()?
1413 {
1414 "ignore" => ExtraColumnsPolicy::Ignore,
1415 "raise" => ExtraColumnsPolicy::Raise,
1416 v => {
1417 return Err(PyValueError::new_err(format!(
1418 "unknown option for extra_struct_fields: {v}"
1419 )));
1420 },
1421 };
1422
1423 let categorical_to_string = match &*ob
1424 .getattr(intern!(py, "categorical_to_string"))?
1425 .extract::<PyBackedStr>()?
1426 {
1427 "allow" => true,
1428 "forbid" => false,
1429 v => {
1430 return Err(PyValueError::new_err(format!(
1431 "unknown option for categorical_to_string: {v}"
1432 )));
1433 },
1434 };
1435
1436 return Ok(Wrap(CastColumnsPolicy {
1437 integer_upcast,
1438 float_upcast,
1439 float_downcast,
1440 datetime_nanoseconds_downcast,
1441 datetime_microseconds_downcast: false,
1442 datetime_convert_timezone,
1443 null_upcast: true,
1444 categorical_to_string,
1445 missing_struct_fields,
1446 extra_struct_fields,
1447 }));
1448
1449 fn parse_multiple_options(
1450 parameter_name: &'static str,
1451 py_object: Bound<'_, PyAny>,
1452 mut parser_func: impl FnMut(&str) -> PyResult<()>,
1453 ) -> PyResult<()> {
1454 if let Ok(v) = py_object.extract::<PyBackedStr>() {
1455 parser_func(&v)?;
1456 } else if let Ok(v) = py_object.try_iter() {
1457 for v in v {
1458 parser_func(&v?.extract::<PyBackedStr>()?)?;
1459 }
1460 } else {
1461 return Err(PyValueError::new_err(format!(
1462 "unknown type for {parameter_name}: {py_object}"
1463 )));
1464 }
1465
1466 Ok(())
1467 }
1468 }
1469}
1470
1471pub(crate) fn parse_fill_null_strategy(
1472 strategy: &str,
1473 limit: FillNullLimit,
1474) -> PyResult<FillNullStrategy> {
1475 let parsed = match strategy {
1476 "forward" => FillNullStrategy::Forward(limit),
1477 "backward" => FillNullStrategy::Backward(limit),
1478 "min" => FillNullStrategy::Min,
1479 "max" => FillNullStrategy::Max,
1480 "mean" => FillNullStrategy::Mean,
1481 "zero" => FillNullStrategy::Zero,
1482 "one" => FillNullStrategy::One,
1483 e => {
1484 return Err(PyValueError::new_err(format!(
1485 "`strategy` must be one of {{'forward', 'backward', 'min', 'max', 'mean', 'zero', 'one'}}, got {e}",
1486 )));
1487 },
1488 };
1489 Ok(parsed)
1490}
1491
1492#[cfg(feature = "parquet")]
1493pub(crate) fn parse_parquet_compression(
1494 compression: &str,
1495 compression_level: Option<i32>,
1496) -> PyResult<ParquetCompression> {
1497 let parsed = match compression {
1498 "uncompressed" => ParquetCompression::Uncompressed,
1499 "snappy" => ParquetCompression::Snappy,
1500 "gzip" => ParquetCompression::Gzip(
1501 compression_level
1502 .map(|lvl| {
1503 GzipLevel::try_new(lvl as u8)
1504 .map_err(|e| PyValueError::new_err(format!("{e:?}")))
1505 })
1506 .transpose()?,
1507 ),
1508 "lzo" => ParquetCompression::Lzo,
1509 "brotli" => ParquetCompression::Brotli(
1510 compression_level
1511 .map(|lvl| {
1512 BrotliLevel::try_new(lvl as u32)
1513 .map_err(|e| PyValueError::new_err(format!("{e:?}")))
1514 })
1515 .transpose()?,
1516 ),
1517 "lz4" => ParquetCompression::Lz4Raw,
1518 "zstd" => ParquetCompression::Zstd(
1519 compression_level
1520 .map(|lvl| {
1521 ZstdLevel::try_new(lvl).map_err(|e| PyValueError::new_err(format!("{e:?}")))
1522 })
1523 .transpose()?,
1524 ),
1525 e => {
1526 return Err(PyValueError::new_err(format!(
1527 "parquet `compression` must be one of {{'uncompressed', 'snappy', 'gzip', 'lzo', 'brotli', 'lz4', 'zstd'}}, got {e}",
1528 )));
1529 },
1530 };
1531 Ok(parsed)
1532}
1533
1534pub(crate) fn strings_to_pl_smallstr<I, S>(container: I) -> Vec<PlSmallStr>
1535where
1536 I: IntoIterator<Item = S>,
1537 S: AsRef<str>,
1538{
1539 container
1540 .into_iter()
1541 .map(|s| PlSmallStr::from_str(s.as_ref()))
1542 .collect()
1543}
1544
1545#[derive(Debug, Copy, Clone)]
1546pub struct PyCompatLevel(pub CompatLevel);
1547
1548impl<'py> FromPyObject<'py> for PyCompatLevel {
1549 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1550 Ok(PyCompatLevel(if let Ok(level) = ob.extract::<u16>() {
1551 if let Ok(compat_level) = CompatLevel::with_level(level) {
1552 compat_level
1553 } else {
1554 return Err(PyValueError::new_err("invalid compat level"));
1555 }
1556 } else if let Ok(future) = ob.extract::<bool>() {
1557 if future {
1558 CompatLevel::newest()
1559 } else {
1560 CompatLevel::oldest()
1561 }
1562 } else {
1563 return Err(PyTypeError::new_err(
1564 "'compat_level' argument accepts int or bool",
1565 ));
1566 }))
1567 }
1568}
1569
1570#[cfg(feature = "string_normalize")]
1571impl<'py> FromPyObject<'py> for Wrap<UnicodeForm> {
1572 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1573 let parsed = match &*ob.extract::<PyBackedStr>()? {
1574 "NFC" => UnicodeForm::NFC,
1575 "NFKC" => UnicodeForm::NFKC,
1576 "NFD" => UnicodeForm::NFD,
1577 "NFKD" => UnicodeForm::NFKD,
1578 v => {
1579 return Err(PyValueError::new_err(format!(
1580 "`form` must be one of {{'NFC', 'NFKC', 'NFD', 'NFKD'}}, got {v}",
1581 )));
1582 },
1583 };
1584 Ok(Wrap(parsed))
1585 }
1586}
1587
1588#[cfg(feature = "parquet")]
1589impl<'py> FromPyObject<'py> for Wrap<Option<KeyValueMetadata>> {
1590 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1591 #[derive(FromPyObject)]
1592 enum Metadata {
1593 Static(Vec<(String, String)>),
1594 Dynamic(Py<PyAny>),
1595 }
1596
1597 let metadata = Option::<Metadata>::extract_bound(ob)?;
1598 let key_value_metadata = metadata.map(|x| match x {
1599 Metadata::Static(kv) => KeyValueMetadata::from_static(kv),
1600 Metadata::Dynamic(func) => KeyValueMetadata::from_py_function(func),
1601 });
1602 Ok(Wrap(key_value_metadata))
1603 }
1604}
1605
1606impl<'py> FromPyObject<'py> for Wrap<Option<TimeZone>> {
1607 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1608 let tz = Option::<Wrap<PlSmallStr>>::extract_bound(ob)?;
1609
1610 let tz = tz.map(|x| x.0);
1611
1612 Ok(Wrap(TimeZone::opt_try_new(tz).map_err(to_py_err)?))
1613 }
1614}
1615
1616impl<'py> FromPyObject<'py> for Wrap<UpcastOrForbid> {
1617 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1618 let parsed = match &*ob.extract::<PyBackedStr>()? {
1619 "upcast" => UpcastOrForbid::Upcast,
1620 "forbid" => UpcastOrForbid::Forbid,
1621 v => {
1622 return Err(PyValueError::new_err(format!(
1623 "cast parameter must be one of {{'upcast', 'forbid'}}, got {v}",
1624 )));
1625 },
1626 };
1627 Ok(Wrap(parsed))
1628 }
1629}
1630
1631impl<'py> FromPyObject<'py> for Wrap<ExtraColumnsPolicy> {
1632 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1633 let parsed = match &*ob.extract::<PyBackedStr>()? {
1634 "ignore" => ExtraColumnsPolicy::Ignore,
1635 "raise" => ExtraColumnsPolicy::Raise,
1636 v => {
1637 return Err(PyValueError::new_err(format!(
1638 "extra column/field parameter must be one of {{'ignore', 'raise'}}, got {v}",
1639 )));
1640 },
1641 };
1642 Ok(Wrap(parsed))
1643 }
1644}
1645
1646impl<'py> FromPyObject<'py> for Wrap<MissingColumnsPolicy> {
1647 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1648 let parsed = match &*ob.extract::<PyBackedStr>()? {
1649 "insert" => MissingColumnsPolicy::Insert,
1650 "raise" => MissingColumnsPolicy::Raise,
1651 v => {
1652 return Err(PyValueError::new_err(format!(
1653 "missing column/field parameter must be one of {{'insert', 'raise'}}, got {v}",
1654 )));
1655 },
1656 };
1657 Ok(Wrap(parsed))
1658 }
1659}
1660
1661impl<'py> FromPyObject<'py> for Wrap<MissingColumnsPolicyOrExpr> {
1662 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1663 if let Ok(pyexpr) = ob.extract::<PyExpr>() {
1664 return Ok(Wrap(MissingColumnsPolicyOrExpr::InsertWith(pyexpr.inner)));
1665 }
1666
1667 let parsed = match &*ob.extract::<PyBackedStr>()? {
1668 "insert" => MissingColumnsPolicyOrExpr::Insert,
1669 "raise" => MissingColumnsPolicyOrExpr::Raise,
1670 v => {
1671 return Err(PyValueError::new_err(format!(
1672 "missing column/field parameter must be one of {{'insert', 'raise', expression}}, got {v}",
1673 )));
1674 },
1675 };
1676 Ok(Wrap(parsed))
1677 }
1678}
1679
1680impl<'py> FromPyObject<'py> for Wrap<ColumnMapping> {
1681 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1682 let (column_mapping_type, ob): (PyBackedStr, Bound<'_, PyAny>) = ob.extract()?;
1683
1684 Ok(Wrap(match &*column_mapping_type {
1685 "iceberg-column-mapping" => {
1686 let arrow_schema: Wrap<ArrowSchema> = ob.extract()?;
1687 ColumnMapping::Iceberg(Arc::new(
1688 IcebergSchema::from_arrow_schema(&arrow_schema.0).map_err(to_py_err)?,
1689 ))
1690 },
1691
1692 v => {
1693 return Err(PyValueError::new_err(format!(
1694 "unknown column mapping type: {v}"
1695 )));
1696 },
1697 }))
1698 }
1699}
1700
1701impl<'py> FromPyObject<'py> for Wrap<DeletionFilesList> {
1702 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1703 let (deletion_file_type, ob): (PyBackedStr, Bound<'_, PyAny>) = ob.extract()?;
1704
1705 Ok(Wrap(match &*deletion_file_type {
1706 "iceberg-position-delete" => {
1707 let dict: Bound<'_, PyDict> = ob.extract()?;
1708
1709 let mut out = PlIndexMap::new();
1710
1711 for (k, v) in dict
1712 .try_iter()?
1713 .zip(dict.call_method0("values")?.try_iter()?)
1714 {
1715 let k: usize = k?.extract()?;
1716 let v: Bound<'_, PyAny> = v?.extract()?;
1717
1718 let files = v
1719 .try_iter()?
1720 .map(|x| {
1721 x.and_then(|x| {
1722 let x: String = x.extract()?;
1723 Ok(x)
1724 })
1725 })
1726 .collect::<PyResult<Arc<[String]>>>()?;
1727
1728 if !files.is_empty() {
1729 out.insert(k, files);
1730 }
1731 }
1732
1733 DeletionFilesList::IcebergPositionDelete(Arc::new(out))
1734 },
1735
1736 v => {
1737 return Err(PyValueError::new_err(format!(
1738 "unknown deletion file type: {v}"
1739 )));
1740 },
1741 }))
1742 }
1743}
1744
1745impl<'py> FromPyObject<'py> for Wrap<DefaultFieldValues> {
1746 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1747 let (default_values_type, ob): (PyBackedStr, Bound<'_, PyAny>) = ob.extract()?;
1748
1749 Ok(Wrap(match &*default_values_type {
1750 "iceberg" => {
1751 let dict: Bound<'_, PyDict> = ob.extract()?;
1752
1753 let mut out = PlIndexMap::new();
1754
1755 for (k, v) in dict
1756 .try_iter()?
1757 .zip(dict.call_method0("values")?.try_iter()?)
1758 {
1759 let k: u32 = k?.extract()?;
1760 let v = v?;
1761
1762 let v: Result<Column, String> = if let Ok(s) = get_series(&v) {
1763 Ok(s.into_column())
1764 } else {
1765 let err_msg: String = v.extract()?;
1766 Err(err_msg)
1767 };
1768
1769 out.insert(k, v);
1770 }
1771
1772 DefaultFieldValues::Iceberg(Arc::new(IcebergIdentityTransformedPartitionFields(
1773 out,
1774 )))
1775 },
1776
1777 v => {
1778 return Err(PyValueError::new_err(format!(
1779 "unknown deletion file type: {v}"
1780 )));
1781 },
1782 }))
1783 }
1784}
1785
1786impl<'py> FromPyObject<'py> for Wrap<PlPath> {
1787 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1788 if let Ok(path) = ob.extract::<PyBackedStr>() {
1789 Ok(Wrap(PlPath::new(&path)))
1790 } else if let Ok(path) = ob.extract::<std::path::PathBuf>() {
1791 Ok(Wrap(PlPath::Local(path.into())))
1792 } else {
1793 Err(
1794 PyTypeError::new_err(format!("PlPath cannot be formed from '{}'", ob.get_type()))
1795 .into(),
1796 )
1797 }
1798 }
1799}
1800
1801impl<'py> IntoPyObject<'py> for Wrap<PlPath> {
1802 type Target = PyString;
1803 type Output = Bound<'py, Self::Target>;
1804 type Error = Infallible;
1805
1806 fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
1807 self.0.to_str().into_pyobject(py)
1808 }
1809}