1pub(crate) mod any_value;
2mod categorical;
3pub(crate) mod chunked_array;
4mod datetime;
5
6use std::convert::Infallible;
7use std::fmt::{Display, Formatter};
8use std::fs::File;
9use std::hash::{Hash, Hasher};
10
11pub use categorical::PyCategories;
12#[cfg(feature = "object")]
13use polars::chunked_array::object::PolarsObjectSafe;
14use polars::frame::row::Row;
15#[cfg(feature = "avro")]
16use polars::io::avro::AvroCompression;
17#[cfg(feature = "cloud")]
18use polars::io::cloud::CloudOptions;
19use polars::prelude::ColumnMapping;
20use polars::prelude::default_values::{
21 DefaultFieldValues, IcebergIdentityTransformedPartitionFields,
22};
23use polars::prelude::deletion::DeletionFilesList;
24use polars::series::ops::NullBehavior;
25use polars_core::schema::iceberg::IcebergSchema;
26use polars_core::utils::arrow::array::Array;
27use polars_core::utils::arrow::types::NativeType;
28use polars_core::utils::materialize_dyn_int;
29use polars_lazy::prelude::*;
30#[cfg(feature = "parquet")]
31use polars_parquet::write::StatisticsOptions;
32use polars_plan::dsl::ScanSources;
33use polars_utils::mmap::MemSlice;
34use polars_utils::pl_str::PlSmallStr;
35use polars_utils::total_ord::{TotalEq, TotalHash};
36use pyo3::basic::CompareOp;
37use pyo3::exceptions::{PyTypeError, PyValueError};
38use pyo3::intern;
39use pyo3::prelude::*;
40use pyo3::pybacked::PyBackedStr;
41use pyo3::sync::GILOnceCell;
42use pyo3::types::{IntoPyDict, PyDict, PyList, PySequence, PyString};
43
44use crate::error::PyPolarsErr;
45use crate::expr::PyExpr;
46use crate::file::{PythonScanSourceInput, get_python_scan_source_input};
47use crate::interop::arrow::to_rust::field_to_rust_arrow;
48#[cfg(feature = "object")]
49use crate::object::OBJECT_NAME;
50use crate::prelude::*;
51use crate::py_modules::{pl_series, polars};
52use crate::series::PySeries;
53use crate::utils::to_py_err;
54use crate::{PyDataFrame, PyLazyFrame};
55
56pub(crate) unsafe trait Transparent {
59 type Target;
60}
61
62unsafe impl Transparent for PySeries {
63 type Target = Series;
64}
65
66unsafe impl<T> Transparent for Wrap<T> {
67 type Target = T;
68}
69
70unsafe impl<T: Transparent> Transparent for Option<T> {
71 type Target = Option<T::Target>;
72}
73
74pub(crate) fn reinterpret_vec<T: Transparent>(input: Vec<T>) -> Vec<T::Target> {
75 assert_eq!(size_of::<T>(), size_of::<T::Target>());
76 assert_eq!(align_of::<T>(), align_of::<T::Target>());
77 let len = input.len();
78 let cap = input.capacity();
79 let mut manual_drop_vec = std::mem::ManuallyDrop::new(input);
80 let vec_ptr: *mut T = manual_drop_vec.as_mut_ptr();
81 let ptr: *mut T::Target = vec_ptr as *mut T::Target;
82 unsafe { Vec::from_raw_parts(ptr, len, cap) }
83}
84
85pub(crate) fn vec_extract_wrapped<T>(buf: Vec<Wrap<T>>) -> Vec<T> {
86 reinterpret_vec(buf)
87}
88
89#[derive(PartialEq, Eq, Hash)]
90#[repr(transparent)]
91pub struct Wrap<T>(pub T);
92
93impl<T> Clone for Wrap<T>
94where
95 T: Clone,
96{
97 fn clone(&self) -> Self {
98 Wrap(self.0.clone())
99 }
100}
101impl<T> From<T> for Wrap<T> {
102 fn from(t: T) -> Self {
103 Wrap(t)
104 }
105}
106
107pub(crate) fn get_df(obj: &Bound<'_, PyAny>) -> PyResult<DataFrame> {
109 let pydf = obj.getattr(intern!(obj.py(), "_df"))?;
110 Ok(pydf.extract::<PyDataFrame>()?.df.into_inner())
111}
112
113pub(crate) fn get_lf(obj: &Bound<'_, PyAny>) -> PyResult<LazyFrame> {
114 let pydf = obj.getattr(intern!(obj.py(), "_ldf"))?;
115 Ok(pydf.extract::<PyLazyFrame>()?.ldf.into_inner())
116}
117
118pub(crate) fn get_series(obj: &Bound<'_, PyAny>) -> PyResult<Series> {
119 let s = obj.getattr(intern!(obj.py(), "_s"))?;
120 Ok(s.extract::<PySeries>()?.series.into_inner())
121}
122
123pub(crate) fn to_series(py: Python<'_>, s: PySeries) -> PyResult<Bound<'_, PyAny>> {
124 let series = pl_series(py).bind(py);
125 let constructor = series.getattr(intern!(py, "_from_pyseries"))?;
126 constructor.call1((s,))
127}
128
129impl<'py> FromPyObject<'py> for Wrap<PlSmallStr> {
130 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
131 Ok(Wrap((&*ob.extract::<PyBackedStr>()?).into()))
132 }
133}
134
135#[cfg(feature = "csv")]
136impl<'py> FromPyObject<'py> for Wrap<NullValues> {
137 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
138 if let Ok(s) = ob.extract::<PyBackedStr>() {
139 Ok(Wrap(NullValues::AllColumnsSingle((&*s).into())))
140 } else if let Ok(s) = ob.extract::<Vec<PyBackedStr>>() {
141 Ok(Wrap(NullValues::AllColumns(
142 s.into_iter().map(|x| (&*x).into()).collect(),
143 )))
144 } else if let Ok(s) = ob.extract::<Vec<(PyBackedStr, PyBackedStr)>>() {
145 Ok(Wrap(NullValues::Named(
146 s.into_iter()
147 .map(|(a, b)| ((&*a).into(), (&*b).into()))
148 .collect(),
149 )))
150 } else {
151 Err(
152 PyPolarsErr::Other("could not extract value from null_values argument".into())
153 .into(),
154 )
155 }
156 }
157}
158
159fn struct_dict<'a, 'py>(
160 py: Python<'py>,
161 vals: impl Iterator<Item = AnyValue<'a>>,
162 flds: &[Field],
163) -> PyResult<Bound<'py, PyDict>> {
164 let dict = PyDict::new(py);
165 flds.iter().zip(vals).try_for_each(|(fld, val)| {
166 dict.set_item(fld.name().as_str(), Wrap(val).into_pyobject(py)?)
167 })?;
168 Ok(dict)
169}
170
171fn decimal_to_digits(v: i128, buf: &mut [u128; 3]) -> usize {
173 const ZEROS: i128 = 0x3030_3030_3030_3030_3030_3030_3030_3030;
174 let buf = unsafe { std::mem::transmute::<&mut [u128; 3], &mut [u8; 48]>(buf) };
177 let mut buffer = itoa::Buffer::new();
178 let value = buffer.format(v);
179 let len = value.len();
180 for (dst, src) in buf.iter_mut().zip(value.as_bytes().iter()) {
181 *dst = *src
182 }
183
184 let ptr = buf.as_mut_ptr() as *mut i128;
185 unsafe {
186 *ptr -= ZEROS;
188 *ptr.add(1) -= ZEROS;
189 *ptr.add(2) -= ZEROS;
190 }
191 len
192}
193
194impl<'py> IntoPyObject<'py> for &Wrap<DataType> {
195 type Target = PyAny;
196 type Output = Bound<'py, Self::Target>;
197 type Error = PyErr;
198
199 fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
200 let pl = polars(py).bind(py);
201
202 match &self.0 {
203 DataType::Int8 => {
204 let class = pl.getattr(intern!(py, "Int8"))?;
205 class.call0()
206 },
207 DataType::Int16 => {
208 let class = pl.getattr(intern!(py, "Int16"))?;
209 class.call0()
210 },
211 DataType::Int32 => {
212 let class = pl.getattr(intern!(py, "Int32"))?;
213 class.call0()
214 },
215 DataType::Int64 => {
216 let class = pl.getattr(intern!(py, "Int64"))?;
217 class.call0()
218 },
219 DataType::UInt8 => {
220 let class = pl.getattr(intern!(py, "UInt8"))?;
221 class.call0()
222 },
223 DataType::UInt16 => {
224 let class = pl.getattr(intern!(py, "UInt16"))?;
225 class.call0()
226 },
227 DataType::UInt32 => {
228 let class = pl.getattr(intern!(py, "UInt32"))?;
229 class.call0()
230 },
231 DataType::UInt64 => {
232 let class = pl.getattr(intern!(py, "UInt64"))?;
233 class.call0()
234 },
235 DataType::Int128 => {
236 let class = pl.getattr(intern!(py, "Int128"))?;
237 class.call0()
238 },
239 DataType::Float32 => {
240 let class = pl.getattr(intern!(py, "Float32"))?;
241 class.call0()
242 },
243 DataType::Float64 | DataType::Unknown(UnknownKind::Float) => {
244 let class = pl.getattr(intern!(py, "Float64"))?;
245 class.call0()
246 },
247 DataType::Decimal(precision, scale) => {
248 let class = pl.getattr(intern!(py, "Decimal"))?;
249 let args = (*precision, *scale);
250 class.call1(args)
251 },
252 DataType::Boolean => {
253 let class = pl.getattr(intern!(py, "Boolean"))?;
254 class.call0()
255 },
256 DataType::String | DataType::Unknown(UnknownKind::Str) => {
257 let class = pl.getattr(intern!(py, "String"))?;
258 class.call0()
259 },
260 DataType::Binary => {
261 let class = pl.getattr(intern!(py, "Binary"))?;
262 class.call0()
263 },
264 DataType::Array(inner, size) => {
265 let class = pl.getattr(intern!(py, "Array"))?;
266 let inner = Wrap(*inner.clone());
267 let args = (&inner, *size);
268 class.call1(args)
269 },
270 DataType::List(inner) => {
271 let class = pl.getattr(intern!(py, "List"))?;
272 let inner = Wrap(*inner.clone());
273 class.call1((&inner,))
274 },
275 DataType::Date => {
276 let class = pl.getattr(intern!(py, "Date"))?;
277 class.call0()
278 },
279 DataType::Datetime(tu, tz) => {
280 let datetime_class = pl.getattr(intern!(py, "Datetime"))?;
281 datetime_class.call1((tu.to_ascii(), tz.as_deref().map(|x| x.as_str())))
282 },
283 DataType::Duration(tu) => {
284 let duration_class = pl.getattr(intern!(py, "Duration"))?;
285 duration_class.call1((tu.to_ascii(),))
286 },
287 #[cfg(feature = "object")]
288 DataType::Object(_) => {
289 let class = pl.getattr(intern!(py, "Object"))?;
290 class.call0()
291 },
292 DataType::Categorical(cats, _) => {
293 let categories_class = pl.getattr(intern!(py, "Categories"))?;
294 let categorical_class = pl.getattr(intern!(py, "Categorical"))?;
295 let categories = categories_class
296 .call_method1("_from_py_categories", (PyCategories::from(cats.clone()),))?;
297 let kwargs = [("categories", categories)];
298 categorical_class.call((), Some(&kwargs.into_py_dict(py)?))
299 },
300 DataType::Enum(_, mapping) => {
301 let categories = unsafe {
302 StringChunked::from_chunks(
303 PlSmallStr::from_static("category"),
304 vec![mapping.to_arrow(true)],
305 )
306 };
307 let class = pl.getattr(intern!(py, "Enum"))?;
308 let series = to_series(py, categories.into_series().into())?;
309 class.call1((series,))
310 },
311 DataType::Time => pl.getattr(intern!(py, "Time")).and_then(|x| x.call0()),
312 DataType::Struct(fields) => {
313 let field_class = pl.getattr(intern!(py, "Field"))?;
314 let iter = fields.iter().map(|fld| {
315 let name = fld.name().as_str();
316 let dtype = Wrap(fld.dtype().clone());
317 field_class.call1((name, &dtype)).unwrap()
318 });
319 let fields = PyList::new(py, iter)?;
320 let struct_class = pl.getattr(intern!(py, "Struct"))?;
321 struct_class.call1((fields,))
322 },
323 DataType::Null => {
324 let class = pl.getattr(intern!(py, "Null"))?;
325 class.call0()
326 },
327 DataType::Unknown(UnknownKind::Int(v)) => {
328 Wrap(materialize_dyn_int(*v).dtype()).into_pyobject(py)
329 },
330 DataType::Unknown(_) => {
331 let class = pl.getattr(intern!(py, "Unknown"))?;
332 class.call0()
333 },
334 DataType::BinaryOffset => {
335 unimplemented!()
336 },
337 }
338 }
339}
340
341impl<'py> FromPyObject<'py> for Wrap<Field> {
342 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
343 let py = ob.py();
344 let name = ob
345 .getattr(intern!(py, "name"))?
346 .str()?
347 .extract::<PyBackedStr>()?;
348 let dtype = ob
349 .getattr(intern!(py, "dtype"))?
350 .extract::<Wrap<DataType>>()?;
351 Ok(Wrap(Field::new((&*name).into(), dtype.0)))
352 }
353}
354
355impl<'py> FromPyObject<'py> for Wrap<DataType> {
356 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
357 let py = ob.py();
358 let type_name = ob.get_type().qualname()?.to_string();
359
360 let dtype = match &*type_name {
361 "DataTypeClass" => {
362 let name = ob
364 .getattr(intern!(py, "__name__"))?
365 .str()?
366 .extract::<PyBackedStr>()?;
367 match &*name {
368 "Int8" => DataType::Int8,
369 "Int16" => DataType::Int16,
370 "Int32" => DataType::Int32,
371 "Int64" => DataType::Int64,
372 "Int128" => DataType::Int128,
373 "UInt8" => DataType::UInt8,
374 "UInt16" => DataType::UInt16,
375 "UInt32" => DataType::UInt32,
376 "UInt64" => DataType::UInt64,
377 "Float32" => DataType::Float32,
378 "Float64" => DataType::Float64,
379 "Boolean" => DataType::Boolean,
380 "String" => DataType::String,
381 "Binary" => DataType::Binary,
382 "Categorical" => DataType::from_categories(Categories::global()),
383 "Enum" => DataType::from_frozen_categories(FrozenCategories::new([]).unwrap()),
384 "Date" => DataType::Date,
385 "Time" => DataType::Time,
386 "Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
387 "Duration" => DataType::Duration(TimeUnit::Microseconds),
388 "Decimal" => DataType::Decimal(None, None), "List" => DataType::List(Box::new(DataType::Null)),
390 "Array" => DataType::Array(Box::new(DataType::Null), 0),
391 "Struct" => DataType::Struct(vec![]),
392 "Null" => DataType::Null,
393 #[cfg(feature = "object")]
394 "Object" => DataType::Object(OBJECT_NAME),
395 "Unknown" => DataType::Unknown(Default::default()),
396 dt => {
397 return Err(PyTypeError::new_err(format!(
398 "'{dt}' is not a Polars data type",
399 )));
400 },
401 }
402 },
403 "Int8" => DataType::Int8,
404 "Int16" => DataType::Int16,
405 "Int32" => DataType::Int32,
406 "Int64" => DataType::Int64,
407 "Int128" => DataType::Int128,
408 "UInt8" => DataType::UInt8,
409 "UInt16" => DataType::UInt16,
410 "UInt32" => DataType::UInt32,
411 "UInt64" => DataType::UInt64,
412 "Float32" => DataType::Float32,
413 "Float64" => DataType::Float64,
414 "Boolean" => DataType::Boolean,
415 "String" => DataType::String,
416 "Binary" => DataType::Binary,
417 "Categorical" => {
418 let categories = ob.getattr(intern!(py, "categories")).unwrap();
419 let py_categories = categories.getattr(intern!(py, "_categories")).unwrap();
420 let py_categories = py_categories.extract::<PyCategories>()?;
421 DataType::from_categories(py_categories.categories().clone())
422 },
423 "Enum" => {
424 let categories = ob.getattr(intern!(py, "categories")).unwrap();
425 let s = get_series(&categories.as_borrowed())?;
426 let ca = s.str().map_err(PyPolarsErr::from)?;
427 let categories = ca.downcast_iter().next().unwrap().clone();
428 assert!(!categories.has_nulls());
429 DataType::from_frozen_categories(
430 FrozenCategories::new(categories.values_iter()).unwrap(),
431 )
432 },
433 "Date" => DataType::Date,
434 "Time" => DataType::Time,
435 "Datetime" => {
436 let time_unit = ob.getattr(intern!(py, "time_unit")).unwrap();
437 let time_unit = time_unit.extract::<Wrap<TimeUnit>>()?.0;
438 let time_zone = ob.getattr(intern!(py, "time_zone")).unwrap();
439 let time_zone = time_zone.extract::<Option<PyBackedStr>>()?;
440 DataType::Datetime(
441 time_unit,
442 TimeZone::opt_try_new(time_zone.as_deref()).map_err(to_py_err)?,
443 )
444 },
445 "Duration" => {
446 let time_unit = ob.getattr(intern!(py, "time_unit")).unwrap();
447 let time_unit = time_unit.extract::<Wrap<TimeUnit>>()?.0;
448 DataType::Duration(time_unit)
449 },
450 "Decimal" => {
451 let precision = ob.getattr(intern!(py, "precision"))?.extract()?;
452 let scale = ob.getattr(intern!(py, "scale"))?.extract()?;
453 DataType::Decimal(precision, Some(scale))
454 },
455 "List" => {
456 let inner = ob.getattr(intern!(py, "inner")).unwrap();
457 let inner = inner.extract::<Wrap<DataType>>()?;
458 DataType::List(Box::new(inner.0))
459 },
460 "Array" => {
461 let inner = ob.getattr(intern!(py, "inner")).unwrap();
462 let size = ob.getattr(intern!(py, "size")).unwrap();
463 let inner = inner.extract::<Wrap<DataType>>()?;
464 let size = size.extract::<usize>()?;
465 DataType::Array(Box::new(inner.0), size)
466 },
467 "Struct" => {
468 let fields = ob.getattr(intern!(py, "fields"))?;
469 let fields = fields
470 .extract::<Vec<Wrap<Field>>>()?
471 .into_iter()
472 .map(|f| f.0)
473 .collect::<Vec<Field>>();
474 DataType::Struct(fields)
475 },
476 "Null" => DataType::Null,
477 #[cfg(feature = "object")]
478 "Object" => DataType::Object(OBJECT_NAME),
479 "Unknown" => DataType::Unknown(Default::default()),
480 dt => {
481 return Err(PyTypeError::new_err(format!(
482 "'{dt}' is not a Polars data type",
483 )));
484 },
485 };
486 Ok(Wrap(dtype))
487 }
488}
489
490enum CategoricalOrdering {
491 Lexical,
492}
493
494impl<'py> IntoPyObject<'py> for Wrap<CategoricalOrdering> {
495 type Target = PyString;
496 type Output = Bound<'py, Self::Target>;
497 type Error = Infallible;
498
499 fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
500 "lexical".into_pyobject(py)
501 }
502}
503
504impl<'py> IntoPyObject<'py> for Wrap<TimeUnit> {
505 type Target = PyString;
506 type Output = Bound<'py, Self::Target>;
507 type Error = Infallible;
508
509 fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
510 self.0.to_ascii().into_pyobject(py)
511 }
512}
513
514#[cfg(feature = "parquet")]
515impl<'py> FromPyObject<'py> for Wrap<StatisticsOptions> {
516 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
517 let mut statistics = StatisticsOptions::empty();
518
519 let dict = ob.downcast::<PyDict>()?;
520 for (key, val) in dict {
521 let key = key.extract::<PyBackedStr>()?;
522 let val = val.extract::<bool>()?;
523
524 match key.as_ref() {
525 "min" => statistics.min_value = val,
526 "max" => statistics.max_value = val,
527 "distinct_count" => statistics.distinct_count = val,
528 "null_count" => statistics.null_count = val,
529 _ => {
530 return Err(PyTypeError::new_err(format!(
531 "'{key}' is not a valid statistic option",
532 )));
533 },
534 }
535 }
536
537 Ok(Wrap(statistics))
538 }
539}
540
541impl<'py> FromPyObject<'py> for Wrap<Row<'static>> {
542 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
543 let vals = ob.extract::<Vec<Wrap<AnyValue<'static>>>>()?;
544 let vals = reinterpret_vec(vals);
545 Ok(Wrap(Row(vals)))
546 }
547}
548
549impl<'py> FromPyObject<'py> for Wrap<Schema> {
550 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
551 let dict = ob.downcast::<PyDict>()?;
552
553 Ok(Wrap(
554 dict.iter()
555 .map(|(key, val)| {
556 let key = key.extract::<PyBackedStr>()?;
557 let val = val.extract::<Wrap<DataType>>()?;
558
559 Ok(Field::new((&*key).into(), val.0))
560 })
561 .collect::<PyResult<Schema>>()?,
562 ))
563 }
564}
565
566impl<'py> FromPyObject<'py> for Wrap<ArrowSchema> {
567 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
568 let py = ob.py();
569
570 let pyarrow_schema_cls = py
571 .import(intern!(py, "pyarrow"))?
572 .getattr(intern!(py, "Schema"))?;
573
574 if ob.is_none() {
575 return Err(PyValueError::new_err("arrow_schema() returned None").into());
576 }
577
578 let schema_cls = ob.getattr(intern!(py, "__class__"))?;
579
580 if !schema_cls.is(&pyarrow_schema_cls) {
581 return Err(PyTypeError::new_err(format!(
582 "expected pyarrow.Schema, got: {schema_cls}"
583 )));
584 }
585
586 let mut iter = ob.try_iter()?.map(|x| x.and_then(field_to_rust_arrow));
587
588 let mut last_err = None;
589
590 let schema =
591 ArrowSchema::from_iter_check_duplicates(std::iter::from_fn(|| match iter.next() {
592 Some(Ok(v)) => Some(v),
593 Some(Err(e)) => {
594 last_err = Some(e);
595 None
596 },
597 None => None,
598 }))
599 .map_err(to_py_err)?;
600
601 if let Some(last_err) = last_err {
602 return Err(last_err.into());
603 }
604
605 Ok(Wrap(schema))
606 }
607}
608
609impl<'py> FromPyObject<'py> for Wrap<ScanSources> {
610 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
611 let list = ob.downcast::<PyList>()?.to_owned();
612
613 if list.is_empty() {
614 return Ok(Wrap(ScanSources::default()));
615 }
616
617 enum MutableSources {
618 Paths(Vec<PlPath>),
619 Files(Vec<File>),
620 Buffers(Vec<MemSlice>),
621 }
622
623 let num_items = list.len();
624 let mut iter = list
625 .into_iter()
626 .map(|val| get_python_scan_source_input(val.unbind(), false));
627
628 let Some(first) = iter.next() else {
629 return Ok(Wrap(ScanSources::default()));
630 };
631
632 let mut sources = match first? {
633 PythonScanSourceInput::Path(path) => {
634 let mut sources = Vec::with_capacity(num_items);
635 sources.push(path);
636 MutableSources::Paths(sources)
637 },
638 PythonScanSourceInput::File(file) => {
639 let mut sources = Vec::with_capacity(num_items);
640 sources.push(file.into());
641 MutableSources::Files(sources)
642 },
643 PythonScanSourceInput::Buffer(buffer) => {
644 let mut sources = Vec::with_capacity(num_items);
645 sources.push(buffer);
646 MutableSources::Buffers(sources)
647 },
648 };
649
650 for source in iter {
651 match (&mut sources, source?) {
652 (MutableSources::Paths(v), PythonScanSourceInput::Path(p)) => v.push(p),
653 (MutableSources::Files(v), PythonScanSourceInput::File(f)) => v.push(f.into()),
654 (MutableSources::Buffers(v), PythonScanSourceInput::Buffer(f)) => v.push(f),
655 _ => {
656 return Err(PyTypeError::new_err(
657 "Cannot combine in-memory bytes, paths and files for scan sources",
658 ));
659 },
660 }
661 }
662
663 Ok(Wrap(match sources {
664 MutableSources::Paths(i) => ScanSources::Paths(i.into()),
665 MutableSources::Files(i) => ScanSources::Files(i.into()),
666 MutableSources::Buffers(i) => ScanSources::Buffers(i.into()),
667 }))
668 }
669}
670
671impl<'py> IntoPyObject<'py> for Wrap<Schema> {
672 type Target = PyDict;
673 type Output = Bound<'py, Self::Target>;
674 type Error = PyErr;
675
676 fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
677 let dict = PyDict::new(py);
678 self.0
679 .iter()
680 .try_for_each(|(k, v)| dict.set_item(k.as_str(), &Wrap(v.clone())))?;
681 Ok(dict)
682 }
683}
684
685#[derive(Debug)]
686#[repr(transparent)]
687pub struct ObjectValue {
688 pub inner: PyObject,
689}
690
691impl Clone for ObjectValue {
692 fn clone(&self) -> Self {
693 Python::with_gil(|py| Self {
694 inner: self.inner.clone_ref(py),
695 })
696 }
697}
698
699impl Hash for ObjectValue {
700 fn hash<H: Hasher>(&self, state: &mut H) {
701 let h = Python::with_gil(|py| self.inner.bind(py).hash().expect("should be hashable"));
702 state.write_isize(h)
703 }
704}
705
706impl Eq for ObjectValue {}
707
708impl PartialEq for ObjectValue {
709 fn eq(&self, other: &Self) -> bool {
710 Python::with_gil(|py| {
711 match self
712 .inner
713 .bind(py)
714 .rich_compare(other.inner.bind(py), CompareOp::Eq)
715 {
716 Ok(result) => result.is_truthy().unwrap(),
717 Err(_) => false,
718 }
719 })
720 }
721}
722
723impl TotalEq for ObjectValue {
724 fn tot_eq(&self, other: &Self) -> bool {
725 self == other
726 }
727}
728
729impl TotalHash for ObjectValue {
730 fn tot_hash<H>(&self, state: &mut H)
731 where
732 H: Hasher,
733 {
734 self.hash(state);
735 }
736}
737
738impl Display for ObjectValue {
739 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
740 write!(f, "{}", self.inner)
741 }
742}
743
744#[cfg(feature = "object")]
745impl PolarsObject for ObjectValue {
746 fn type_name() -> &'static str {
747 "object"
748 }
749}
750
751impl From<PyObject> for ObjectValue {
752 fn from(p: PyObject) -> Self {
753 Self { inner: p }
754 }
755}
756
757impl<'py> FromPyObject<'py> for ObjectValue {
758 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
759 Ok(ObjectValue {
760 inner: ob.to_owned().unbind(),
761 })
762 }
763}
764
765#[cfg(feature = "object")]
769impl From<&dyn PolarsObjectSafe> for &ObjectValue {
770 fn from(val: &dyn PolarsObjectSafe) -> Self {
771 unsafe { &*(val as *const dyn PolarsObjectSafe as *const ObjectValue) }
772 }
773}
774
775impl<'a, 'py> IntoPyObject<'py> for &'a ObjectValue {
776 type Target = PyAny;
777 type Output = Borrowed<'a, 'py, Self::Target>;
778 type Error = std::convert::Infallible;
779
780 fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
781 Ok(self.inner.bind_borrowed(py))
782 }
783}
784
785impl Default for ObjectValue {
786 fn default() -> Self {
787 Python::with_gil(|py| ObjectValue { inner: py.None() })
788 }
789}
790
791impl<'py, T: NativeType + FromPyObject<'py>> FromPyObject<'py> for Wrap<Vec<T>> {
792 fn extract_bound(obj: &Bound<'py, PyAny>) -> PyResult<Self> {
793 let seq = obj.downcast::<PySequence>()?;
794 let mut v = Vec::with_capacity(seq.len().unwrap_or(0));
795 for item in seq.try_iter()? {
796 v.push(item?.extract::<T>()?);
797 }
798 Ok(Wrap(v))
799 }
800}
801
802#[cfg(feature = "asof_join")]
803impl<'py> FromPyObject<'py> for Wrap<AsofStrategy> {
804 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
805 let parsed = match &*(ob.extract::<PyBackedStr>()?) {
806 "backward" => AsofStrategy::Backward,
807 "forward" => AsofStrategy::Forward,
808 "nearest" => AsofStrategy::Nearest,
809 v => {
810 return Err(PyValueError::new_err(format!(
811 "asof `strategy` must be one of {{'backward', 'forward', 'nearest'}}, got {v}",
812 )));
813 },
814 };
815 Ok(Wrap(parsed))
816 }
817}
818
819impl<'py> FromPyObject<'py> for Wrap<InterpolationMethod> {
820 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
821 let parsed = match &*(ob.extract::<PyBackedStr>()?) {
822 "linear" => InterpolationMethod::Linear,
823 "nearest" => InterpolationMethod::Nearest,
824 v => {
825 return Err(PyValueError::new_err(format!(
826 "interpolation `method` must be one of {{'linear', 'nearest'}}, got {v}",
827 )));
828 },
829 };
830 Ok(Wrap(parsed))
831 }
832}
833
834#[cfg(feature = "avro")]
835impl<'py> FromPyObject<'py> for Wrap<Option<AvroCompression>> {
836 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
837 let parsed = match &*ob.extract::<PyBackedStr>()? {
838 "uncompressed" => None,
839 "snappy" => Some(AvroCompression::Snappy),
840 "deflate" => Some(AvroCompression::Deflate),
841 v => {
842 return Err(PyValueError::new_err(format!(
843 "avro `compression` must be one of {{'uncompressed', 'snappy', 'deflate'}}, got {v}",
844 )));
845 },
846 };
847 Ok(Wrap(parsed))
848 }
849}
850
851impl<'py> FromPyObject<'py> for Wrap<CategoricalOrdering> {
852 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
853 let parsed = match &*ob.extract::<PyBackedStr>()? {
854 "lexical" => CategoricalOrdering::Lexical,
855 "physical" => {
856 polars_warn!(
857 Deprecation,
858 "physical ordering is deprecated, will use lexical ordering instead"
859 );
860 CategoricalOrdering::Lexical
861 },
862 v => {
863 return Err(PyValueError::new_err(format!(
864 "categorical `ordering` must be one of {{'physical', 'lexical'}}, got {v}",
865 )));
866 },
867 };
868 Ok(Wrap(parsed))
869 }
870}
871
872impl<'py> FromPyObject<'py> for Wrap<StartBy> {
873 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
874 let parsed = match &*ob.extract::<PyBackedStr>()? {
875 "window" => StartBy::WindowBound,
876 "datapoint" => StartBy::DataPoint,
877 "monday" => StartBy::Monday,
878 "tuesday" => StartBy::Tuesday,
879 "wednesday" => StartBy::Wednesday,
880 "thursday" => StartBy::Thursday,
881 "friday" => StartBy::Friday,
882 "saturday" => StartBy::Saturday,
883 "sunday" => StartBy::Sunday,
884 v => {
885 return Err(PyValueError::new_err(format!(
886 "`start_by` must be one of {{'window', 'datapoint', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday'}}, got {v}",
887 )));
888 },
889 };
890 Ok(Wrap(parsed))
891 }
892}
893
894impl<'py> FromPyObject<'py> for Wrap<ClosedWindow> {
895 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
896 let parsed = match &*ob.extract::<PyBackedStr>()? {
897 "left" => ClosedWindow::Left,
898 "right" => ClosedWindow::Right,
899 "both" => ClosedWindow::Both,
900 "none" => ClosedWindow::None,
901 v => {
902 return Err(PyValueError::new_err(format!(
903 "`closed` must be one of {{'left', 'right', 'both', 'none'}}, got {v}",
904 )));
905 },
906 };
907 Ok(Wrap(parsed))
908 }
909}
910
911impl<'py> FromPyObject<'py> for Wrap<RoundMode> {
912 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
913 let parsed = match &*ob.extract::<PyBackedStr>()? {
914 "half_to_even" => RoundMode::HalfToEven,
915 "half_away_from_zero" => RoundMode::HalfAwayFromZero,
916 v => {
917 return Err(PyValueError::new_err(format!(
918 "`mode` must be one of {{'half_to_even', 'half_away_from_zero'}}, got {v}",
919 )));
920 },
921 };
922 Ok(Wrap(parsed))
923 }
924}
925
926#[cfg(feature = "csv")]
927impl<'py> FromPyObject<'py> for Wrap<CsvEncoding> {
928 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
929 let parsed = match &*ob.extract::<PyBackedStr>()? {
930 "utf8" => CsvEncoding::Utf8,
931 "utf8-lossy" => CsvEncoding::LossyUtf8,
932 v => {
933 return Err(PyValueError::new_err(format!(
934 "csv `encoding` must be one of {{'utf8', 'utf8-lossy'}}, got {v}",
935 )));
936 },
937 };
938 Ok(Wrap(parsed))
939 }
940}
941
942#[cfg(feature = "ipc")]
943impl<'py> FromPyObject<'py> for Wrap<Option<IpcCompression>> {
944 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
945 let parsed = match &*ob.extract::<PyBackedStr>()? {
946 "uncompressed" => None,
947 "lz4" => Some(IpcCompression::LZ4),
948 "zstd" => Some(IpcCompression::ZSTD),
949 v => {
950 return Err(PyValueError::new_err(format!(
951 "ipc `compression` must be one of {{'uncompressed', 'lz4', 'zstd'}}, got {v}",
952 )));
953 },
954 };
955 Ok(Wrap(parsed))
956 }
957}
958
959impl<'py> FromPyObject<'py> for Wrap<JoinType> {
960 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
961 let parsed = match &*ob.extract::<PyBackedStr>()? {
962 "inner" => JoinType::Inner,
963 "left" => JoinType::Left,
964 "right" => JoinType::Right,
965 "full" => JoinType::Full,
966 "semi" => JoinType::Semi,
967 "anti" => JoinType::Anti,
968 #[cfg(feature = "cross_join")]
969 "cross" => JoinType::Cross,
970 v => {
971 return Err(PyValueError::new_err(format!(
972 "`how` must be one of {{'inner', 'left', 'full', 'semi', 'anti', 'cross'}}, got {v}",
973 )));
974 },
975 };
976 Ok(Wrap(parsed))
977 }
978}
979
980impl<'py> FromPyObject<'py> for Wrap<Label> {
981 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
982 let parsed = match &*ob.extract::<PyBackedStr>()? {
983 "left" => Label::Left,
984 "right" => Label::Right,
985 "datapoint" => Label::DataPoint,
986 v => {
987 return Err(PyValueError::new_err(format!(
988 "`label` must be one of {{'left', 'right', 'datapoint'}}, got {v}",
989 )));
990 },
991 };
992 Ok(Wrap(parsed))
993 }
994}
995
996impl<'py> FromPyObject<'py> for Wrap<ListToStructWidthStrategy> {
997 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
998 let parsed = match &*ob.extract::<PyBackedStr>()? {
999 "first_non_null" => ListToStructWidthStrategy::FirstNonNull,
1000 "max_width" => ListToStructWidthStrategy::MaxWidth,
1001 v => {
1002 return Err(PyValueError::new_err(format!(
1003 "`n_field_strategy` must be one of {{'first_non_null', 'max_width'}}, got {v}",
1004 )));
1005 },
1006 };
1007 Ok(Wrap(parsed))
1008 }
1009}
1010
1011impl<'py> FromPyObject<'py> for Wrap<NonExistent> {
1012 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1013 let parsed = match &*ob.extract::<PyBackedStr>()? {
1014 "null" => NonExistent::Null,
1015 "raise" => NonExistent::Raise,
1016 v => {
1017 return Err(PyValueError::new_err(format!(
1018 "`non_existent` must be one of {{'null', 'raise'}}, got {v}",
1019 )));
1020 },
1021 };
1022 Ok(Wrap(parsed))
1023 }
1024}
1025
1026impl<'py> FromPyObject<'py> for Wrap<NullBehavior> {
1027 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1028 let parsed = match &*ob.extract::<PyBackedStr>()? {
1029 "drop" => NullBehavior::Drop,
1030 "ignore" => NullBehavior::Ignore,
1031 v => {
1032 return Err(PyValueError::new_err(format!(
1033 "`null_behavior` must be one of {{'drop', 'ignore'}}, got {v}",
1034 )));
1035 },
1036 };
1037 Ok(Wrap(parsed))
1038 }
1039}
1040
1041impl<'py> FromPyObject<'py> for Wrap<NullStrategy> {
1042 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1043 let parsed = match &*ob.extract::<PyBackedStr>()? {
1044 "ignore" => NullStrategy::Ignore,
1045 "propagate" => NullStrategy::Propagate,
1046 v => {
1047 return Err(PyValueError::new_err(format!(
1048 "`null_strategy` must be one of {{'ignore', 'propagate'}}, got {v}",
1049 )));
1050 },
1051 };
1052 Ok(Wrap(parsed))
1053 }
1054}
1055
1056#[cfg(feature = "parquet")]
1057impl<'py> FromPyObject<'py> for Wrap<ParallelStrategy> {
1058 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1059 let parsed = match &*ob.extract::<PyBackedStr>()? {
1060 "auto" => ParallelStrategy::Auto,
1061 "columns" => ParallelStrategy::Columns,
1062 "row_groups" => ParallelStrategy::RowGroups,
1063 "prefiltered" => ParallelStrategy::Prefiltered,
1064 "none" => ParallelStrategy::None,
1065 v => {
1066 return Err(PyValueError::new_err(format!(
1067 "`parallel` must be one of {{'auto', 'columns', 'row_groups', 'prefiltered', 'none'}}, got {v}",
1068 )));
1069 },
1070 };
1071 Ok(Wrap(parsed))
1072 }
1073}
1074
1075impl<'py> FromPyObject<'py> for Wrap<IndexOrder> {
1076 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1077 let parsed = match &*ob.extract::<PyBackedStr>()? {
1078 "fortran" => IndexOrder::Fortran,
1079 "c" => IndexOrder::C,
1080 v => {
1081 return Err(PyValueError::new_err(format!(
1082 "`order` must be one of {{'fortran', 'c'}}, got {v}",
1083 )));
1084 },
1085 };
1086 Ok(Wrap(parsed))
1087 }
1088}
1089
1090impl<'py> FromPyObject<'py> for Wrap<QuantileMethod> {
1091 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1092 let parsed = match &*ob.extract::<PyBackedStr>()? {
1093 "lower" => QuantileMethod::Lower,
1094 "higher" => QuantileMethod::Higher,
1095 "nearest" => QuantileMethod::Nearest,
1096 "linear" => QuantileMethod::Linear,
1097 "midpoint" => QuantileMethod::Midpoint,
1098 "equiprobable" => QuantileMethod::Equiprobable,
1099 v => {
1100 return Err(PyValueError::new_err(format!(
1101 "`interpolation` must be one of {{'lower', 'higher', 'nearest', 'linear', 'midpoint', 'equiprobable'}}, got {v}",
1102 )));
1103 },
1104 };
1105 Ok(Wrap(parsed))
1106 }
1107}
1108
1109impl<'py> FromPyObject<'py> for Wrap<RankMethod> {
1110 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1111 let parsed = match &*ob.extract::<PyBackedStr>()? {
1112 "min" => RankMethod::Min,
1113 "max" => RankMethod::Max,
1114 "average" => RankMethod::Average,
1115 "dense" => RankMethod::Dense,
1116 "ordinal" => RankMethod::Ordinal,
1117 "random" => RankMethod::Random,
1118 v => {
1119 return Err(PyValueError::new_err(format!(
1120 "rank `method` must be one of {{'min', 'max', 'average', 'dense', 'ordinal', 'random'}}, got {v}",
1121 )));
1122 },
1123 };
1124 Ok(Wrap(parsed))
1125 }
1126}
1127
1128impl<'py> FromPyObject<'py> for Wrap<Roll> {
1129 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1130 let parsed = match &*ob.extract::<PyBackedStr>()? {
1131 "raise" => Roll::Raise,
1132 "forward" => Roll::Forward,
1133 "backward" => Roll::Backward,
1134 v => {
1135 return Err(PyValueError::new_err(format!(
1136 "`roll` must be one of {{'raise', 'forward', 'backward'}}, got {v}",
1137 )));
1138 },
1139 };
1140 Ok(Wrap(parsed))
1141 }
1142}
1143
1144impl<'py> FromPyObject<'py> for Wrap<TimeUnit> {
1145 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1146 let parsed = match &*ob.extract::<PyBackedStr>()? {
1147 "ns" => TimeUnit::Nanoseconds,
1148 "us" => TimeUnit::Microseconds,
1149 "ms" => TimeUnit::Milliseconds,
1150 v => {
1151 return Err(PyValueError::new_err(format!(
1152 "`time_unit` must be one of {{'ns', 'us', 'ms'}}, got {v}",
1153 )));
1154 },
1155 };
1156 Ok(Wrap(parsed))
1157 }
1158}
1159
1160impl<'py> FromPyObject<'py> for Wrap<UniqueKeepStrategy> {
1161 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1162 let parsed = match &*ob.extract::<PyBackedStr>()? {
1163 "first" => UniqueKeepStrategy::First,
1164 "last" => UniqueKeepStrategy::Last,
1165 "none" => UniqueKeepStrategy::None,
1166 "any" => UniqueKeepStrategy::Any,
1167 v => {
1168 return Err(PyValueError::new_err(format!(
1169 "`keep` must be one of {{'first', 'last', 'any', 'none'}}, got {v}",
1170 )));
1171 },
1172 };
1173 Ok(Wrap(parsed))
1174 }
1175}
1176
1177#[cfg(feature = "search_sorted")]
1178impl<'py> FromPyObject<'py> for Wrap<SearchSortedSide> {
1179 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1180 let parsed = match &*ob.extract::<PyBackedStr>()? {
1181 "any" => SearchSortedSide::Any,
1182 "left" => SearchSortedSide::Left,
1183 "right" => SearchSortedSide::Right,
1184 v => {
1185 return Err(PyValueError::new_err(format!(
1186 "sorted `side` must be one of {{'any', 'left', 'right'}}, got {v}",
1187 )));
1188 },
1189 };
1190 Ok(Wrap(parsed))
1191 }
1192}
1193
1194impl<'py> FromPyObject<'py> for Wrap<ClosedInterval> {
1195 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1196 let parsed = match &*ob.extract::<PyBackedStr>()? {
1197 "both" => ClosedInterval::Both,
1198 "left" => ClosedInterval::Left,
1199 "right" => ClosedInterval::Right,
1200 "none" => ClosedInterval::None,
1201 v => {
1202 return Err(PyValueError::new_err(format!(
1203 "`closed` must be one of {{'both', 'left', 'right', 'none'}}, got {v}",
1204 )));
1205 },
1206 };
1207 Ok(Wrap(parsed))
1208 }
1209}
1210
1211impl<'py> FromPyObject<'py> for Wrap<WindowMapping> {
1212 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1213 let parsed = match &*ob.extract::<PyBackedStr>()? {
1214 "group_to_rows" => WindowMapping::GroupsToRows,
1215 "join" => WindowMapping::Join,
1216 "explode" => WindowMapping::Explode,
1217 v => {
1218 return Err(PyValueError::new_err(format!(
1219 "`mapping_strategy` must be one of {{'group_to_rows', 'join', 'explode'}}, got {v}",
1220 )));
1221 },
1222 };
1223 Ok(Wrap(parsed))
1224 }
1225}
1226
1227impl<'py> FromPyObject<'py> for Wrap<JoinValidation> {
1228 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1229 let parsed = match &*ob.extract::<PyBackedStr>()? {
1230 "1:1" => JoinValidation::OneToOne,
1231 "1:m" => JoinValidation::OneToMany,
1232 "m:m" => JoinValidation::ManyToMany,
1233 "m:1" => JoinValidation::ManyToOne,
1234 v => {
1235 return Err(PyValueError::new_err(format!(
1236 "`validate` must be one of {{'m:m', 'm:1', '1:m', '1:1'}}, got {v}",
1237 )));
1238 },
1239 };
1240 Ok(Wrap(parsed))
1241 }
1242}
1243
1244impl<'py> FromPyObject<'py> for Wrap<MaintainOrderJoin> {
1245 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1246 let parsed = match &*ob.extract::<PyBackedStr>()? {
1247 "none" => MaintainOrderJoin::None,
1248 "left" => MaintainOrderJoin::Left,
1249 "right" => MaintainOrderJoin::Right,
1250 "left_right" => MaintainOrderJoin::LeftRight,
1251 "right_left" => MaintainOrderJoin::RightLeft,
1252 v => {
1253 return Err(PyValueError::new_err(format!(
1254 "`maintain_order` must be one of {{'none', 'left', 'right', 'left_right', 'right_left'}}, got {v}",
1255 )));
1256 },
1257 };
1258 Ok(Wrap(parsed))
1259 }
1260}
1261
1262#[cfg(feature = "csv")]
1263impl<'py> FromPyObject<'py> for Wrap<QuoteStyle> {
1264 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1265 let parsed = match &*ob.extract::<PyBackedStr>()? {
1266 "always" => QuoteStyle::Always,
1267 "necessary" => QuoteStyle::Necessary,
1268 "non_numeric" => QuoteStyle::NonNumeric,
1269 "never" => QuoteStyle::Never,
1270 v => {
1271 return Err(PyValueError::new_err(format!(
1272 "`quote_style` must be one of {{'always', 'necessary', 'non_numeric', 'never'}}, got {v}",
1273 )));
1274 },
1275 };
1276 Ok(Wrap(parsed))
1277 }
1278}
1279
1280#[cfg(feature = "cloud")]
1281pub(crate) fn parse_cloud_options(
1282 uri: &str,
1283 kv: impl IntoIterator<Item = (String, String)>,
1284) -> PyResult<CloudOptions> {
1285 let iter: &mut dyn Iterator<Item = _> = &mut kv.into_iter();
1286 let out = CloudOptions::from_untyped_config(uri, iter).map_err(PyPolarsErr::from)?;
1287 Ok(out)
1288}
1289
1290#[cfg(feature = "list_sets")]
1291impl<'py> FromPyObject<'py> for Wrap<SetOperation> {
1292 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1293 let parsed = match &*ob.extract::<PyBackedStr>()? {
1294 "union" => SetOperation::Union,
1295 "difference" => SetOperation::Difference,
1296 "intersection" => SetOperation::Intersection,
1297 "symmetric_difference" => SetOperation::SymmetricDifference,
1298 v => {
1299 return Err(PyValueError::new_err(format!(
1300 "set operation must be one of {{'union', 'difference', 'intersection', 'symmetric_difference'}}, got {v}",
1301 )));
1302 },
1303 };
1304 Ok(Wrap(parsed))
1305 }
1306}
1307
1308impl<'py> FromPyObject<'py> for Wrap<CastColumnsPolicy> {
1310 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1311 if ob.is_none() {
1312 static DEFAULT: GILOnceCell<Wrap<CastColumnsPolicy>> = GILOnceCell::new();
1314
1315 let out = DEFAULT.get_or_try_init(ob.py(), || {
1316 let ob = PyModule::import(ob.py(), "polars.io.scan_options.cast_options")
1317 .unwrap()
1318 .getattr("ScanCastOptions")
1319 .unwrap()
1320 .call_method0("_default")
1321 .unwrap();
1322
1323 let out = Self::extract_bound(&ob)?;
1324
1325 debug_assert_eq!(&out.0, &CastColumnsPolicy::ERROR_ON_MISMATCH);
1327
1328 PyResult::Ok(out)
1329 })?;
1330
1331 return Ok(out.clone());
1332 }
1333
1334 let py = ob.py();
1335
1336 let integer_upcast = match &*ob
1337 .getattr(intern!(py, "integer_cast"))?
1338 .extract::<PyBackedStr>()?
1339 {
1340 "upcast" => true,
1341 "forbid" => false,
1342 v => {
1343 return Err(PyValueError::new_err(format!(
1344 "unknown option for integer_cast: {v}"
1345 )));
1346 },
1347 };
1348
1349 let mut float_upcast = false;
1350 let mut float_downcast = false;
1351
1352 let float_cast_object = ob.getattr(intern!(py, "float_cast"))?;
1353
1354 parse_multiple_options("float_cast", float_cast_object, |v| {
1355 match v {
1356 "forbid" => {},
1357 "upcast" => float_upcast = true,
1358 "downcast" => float_downcast = true,
1359 v => {
1360 return Err(PyValueError::new_err(format!(
1361 "unknown option for float_cast: {v}"
1362 )));
1363 },
1364 }
1365
1366 Ok(())
1367 })?;
1368
1369 let mut datetime_nanoseconds_downcast = false;
1370 let mut datetime_convert_timezone = false;
1371
1372 let datetime_cast_object = ob.getattr(intern!(py, "datetime_cast"))?;
1373
1374 parse_multiple_options("datetime_cast", datetime_cast_object, |v| {
1375 match v {
1376 "forbid" => {},
1377 "nanosecond-downcast" => datetime_nanoseconds_downcast = true,
1378 "convert-timezone" => datetime_convert_timezone = true,
1379 v => {
1380 return Err(PyValueError::new_err(format!(
1381 "unknown option for datetime_cast: {v}"
1382 )));
1383 },
1384 };
1385
1386 Ok(())
1387 })?;
1388
1389 let missing_struct_fields = match &*ob
1390 .getattr(intern!(py, "missing_struct_fields"))?
1391 .extract::<PyBackedStr>()?
1392 {
1393 "insert" => MissingColumnsPolicy::Insert,
1394 "raise" => MissingColumnsPolicy::Raise,
1395 v => {
1396 return Err(PyValueError::new_err(format!(
1397 "unknown option for missing_struct_fields: {v}"
1398 )));
1399 },
1400 };
1401
1402 let extra_struct_fields = match &*ob
1403 .getattr(intern!(py, "extra_struct_fields"))?
1404 .extract::<PyBackedStr>()?
1405 {
1406 "ignore" => ExtraColumnsPolicy::Ignore,
1407 "raise" => ExtraColumnsPolicy::Raise,
1408 v => {
1409 return Err(PyValueError::new_err(format!(
1410 "unknown option for extra_struct_fields: {v}"
1411 )));
1412 },
1413 };
1414
1415 return Ok(Wrap(CastColumnsPolicy {
1416 integer_upcast,
1417 float_upcast,
1418 float_downcast,
1419 datetime_nanoseconds_downcast,
1420 datetime_microseconds_downcast: false,
1421 datetime_convert_timezone,
1422 null_upcast: true,
1423 missing_struct_fields,
1424 extra_struct_fields,
1425 }));
1426
1427 fn parse_multiple_options(
1428 parameter_name: &'static str,
1429 py_object: Bound<'_, PyAny>,
1430 mut parser_func: impl FnMut(&str) -> PyResult<()>,
1431 ) -> PyResult<()> {
1432 if let Ok(v) = py_object.extract::<PyBackedStr>() {
1433 parser_func(&v)?;
1434 } else if let Ok(v) = py_object.try_iter() {
1435 for v in v {
1436 parser_func(&v?.extract::<PyBackedStr>()?)?;
1437 }
1438 } else {
1439 return Err(PyValueError::new_err(format!(
1440 "unknown type for {parameter_name}: {py_object}"
1441 )));
1442 }
1443
1444 Ok(())
1445 }
1446 }
1447}
1448
1449pub(crate) fn parse_fill_null_strategy(
1450 strategy: &str,
1451 limit: FillNullLimit,
1452) -> PyResult<FillNullStrategy> {
1453 let parsed = match strategy {
1454 "forward" => FillNullStrategy::Forward(limit),
1455 "backward" => FillNullStrategy::Backward(limit),
1456 "min" => FillNullStrategy::Min,
1457 "max" => FillNullStrategy::Max,
1458 "mean" => FillNullStrategy::Mean,
1459 "zero" => FillNullStrategy::Zero,
1460 "one" => FillNullStrategy::One,
1461 e => {
1462 return Err(PyValueError::new_err(format!(
1463 "`strategy` must be one of {{'forward', 'backward', 'min', 'max', 'mean', 'zero', 'one'}}, got {e}",
1464 )));
1465 },
1466 };
1467 Ok(parsed)
1468}
1469
1470#[cfg(feature = "parquet")]
1471pub(crate) fn parse_parquet_compression(
1472 compression: &str,
1473 compression_level: Option<i32>,
1474) -> PyResult<ParquetCompression> {
1475 let parsed = match compression {
1476 "uncompressed" => ParquetCompression::Uncompressed,
1477 "snappy" => ParquetCompression::Snappy,
1478 "gzip" => ParquetCompression::Gzip(
1479 compression_level
1480 .map(|lvl| {
1481 GzipLevel::try_new(lvl as u8)
1482 .map_err(|e| PyValueError::new_err(format!("{e:?}")))
1483 })
1484 .transpose()?,
1485 ),
1486 "lzo" => ParquetCompression::Lzo,
1487 "brotli" => ParquetCompression::Brotli(
1488 compression_level
1489 .map(|lvl| {
1490 BrotliLevel::try_new(lvl as u32)
1491 .map_err(|e| PyValueError::new_err(format!("{e:?}")))
1492 })
1493 .transpose()?,
1494 ),
1495 "lz4" => ParquetCompression::Lz4Raw,
1496 "zstd" => ParquetCompression::Zstd(
1497 compression_level
1498 .map(|lvl| {
1499 ZstdLevel::try_new(lvl).map_err(|e| PyValueError::new_err(format!("{e:?}")))
1500 })
1501 .transpose()?,
1502 ),
1503 e => {
1504 return Err(PyValueError::new_err(format!(
1505 "parquet `compression` must be one of {{'uncompressed', 'snappy', 'gzip', 'lzo', 'brotli', 'lz4', 'zstd'}}, got {e}",
1506 )));
1507 },
1508 };
1509 Ok(parsed)
1510}
1511
1512pub(crate) fn strings_to_pl_smallstr<I, S>(container: I) -> Vec<PlSmallStr>
1513where
1514 I: IntoIterator<Item = S>,
1515 S: AsRef<str>,
1516{
1517 container
1518 .into_iter()
1519 .map(|s| PlSmallStr::from_str(s.as_ref()))
1520 .collect()
1521}
1522
1523#[derive(Debug, Copy, Clone)]
1524pub struct PyCompatLevel(pub CompatLevel);
1525
1526impl<'py> FromPyObject<'py> for PyCompatLevel {
1527 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1528 Ok(PyCompatLevel(if let Ok(level) = ob.extract::<u16>() {
1529 if let Ok(compat_level) = CompatLevel::with_level(level) {
1530 compat_level
1531 } else {
1532 return Err(PyValueError::new_err("invalid compat level"));
1533 }
1534 } else if let Ok(future) = ob.extract::<bool>() {
1535 if future {
1536 CompatLevel::newest()
1537 } else {
1538 CompatLevel::oldest()
1539 }
1540 } else {
1541 return Err(PyTypeError::new_err(
1542 "'compat_level' argument accepts int or bool",
1543 ));
1544 }))
1545 }
1546}
1547
1548#[cfg(feature = "string_normalize")]
1549impl<'py> FromPyObject<'py> for Wrap<UnicodeForm> {
1550 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1551 let parsed = match &*ob.extract::<PyBackedStr>()? {
1552 "NFC" => UnicodeForm::NFC,
1553 "NFKC" => UnicodeForm::NFKC,
1554 "NFD" => UnicodeForm::NFD,
1555 "NFKD" => UnicodeForm::NFKD,
1556 v => {
1557 return Err(PyValueError::new_err(format!(
1558 "`form` must be one of {{'NFC', 'NFKC', 'NFD', 'NFKD'}}, got {v}",
1559 )));
1560 },
1561 };
1562 Ok(Wrap(parsed))
1563 }
1564}
1565
1566#[cfg(feature = "parquet")]
1567impl<'py> FromPyObject<'py> for Wrap<Option<KeyValueMetadata>> {
1568 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1569 #[derive(FromPyObject)]
1570 enum Metadata {
1571 Static(Vec<(String, String)>),
1572 Dynamic(PyObject),
1573 }
1574
1575 let metadata = Option::<Metadata>::extract_bound(ob)?;
1576 let key_value_metadata = metadata.map(|x| match x {
1577 Metadata::Static(kv) => KeyValueMetadata::from_static(kv),
1578 Metadata::Dynamic(func) => KeyValueMetadata::from_py_function(func),
1579 });
1580 Ok(Wrap(key_value_metadata))
1581 }
1582}
1583
1584impl<'py> FromPyObject<'py> for Wrap<Option<TimeZone>> {
1585 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1586 let tz = Option::<Wrap<PlSmallStr>>::extract_bound(ob)?;
1587
1588 let tz = tz.map(|x| x.0);
1589
1590 Ok(Wrap(TimeZone::opt_try_new(tz).map_err(to_py_err)?))
1591 }
1592}
1593
1594impl<'py> FromPyObject<'py> for Wrap<UpcastOrForbid> {
1595 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1596 let parsed = match &*ob.extract::<PyBackedStr>()? {
1597 "upcast" => UpcastOrForbid::Upcast,
1598 "forbid" => UpcastOrForbid::Forbid,
1599 v => {
1600 return Err(PyValueError::new_err(format!(
1601 "cast parameter must be one of {{'upcast', 'forbid'}}, got {v}",
1602 )));
1603 },
1604 };
1605 Ok(Wrap(parsed))
1606 }
1607}
1608
1609impl<'py> FromPyObject<'py> for Wrap<ExtraColumnsPolicy> {
1610 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1611 let parsed = match &*ob.extract::<PyBackedStr>()? {
1612 "ignore" => ExtraColumnsPolicy::Ignore,
1613 "raise" => ExtraColumnsPolicy::Raise,
1614 v => {
1615 return Err(PyValueError::new_err(format!(
1616 "extra column/field parameter must be one of {{'ignore', 'raise'}}, got {v}",
1617 )));
1618 },
1619 };
1620 Ok(Wrap(parsed))
1621 }
1622}
1623
1624impl<'py> FromPyObject<'py> for Wrap<MissingColumnsPolicy> {
1625 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1626 let parsed = match &*ob.extract::<PyBackedStr>()? {
1627 "insert" => MissingColumnsPolicy::Insert,
1628 "raise" => MissingColumnsPolicy::Raise,
1629 v => {
1630 return Err(PyValueError::new_err(format!(
1631 "missing column/field parameter must be one of {{'insert', 'raise'}}, got {v}",
1632 )));
1633 },
1634 };
1635 Ok(Wrap(parsed))
1636 }
1637}
1638
1639impl<'py> FromPyObject<'py> for Wrap<MissingColumnsPolicyOrExpr> {
1640 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1641 if let Ok(pyexpr) = ob.extract::<PyExpr>() {
1642 return Ok(Wrap(MissingColumnsPolicyOrExpr::InsertWith(pyexpr.inner)));
1643 }
1644
1645 let parsed = match &*ob.extract::<PyBackedStr>()? {
1646 "insert" => MissingColumnsPolicyOrExpr::Insert,
1647 "raise" => MissingColumnsPolicyOrExpr::Raise,
1648 v => {
1649 return Err(PyValueError::new_err(format!(
1650 "missing column/field parameter must be one of {{'insert', 'raise', expression}}, got {v}",
1651 )));
1652 },
1653 };
1654 Ok(Wrap(parsed))
1655 }
1656}
1657
1658impl<'py> FromPyObject<'py> for Wrap<ColumnMapping> {
1659 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1660 let (column_mapping_type, ob): (PyBackedStr, Bound<'_, PyAny>) = ob.extract()?;
1661
1662 Ok(Wrap(match &*column_mapping_type {
1663 "iceberg-column-mapping" => {
1664 let arrow_schema: Wrap<ArrowSchema> = ob.extract()?;
1665 ColumnMapping::Iceberg(Arc::new(
1666 IcebergSchema::from_arrow_schema(&arrow_schema.0).map_err(to_py_err)?,
1667 ))
1668 },
1669
1670 v => {
1671 return Err(PyValueError::new_err(format!(
1672 "unknown column mapping type: {v}"
1673 )));
1674 },
1675 }))
1676 }
1677}
1678
1679impl<'py> FromPyObject<'py> for Wrap<DeletionFilesList> {
1680 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1681 let (deletion_file_type, ob): (PyBackedStr, Bound<'_, PyAny>) = ob.extract()?;
1682
1683 Ok(Wrap(match &*deletion_file_type {
1684 "iceberg-position-delete" => {
1685 let dict: Bound<'_, PyDict> = ob.extract()?;
1686
1687 let mut out = PlIndexMap::new();
1688
1689 for (k, v) in dict
1690 .try_iter()?
1691 .zip(dict.call_method0("values")?.try_iter()?)
1692 {
1693 let k: usize = k?.extract()?;
1694 let v: Bound<'_, PyAny> = v?.extract()?;
1695
1696 let files = v
1697 .try_iter()?
1698 .map(|x| {
1699 x.and_then(|x| {
1700 let x: String = x.extract()?;
1701 Ok(x)
1702 })
1703 })
1704 .collect::<PyResult<Arc<[String]>>>()?;
1705
1706 if !files.is_empty() {
1707 out.insert(k, files);
1708 }
1709 }
1710
1711 DeletionFilesList::IcebergPositionDelete(Arc::new(out))
1712 },
1713
1714 v => {
1715 return Err(PyValueError::new_err(format!(
1716 "unknown deletion file type: {v}"
1717 )));
1718 },
1719 }))
1720 }
1721}
1722
1723impl<'py> FromPyObject<'py> for Wrap<DefaultFieldValues> {
1724 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1725 let (default_values_type, ob): (PyBackedStr, Bound<'_, PyAny>) = ob.extract()?;
1726
1727 Ok(Wrap(match &*default_values_type {
1728 "iceberg" => {
1729 let dict: Bound<'_, PyDict> = ob.extract()?;
1730
1731 let mut out = PlIndexMap::new();
1732
1733 for (k, v) in dict
1734 .try_iter()?
1735 .zip(dict.call_method0("values")?.try_iter()?)
1736 {
1737 let k: u32 = k?.extract()?;
1738 let v = v?;
1739
1740 let v: Result<Column, String> = if let Ok(s) = get_series(&v) {
1741 Ok(s.into_column())
1742 } else {
1743 let err_msg: String = v.extract()?;
1744 Err(err_msg)
1745 };
1746
1747 out.insert(k, v);
1748 }
1749
1750 DefaultFieldValues::Iceberg(Arc::new(IcebergIdentityTransformedPartitionFields(
1751 out,
1752 )))
1753 },
1754
1755 v => {
1756 return Err(PyValueError::new_err(format!(
1757 "unknown deletion file type: {v}"
1758 )));
1759 },
1760 }))
1761 }
1762}
1763
1764impl<'py> FromPyObject<'py> for Wrap<PlPath> {
1765 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1766 if let Ok(path) = ob.extract::<PyBackedStr>() {
1767 Ok(Wrap(PlPath::new(&path)))
1768 } else if let Ok(path) = ob.extract::<std::path::PathBuf>() {
1769 Ok(Wrap(PlPath::Local(path.into())))
1770 } else {
1771 Err(
1772 PyTypeError::new_err(format!("PlPath cannot be formed from '{}'", ob.get_type()))
1773 .into(),
1774 )
1775 }
1776 }
1777}
1778
1779impl<'py> IntoPyObject<'py> for Wrap<PlPath> {
1780 type Target = PyString;
1781 type Output = Bound<'py, Self::Target>;
1782 type Error = Infallible;
1783
1784 fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
1785 self.0.to_str().into_pyobject(py)
1786 }
1787}