1pub(crate) mod any_value;
2mod categorical;
3pub(crate) mod chunked_array;
4mod datetime;
5
6use std::convert::Infallible;
7use std::fmt::{Display, Formatter};
8use std::fs::File;
9use std::hash::{Hash, Hasher};
10
11pub use categorical::PyCategories;
12#[cfg(feature = "object")]
13use polars::chunked_array::object::PolarsObjectSafe;
14use polars::frame::row::Row;
15#[cfg(feature = "avro")]
16use polars::io::avro::AvroCompression;
17#[cfg(feature = "cloud")]
18use polars::io::cloud::CloudOptions;
19use polars::prelude::ColumnMapping;
20use polars::prelude::deletion::DeletionFilesList;
21use polars::series::ops::NullBehavior;
22use polars_core::schema::iceberg::IcebergSchema;
23use polars_core::utils::arrow::array::Array;
24use polars_core::utils::arrow::types::NativeType;
25use polars_core::utils::materialize_dyn_int;
26use polars_lazy::prelude::*;
27#[cfg(feature = "parquet")]
28use polars_parquet::write::StatisticsOptions;
29use polars_plan::dsl::ScanSources;
30use polars_utils::mmap::MemSlice;
31use polars_utils::pl_str::PlSmallStr;
32use polars_utils::total_ord::{TotalEq, TotalHash};
33use pyo3::basic::CompareOp;
34use pyo3::exceptions::{PyTypeError, PyValueError};
35use pyo3::intern;
36use pyo3::prelude::*;
37use pyo3::pybacked::PyBackedStr;
38use pyo3::sync::GILOnceCell;
39use pyo3::types::{IntoPyDict, PyDict, PyList, PySequence, PyString};
40
41use crate::error::PyPolarsErr;
42use crate::expr::PyExpr;
43use crate::file::{PythonScanSourceInput, get_python_scan_source_input};
44use crate::interop::arrow::to_rust::field_to_rust_arrow;
45#[cfg(feature = "object")]
46use crate::object::OBJECT_NAME;
47use crate::prelude::*;
48use crate::py_modules::{pl_series, polars};
49use crate::series::PySeries;
50use crate::utils::to_py_err;
51use crate::{PyDataFrame, PyLazyFrame};
52
53pub(crate) unsafe trait Transparent {
56 type Target;
57}
58
59unsafe impl Transparent for PySeries {
60 type Target = Series;
61}
62
63unsafe impl<T> Transparent for Wrap<T> {
64 type Target = T;
65}
66
67unsafe impl<T: Transparent> Transparent for Option<T> {
68 type Target = Option<T::Target>;
69}
70
71pub(crate) fn reinterpret_vec<T: Transparent>(input: Vec<T>) -> Vec<T::Target> {
72 assert_eq!(size_of::<T>(), size_of::<T::Target>());
73 assert_eq!(align_of::<T>(), align_of::<T::Target>());
74 let len = input.len();
75 let cap = input.capacity();
76 let mut manual_drop_vec = std::mem::ManuallyDrop::new(input);
77 let vec_ptr: *mut T = manual_drop_vec.as_mut_ptr();
78 let ptr: *mut T::Target = vec_ptr as *mut T::Target;
79 unsafe { Vec::from_raw_parts(ptr, len, cap) }
80}
81
82pub(crate) fn vec_extract_wrapped<T>(buf: Vec<Wrap<T>>) -> Vec<T> {
83 reinterpret_vec(buf)
84}
85
86#[derive(PartialEq, Eq, Hash)]
87#[repr(transparent)]
88pub struct Wrap<T>(pub T);
89
90impl<T> Clone for Wrap<T>
91where
92 T: Clone,
93{
94 fn clone(&self) -> Self {
95 Wrap(self.0.clone())
96 }
97}
98impl<T> From<T> for Wrap<T> {
99 fn from(t: T) -> Self {
100 Wrap(t)
101 }
102}
103
104pub(crate) fn get_df(obj: &Bound<'_, PyAny>) -> PyResult<DataFrame> {
106 let pydf = obj.getattr(intern!(obj.py(), "_df"))?;
107 Ok(pydf.extract::<PyDataFrame>()?.df)
108}
109
110pub(crate) fn get_lf(obj: &Bound<'_, PyAny>) -> PyResult<LazyFrame> {
111 let pydf = obj.getattr(intern!(obj.py(), "_ldf"))?;
112 Ok(pydf.extract::<PyLazyFrame>()?.ldf)
113}
114
115pub(crate) fn get_series(obj: &Bound<'_, PyAny>) -> PyResult<Series> {
116 let s = obj.getattr(intern!(obj.py(), "_s"))?;
117 Ok(s.extract::<PySeries>()?.series)
118}
119
120pub(crate) fn to_series(py: Python<'_>, s: PySeries) -> PyResult<Bound<'_, PyAny>> {
121 let series = pl_series(py).bind(py);
122 let constructor = series.getattr(intern!(py, "_from_pyseries"))?;
123 constructor.call1((s,))
124}
125
126impl<'py> FromPyObject<'py> for Wrap<PlSmallStr> {
127 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
128 Ok(Wrap((&*ob.extract::<PyBackedStr>()?).into()))
129 }
130}
131
132#[cfg(feature = "csv")]
133impl<'py> FromPyObject<'py> for Wrap<NullValues> {
134 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
135 if let Ok(s) = ob.extract::<PyBackedStr>() {
136 Ok(Wrap(NullValues::AllColumnsSingle((&*s).into())))
137 } else if let Ok(s) = ob.extract::<Vec<PyBackedStr>>() {
138 Ok(Wrap(NullValues::AllColumns(
139 s.into_iter().map(|x| (&*x).into()).collect(),
140 )))
141 } else if let Ok(s) = ob.extract::<Vec<(PyBackedStr, PyBackedStr)>>() {
142 Ok(Wrap(NullValues::Named(
143 s.into_iter()
144 .map(|(a, b)| ((&*a).into(), (&*b).into()))
145 .collect(),
146 )))
147 } else {
148 Err(
149 PyPolarsErr::Other("could not extract value from null_values argument".into())
150 .into(),
151 )
152 }
153 }
154}
155
156fn struct_dict<'a, 'py>(
157 py: Python<'py>,
158 vals: impl Iterator<Item = AnyValue<'a>>,
159 flds: &[Field],
160) -> PyResult<Bound<'py, PyDict>> {
161 let dict = PyDict::new(py);
162 flds.iter().zip(vals).try_for_each(|(fld, val)| {
163 dict.set_item(fld.name().as_str(), Wrap(val).into_pyobject(py)?)
164 })?;
165 Ok(dict)
166}
167
168fn decimal_to_digits(v: i128, buf: &mut [u128; 3]) -> usize {
170 const ZEROS: i128 = 0x3030_3030_3030_3030_3030_3030_3030_3030;
171 let buf = unsafe { std::mem::transmute::<&mut [u128; 3], &mut [u8; 48]>(buf) };
174 let mut buffer = itoa::Buffer::new();
175 let value = buffer.format(v);
176 let len = value.len();
177 for (dst, src) in buf.iter_mut().zip(value.as_bytes().iter()) {
178 *dst = *src
179 }
180
181 let ptr = buf.as_mut_ptr() as *mut i128;
182 unsafe {
183 *ptr -= ZEROS;
185 *ptr.add(1) -= ZEROS;
186 *ptr.add(2) -= ZEROS;
187 }
188 len
189}
190
191impl<'py> IntoPyObject<'py> for &Wrap<DataType> {
192 type Target = PyAny;
193 type Output = Bound<'py, Self::Target>;
194 type Error = PyErr;
195
196 fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
197 let pl = polars(py).bind(py);
198
199 match &self.0 {
200 DataType::Int8 => {
201 let class = pl.getattr(intern!(py, "Int8"))?;
202 class.call0()
203 },
204 DataType::Int16 => {
205 let class = pl.getattr(intern!(py, "Int16"))?;
206 class.call0()
207 },
208 DataType::Int32 => {
209 let class = pl.getattr(intern!(py, "Int32"))?;
210 class.call0()
211 },
212 DataType::Int64 => {
213 let class = pl.getattr(intern!(py, "Int64"))?;
214 class.call0()
215 },
216 DataType::UInt8 => {
217 let class = pl.getattr(intern!(py, "UInt8"))?;
218 class.call0()
219 },
220 DataType::UInt16 => {
221 let class = pl.getattr(intern!(py, "UInt16"))?;
222 class.call0()
223 },
224 DataType::UInt32 => {
225 let class = pl.getattr(intern!(py, "UInt32"))?;
226 class.call0()
227 },
228 DataType::UInt64 => {
229 let class = pl.getattr(intern!(py, "UInt64"))?;
230 class.call0()
231 },
232 DataType::Int128 => {
233 let class = pl.getattr(intern!(py, "Int128"))?;
234 class.call0()
235 },
236 DataType::Float32 => {
237 let class = pl.getattr(intern!(py, "Float32"))?;
238 class.call0()
239 },
240 DataType::Float64 | DataType::Unknown(UnknownKind::Float) => {
241 let class = pl.getattr(intern!(py, "Float64"))?;
242 class.call0()
243 },
244 DataType::Decimal(precision, scale) => {
245 let class = pl.getattr(intern!(py, "Decimal"))?;
246 let args = (*precision, *scale);
247 class.call1(args)
248 },
249 DataType::Boolean => {
250 let class = pl.getattr(intern!(py, "Boolean"))?;
251 class.call0()
252 },
253 DataType::String | DataType::Unknown(UnknownKind::Str) => {
254 let class = pl.getattr(intern!(py, "String"))?;
255 class.call0()
256 },
257 DataType::Binary => {
258 let class = pl.getattr(intern!(py, "Binary"))?;
259 class.call0()
260 },
261 DataType::Array(inner, size) => {
262 let class = pl.getattr(intern!(py, "Array"))?;
263 let inner = Wrap(*inner.clone());
264 let args = (&inner, *size);
265 class.call1(args)
266 },
267 DataType::List(inner) => {
268 let class = pl.getattr(intern!(py, "List"))?;
269 let inner = Wrap(*inner.clone());
270 class.call1((&inner,))
271 },
272 DataType::Date => {
273 let class = pl.getattr(intern!(py, "Date"))?;
274 class.call0()
275 },
276 DataType::Datetime(tu, tz) => {
277 let datetime_class = pl.getattr(intern!(py, "Datetime"))?;
278 datetime_class.call1((tu.to_ascii(), tz.as_deref().map(|x| x.as_str())))
279 },
280 DataType::Duration(tu) => {
281 let duration_class = pl.getattr(intern!(py, "Duration"))?;
282 duration_class.call1((tu.to_ascii(),))
283 },
284 #[cfg(feature = "object")]
285 DataType::Object(_) => {
286 let class = pl.getattr(intern!(py, "Object"))?;
287 class.call0()
288 },
289 DataType::Categorical(cats, _) => {
290 let categories_class = pl.getattr(intern!(py, "Categories"))?;
291 let categorical_class = pl.getattr(intern!(py, "Categorical"))?;
292 let categories = categories_class
293 .call_method1("_from_py_categories", (PyCategories::from(cats.clone()),))?;
294 let kwargs = [("categories", categories)];
295 categorical_class.call((), Some(&kwargs.into_py_dict(py)?))
296 },
297 DataType::Enum(_, mapping) => {
298 let categories = unsafe {
299 StringChunked::from_chunks(
300 PlSmallStr::from_static("category"),
301 vec![mapping.to_arrow(true)],
302 )
303 };
304 let class = pl.getattr(intern!(py, "Enum"))?;
305 let series = to_series(py, categories.into_series().into())?;
306 class.call1((series,))
307 },
308 DataType::Time => pl.getattr(intern!(py, "Time")),
309 DataType::Struct(fields) => {
310 let field_class = pl.getattr(intern!(py, "Field"))?;
311 let iter = fields.iter().map(|fld| {
312 let name = fld.name().as_str();
313 let dtype = Wrap(fld.dtype().clone());
314 field_class.call1((name, &dtype)).unwrap()
315 });
316 let fields = PyList::new(py, iter)?;
317 let struct_class = pl.getattr(intern!(py, "Struct"))?;
318 struct_class.call1((fields,))
319 },
320 DataType::Null => {
321 let class = pl.getattr(intern!(py, "Null"))?;
322 class.call0()
323 },
324 DataType::Unknown(UnknownKind::Int(v)) => {
325 Wrap(materialize_dyn_int(*v).dtype()).into_pyobject(py)
326 },
327 DataType::Unknown(_) => {
328 let class = pl.getattr(intern!(py, "Unknown"))?;
329 class.call0()
330 },
331 DataType::BinaryOffset => {
332 unimplemented!()
333 },
334 }
335 }
336}
337
338impl<'py> FromPyObject<'py> for Wrap<Field> {
339 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
340 let py = ob.py();
341 let name = ob
342 .getattr(intern!(py, "name"))?
343 .str()?
344 .extract::<PyBackedStr>()?;
345 let dtype = ob
346 .getattr(intern!(py, "dtype"))?
347 .extract::<Wrap<DataType>>()?;
348 Ok(Wrap(Field::new((&*name).into(), dtype.0)))
349 }
350}
351
352impl<'py> FromPyObject<'py> for Wrap<DataType> {
353 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
354 let py = ob.py();
355 let type_name = ob.get_type().qualname()?.to_string();
356
357 let dtype = match &*type_name {
358 "DataTypeClass" => {
359 let name = ob
361 .getattr(intern!(py, "__name__"))?
362 .str()?
363 .extract::<PyBackedStr>()?;
364 match &*name {
365 "Int8" => DataType::Int8,
366 "Int16" => DataType::Int16,
367 "Int32" => DataType::Int32,
368 "Int64" => DataType::Int64,
369 "Int128" => DataType::Int128,
370 "UInt8" => DataType::UInt8,
371 "UInt16" => DataType::UInt16,
372 "UInt32" => DataType::UInt32,
373 "UInt64" => DataType::UInt64,
374 "Float32" => DataType::Float32,
375 "Float64" => DataType::Float64,
376 "Boolean" => DataType::Boolean,
377 "String" => DataType::String,
378 "Binary" => DataType::Binary,
379 "Categorical" => DataType::from_categories(Categories::global()),
380 "Enum" => DataType::from_frozen_categories(FrozenCategories::new([]).unwrap()),
381 "Date" => DataType::Date,
382 "Time" => DataType::Time,
383 "Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
384 "Duration" => DataType::Duration(TimeUnit::Microseconds),
385 "Decimal" => DataType::Decimal(None, None), "List" => DataType::List(Box::new(DataType::Null)),
387 "Array" => DataType::Array(Box::new(DataType::Null), 0),
388 "Struct" => DataType::Struct(vec![]),
389 "Null" => DataType::Null,
390 #[cfg(feature = "object")]
391 "Object" => DataType::Object(OBJECT_NAME),
392 "Unknown" => DataType::Unknown(Default::default()),
393 dt => {
394 return Err(PyTypeError::new_err(format!(
395 "'{dt}' is not a Polars data type",
396 )));
397 },
398 }
399 },
400 "Int8" => DataType::Int8,
401 "Int16" => DataType::Int16,
402 "Int32" => DataType::Int32,
403 "Int64" => DataType::Int64,
404 "Int128" => DataType::Int128,
405 "UInt8" => DataType::UInt8,
406 "UInt16" => DataType::UInt16,
407 "UInt32" => DataType::UInt32,
408 "UInt64" => DataType::UInt64,
409 "Float32" => DataType::Float32,
410 "Float64" => DataType::Float64,
411 "Boolean" => DataType::Boolean,
412 "String" => DataType::String,
413 "Binary" => DataType::Binary,
414 "Categorical" => {
415 let categories = ob.getattr(intern!(py, "categories")).unwrap();
416 let py_categories = categories.getattr(intern!(py, "_categories")).unwrap();
417 let py_categories = py_categories.extract::<PyCategories>()?;
418 DataType::from_categories(py_categories.categories().clone())
419 },
420 "Enum" => {
421 let categories = ob.getattr(intern!(py, "categories")).unwrap();
422 let s = get_series(&categories.as_borrowed())?;
423 let ca = s.str().map_err(PyPolarsErr::from)?;
424 let categories = ca.downcast_iter().next().unwrap().clone();
425 assert!(!categories.has_nulls());
426 DataType::from_frozen_categories(
427 FrozenCategories::new(categories.values_iter()).unwrap(),
428 )
429 },
430 "Date" => DataType::Date,
431 "Time" => DataType::Time,
432 "Datetime" => {
433 let time_unit = ob.getattr(intern!(py, "time_unit")).unwrap();
434 let time_unit = time_unit.extract::<Wrap<TimeUnit>>()?.0;
435 let time_zone = ob.getattr(intern!(py, "time_zone")).unwrap();
436 let time_zone = time_zone.extract::<Option<PyBackedStr>>()?;
437 DataType::Datetime(
438 time_unit,
439 TimeZone::opt_try_new(time_zone.as_deref()).map_err(to_py_err)?,
440 )
441 },
442 "Duration" => {
443 let time_unit = ob.getattr(intern!(py, "time_unit")).unwrap();
444 let time_unit = time_unit.extract::<Wrap<TimeUnit>>()?.0;
445 DataType::Duration(time_unit)
446 },
447 "Decimal" => {
448 let precision = ob.getattr(intern!(py, "precision"))?.extract()?;
449 let scale = ob.getattr(intern!(py, "scale"))?.extract()?;
450 DataType::Decimal(precision, Some(scale))
451 },
452 "List" => {
453 let inner = ob.getattr(intern!(py, "inner")).unwrap();
454 let inner = inner.extract::<Wrap<DataType>>()?;
455 DataType::List(Box::new(inner.0))
456 },
457 "Array" => {
458 let inner = ob.getattr(intern!(py, "inner")).unwrap();
459 let size = ob.getattr(intern!(py, "size")).unwrap();
460 let inner = inner.extract::<Wrap<DataType>>()?;
461 let size = size.extract::<usize>()?;
462 DataType::Array(Box::new(inner.0), size)
463 },
464 "Struct" => {
465 let fields = ob.getattr(intern!(py, "fields"))?;
466 let fields = fields
467 .extract::<Vec<Wrap<Field>>>()?
468 .into_iter()
469 .map(|f| f.0)
470 .collect::<Vec<Field>>();
471 DataType::Struct(fields)
472 },
473 "Null" => DataType::Null,
474 #[cfg(feature = "object")]
475 "Object" => DataType::Object(OBJECT_NAME),
476 "Unknown" => DataType::Unknown(Default::default()),
477 dt => {
478 return Err(PyTypeError::new_err(format!(
479 "'{dt}' is not a Polars data type",
480 )));
481 },
482 };
483 Ok(Wrap(dtype))
484 }
485}
486
487enum CategoricalOrdering {
488 Lexical,
489}
490
491impl<'py> IntoPyObject<'py> for Wrap<CategoricalOrdering> {
492 type Target = PyString;
493 type Output = Bound<'py, Self::Target>;
494 type Error = Infallible;
495
496 fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
497 "lexical".into_pyobject(py)
498 }
499}
500
501impl<'py> IntoPyObject<'py> for Wrap<TimeUnit> {
502 type Target = PyString;
503 type Output = Bound<'py, Self::Target>;
504 type Error = Infallible;
505
506 fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
507 self.0.to_ascii().into_pyobject(py)
508 }
509}
510
511#[cfg(feature = "parquet")]
512impl<'py> FromPyObject<'py> for Wrap<StatisticsOptions> {
513 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
514 let mut statistics = StatisticsOptions::empty();
515
516 let dict = ob.downcast::<PyDict>()?;
517 for (key, val) in dict {
518 let key = key.extract::<PyBackedStr>()?;
519 let val = val.extract::<bool>()?;
520
521 match key.as_ref() {
522 "min" => statistics.min_value = val,
523 "max" => statistics.max_value = val,
524 "distinct_count" => statistics.distinct_count = val,
525 "null_count" => statistics.null_count = val,
526 _ => {
527 return Err(PyTypeError::new_err(format!(
528 "'{key}' is not a valid statistic option",
529 )));
530 },
531 }
532 }
533
534 Ok(Wrap(statistics))
535 }
536}
537
538impl<'py> FromPyObject<'py> for Wrap<Row<'static>> {
539 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
540 let vals = ob.extract::<Vec<Wrap<AnyValue<'static>>>>()?;
541 let vals = reinterpret_vec(vals);
542 Ok(Wrap(Row(vals)))
543 }
544}
545
546impl<'py> FromPyObject<'py> for Wrap<Schema> {
547 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
548 let dict = ob.downcast::<PyDict>()?;
549
550 Ok(Wrap(
551 dict.iter()
552 .map(|(key, val)| {
553 let key = key.extract::<PyBackedStr>()?;
554 let val = val.extract::<Wrap<DataType>>()?;
555
556 Ok(Field::new((&*key).into(), val.0))
557 })
558 .collect::<PyResult<Schema>>()?,
559 ))
560 }
561}
562
563impl<'py> FromPyObject<'py> for Wrap<ArrowSchema> {
564 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
565 let py = ob.py();
566
567 let pyarrow_schema_cls = py
568 .import(intern!(py, "pyarrow"))?
569 .getattr(intern!(py, "Schema"))?;
570
571 if ob.is_none() {
572 return Err(PyValueError::new_err("arrow_schema() returned None").into());
573 }
574
575 let schema_cls = ob.getattr(intern!(py, "__class__"))?;
576
577 if !schema_cls.is(&pyarrow_schema_cls) {
578 return Err(PyTypeError::new_err(format!(
579 "expected pyarrow.Schema, got: {schema_cls}"
580 )));
581 }
582
583 let mut iter = ob.try_iter()?.map(|x| x.and_then(field_to_rust_arrow));
584
585 let mut last_err = None;
586
587 let schema =
588 ArrowSchema::from_iter_check_duplicates(std::iter::from_fn(|| match iter.next() {
589 Some(Ok(v)) => Some(v),
590 Some(Err(e)) => {
591 last_err = Some(e);
592 None
593 },
594 None => None,
595 }))
596 .map_err(to_py_err)?;
597
598 if let Some(last_err) = last_err {
599 return Err(last_err.into());
600 }
601
602 Ok(Wrap(schema))
603 }
604}
605
606impl<'py> FromPyObject<'py> for Wrap<ScanSources> {
607 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
608 let list = ob.downcast::<PyList>()?.to_owned();
609
610 if list.is_empty() {
611 return Ok(Wrap(ScanSources::default()));
612 }
613
614 enum MutableSources {
615 Paths(Vec<PlPath>),
616 Files(Vec<File>),
617 Buffers(Vec<MemSlice>),
618 }
619
620 let num_items = list.len();
621 let mut iter = list
622 .into_iter()
623 .map(|val| get_python_scan_source_input(val.unbind(), false));
624
625 let Some(first) = iter.next() else {
626 return Ok(Wrap(ScanSources::default()));
627 };
628
629 let mut sources = match first? {
630 PythonScanSourceInput::Path(path) => {
631 let mut sources = Vec::with_capacity(num_items);
632 sources.push(path);
633 MutableSources::Paths(sources)
634 },
635 PythonScanSourceInput::File(file) => {
636 let mut sources = Vec::with_capacity(num_items);
637 sources.push(file.into());
638 MutableSources::Files(sources)
639 },
640 PythonScanSourceInput::Buffer(buffer) => {
641 let mut sources = Vec::with_capacity(num_items);
642 sources.push(buffer);
643 MutableSources::Buffers(sources)
644 },
645 };
646
647 for source in iter {
648 match (&mut sources, source?) {
649 (MutableSources::Paths(v), PythonScanSourceInput::Path(p)) => v.push(p),
650 (MutableSources::Files(v), PythonScanSourceInput::File(f)) => v.push(f.into()),
651 (MutableSources::Buffers(v), PythonScanSourceInput::Buffer(f)) => v.push(f),
652 _ => {
653 return Err(PyTypeError::new_err(
654 "Cannot combine in-memory bytes, paths and files for scan sources",
655 ));
656 },
657 }
658 }
659
660 Ok(Wrap(match sources {
661 MutableSources::Paths(i) => ScanSources::Paths(i.into()),
662 MutableSources::Files(i) => ScanSources::Files(i.into()),
663 MutableSources::Buffers(i) => ScanSources::Buffers(i.into()),
664 }))
665 }
666}
667
668impl<'py> IntoPyObject<'py> for Wrap<Schema> {
669 type Target = PyDict;
670 type Output = Bound<'py, Self::Target>;
671 type Error = PyErr;
672
673 fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
674 let dict = PyDict::new(py);
675 self.0
676 .iter()
677 .try_for_each(|(k, v)| dict.set_item(k.as_str(), &Wrap(v.clone())))?;
678 Ok(dict)
679 }
680}
681
682#[derive(Debug)]
683#[repr(transparent)]
684pub struct ObjectValue {
685 pub inner: PyObject,
686}
687
688impl Clone for ObjectValue {
689 fn clone(&self) -> Self {
690 Python::with_gil(|py| Self {
691 inner: self.inner.clone_ref(py),
692 })
693 }
694}
695
696impl Hash for ObjectValue {
697 fn hash<H: Hasher>(&self, state: &mut H) {
698 let h = Python::with_gil(|py| self.inner.bind(py).hash().expect("should be hashable"));
699 state.write_isize(h)
700 }
701}
702
703impl Eq for ObjectValue {}
704
705impl PartialEq for ObjectValue {
706 fn eq(&self, other: &Self) -> bool {
707 Python::with_gil(|py| {
708 match self
709 .inner
710 .bind(py)
711 .rich_compare(other.inner.bind(py), CompareOp::Eq)
712 {
713 Ok(result) => result.is_truthy().unwrap(),
714 Err(_) => false,
715 }
716 })
717 }
718}
719
720impl TotalEq for ObjectValue {
721 fn tot_eq(&self, other: &Self) -> bool {
722 self == other
723 }
724}
725
726impl TotalHash for ObjectValue {
727 fn tot_hash<H>(&self, state: &mut H)
728 where
729 H: Hasher,
730 {
731 self.hash(state);
732 }
733}
734
735impl Display for ObjectValue {
736 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
737 write!(f, "{}", self.inner)
738 }
739}
740
741#[cfg(feature = "object")]
742impl PolarsObject for ObjectValue {
743 fn type_name() -> &'static str {
744 "object"
745 }
746}
747
748impl From<PyObject> for ObjectValue {
749 fn from(p: PyObject) -> Self {
750 Self { inner: p }
751 }
752}
753
754impl<'py> FromPyObject<'py> for ObjectValue {
755 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
756 Ok(ObjectValue {
757 inner: ob.to_owned().unbind(),
758 })
759 }
760}
761
762#[cfg(feature = "object")]
766impl From<&dyn PolarsObjectSafe> for &ObjectValue {
767 fn from(val: &dyn PolarsObjectSafe) -> Self {
768 unsafe { &*(val as *const dyn PolarsObjectSafe as *const ObjectValue) }
769 }
770}
771
772impl<'a, 'py> IntoPyObject<'py> for &'a ObjectValue {
773 type Target = PyAny;
774 type Output = Borrowed<'a, 'py, Self::Target>;
775 type Error = std::convert::Infallible;
776
777 fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
778 Ok(self.inner.bind_borrowed(py))
779 }
780}
781
782impl Default for ObjectValue {
783 fn default() -> Self {
784 Python::with_gil(|py| ObjectValue { inner: py.None() })
785 }
786}
787
788impl<'py, T: NativeType + FromPyObject<'py>> FromPyObject<'py> for Wrap<Vec<T>> {
789 fn extract_bound(obj: &Bound<'py, PyAny>) -> PyResult<Self> {
790 let seq = obj.downcast::<PySequence>()?;
791 let mut v = Vec::with_capacity(seq.len().unwrap_or(0));
792 for item in seq.try_iter()? {
793 v.push(item?.extract::<T>()?);
794 }
795 Ok(Wrap(v))
796 }
797}
798
799#[cfg(feature = "asof_join")]
800impl<'py> FromPyObject<'py> for Wrap<AsofStrategy> {
801 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
802 let parsed = match &*(ob.extract::<PyBackedStr>()?) {
803 "backward" => AsofStrategy::Backward,
804 "forward" => AsofStrategy::Forward,
805 "nearest" => AsofStrategy::Nearest,
806 v => {
807 return Err(PyValueError::new_err(format!(
808 "asof `strategy` must be one of {{'backward', 'forward', 'nearest'}}, got {v}",
809 )));
810 },
811 };
812 Ok(Wrap(parsed))
813 }
814}
815
816impl<'py> FromPyObject<'py> for Wrap<InterpolationMethod> {
817 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
818 let parsed = match &*(ob.extract::<PyBackedStr>()?) {
819 "linear" => InterpolationMethod::Linear,
820 "nearest" => InterpolationMethod::Nearest,
821 v => {
822 return Err(PyValueError::new_err(format!(
823 "interpolation `method` must be one of {{'linear', 'nearest'}}, got {v}",
824 )));
825 },
826 };
827 Ok(Wrap(parsed))
828 }
829}
830
831#[cfg(feature = "avro")]
832impl<'py> FromPyObject<'py> for Wrap<Option<AvroCompression>> {
833 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
834 let parsed = match &*ob.extract::<PyBackedStr>()? {
835 "uncompressed" => None,
836 "snappy" => Some(AvroCompression::Snappy),
837 "deflate" => Some(AvroCompression::Deflate),
838 v => {
839 return Err(PyValueError::new_err(format!(
840 "avro `compression` must be one of {{'uncompressed', 'snappy', 'deflate'}}, got {v}",
841 )));
842 },
843 };
844 Ok(Wrap(parsed))
845 }
846}
847
848impl<'py> FromPyObject<'py> for Wrap<CategoricalOrdering> {
849 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
850 let parsed = match &*ob.extract::<PyBackedStr>()? {
851 "lexical" => CategoricalOrdering::Lexical,
852 "physical" => {
853 polars_warn!(
854 Deprecation,
855 "physical ordering is deprecated, will use lexical ordering instead"
856 );
857 CategoricalOrdering::Lexical
858 },
859 v => {
860 return Err(PyValueError::new_err(format!(
861 "categorical `ordering` must be one of {{'physical', 'lexical'}}, got {v}",
862 )));
863 },
864 };
865 Ok(Wrap(parsed))
866 }
867}
868
869impl<'py> FromPyObject<'py> for Wrap<StartBy> {
870 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
871 let parsed = match &*ob.extract::<PyBackedStr>()? {
872 "window" => StartBy::WindowBound,
873 "datapoint" => StartBy::DataPoint,
874 "monday" => StartBy::Monday,
875 "tuesday" => StartBy::Tuesday,
876 "wednesday" => StartBy::Wednesday,
877 "thursday" => StartBy::Thursday,
878 "friday" => StartBy::Friday,
879 "saturday" => StartBy::Saturday,
880 "sunday" => StartBy::Sunday,
881 v => {
882 return Err(PyValueError::new_err(format!(
883 "`start_by` must be one of {{'window', 'datapoint', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday'}}, got {v}",
884 )));
885 },
886 };
887 Ok(Wrap(parsed))
888 }
889}
890
891impl<'py> FromPyObject<'py> for Wrap<ClosedWindow> {
892 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
893 let parsed = match &*ob.extract::<PyBackedStr>()? {
894 "left" => ClosedWindow::Left,
895 "right" => ClosedWindow::Right,
896 "both" => ClosedWindow::Both,
897 "none" => ClosedWindow::None,
898 v => {
899 return Err(PyValueError::new_err(format!(
900 "`closed` must be one of {{'left', 'right', 'both', 'none'}}, got {v}",
901 )));
902 },
903 };
904 Ok(Wrap(parsed))
905 }
906}
907
908impl<'py> FromPyObject<'py> for Wrap<RoundMode> {
909 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
910 let parsed = match &*ob.extract::<PyBackedStr>()? {
911 "half_to_even" => RoundMode::HalfToEven,
912 "half_away_from_zero" => RoundMode::HalfAwayFromZero,
913 v => {
914 return Err(PyValueError::new_err(format!(
915 "`mode` must be one of {{'half_to_even', 'half_away_from_zero'}}, got {v}",
916 )));
917 },
918 };
919 Ok(Wrap(parsed))
920 }
921}
922
923#[cfg(feature = "csv")]
924impl<'py> FromPyObject<'py> for Wrap<CsvEncoding> {
925 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
926 let parsed = match &*ob.extract::<PyBackedStr>()? {
927 "utf8" => CsvEncoding::Utf8,
928 "utf8-lossy" => CsvEncoding::LossyUtf8,
929 v => {
930 return Err(PyValueError::new_err(format!(
931 "csv `encoding` must be one of {{'utf8', 'utf8-lossy'}}, got {v}",
932 )));
933 },
934 };
935 Ok(Wrap(parsed))
936 }
937}
938
939#[cfg(feature = "ipc")]
940impl<'py> FromPyObject<'py> for Wrap<Option<IpcCompression>> {
941 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
942 let parsed = match &*ob.extract::<PyBackedStr>()? {
943 "uncompressed" => None,
944 "lz4" => Some(IpcCompression::LZ4),
945 "zstd" => Some(IpcCompression::ZSTD),
946 v => {
947 return Err(PyValueError::new_err(format!(
948 "ipc `compression` must be one of {{'uncompressed', 'lz4', 'zstd'}}, got {v}",
949 )));
950 },
951 };
952 Ok(Wrap(parsed))
953 }
954}
955
956impl<'py> FromPyObject<'py> for Wrap<JoinType> {
957 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
958 let parsed = match &*ob.extract::<PyBackedStr>()? {
959 "inner" => JoinType::Inner,
960 "left" => JoinType::Left,
961 "right" => JoinType::Right,
962 "full" => JoinType::Full,
963 "semi" => JoinType::Semi,
964 "anti" => JoinType::Anti,
965 #[cfg(feature = "cross_join")]
966 "cross" => JoinType::Cross,
967 v => {
968 return Err(PyValueError::new_err(format!(
969 "`how` must be one of {{'inner', 'left', 'full', 'semi', 'anti', 'cross'}}, got {v}",
970 )));
971 },
972 };
973 Ok(Wrap(parsed))
974 }
975}
976
977impl<'py> FromPyObject<'py> for Wrap<Label> {
978 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
979 let parsed = match &*ob.extract::<PyBackedStr>()? {
980 "left" => Label::Left,
981 "right" => Label::Right,
982 "datapoint" => Label::DataPoint,
983 v => {
984 return Err(PyValueError::new_err(format!(
985 "`label` must be one of {{'left', 'right', 'datapoint'}}, got {v}",
986 )));
987 },
988 };
989 Ok(Wrap(parsed))
990 }
991}
992
993impl<'py> FromPyObject<'py> for Wrap<ListToStructWidthStrategy> {
994 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
995 let parsed = match &*ob.extract::<PyBackedStr>()? {
996 "first_non_null" => ListToStructWidthStrategy::FirstNonNull,
997 "max_width" => ListToStructWidthStrategy::MaxWidth,
998 v => {
999 return Err(PyValueError::new_err(format!(
1000 "`n_field_strategy` must be one of {{'first_non_null', 'max_width'}}, got {v}",
1001 )));
1002 },
1003 };
1004 Ok(Wrap(parsed))
1005 }
1006}
1007
1008impl<'py> FromPyObject<'py> for Wrap<NonExistent> {
1009 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1010 let parsed = match &*ob.extract::<PyBackedStr>()? {
1011 "null" => NonExistent::Null,
1012 "raise" => NonExistent::Raise,
1013 v => {
1014 return Err(PyValueError::new_err(format!(
1015 "`non_existent` must be one of {{'null', 'raise'}}, got {v}",
1016 )));
1017 },
1018 };
1019 Ok(Wrap(parsed))
1020 }
1021}
1022
1023impl<'py> FromPyObject<'py> for Wrap<NullBehavior> {
1024 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1025 let parsed = match &*ob.extract::<PyBackedStr>()? {
1026 "drop" => NullBehavior::Drop,
1027 "ignore" => NullBehavior::Ignore,
1028 v => {
1029 return Err(PyValueError::new_err(format!(
1030 "`null_behavior` must be one of {{'drop', 'ignore'}}, got {v}",
1031 )));
1032 },
1033 };
1034 Ok(Wrap(parsed))
1035 }
1036}
1037
1038impl<'py> FromPyObject<'py> for Wrap<NullStrategy> {
1039 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1040 let parsed = match &*ob.extract::<PyBackedStr>()? {
1041 "ignore" => NullStrategy::Ignore,
1042 "propagate" => NullStrategy::Propagate,
1043 v => {
1044 return Err(PyValueError::new_err(format!(
1045 "`null_strategy` must be one of {{'ignore', 'propagate'}}, got {v}",
1046 )));
1047 },
1048 };
1049 Ok(Wrap(parsed))
1050 }
1051}
1052
1053#[cfg(feature = "parquet")]
1054impl<'py> FromPyObject<'py> for Wrap<ParallelStrategy> {
1055 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1056 let parsed = match &*ob.extract::<PyBackedStr>()? {
1057 "auto" => ParallelStrategy::Auto,
1058 "columns" => ParallelStrategy::Columns,
1059 "row_groups" => ParallelStrategy::RowGroups,
1060 "prefiltered" => ParallelStrategy::Prefiltered,
1061 "none" => ParallelStrategy::None,
1062 v => {
1063 return Err(PyValueError::new_err(format!(
1064 "`parallel` must be one of {{'auto', 'columns', 'row_groups', 'prefiltered', 'none'}}, got {v}",
1065 )));
1066 },
1067 };
1068 Ok(Wrap(parsed))
1069 }
1070}
1071
1072impl<'py> FromPyObject<'py> for Wrap<IndexOrder> {
1073 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1074 let parsed = match &*ob.extract::<PyBackedStr>()? {
1075 "fortran" => IndexOrder::Fortran,
1076 "c" => IndexOrder::C,
1077 v => {
1078 return Err(PyValueError::new_err(format!(
1079 "`order` must be one of {{'fortran', 'c'}}, got {v}",
1080 )));
1081 },
1082 };
1083 Ok(Wrap(parsed))
1084 }
1085}
1086
1087impl<'py> FromPyObject<'py> for Wrap<QuantileMethod> {
1088 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1089 let parsed = match &*ob.extract::<PyBackedStr>()? {
1090 "lower" => QuantileMethod::Lower,
1091 "higher" => QuantileMethod::Higher,
1092 "nearest" => QuantileMethod::Nearest,
1093 "linear" => QuantileMethod::Linear,
1094 "midpoint" => QuantileMethod::Midpoint,
1095 "equiprobable" => QuantileMethod::Equiprobable,
1096 v => {
1097 return Err(PyValueError::new_err(format!(
1098 "`interpolation` must be one of {{'lower', 'higher', 'nearest', 'linear', 'midpoint', 'equiprobable'}}, got {v}",
1099 )));
1100 },
1101 };
1102 Ok(Wrap(parsed))
1103 }
1104}
1105
1106impl<'py> FromPyObject<'py> for Wrap<RankMethod> {
1107 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1108 let parsed = match &*ob.extract::<PyBackedStr>()? {
1109 "min" => RankMethod::Min,
1110 "max" => RankMethod::Max,
1111 "average" => RankMethod::Average,
1112 "dense" => RankMethod::Dense,
1113 "ordinal" => RankMethod::Ordinal,
1114 "random" => RankMethod::Random,
1115 v => {
1116 return Err(PyValueError::new_err(format!(
1117 "rank `method` must be one of {{'min', 'max', 'average', 'dense', 'ordinal', 'random'}}, got {v}",
1118 )));
1119 },
1120 };
1121 Ok(Wrap(parsed))
1122 }
1123}
1124
1125impl<'py> FromPyObject<'py> for Wrap<Roll> {
1126 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1127 let parsed = match &*ob.extract::<PyBackedStr>()? {
1128 "raise" => Roll::Raise,
1129 "forward" => Roll::Forward,
1130 "backward" => Roll::Backward,
1131 v => {
1132 return Err(PyValueError::new_err(format!(
1133 "`roll` must be one of {{'raise', 'forward', 'backward'}}, got {v}",
1134 )));
1135 },
1136 };
1137 Ok(Wrap(parsed))
1138 }
1139}
1140
1141impl<'py> FromPyObject<'py> for Wrap<TimeUnit> {
1142 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1143 let parsed = match &*ob.extract::<PyBackedStr>()? {
1144 "ns" => TimeUnit::Nanoseconds,
1145 "us" => TimeUnit::Microseconds,
1146 "ms" => TimeUnit::Milliseconds,
1147 v => {
1148 return Err(PyValueError::new_err(format!(
1149 "`time_unit` must be one of {{'ns', 'us', 'ms'}}, got {v}",
1150 )));
1151 },
1152 };
1153 Ok(Wrap(parsed))
1154 }
1155}
1156
1157impl<'py> FromPyObject<'py> for Wrap<UniqueKeepStrategy> {
1158 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1159 let parsed = match &*ob.extract::<PyBackedStr>()? {
1160 "first" => UniqueKeepStrategy::First,
1161 "last" => UniqueKeepStrategy::Last,
1162 "none" => UniqueKeepStrategy::None,
1163 "any" => UniqueKeepStrategy::Any,
1164 v => {
1165 return Err(PyValueError::new_err(format!(
1166 "`keep` must be one of {{'first', 'last', 'any', 'none'}}, got {v}",
1167 )));
1168 },
1169 };
1170 Ok(Wrap(parsed))
1171 }
1172}
1173
1174#[cfg(feature = "search_sorted")]
1175impl<'py> FromPyObject<'py> for Wrap<SearchSortedSide> {
1176 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1177 let parsed = match &*ob.extract::<PyBackedStr>()? {
1178 "any" => SearchSortedSide::Any,
1179 "left" => SearchSortedSide::Left,
1180 "right" => SearchSortedSide::Right,
1181 v => {
1182 return Err(PyValueError::new_err(format!(
1183 "sorted `side` must be one of {{'any', 'left', 'right'}}, got {v}",
1184 )));
1185 },
1186 };
1187 Ok(Wrap(parsed))
1188 }
1189}
1190
1191impl<'py> FromPyObject<'py> for Wrap<ClosedInterval> {
1192 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1193 let parsed = match &*ob.extract::<PyBackedStr>()? {
1194 "both" => ClosedInterval::Both,
1195 "left" => ClosedInterval::Left,
1196 "right" => ClosedInterval::Right,
1197 "none" => ClosedInterval::None,
1198 v => {
1199 return Err(PyValueError::new_err(format!(
1200 "`closed` must be one of {{'both', 'left', 'right', 'none'}}, got {v}",
1201 )));
1202 },
1203 };
1204 Ok(Wrap(parsed))
1205 }
1206}
1207
1208impl<'py> FromPyObject<'py> for Wrap<WindowMapping> {
1209 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1210 let parsed = match &*ob.extract::<PyBackedStr>()? {
1211 "group_to_rows" => WindowMapping::GroupsToRows,
1212 "join" => WindowMapping::Join,
1213 "explode" => WindowMapping::Explode,
1214 v => {
1215 return Err(PyValueError::new_err(format!(
1216 "`mapping_strategy` must be one of {{'group_to_rows', 'join', 'explode'}}, got {v}",
1217 )));
1218 },
1219 };
1220 Ok(Wrap(parsed))
1221 }
1222}
1223
1224impl<'py> FromPyObject<'py> for Wrap<JoinValidation> {
1225 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1226 let parsed = match &*ob.extract::<PyBackedStr>()? {
1227 "1:1" => JoinValidation::OneToOne,
1228 "1:m" => JoinValidation::OneToMany,
1229 "m:m" => JoinValidation::ManyToMany,
1230 "m:1" => JoinValidation::ManyToOne,
1231 v => {
1232 return Err(PyValueError::new_err(format!(
1233 "`validate` must be one of {{'m:m', 'm:1', '1:m', '1:1'}}, got {v}",
1234 )));
1235 },
1236 };
1237 Ok(Wrap(parsed))
1238 }
1239}
1240
1241impl<'py> FromPyObject<'py> for Wrap<MaintainOrderJoin> {
1242 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1243 let parsed = match &*ob.extract::<PyBackedStr>()? {
1244 "none" => MaintainOrderJoin::None,
1245 "left" => MaintainOrderJoin::Left,
1246 "right" => MaintainOrderJoin::Right,
1247 "left_right" => MaintainOrderJoin::LeftRight,
1248 "right_left" => MaintainOrderJoin::RightLeft,
1249 v => {
1250 return Err(PyValueError::new_err(format!(
1251 "`maintain_order` must be one of {{'none', 'left', 'right', 'left_right', 'right_left'}}, got {v}",
1252 )));
1253 },
1254 };
1255 Ok(Wrap(parsed))
1256 }
1257}
1258
1259#[cfg(feature = "csv")]
1260impl<'py> FromPyObject<'py> for Wrap<QuoteStyle> {
1261 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1262 let parsed = match &*ob.extract::<PyBackedStr>()? {
1263 "always" => QuoteStyle::Always,
1264 "necessary" => QuoteStyle::Necessary,
1265 "non_numeric" => QuoteStyle::NonNumeric,
1266 "never" => QuoteStyle::Never,
1267 v => {
1268 return Err(PyValueError::new_err(format!(
1269 "`quote_style` must be one of {{'always', 'necessary', 'non_numeric', 'never'}}, got {v}",
1270 )));
1271 },
1272 };
1273 Ok(Wrap(parsed))
1274 }
1275}
1276
1277#[cfg(feature = "cloud")]
1278pub(crate) fn parse_cloud_options(
1279 uri: &str,
1280 kv: impl IntoIterator<Item = (String, String)>,
1281) -> PyResult<CloudOptions> {
1282 let iter: &mut dyn Iterator<Item = _> = &mut kv.into_iter();
1283 let out = CloudOptions::from_untyped_config(uri, iter).map_err(PyPolarsErr::from)?;
1284 Ok(out)
1285}
1286
1287#[cfg(feature = "list_sets")]
1288impl<'py> FromPyObject<'py> for Wrap<SetOperation> {
1289 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1290 let parsed = match &*ob.extract::<PyBackedStr>()? {
1291 "union" => SetOperation::Union,
1292 "difference" => SetOperation::Difference,
1293 "intersection" => SetOperation::Intersection,
1294 "symmetric_difference" => SetOperation::SymmetricDifference,
1295 v => {
1296 return Err(PyValueError::new_err(format!(
1297 "set operation must be one of {{'union', 'difference', 'intersection', 'symmetric_difference'}}, got {v}",
1298 )));
1299 },
1300 };
1301 Ok(Wrap(parsed))
1302 }
1303}
1304
1305impl<'py> FromPyObject<'py> for Wrap<CastColumnsPolicy> {
1307 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1308 if ob.is_none() {
1309 static DEFAULT: GILOnceCell<Wrap<CastColumnsPolicy>> = GILOnceCell::new();
1311
1312 let out = DEFAULT.get_or_try_init(ob.py(), || {
1313 let ob = PyModule::import(ob.py(), "polars.io.scan_options.cast_options")
1314 .unwrap()
1315 .getattr("ScanCastOptions")
1316 .unwrap()
1317 .call_method0("_default")
1318 .unwrap();
1319
1320 let out = Self::extract_bound(&ob)?;
1321
1322 debug_assert_eq!(&out.0, &CastColumnsPolicy::ERROR_ON_MISMATCH);
1324
1325 PyResult::Ok(out)
1326 })?;
1327
1328 return Ok(out.clone());
1329 }
1330
1331 let py = ob.py();
1332
1333 let integer_upcast = match &*ob
1334 .getattr(intern!(py, "integer_cast"))?
1335 .extract::<PyBackedStr>()?
1336 {
1337 "upcast" => true,
1338 "forbid" => false,
1339 v => {
1340 return Err(PyValueError::new_err(format!(
1341 "unknown option for integer_cast: {v}"
1342 )));
1343 },
1344 };
1345
1346 let mut float_upcast = false;
1347 let mut float_downcast = false;
1348
1349 let float_cast_object = ob.getattr(intern!(py, "float_cast"))?;
1350
1351 parse_multiple_options("float_cast", float_cast_object, |v| {
1352 match v {
1353 "forbid" => {},
1354 "upcast" => float_upcast = true,
1355 "downcast" => float_downcast = true,
1356 v => {
1357 return Err(PyValueError::new_err(format!(
1358 "unknown option for float_cast: {v}"
1359 )));
1360 },
1361 }
1362
1363 Ok(())
1364 })?;
1365
1366 let mut datetime_nanoseconds_downcast = false;
1367 let mut datetime_convert_timezone = false;
1368
1369 let datetime_cast_object = ob.getattr(intern!(py, "datetime_cast"))?;
1370
1371 parse_multiple_options("datetime_cast", datetime_cast_object, |v| {
1372 match v {
1373 "forbid" => {},
1374 "nanosecond-downcast" => datetime_nanoseconds_downcast = true,
1375 "convert-timezone" => datetime_convert_timezone = true,
1376 v => {
1377 return Err(PyValueError::new_err(format!(
1378 "unknown option for datetime_cast: {v}"
1379 )));
1380 },
1381 };
1382
1383 Ok(())
1384 })?;
1385
1386 let missing_struct_fields = match &*ob
1387 .getattr(intern!(py, "missing_struct_fields"))?
1388 .extract::<PyBackedStr>()?
1389 {
1390 "insert" => MissingColumnsPolicy::Insert,
1391 "raise" => MissingColumnsPolicy::Raise,
1392 v => {
1393 return Err(PyValueError::new_err(format!(
1394 "unknown option for missing_struct_fields: {v}"
1395 )));
1396 },
1397 };
1398
1399 let extra_struct_fields = match &*ob
1400 .getattr(intern!(py, "extra_struct_fields"))?
1401 .extract::<PyBackedStr>()?
1402 {
1403 "ignore" => ExtraColumnsPolicy::Ignore,
1404 "raise" => ExtraColumnsPolicy::Raise,
1405 v => {
1406 return Err(PyValueError::new_err(format!(
1407 "unknown option for extra_struct_fields: {v}"
1408 )));
1409 },
1410 };
1411
1412 return Ok(Wrap(CastColumnsPolicy {
1413 integer_upcast,
1414 float_upcast,
1415 float_downcast,
1416 datetime_nanoseconds_downcast,
1417 datetime_microseconds_downcast: false,
1418 datetime_convert_timezone,
1419 missing_struct_fields,
1420 extra_struct_fields,
1421 }));
1422
1423 fn parse_multiple_options(
1424 parameter_name: &'static str,
1425 py_object: Bound<'_, PyAny>,
1426 mut parser_func: impl FnMut(&str) -> PyResult<()>,
1427 ) -> PyResult<()> {
1428 if let Ok(v) = py_object.extract::<PyBackedStr>() {
1429 parser_func(&v)?;
1430 } else if let Ok(v) = py_object.try_iter() {
1431 for v in v {
1432 parser_func(&v?.extract::<PyBackedStr>()?)?;
1433 }
1434 } else {
1435 return Err(PyValueError::new_err(format!(
1436 "unknown type for {parameter_name}: {py_object}"
1437 )));
1438 }
1439
1440 Ok(())
1441 }
1442 }
1443}
1444
1445pub(crate) fn parse_fill_null_strategy(
1446 strategy: &str,
1447 limit: FillNullLimit,
1448) -> PyResult<FillNullStrategy> {
1449 let parsed = match strategy {
1450 "forward" => FillNullStrategy::Forward(limit),
1451 "backward" => FillNullStrategy::Backward(limit),
1452 "min" => FillNullStrategy::Min,
1453 "max" => FillNullStrategy::Max,
1454 "mean" => FillNullStrategy::Mean,
1455 "zero" => FillNullStrategy::Zero,
1456 "one" => FillNullStrategy::One,
1457 e => {
1458 return Err(PyValueError::new_err(format!(
1459 "`strategy` must be one of {{'forward', 'backward', 'min', 'max', 'mean', 'zero', 'one'}}, got {e}",
1460 )));
1461 },
1462 };
1463 Ok(parsed)
1464}
1465
1466#[cfg(feature = "parquet")]
1467pub(crate) fn parse_parquet_compression(
1468 compression: &str,
1469 compression_level: Option<i32>,
1470) -> PyResult<ParquetCompression> {
1471 let parsed = match compression {
1472 "uncompressed" => ParquetCompression::Uncompressed,
1473 "snappy" => ParquetCompression::Snappy,
1474 "gzip" => ParquetCompression::Gzip(
1475 compression_level
1476 .map(|lvl| {
1477 GzipLevel::try_new(lvl as u8)
1478 .map_err(|e| PyValueError::new_err(format!("{e:?}")))
1479 })
1480 .transpose()?,
1481 ),
1482 "lzo" => ParquetCompression::Lzo,
1483 "brotli" => ParquetCompression::Brotli(
1484 compression_level
1485 .map(|lvl| {
1486 BrotliLevel::try_new(lvl as u32)
1487 .map_err(|e| PyValueError::new_err(format!("{e:?}")))
1488 })
1489 .transpose()?,
1490 ),
1491 "lz4" => ParquetCompression::Lz4Raw,
1492 "zstd" => ParquetCompression::Zstd(
1493 compression_level
1494 .map(|lvl| {
1495 ZstdLevel::try_new(lvl).map_err(|e| PyValueError::new_err(format!("{e:?}")))
1496 })
1497 .transpose()?,
1498 ),
1499 e => {
1500 return Err(PyValueError::new_err(format!(
1501 "parquet `compression` must be one of {{'uncompressed', 'snappy', 'gzip', 'lzo', 'brotli', 'lz4', 'zstd'}}, got {e}",
1502 )));
1503 },
1504 };
1505 Ok(parsed)
1506}
1507
1508pub(crate) fn strings_to_pl_smallstr<I, S>(container: I) -> Vec<PlSmallStr>
1509where
1510 I: IntoIterator<Item = S>,
1511 S: AsRef<str>,
1512{
1513 container
1514 .into_iter()
1515 .map(|s| PlSmallStr::from_str(s.as_ref()))
1516 .collect()
1517}
1518
1519#[derive(Debug, Copy, Clone)]
1520pub struct PyCompatLevel(pub CompatLevel);
1521
1522impl<'py> FromPyObject<'py> for PyCompatLevel {
1523 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1524 Ok(PyCompatLevel(if let Ok(level) = ob.extract::<u16>() {
1525 if let Ok(compat_level) = CompatLevel::with_level(level) {
1526 compat_level
1527 } else {
1528 return Err(PyValueError::new_err("invalid compat level"));
1529 }
1530 } else if let Ok(future) = ob.extract::<bool>() {
1531 if future {
1532 CompatLevel::newest()
1533 } else {
1534 CompatLevel::oldest()
1535 }
1536 } else {
1537 return Err(PyTypeError::new_err(
1538 "'compat_level' argument accepts int or bool",
1539 ));
1540 }))
1541 }
1542}
1543
1544#[cfg(feature = "string_normalize")]
1545impl<'py> FromPyObject<'py> for Wrap<UnicodeForm> {
1546 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1547 let parsed = match &*ob.extract::<PyBackedStr>()? {
1548 "NFC" => UnicodeForm::NFC,
1549 "NFKC" => UnicodeForm::NFKC,
1550 "NFD" => UnicodeForm::NFD,
1551 "NFKD" => UnicodeForm::NFKD,
1552 v => {
1553 return Err(PyValueError::new_err(format!(
1554 "`form` must be one of {{'NFC', 'NFKC', 'NFD', 'NFKD'}}, got {v}",
1555 )));
1556 },
1557 };
1558 Ok(Wrap(parsed))
1559 }
1560}
1561
1562#[cfg(feature = "parquet")]
1563impl<'py> FromPyObject<'py> for Wrap<Option<KeyValueMetadata>> {
1564 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1565 #[derive(FromPyObject)]
1566 enum Metadata {
1567 Static(Vec<(String, String)>),
1568 Dynamic(PyObject),
1569 }
1570
1571 let metadata = Option::<Metadata>::extract_bound(ob)?;
1572 let key_value_metadata = metadata.map(|x| match x {
1573 Metadata::Static(kv) => KeyValueMetadata::from_static(kv),
1574 Metadata::Dynamic(func) => KeyValueMetadata::from_py_function(func),
1575 });
1576 Ok(Wrap(key_value_metadata))
1577 }
1578}
1579
1580impl<'py> FromPyObject<'py> for Wrap<Option<TimeZone>> {
1581 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1582 let tz = Option::<Wrap<PlSmallStr>>::extract_bound(ob)?;
1583
1584 let tz = tz.map(|x| x.0);
1585
1586 Ok(Wrap(TimeZone::opt_try_new(tz).map_err(to_py_err)?))
1587 }
1588}
1589
1590impl<'py> FromPyObject<'py> for Wrap<UpcastOrForbid> {
1591 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1592 let parsed = match &*ob.extract::<PyBackedStr>()? {
1593 "upcast" => UpcastOrForbid::Upcast,
1594 "forbid" => UpcastOrForbid::Forbid,
1595 v => {
1596 return Err(PyValueError::new_err(format!(
1597 "cast parameter must be one of {{'upcast', 'forbid'}}, got {v}",
1598 )));
1599 },
1600 };
1601 Ok(Wrap(parsed))
1602 }
1603}
1604
1605impl<'py> FromPyObject<'py> for Wrap<ExtraColumnsPolicy> {
1606 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1607 let parsed = match &*ob.extract::<PyBackedStr>()? {
1608 "ignore" => ExtraColumnsPolicy::Ignore,
1609 "raise" => ExtraColumnsPolicy::Raise,
1610 v => {
1611 return Err(PyValueError::new_err(format!(
1612 "extra column/field parameter must be one of {{'ignore', 'raise'}}, got {v}",
1613 )));
1614 },
1615 };
1616 Ok(Wrap(parsed))
1617 }
1618}
1619
1620impl<'py> FromPyObject<'py> for Wrap<MissingColumnsPolicy> {
1621 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1622 let parsed = match &*ob.extract::<PyBackedStr>()? {
1623 "insert" => MissingColumnsPolicy::Insert,
1624 "raise" => MissingColumnsPolicy::Raise,
1625 v => {
1626 return Err(PyValueError::new_err(format!(
1627 "missing column/field parameter must be one of {{'insert', 'raise'}}, got {v}",
1628 )));
1629 },
1630 };
1631 Ok(Wrap(parsed))
1632 }
1633}
1634
1635impl<'py> FromPyObject<'py> for Wrap<MissingColumnsPolicyOrExpr> {
1636 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1637 if let Ok(pyexpr) = ob.extract::<PyExpr>() {
1638 return Ok(Wrap(MissingColumnsPolicyOrExpr::InsertWith(pyexpr.inner)));
1639 }
1640
1641 let parsed = match &*ob.extract::<PyBackedStr>()? {
1642 "insert" => MissingColumnsPolicyOrExpr::Insert,
1643 "raise" => MissingColumnsPolicyOrExpr::Raise,
1644 v => {
1645 return Err(PyValueError::new_err(format!(
1646 "missing column/field parameter must be one of {{'insert', 'raise', expression}}, got {v}",
1647 )));
1648 },
1649 };
1650 Ok(Wrap(parsed))
1651 }
1652}
1653
1654impl<'py> FromPyObject<'py> for Wrap<ColumnMapping> {
1655 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1656 let (column_mapping_type, ob): (PyBackedStr, Bound<'_, PyAny>) = ob.extract()?;
1657
1658 Ok(Wrap(match &*column_mapping_type {
1659 "iceberg-column-mapping" => {
1660 let arrow_schema: Wrap<ArrowSchema> = ob.extract()?;
1661 ColumnMapping::Iceberg(Arc::new(
1662 IcebergSchema::from_arrow_schema(&arrow_schema.0).map_err(to_py_err)?,
1663 ))
1664 },
1665
1666 v => {
1667 return Err(PyValueError::new_err(format!(
1668 "unknown column mapping type: {v}"
1669 )));
1670 },
1671 }))
1672 }
1673}
1674
1675impl<'py> FromPyObject<'py> for Wrap<DeletionFilesList> {
1676 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1677 let (deletion_file_type, ob): (PyBackedStr, Bound<'_, PyAny>) = ob.extract()?;
1678
1679 Ok(Wrap(match &*deletion_file_type {
1680 "iceberg-position-delete" => {
1681 let dict: Bound<'_, PyDict> = ob.extract()?;
1682
1683 let mut out = PlIndexMap::new();
1684
1685 for (k, v) in dict
1686 .try_iter()?
1687 .zip(dict.call_method0("values")?.try_iter()?)
1688 {
1689 let k: usize = k?.extract()?;
1690 let v: Bound<'_, PyAny> = v?.extract()?;
1691
1692 let files = v
1693 .try_iter()?
1694 .map(|x| {
1695 x.and_then(|x| {
1696 let x: String = x.extract()?;
1697 Ok(x)
1698 })
1699 })
1700 .collect::<PyResult<Arc<[String]>>>()?;
1701
1702 if !files.is_empty() {
1703 out.insert(k, files);
1704 }
1705 }
1706
1707 DeletionFilesList::IcebergPositionDelete(Arc::new(out))
1708 },
1709
1710 v => {
1711 return Err(PyValueError::new_err(format!(
1712 "unknown deletion file type: {v}"
1713 )));
1714 },
1715 }))
1716 }
1717}
1718
1719impl<'py> FromPyObject<'py> for Wrap<PlPath> {
1720 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1721 if let Ok(path) = ob.extract::<PyBackedStr>() {
1722 Ok(Wrap(PlPath::new(&path)))
1723 } else if let Ok(path) = ob.extract::<std::path::PathBuf>() {
1724 Ok(Wrap(PlPath::Local(path.into())))
1725 } else {
1726 Err(
1727 PyTypeError::new_err(format!("PlPath cannot be formed from '{}'", ob.get_type()))
1728 .into(),
1729 )
1730 }
1731 }
1732}
1733
1734impl<'py> IntoPyObject<'py> for Wrap<PlPath> {
1735 type Target = PyString;
1736 type Output = Bound<'py, Self::Target>;
1737 type Error = Infallible;
1738
1739 fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
1740 self.0.to_str().into_pyobject(py)
1741 }
1742}