1pub(crate) mod any_value;
2mod categorical;
3pub(crate) mod chunked_array;
4mod datetime;
5
6use std::convert::Infallible;
7use std::fmt::{Display, Formatter};
8use std::fs::File;
9use std::hash::{Hash, Hasher};
10
11pub use categorical::PyCategories;
12#[cfg(feature = "object")]
13use polars::chunked_array::object::PolarsObjectSafe;
14use polars::frame::row::Row;
15#[cfg(feature = "avro")]
16use polars::io::avro::AvroCompression;
17use polars::prelude::ColumnMapping;
18use polars::prelude::default_values::{
19 DefaultFieldValues, IcebergIdentityTransformedPartitionFields,
20};
21use polars::prelude::deletion::DeletionFilesList;
22use polars::series::ops::NullBehavior;
23use polars_buffer::Buffer;
24use polars_compute::decimal::dec128_verify_prec_scale;
25use polars_core::datatypes::extension::get_extension_type_or_generic;
26use polars_core::schema::iceberg::IcebergSchema;
27use polars_core::utils::arrow::array::Array;
28use polars_core::utils::materialize_dyn_int;
29use polars_lazy::prelude::*;
30#[cfg(feature = "parquet")]
31use polars_parquet::write::StatisticsOptions;
32use polars_plan::dsl::ScanSources;
33use polars_utils::compression::{BrotliLevel, GzipLevel, ZstdLevel};
34use polars_utils::pl_str::PlSmallStr;
35use polars_utils::total_ord::{TotalEq, TotalHash};
36use pyo3::basic::CompareOp;
37use pyo3::exceptions::{PyTypeError, PyValueError};
38use pyo3::intern;
39use pyo3::prelude::*;
40use pyo3::pybacked::PyBackedStr;
41use pyo3::sync::PyOnceLock;
42use pyo3::types::{IntoPyDict, PyDict, PyList, PySequence, PyString};
43
44use crate::error::PyPolarsErr;
45use crate::expr::PyExpr;
46use crate::file::{PythonScanSourceInput, get_python_scan_source_input};
47#[cfg(feature = "object")]
48use crate::object::OBJECT_NAME;
49use crate::prelude::*;
50use crate::py_modules::{pl_series, polars};
51use crate::series::{PySeries, import_schema_pycapsule};
52use crate::utils::to_py_err;
53use crate::{PyDataFrame, PyLazyFrame};
54
55pub(crate) unsafe trait Transparent {
58 type Target;
59}
60
61unsafe impl Transparent for PySeries {
62 type Target = Series;
63}
64
65unsafe impl<T> Transparent for Wrap<T> {
66 type Target = T;
67}
68
69unsafe impl<T: Transparent> Transparent for Option<T> {
70 type Target = Option<T::Target>;
71}
72
73pub(crate) fn reinterpret_vec<T: Transparent>(input: Vec<T>) -> Vec<T::Target> {
74 assert_eq!(size_of::<T>(), size_of::<T::Target>());
75 assert_eq!(align_of::<T>(), align_of::<T::Target>());
76 let len = input.len();
77 let cap = input.capacity();
78 let mut manual_drop_vec = std::mem::ManuallyDrop::new(input);
79 let vec_ptr: *mut T = manual_drop_vec.as_mut_ptr();
80 let ptr: *mut T::Target = vec_ptr as *mut T::Target;
81 unsafe { Vec::from_raw_parts(ptr, len, cap) }
82}
83
84pub(crate) fn vec_extract_wrapped<T>(buf: Vec<Wrap<T>>) -> Vec<T> {
85 reinterpret_vec(buf)
86}
87
88#[derive(PartialEq, Eq, Hash)]
89#[repr(transparent)]
90pub struct Wrap<T>(pub T);
91
92impl<T> Clone for Wrap<T>
93where
94 T: Clone,
95{
96 fn clone(&self) -> Self {
97 Wrap(self.0.clone())
98 }
99}
100impl<T> From<T> for Wrap<T> {
101 fn from(t: T) -> Self {
102 Wrap(t)
103 }
104}
105
106pub(crate) fn get_df(obj: &Bound<'_, PyAny>) -> PyResult<DataFrame> {
108 let pydf = obj.getattr(intern!(obj.py(), "_df"))?;
109 Ok(pydf.extract::<PyDataFrame>()?.df.into_inner())
110}
111
112pub(crate) fn get_lf(obj: &Bound<'_, PyAny>) -> PyResult<LazyFrame> {
113 let pydf = obj.getattr(intern!(obj.py(), "_ldf"))?;
114 Ok(pydf.extract::<PyLazyFrame>()?.ldf.into_inner())
115}
116
117pub(crate) fn get_series(obj: &Bound<'_, PyAny>) -> PyResult<Series> {
118 let s = obj.getattr(intern!(obj.py(), "_s"))?;
119 Ok(s.extract::<PySeries>()?.series.into_inner())
120}
121
122pub(crate) fn to_series(py: Python<'_>, s: PySeries) -> PyResult<Bound<'_, PyAny>> {
123 let series = pl_series(py).bind(py);
124 let constructor = series.getattr(intern!(py, "_from_pyseries"))?;
125 constructor.call1((s,))
126}
127
128impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<PlSmallStr> {
129 type Error = PyErr;
130
131 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
132 Ok(Wrap((&*ob.extract::<PyBackedStr>()?).into()))
133 }
134}
135
136#[cfg(feature = "csv")]
137impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<NullValues> {
138 type Error = PyErr;
139
140 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
141 if let Ok(s) = ob.extract::<PyBackedStr>() {
142 Ok(Wrap(NullValues::AllColumnsSingle((&*s).into())))
143 } else if let Ok(s) = ob.extract::<Vec<PyBackedStr>>() {
144 Ok(Wrap(NullValues::AllColumns(
145 s.into_iter().map(|x| (&*x).into()).collect(),
146 )))
147 } else if let Ok(s) = ob.extract::<Vec<(PyBackedStr, PyBackedStr)>>() {
148 Ok(Wrap(NullValues::Named(
149 s.into_iter()
150 .map(|(a, b)| ((&*a).into(), (&*b).into()))
151 .collect(),
152 )))
153 } else {
154 Err(
155 PyPolarsErr::Other("could not extract value from null_values argument".into())
156 .into(),
157 )
158 }
159 }
160}
161
162fn struct_dict<'a, 'py>(
163 py: Python<'py>,
164 vals: impl Iterator<Item = AnyValue<'a>>,
165 flds: &[Field],
166) -> PyResult<Bound<'py, PyDict>> {
167 let dict = PyDict::new(py);
168 flds.iter().zip(vals).try_for_each(|(fld, val)| {
169 dict.set_item(fld.name().as_str(), Wrap(val).into_pyobject(py)?)
170 })?;
171 Ok(dict)
172}
173
174impl<'py> IntoPyObject<'py> for Wrap<Series> {
175 type Target = PyAny;
176 type Output = Bound<'py, Self::Target>;
177 type Error = PyErr;
178
179 fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
180 to_series(py, PySeries::new(self.0))
181 }
182}
183
184impl<'py> IntoPyObject<'py> for &Wrap<DataType> {
185 type Target = PyAny;
186 type Output = Bound<'py, Self::Target>;
187 type Error = PyErr;
188
189 fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
190 let pl = polars(py).bind(py);
191
192 match &self.0 {
193 DataType::Int8 => {
194 let class = pl.getattr(intern!(py, "Int8"))?;
195 class.call0()
196 },
197 DataType::Int16 => {
198 let class = pl.getattr(intern!(py, "Int16"))?;
199 class.call0()
200 },
201 DataType::Int32 => {
202 let class = pl.getattr(intern!(py, "Int32"))?;
203 class.call0()
204 },
205 DataType::Int64 => {
206 let class = pl.getattr(intern!(py, "Int64"))?;
207 class.call0()
208 },
209 DataType::UInt8 => {
210 let class = pl.getattr(intern!(py, "UInt8"))?;
211 class.call0()
212 },
213 DataType::UInt16 => {
214 let class = pl.getattr(intern!(py, "UInt16"))?;
215 class.call0()
216 },
217 DataType::UInt32 => {
218 let class = pl.getattr(intern!(py, "UInt32"))?;
219 class.call0()
220 },
221 DataType::UInt64 => {
222 let class = pl.getattr(intern!(py, "UInt64"))?;
223 class.call0()
224 },
225 DataType::UInt128 => {
226 let class = pl.getattr(intern!(py, "UInt128"))?;
227 class.call0()
228 },
229 DataType::Int128 => {
230 let class = pl.getattr(intern!(py, "Int128"))?;
231 class.call0()
232 },
233 DataType::Float16 => {
234 let class = pl.getattr(intern!(py, "Float16"))?;
235 class.call0()
236 },
237 DataType::Float32 => {
238 let class = pl.getattr(intern!(py, "Float32"))?;
239 class.call0()
240 },
241 DataType::Float64 | DataType::Unknown(UnknownKind::Float) => {
242 let class = pl.getattr(intern!(py, "Float64"))?;
243 class.call0()
244 },
245 DataType::Decimal(precision, scale) => {
246 let class = pl.getattr(intern!(py, "Decimal"))?;
247 let args = (*precision, *scale);
248 class.call1(args)
249 },
250 DataType::Boolean => {
251 let class = pl.getattr(intern!(py, "Boolean"))?;
252 class.call0()
253 },
254 DataType::String | DataType::Unknown(UnknownKind::Str) => {
255 let class = pl.getattr(intern!(py, "String"))?;
256 class.call0()
257 },
258 DataType::Binary => {
259 let class = pl.getattr(intern!(py, "Binary"))?;
260 class.call0()
261 },
262 DataType::Array(inner, size) => {
263 let class = pl.getattr(intern!(py, "Array"))?;
264 let inner = Wrap(*inner.clone());
265 let args = (&inner, *size);
266 class.call1(args)
267 },
268 DataType::List(inner) => {
269 let class = pl.getattr(intern!(py, "List"))?;
270 let inner = Wrap(*inner.clone());
271 class.call1((&inner,))
272 },
273 DataType::Date => {
274 let class = pl.getattr(intern!(py, "Date"))?;
275 class.call0()
276 },
277 DataType::Datetime(tu, tz) => {
278 let datetime_class = pl.getattr(intern!(py, "Datetime"))?;
279 datetime_class.call1((tu.to_ascii(), tz.as_deref().map(|x| x.as_str())))
280 },
281 DataType::Duration(tu) => {
282 let duration_class = pl.getattr(intern!(py, "Duration"))?;
283 duration_class.call1((tu.to_ascii(),))
284 },
285 #[cfg(feature = "object")]
286 DataType::Object(_) => {
287 let class = pl.getattr(intern!(py, "Object"))?;
288 class.call0()
289 },
290 DataType::Categorical(cats, _) => {
291 let categories_class = pl.getattr(intern!(py, "Categories"))?;
292 let categorical_class = pl.getattr(intern!(py, "Categorical"))?;
293 let categories = categories_class
294 .call_method1("_from_py_categories", (PyCategories::from(cats.clone()),))?;
295 let kwargs = [("categories", categories)];
296 categorical_class.call((), Some(&kwargs.into_py_dict(py)?))
297 },
298 DataType::Enum(_, mapping) => {
299 let categories = unsafe {
300 StringChunked::from_chunks(
301 PlSmallStr::from_static("category"),
302 vec![mapping.to_arrow(true)],
303 )
304 };
305 let class = pl.getattr(intern!(py, "Enum"))?;
306 let series = to_series(py, categories.into_series().into())?;
307 class.call1((series,))
308 },
309 DataType::Time => pl.getattr(intern!(py, "Time")).and_then(|x| x.call0()),
310 DataType::Struct(fields) => {
311 let field_class = pl.getattr(intern!(py, "Field"))?;
312 let iter = fields.iter().map(|fld| {
313 let name = fld.name().as_str();
314 let dtype = Wrap(fld.dtype().clone());
315 field_class.call1((name, &dtype)).unwrap()
316 });
317 let fields = PyList::new(py, iter)?;
318 let struct_class = pl.getattr(intern!(py, "Struct"))?;
319 struct_class.call1((fields,))
320 },
321 DataType::Null => {
322 let class = pl.getattr(intern!(py, "Null"))?;
323 class.call0()
324 },
325 DataType::Extension(typ, storage) => {
326 let py_storage = Wrap((**storage).clone()).into_pyobject(py)?;
327 let py_typ = pl
328 .getattr(intern!(py, "get_extension_type"))?
329 .call1((typ.name(),))?;
330 let class = if py_typ.is_none()
331 || py_typ.str().map(|s| s == "storage").ok() == Some(true)
332 {
333 pl.getattr(intern!(py, "Extension"))?
334 } else {
335 py_typ
336 };
337 let from_params = class.getattr(intern!(py, "ext_from_params"))?;
338 from_params.call1((typ.name(), py_storage, typ.serialize_metadata()))
339 },
340 DataType::Unknown(UnknownKind::Int(v)) => {
341 Wrap(materialize_dyn_int(*v).dtype()).into_pyobject(py)
342 },
343 DataType::Unknown(_) => {
344 let class = pl.getattr(intern!(py, "Unknown"))?;
345 class.call0()
346 },
347 DataType::BinaryOffset => {
348 unimplemented!()
349 },
350 }
351 }
352}
353
354impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<Field> {
355 type Error = PyErr;
356
357 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
358 let py = ob.py();
359 let name = ob
360 .getattr(intern!(py, "name"))?
361 .str()?
362 .extract::<PyBackedStr>()?;
363 let dtype = ob
364 .getattr(intern!(py, "dtype"))?
365 .extract::<Wrap<DataType>>()?;
366 Ok(Wrap(Field::new((&*name).into(), dtype.0)))
367 }
368}
369
370impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<DataType> {
371 type Error = PyErr;
372
373 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
374 let py = ob.py();
375 let type_name = ob.get_type().qualname()?.to_string();
376
377 let dtype = match &*type_name {
378 "DataTypeClass" => {
379 let name = ob
381 .getattr(intern!(py, "__name__"))?
382 .str()?
383 .extract::<PyBackedStr>()?;
384 match &*name {
385 "Int8" => DataType::Int8,
386 "Int16" => DataType::Int16,
387 "Int32" => DataType::Int32,
388 "Int64" => DataType::Int64,
389 "Int128" => DataType::Int128,
390 "UInt8" => DataType::UInt8,
391 "UInt16" => DataType::UInt16,
392 "UInt32" => DataType::UInt32,
393 "UInt64" => DataType::UInt64,
394 "UInt128" => DataType::UInt128,
395 "Float16" => DataType::Float16,
396 "Float32" => DataType::Float32,
397 "Float64" => DataType::Float64,
398 "Boolean" => DataType::Boolean,
399 "String" => DataType::String,
400 "Binary" => DataType::Binary,
401 "Categorical" => DataType::from_categories(Categories::global()),
402 "Enum" => DataType::from_frozen_categories(FrozenCategories::new([]).unwrap()),
403 "Date" => DataType::Date,
404 "Time" => DataType::Time,
405 "Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
406 "Duration" => DataType::Duration(TimeUnit::Microseconds),
407 "List" => DataType::List(Box::new(DataType::Null)),
408 "Array" => DataType::Array(Box::new(DataType::Null), 0),
409 "Struct" => DataType::Struct(vec![]),
410 "Null" => DataType::Null,
411 #[cfg(feature = "object")]
412 "Object" => DataType::Object(OBJECT_NAME),
413 "Unknown" => DataType::Unknown(Default::default()),
414 "Decimal" => {
415 return Err(PyTypeError::new_err(
416 "Decimal without precision/scale set is not a valid Polars datatype",
417 ));
418 },
419 dt => {
420 return Err(PyTypeError::new_err(format!(
421 "'{dt}' is not a Polars data type",
422 )));
423 },
424 }
425 },
426 "Int8" => DataType::Int8,
427 "Int16" => DataType::Int16,
428 "Int32" => DataType::Int32,
429 "Int64" => DataType::Int64,
430 "Int128" => DataType::Int128,
431 "UInt8" => DataType::UInt8,
432 "UInt16" => DataType::UInt16,
433 "UInt32" => DataType::UInt32,
434 "UInt64" => DataType::UInt64,
435 "UInt128" => DataType::UInt128,
436 "Float16" => DataType::Float16,
437 "Float32" => DataType::Float32,
438 "Float64" => DataType::Float64,
439 "Boolean" => DataType::Boolean,
440 "String" => DataType::String,
441 "Binary" => DataType::Binary,
442 "Categorical" => {
443 let categories = ob.getattr(intern!(py, "categories")).unwrap();
444 let py_categories = categories.getattr(intern!(py, "_categories")).unwrap();
445 let py_categories = py_categories.extract::<PyCategories>()?;
446 DataType::from_categories(py_categories.categories().clone())
447 },
448 "Enum" => {
449 let categories = ob.getattr(intern!(py, "categories")).unwrap();
450 let s = get_series(&categories.as_borrowed())?;
451 let ca = s.str().map_err(PyPolarsErr::from)?;
452 let categories = ca.downcast_iter().next().unwrap().clone();
453 assert!(!categories.has_nulls());
454 DataType::from_frozen_categories(
455 FrozenCategories::new(categories.values_iter()).unwrap(),
456 )
457 },
458 "Date" => DataType::Date,
459 "Time" => DataType::Time,
460 "Datetime" => {
461 let time_unit = ob.getattr(intern!(py, "time_unit")).unwrap();
462 let time_unit = time_unit.extract::<Wrap<TimeUnit>>()?.0;
463 let time_zone = ob.getattr(intern!(py, "time_zone")).unwrap();
464 let time_zone = time_zone.extract::<Option<PyBackedStr>>()?;
465 DataType::Datetime(
466 time_unit,
467 TimeZone::opt_try_new(time_zone.as_deref()).map_err(to_py_err)?,
468 )
469 },
470 "Duration" => {
471 let time_unit = ob.getattr(intern!(py, "time_unit")).unwrap();
472 let time_unit = time_unit.extract::<Wrap<TimeUnit>>()?.0;
473 DataType::Duration(time_unit)
474 },
475 "Decimal" => {
476 let precision = ob.getattr(intern!(py, "precision"))?.extract()?;
477 let scale = ob.getattr(intern!(py, "scale"))?.extract()?;
478 dec128_verify_prec_scale(precision, scale).map_err(to_py_err)?;
479 DataType::Decimal(precision, scale)
480 },
481 "List" => {
482 let inner = ob.getattr(intern!(py, "inner")).unwrap();
483 let inner = inner.extract::<Wrap<DataType>>()?;
484 DataType::List(Box::new(inner.0))
485 },
486 "Array" => {
487 let inner = ob.getattr(intern!(py, "inner")).unwrap();
488 let size = ob.getattr(intern!(py, "size")).unwrap();
489 let inner = inner.extract::<Wrap<DataType>>()?;
490 let size = size.extract::<usize>()?;
491 DataType::Array(Box::new(inner.0), size)
492 },
493 "Struct" => {
494 let fields = ob.getattr(intern!(py, "fields"))?;
495 let fields = fields
496 .extract::<Vec<Wrap<Field>>>()?
497 .into_iter()
498 .map(|f| f.0)
499 .collect::<Vec<Field>>();
500 DataType::Struct(fields)
501 },
502 "Null" => DataType::Null,
503 #[cfg(feature = "object")]
504 "Object" => DataType::Object(OBJECT_NAME),
505 "Unknown" => DataType::Unknown(Default::default()),
506 dt => {
507 let base_ext = polars(py)
508 .getattr(py, intern!(py, "BaseExtension"))
509 .unwrap();
510 if ob.is_instance(base_ext.bind(py))? {
511 let ext_name_f = ob.getattr(intern!(py, "ext_name"))?;
512 let ext_metadata_f = ob.getattr(intern!(py, "ext_metadata"))?;
513 let ext_storage_f = ob.getattr(intern!(py, "ext_storage"))?;
514 let name: String = ext_name_f.call0()?.extract()?;
515 let metadata: Option<String> = ext_metadata_f.call0()?.extract()?;
516 let storage: Wrap<DataType> = ext_storage_f.call0()?.extract()?;
517 let ext_typ =
518 get_extension_type_or_generic(&name, &storage.0, metadata.as_deref());
519 return Ok(Wrap(DataType::Extension(ext_typ, Box::new(storage.0))));
520 }
521
522 return Err(PyTypeError::new_err(format!(
523 "'{dt}' is not a Polars data type",
524 )));
525 },
526 };
527 Ok(Wrap(dtype))
528 }
529}
530
531impl<'py> IntoPyObject<'py> for Wrap<TimeUnit> {
532 type Target = PyString;
533 type Output = Bound<'py, Self::Target>;
534 type Error = Infallible;
535
536 fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
537 self.0.to_ascii().into_pyobject(py)
538 }
539}
540
541#[cfg(feature = "parquet")]
542impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<StatisticsOptions> {
543 type Error = PyErr;
544
545 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
546 let mut statistics = StatisticsOptions::empty();
547
548 let dict = ob.cast::<PyDict>()?;
549 for (key, val) in dict.iter() {
550 let key = key.extract::<PyBackedStr>()?;
551 let val = val.extract::<bool>()?;
552
553 match key.as_ref() {
554 "min" => statistics.min_value = val,
555 "max" => statistics.max_value = val,
556 "distinct_count" => statistics.distinct_count = val,
557 "null_count" => statistics.null_count = val,
558 _ => {
559 return Err(PyTypeError::new_err(format!(
560 "'{key}' is not a valid statistic option",
561 )));
562 },
563 }
564 }
565
566 Ok(Wrap(statistics))
567 }
568}
569
570impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<Row<'static>> {
571 type Error = PyErr;
572
573 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
574 let vals = ob.extract::<Vec<Wrap<AnyValue<'static>>>>()?;
575 let vals = reinterpret_vec(vals);
576 Ok(Wrap(Row(vals)))
577 }
578}
579
580impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<Schema> {
581 type Error = PyErr;
582
583 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
584 let dict = ob.cast::<PyDict>()?;
585
586 Ok(Wrap(
587 dict.iter()
588 .map(|(key, val)| {
589 let key = key.extract::<PyBackedStr>()?;
590 let val = val.extract::<Wrap<DataType>>()?;
591
592 Ok(Field::new((&*key).into(), val.0))
593 })
594 .collect::<PyResult<Schema>>()?,
595 ))
596 }
597}
598
599impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<ArrowSchema> {
600 type Error = PyErr;
601
602 fn extract(schema_object: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
603 let py = schema_object.py();
604
605 let schema_capsule = schema_object
606 .getattr(intern!(py, "__arrow_c_schema__"))?
607 .call0()?;
608
609 let field = import_schema_pycapsule(&schema_capsule.extract()?)?;
610
611 let ArrowDataType::Struct(fields) = field.dtype else {
612 return Err(PyValueError::new_err(format!(
613 "__arrow_c_schema__ of object did not return struct dtype: \
614 object: {:?}, dtype: {:?}",
615 schema_object, &field.dtype
616 )));
617 };
618
619 let mut schema = ArrowSchema::from_iter_check_duplicates(fields).map_err(to_py_err)?;
620
621 if let Some(md) = field.metadata {
622 *schema.metadata_mut() = Arc::unwrap_or_clone(md);
623 }
624
625 Ok(Wrap(schema))
626 }
627}
628
629impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<ScanSources> {
630 type Error = PyErr;
631
632 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
633 let list = ob.cast::<PyList>()?.to_owned();
634
635 if list.is_empty() {
636 return Ok(Wrap(ScanSources::default()));
637 }
638
639 enum MutableSources {
640 Paths(Vec<PlRefPath>),
641 Files(Vec<File>),
642 Buffers(Vec<Buffer<u8>>),
643 }
644
645 let num_items = list.len();
646 let mut iter = list
647 .into_iter()
648 .map(|val| get_python_scan_source_input(val.unbind(), false));
649
650 let Some(first) = iter.next() else {
651 return Ok(Wrap(ScanSources::default()));
652 };
653
654 let mut sources = match first? {
655 PythonScanSourceInput::Path(path) => {
656 let mut sources = Vec::with_capacity(num_items);
657 sources.push(path);
658 MutableSources::Paths(sources)
659 },
660 PythonScanSourceInput::File(file) => {
661 let mut sources = Vec::with_capacity(num_items);
662 sources.push(file.into());
663 MutableSources::Files(sources)
664 },
665 PythonScanSourceInput::Buffer(buffer) => {
666 let mut sources = Vec::with_capacity(num_items);
667 sources.push(buffer);
668 MutableSources::Buffers(sources)
669 },
670 };
671
672 for source in iter {
673 match (&mut sources, source?) {
674 (MutableSources::Paths(v), PythonScanSourceInput::Path(p)) => v.push(p),
675 (MutableSources::Files(v), PythonScanSourceInput::File(f)) => v.push(f.into()),
676 (MutableSources::Buffers(v), PythonScanSourceInput::Buffer(f)) => v.push(f),
677 _ => {
678 return Err(PyTypeError::new_err(
679 "Cannot combine in-memory bytes, paths and files for scan sources",
680 ));
681 },
682 }
683 }
684
685 Ok(Wrap(match sources {
686 MutableSources::Paths(i) => ScanSources::Paths(i.into()),
687 MutableSources::Files(i) => ScanSources::Files(i.into()),
688 MutableSources::Buffers(i) => ScanSources::Buffers(i.into()),
689 }))
690 }
691}
692
693impl<'py> IntoPyObject<'py> for Wrap<Schema> {
694 type Target = PyDict;
695 type Output = Bound<'py, Self::Target>;
696 type Error = PyErr;
697
698 fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
699 let dict = PyDict::new(py);
700 self.0
701 .iter()
702 .try_for_each(|(k, v)| dict.set_item(k.as_str(), &Wrap(v.clone())))?;
703 Ok(dict)
704 }
705}
706
707#[derive(Debug)]
708#[repr(transparent)]
709pub struct ObjectValue {
710 pub inner: Py<PyAny>,
711}
712
713impl Clone for ObjectValue {
714 fn clone(&self) -> Self {
715 Python::attach(|py| Self {
716 inner: self.inner.clone_ref(py),
717 })
718 }
719}
720
721impl Hash for ObjectValue {
722 fn hash<H: Hasher>(&self, state: &mut H) {
723 let h = Python::attach(|py| self.inner.bind(py).hash().expect("should be hashable"));
724 state.write_isize(h)
725 }
726}
727
728impl Eq for ObjectValue {}
729
730impl PartialEq for ObjectValue {
731 fn eq(&self, other: &Self) -> bool {
732 Python::attach(|py| {
733 match self
734 .inner
735 .bind(py)
736 .rich_compare(other.inner.bind(py), CompareOp::Eq)
737 {
738 Ok(result) => result.is_truthy().unwrap(),
739 Err(_) => false,
740 }
741 })
742 }
743}
744
745impl TotalEq for ObjectValue {
746 fn tot_eq(&self, other: &Self) -> bool {
747 self == other
748 }
749}
750
751impl TotalHash for ObjectValue {
752 fn tot_hash<H>(&self, state: &mut H)
753 where
754 H: Hasher,
755 {
756 self.hash(state);
757 }
758}
759
760impl Display for ObjectValue {
761 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
762 write!(f, "{}", self.inner)
763 }
764}
765
766#[cfg(feature = "object")]
767impl PolarsObject for ObjectValue {
768 fn type_name() -> &'static str {
769 "object"
770 }
771}
772
773impl From<Py<PyAny>> for ObjectValue {
774 fn from(p: Py<PyAny>) -> Self {
775 Self { inner: p }
776 }
777}
778
779impl<'a, 'py> FromPyObject<'a, 'py> for ObjectValue {
780 type Error = PyErr;
781
782 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
783 Ok(ObjectValue {
784 inner: ob.to_owned().unbind(),
785 })
786 }
787}
788
789#[cfg(feature = "object")]
793impl From<&dyn PolarsObjectSafe> for &ObjectValue {
794 fn from(val: &dyn PolarsObjectSafe) -> Self {
795 unsafe { &*(val as *const dyn PolarsObjectSafe as *const ObjectValue) }
796 }
797}
798
799impl<'a, 'py> IntoPyObject<'py> for &'a ObjectValue {
800 type Target = PyAny;
801 type Output = Borrowed<'a, 'py, Self::Target>;
802 type Error = std::convert::Infallible;
803
804 fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
805 Ok(self.inner.bind_borrowed(py))
806 }
807}
808
809impl Default for ObjectValue {
810 fn default() -> Self {
811 Python::attach(|py| ObjectValue { inner: py.None() })
812 }
813}
814
815impl<'a, 'py, T> FromPyObject<'a, 'py> for Wrap<Vec<T>>
816where
817 T: FromPyObjectOwned<'py>,
818{
819 type Error = PyErr;
820
821 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
822 let seq = ob
823 .cast::<PySequence>()
824 .map_err(<PyErr as From<pyo3::CastError>>::from)?;
825 let mut v = Vec::with_capacity(seq.len().unwrap_or(0));
826 for item in seq.try_iter()? {
827 v.push(item?.extract::<T>().map_err(Into::into)?);
828 }
829 Ok(Wrap(v))
830 }
831}
832
833#[cfg(feature = "asof_join")]
834impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<AsofStrategy> {
835 type Error = PyErr;
836
837 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
838 let parsed = match &*(ob.extract::<PyBackedStr>()?) {
839 "backward" => AsofStrategy::Backward,
840 "forward" => AsofStrategy::Forward,
841 "nearest" => AsofStrategy::Nearest,
842 v => {
843 return Err(PyValueError::new_err(format!(
844 "asof `strategy` must be one of {{'backward', 'forward', 'nearest'}}, got {v}",
845 )));
846 },
847 };
848 Ok(Wrap(parsed))
849 }
850}
851
852impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<InterpolationMethod> {
853 type Error = PyErr;
854
855 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
856 let parsed = match &*(ob.extract::<PyBackedStr>()?) {
857 "linear" => InterpolationMethod::Linear,
858 "nearest" => InterpolationMethod::Nearest,
859 v => {
860 return Err(PyValueError::new_err(format!(
861 "interpolation `method` must be one of {{'linear', 'nearest'}}, got {v}",
862 )));
863 },
864 };
865 Ok(Wrap(parsed))
866 }
867}
868
869#[cfg(feature = "avro")]
870impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<Option<AvroCompression>> {
871 type Error = PyErr;
872
873 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
874 let parsed = match &*ob.extract::<PyBackedStr>()? {
875 "uncompressed" => None,
876 "snappy" => Some(AvroCompression::Snappy),
877 "deflate" => Some(AvroCompression::Deflate),
878 v => {
879 return Err(PyValueError::new_err(format!(
880 "avro `compression` must be one of {{'uncompressed', 'snappy', 'deflate'}}, got {v}",
881 )));
882 },
883 };
884 Ok(Wrap(parsed))
885 }
886}
887
888impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<StartBy> {
889 type Error = PyErr;
890
891 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
892 let parsed = match &*ob.extract::<PyBackedStr>()? {
893 "window" => StartBy::WindowBound,
894 "datapoint" => StartBy::DataPoint,
895 "monday" => StartBy::Monday,
896 "tuesday" => StartBy::Tuesday,
897 "wednesday" => StartBy::Wednesday,
898 "thursday" => StartBy::Thursday,
899 "friday" => StartBy::Friday,
900 "saturday" => StartBy::Saturday,
901 "sunday" => StartBy::Sunday,
902 v => {
903 return Err(PyValueError::new_err(format!(
904 "`start_by` must be one of {{'window', 'datapoint', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday'}}, got {v}",
905 )));
906 },
907 };
908 Ok(Wrap(parsed))
909 }
910}
911
912impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<ClosedWindow> {
913 type Error = PyErr;
914
915 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
916 let parsed = match &*ob.extract::<PyBackedStr>()? {
917 "left" => ClosedWindow::Left,
918 "right" => ClosedWindow::Right,
919 "both" => ClosedWindow::Both,
920 "none" => ClosedWindow::None,
921 v => {
922 return Err(PyValueError::new_err(format!(
923 "`closed` must be one of {{'left', 'right', 'both', 'none'}}, got {v}",
924 )));
925 },
926 };
927 Ok(Wrap(parsed))
928 }
929}
930
931impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<RoundMode> {
932 type Error = PyErr;
933
934 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
935 let parsed = match &*ob.extract::<PyBackedStr>()? {
936 "half_to_even" => RoundMode::HalfToEven,
937 "half_away_from_zero" => RoundMode::HalfAwayFromZero,
938 v => {
939 return Err(PyValueError::new_err(format!(
940 "`mode` must be one of {{'half_to_even', 'half_away_from_zero'}}, got {v}",
941 )));
942 },
943 };
944 Ok(Wrap(parsed))
945 }
946}
947
948#[cfg(feature = "csv")]
949impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<CsvEncoding> {
950 type Error = PyErr;
951
952 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
953 let parsed = match &*ob.extract::<PyBackedStr>()? {
954 "utf8" => CsvEncoding::Utf8,
955 "utf8-lossy" => CsvEncoding::LossyUtf8,
956 v => {
957 return Err(PyValueError::new_err(format!(
958 "csv `encoding` must be one of {{'utf8', 'utf8-lossy'}}, got {v}",
959 )));
960 },
961 };
962 Ok(Wrap(parsed))
963 }
964}
965
966#[cfg(feature = "ipc")]
967impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<Option<IpcCompression>> {
968 type Error = PyErr;
969
970 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
971 let parsed = match &*ob.extract::<PyBackedStr>()? {
972 "uncompressed" => None,
973 "lz4" => Some(IpcCompression::LZ4),
974 "zstd" => Some(IpcCompression::ZSTD(Default::default())),
975 v => {
976 return Err(PyValueError::new_err(format!(
977 "ipc `compression` must be one of {{'uncompressed', 'lz4', 'zstd'}}, got {v}",
978 )));
979 },
980 };
981 Ok(Wrap(parsed))
982 }
983}
984
985impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<JoinType> {
986 type Error = PyErr;
987
988 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
989 let parsed = match &*ob.extract::<PyBackedStr>()? {
990 "inner" => JoinType::Inner,
991 "left" => JoinType::Left,
992 "right" => JoinType::Right,
993 "full" => JoinType::Full,
994 "semi" => JoinType::Semi,
995 "anti" => JoinType::Anti,
996 #[cfg(feature = "cross_join")]
997 "cross" => JoinType::Cross,
998 v => {
999 return Err(PyValueError::new_err(format!(
1000 "`how` must be one of {{'inner', 'left', 'full', 'semi', 'anti', 'cross'}}, got {v}",
1001 )));
1002 },
1003 };
1004 Ok(Wrap(parsed))
1005 }
1006}
1007
1008impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<Label> {
1009 type Error = PyErr;
1010
1011 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1012 let parsed = match &*ob.extract::<PyBackedStr>()? {
1013 "left" => Label::Left,
1014 "right" => Label::Right,
1015 "datapoint" => Label::DataPoint,
1016 v => {
1017 return Err(PyValueError::new_err(format!(
1018 "`label` must be one of {{'left', 'right', 'datapoint'}}, got {v}",
1019 )));
1020 },
1021 };
1022 Ok(Wrap(parsed))
1023 }
1024}
1025
1026impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<ListToStructWidthStrategy> {
1027 type Error = PyErr;
1028
1029 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1030 let parsed = match &*ob.extract::<PyBackedStr>()? {
1031 "first_non_null" => ListToStructWidthStrategy::FirstNonNull,
1032 "max_width" => ListToStructWidthStrategy::MaxWidth,
1033 v => {
1034 return Err(PyValueError::new_err(format!(
1035 "`n_field_strategy` must be one of {{'first_non_null', 'max_width'}}, got {v}",
1036 )));
1037 },
1038 };
1039 Ok(Wrap(parsed))
1040 }
1041}
1042
1043impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<NonExistent> {
1044 type Error = PyErr;
1045
1046 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1047 let parsed = match &*ob.extract::<PyBackedStr>()? {
1048 "null" => NonExistent::Null,
1049 "raise" => NonExistent::Raise,
1050 v => {
1051 return Err(PyValueError::new_err(format!(
1052 "`non_existent` must be one of {{'null', 'raise'}}, got {v}",
1053 )));
1054 },
1055 };
1056 Ok(Wrap(parsed))
1057 }
1058}
1059
1060impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<NullBehavior> {
1061 type Error = PyErr;
1062
1063 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1064 let parsed = match &*ob.extract::<PyBackedStr>()? {
1065 "drop" => NullBehavior::Drop,
1066 "ignore" => NullBehavior::Ignore,
1067 v => {
1068 return Err(PyValueError::new_err(format!(
1069 "`null_behavior` must be one of {{'drop', 'ignore'}}, got {v}",
1070 )));
1071 },
1072 };
1073 Ok(Wrap(parsed))
1074 }
1075}
1076
1077impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<NullStrategy> {
1078 type Error = PyErr;
1079
1080 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1081 let parsed = match &*ob.extract::<PyBackedStr>()? {
1082 "ignore" => NullStrategy::Ignore,
1083 "propagate" => NullStrategy::Propagate,
1084 v => {
1085 return Err(PyValueError::new_err(format!(
1086 "`null_strategy` must be one of {{'ignore', 'propagate'}}, got {v}",
1087 )));
1088 },
1089 };
1090 Ok(Wrap(parsed))
1091 }
1092}
1093
1094#[cfg(feature = "parquet")]
1095impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<ParallelStrategy> {
1096 type Error = PyErr;
1097
1098 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1099 let parsed = match &*ob.extract::<PyBackedStr>()? {
1100 "auto" => ParallelStrategy::Auto,
1101 "columns" => ParallelStrategy::Columns,
1102 "row_groups" => ParallelStrategy::RowGroups,
1103 "prefiltered" => ParallelStrategy::Prefiltered,
1104 "none" => ParallelStrategy::None,
1105 v => {
1106 return Err(PyValueError::new_err(format!(
1107 "`parallel` must be one of {{'auto', 'columns', 'row_groups', 'prefiltered', 'none'}}, got {v}",
1108 )));
1109 },
1110 };
1111 Ok(Wrap(parsed))
1112 }
1113}
1114
1115impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<IndexOrder> {
1116 type Error = PyErr;
1117
1118 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1119 let parsed = match &*ob.extract::<PyBackedStr>()? {
1120 "fortran" => IndexOrder::Fortran,
1121 "c" => IndexOrder::C,
1122 v => {
1123 return Err(PyValueError::new_err(format!(
1124 "`order` must be one of {{'fortran', 'c'}}, got {v}",
1125 )));
1126 },
1127 };
1128 Ok(Wrap(parsed))
1129 }
1130}
1131
1132impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<QuantileMethod> {
1133 type Error = PyErr;
1134
1135 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1136 let parsed = match &*ob.extract::<PyBackedStr>()? {
1137 "lower" => QuantileMethod::Lower,
1138 "higher" => QuantileMethod::Higher,
1139 "nearest" => QuantileMethod::Nearest,
1140 "linear" => QuantileMethod::Linear,
1141 "midpoint" => QuantileMethod::Midpoint,
1142 "equiprobable" => QuantileMethod::Equiprobable,
1143 v => {
1144 return Err(PyValueError::new_err(format!(
1145 "`interpolation` must be one of {{'lower', 'higher', 'nearest', 'linear', 'midpoint', 'equiprobable'}}, got {v}",
1146 )));
1147 },
1148 };
1149 Ok(Wrap(parsed))
1150 }
1151}
1152
1153impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<RankMethod> {
1154 type Error = PyErr;
1155
1156 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1157 let parsed = match &*ob.extract::<PyBackedStr>()? {
1158 "min" => RankMethod::Min,
1159 "max" => RankMethod::Max,
1160 "average" => RankMethod::Average,
1161 "dense" => RankMethod::Dense,
1162 "ordinal" => RankMethod::Ordinal,
1163 "random" => RankMethod::Random,
1164 v => {
1165 return Err(PyValueError::new_err(format!(
1166 "rank `method` must be one of {{'min', 'max', 'average', 'dense', 'ordinal', 'random'}}, got {v}",
1167 )));
1168 },
1169 };
1170 Ok(Wrap(parsed))
1171 }
1172}
1173
1174impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<RollingRankMethod> {
1175 type Error = PyErr;
1176
1177 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1178 let parsed = match &*ob.extract::<PyBackedStr>()? {
1179 "min" => RollingRankMethod::Min,
1180 "max" => RollingRankMethod::Max,
1181 "average" => RollingRankMethod::Average,
1182 "dense" => RollingRankMethod::Dense,
1183 "random" => RollingRankMethod::Random,
1184 v => {
1185 return Err(PyValueError::new_err(format!(
1186 "rank `method` must be one of {{'min', 'max', 'average', 'dense', 'random'}}, got {v}",
1187 )));
1188 },
1189 };
1190 Ok(Wrap(parsed))
1191 }
1192}
1193
1194impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<Roll> {
1195 type Error = PyErr;
1196
1197 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1198 let parsed = match &*ob.extract::<PyBackedStr>()? {
1199 "raise" => Roll::Raise,
1200 "forward" => Roll::Forward,
1201 "backward" => Roll::Backward,
1202 v => {
1203 return Err(PyValueError::new_err(format!(
1204 "`roll` must be one of {{'raise', 'forward', 'backward'}}, got {v}",
1205 )));
1206 },
1207 };
1208 Ok(Wrap(parsed))
1209 }
1210}
1211
1212impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<TimeUnit> {
1213 type Error = PyErr;
1214
1215 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1216 let parsed = match &*ob.extract::<PyBackedStr>()? {
1217 "ns" => TimeUnit::Nanoseconds,
1218 "us" => TimeUnit::Microseconds,
1219 "ms" => TimeUnit::Milliseconds,
1220 v => {
1221 return Err(PyValueError::new_err(format!(
1222 "`time_unit` must be one of {{'ns', 'us', 'ms'}}, got {v}",
1223 )));
1224 },
1225 };
1226 Ok(Wrap(parsed))
1227 }
1228}
1229
1230impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<UniqueKeepStrategy> {
1231 type Error = PyErr;
1232
1233 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1234 let parsed = match &*ob.extract::<PyBackedStr>()? {
1235 "first" => UniqueKeepStrategy::First,
1236 "last" => UniqueKeepStrategy::Last,
1237 "none" => UniqueKeepStrategy::None,
1238 "any" => UniqueKeepStrategy::Any,
1239 v => {
1240 return Err(PyValueError::new_err(format!(
1241 "`keep` must be one of {{'first', 'last', 'any', 'none'}}, got {v}",
1242 )));
1243 },
1244 };
1245 Ok(Wrap(parsed))
1246 }
1247}
1248
1249#[cfg(feature = "search_sorted")]
1250impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<SearchSortedSide> {
1251 type Error = PyErr;
1252
1253 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1254 let parsed = match &*ob.extract::<PyBackedStr>()? {
1255 "any" => SearchSortedSide::Any,
1256 "left" => SearchSortedSide::Left,
1257 "right" => SearchSortedSide::Right,
1258 v => {
1259 return Err(PyValueError::new_err(format!(
1260 "sorted `side` must be one of {{'any', 'left', 'right'}}, got {v}",
1261 )));
1262 },
1263 };
1264 Ok(Wrap(parsed))
1265 }
1266}
1267
1268impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<ClosedInterval> {
1269 type Error = PyErr;
1270
1271 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1272 let parsed = match &*ob.extract::<PyBackedStr>()? {
1273 "both" => ClosedInterval::Both,
1274 "left" => ClosedInterval::Left,
1275 "right" => ClosedInterval::Right,
1276 "none" => ClosedInterval::None,
1277 v => {
1278 return Err(PyValueError::new_err(format!(
1279 "`closed` must be one of {{'both', 'left', 'right', 'none'}}, got {v}",
1280 )));
1281 },
1282 };
1283 Ok(Wrap(parsed))
1284 }
1285}
1286
1287impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<WindowMapping> {
1288 type Error = PyErr;
1289
1290 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1291 let parsed = match &*ob.extract::<PyBackedStr>()? {
1292 "group_to_rows" => WindowMapping::GroupsToRows,
1293 "join" => WindowMapping::Join,
1294 "explode" => WindowMapping::Explode,
1295 v => {
1296 return Err(PyValueError::new_err(format!(
1297 "`mapping_strategy` must be one of {{'group_to_rows', 'join', 'explode'}}, got {v}",
1298 )));
1299 },
1300 };
1301 Ok(Wrap(parsed))
1302 }
1303}
1304
1305impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<JoinValidation> {
1306 type Error = PyErr;
1307
1308 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1309 let parsed = match &*ob.extract::<PyBackedStr>()? {
1310 "1:1" => JoinValidation::OneToOne,
1311 "1:m" => JoinValidation::OneToMany,
1312 "m:m" => JoinValidation::ManyToMany,
1313 "m:1" => JoinValidation::ManyToOne,
1314 v => {
1315 return Err(PyValueError::new_err(format!(
1316 "`validate` must be one of {{'m:m', 'm:1', '1:m', '1:1'}}, got {v}",
1317 )));
1318 },
1319 };
1320 Ok(Wrap(parsed))
1321 }
1322}
1323
1324impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<MaintainOrderJoin> {
1325 type Error = PyErr;
1326
1327 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1328 let parsed = match &*ob.extract::<PyBackedStr>()? {
1329 "none" => MaintainOrderJoin::None,
1330 "left" => MaintainOrderJoin::Left,
1331 "right" => MaintainOrderJoin::Right,
1332 "left_right" => MaintainOrderJoin::LeftRight,
1333 "right_left" => MaintainOrderJoin::RightLeft,
1334 v => {
1335 return Err(PyValueError::new_err(format!(
1336 "`maintain_order` must be one of {{'none', 'left', 'right', 'left_right', 'right_left'}}, got {v}",
1337 )));
1338 },
1339 };
1340 Ok(Wrap(parsed))
1341 }
1342}
1343
1344#[cfg(feature = "csv")]
1345impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<QuoteStyle> {
1346 type Error = PyErr;
1347
1348 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1349 let parsed = match &*ob.extract::<PyBackedStr>()? {
1350 "always" => QuoteStyle::Always,
1351 "necessary" => QuoteStyle::Necessary,
1352 "non_numeric" => QuoteStyle::NonNumeric,
1353 "never" => QuoteStyle::Never,
1354 v => {
1355 return Err(PyValueError::new_err(format!(
1356 "`quote_style` must be one of {{'always', 'necessary', 'non_numeric', 'never'}}, got {v}",
1357 )));
1358 },
1359 };
1360 Ok(Wrap(parsed))
1361 }
1362}
1363
1364#[cfg(feature = "list_sets")]
1365impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<SetOperation> {
1366 type Error = PyErr;
1367
1368 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1369 let parsed = match &*ob.extract::<PyBackedStr>()? {
1370 "union" => SetOperation::Union,
1371 "difference" => SetOperation::Difference,
1372 "intersection" => SetOperation::Intersection,
1373 "symmetric_difference" => SetOperation::SymmetricDifference,
1374 v => {
1375 return Err(PyValueError::new_err(format!(
1376 "set operation must be one of {{'union', 'difference', 'intersection', 'symmetric_difference'}}, got {v}",
1377 )));
1378 },
1379 };
1380 Ok(Wrap(parsed))
1381 }
1382}
1383
1384impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<CastColumnsPolicy> {
1386 type Error = PyErr;
1387
1388 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1389 if ob.is_none() {
1390 static DEFAULT: PyOnceLock<Wrap<CastColumnsPolicy>> = PyOnceLock::new();
1392
1393 let out = DEFAULT.get_or_try_init(ob.py(), || {
1394 let ob = PyModule::import(ob.py(), "polars.io.scan_options.cast_options")
1395 .unwrap()
1396 .getattr("ScanCastOptions")
1397 .unwrap()
1398 .call_method0("_default")
1399 .unwrap();
1400
1401 let out = Self::extract(ob.as_borrowed())?;
1402
1403 debug_assert_eq!(&out.0, &CastColumnsPolicy::ERROR_ON_MISMATCH);
1405
1406 PyResult::Ok(out)
1407 })?;
1408
1409 return Ok(out.clone());
1410 }
1411
1412 let py = ob.py();
1413
1414 let integer_upcast = match &*ob
1415 .getattr(intern!(py, "integer_cast"))?
1416 .extract::<PyBackedStr>()?
1417 {
1418 "upcast" => true,
1419 "forbid" => false,
1420 v => {
1421 return Err(PyValueError::new_err(format!(
1422 "unknown option for integer_cast: {v}"
1423 )));
1424 },
1425 };
1426
1427 let mut float_upcast = false;
1428 let mut float_downcast = false;
1429
1430 let float_cast_object = ob.getattr(intern!(py, "float_cast"))?;
1431
1432 parse_multiple_options("float_cast", float_cast_object, |v| {
1433 match v {
1434 "forbid" => {},
1435 "upcast" => float_upcast = true,
1436 "downcast" => float_downcast = true,
1437 v => {
1438 return Err(PyValueError::new_err(format!(
1439 "unknown option for float_cast: {v}"
1440 )));
1441 },
1442 }
1443
1444 Ok(())
1445 })?;
1446
1447 let mut datetime_nanoseconds_downcast = false;
1448 let mut datetime_convert_timezone = false;
1449
1450 let datetime_cast_object = ob.getattr(intern!(py, "datetime_cast"))?;
1451
1452 parse_multiple_options("datetime_cast", datetime_cast_object, |v| {
1453 match v {
1454 "forbid" => {},
1455 "nanosecond-downcast" => datetime_nanoseconds_downcast = true,
1456 "convert-timezone" => datetime_convert_timezone = true,
1457 v => {
1458 return Err(PyValueError::new_err(format!(
1459 "unknown option for datetime_cast: {v}"
1460 )));
1461 },
1462 };
1463
1464 Ok(())
1465 })?;
1466
1467 let missing_struct_fields = match &*ob
1468 .getattr(intern!(py, "missing_struct_fields"))?
1469 .extract::<PyBackedStr>()?
1470 {
1471 "insert" => MissingColumnsPolicy::Insert,
1472 "raise" => MissingColumnsPolicy::Raise,
1473 v => {
1474 return Err(PyValueError::new_err(format!(
1475 "unknown option for missing_struct_fields: {v}"
1476 )));
1477 },
1478 };
1479
1480 let extra_struct_fields = match &*ob
1481 .getattr(intern!(py, "extra_struct_fields"))?
1482 .extract::<PyBackedStr>()?
1483 {
1484 "ignore" => ExtraColumnsPolicy::Ignore,
1485 "raise" => ExtraColumnsPolicy::Raise,
1486 v => {
1487 return Err(PyValueError::new_err(format!(
1488 "unknown option for extra_struct_fields: {v}"
1489 )));
1490 },
1491 };
1492
1493 let categorical_to_string = match &*ob
1494 .getattr(intern!(py, "categorical_to_string"))?
1495 .extract::<PyBackedStr>()?
1496 {
1497 "allow" => true,
1498 "forbid" => false,
1499 v => {
1500 return Err(PyValueError::new_err(format!(
1501 "unknown option for categorical_to_string: {v}"
1502 )));
1503 },
1504 };
1505
1506 return Ok(Wrap(CastColumnsPolicy {
1507 integer_upcast,
1508 float_upcast,
1509 float_downcast,
1510 datetime_nanoseconds_downcast,
1511 datetime_microseconds_downcast: false,
1512 datetime_convert_timezone,
1513 null_upcast: true,
1514 categorical_to_string,
1515 missing_struct_fields,
1516 extra_struct_fields,
1517 }));
1518
1519 fn parse_multiple_options(
1520 parameter_name: &'static str,
1521 py_object: Bound<'_, PyAny>,
1522 mut parser_func: impl FnMut(&str) -> PyResult<()>,
1523 ) -> PyResult<()> {
1524 if let Ok(v) = py_object.extract::<PyBackedStr>() {
1525 parser_func(&v)?;
1526 } else if let Ok(v) = py_object.try_iter() {
1527 for v in v {
1528 parser_func(&v?.extract::<PyBackedStr>()?)?;
1529 }
1530 } else {
1531 return Err(PyValueError::new_err(format!(
1532 "unknown type for {parameter_name}: {py_object}"
1533 )));
1534 }
1535
1536 Ok(())
1537 }
1538 }
1539}
1540
1541pub(crate) fn parse_fill_null_strategy(
1542 strategy: &str,
1543 limit: FillNullLimit,
1544) -> PyResult<FillNullStrategy> {
1545 let parsed = match strategy {
1546 "forward" => FillNullStrategy::Forward(limit),
1547 "backward" => FillNullStrategy::Backward(limit),
1548 "min" => FillNullStrategy::Min,
1549 "max" => FillNullStrategy::Max,
1550 "mean" => FillNullStrategy::Mean,
1551 "zero" => FillNullStrategy::Zero,
1552 "one" => FillNullStrategy::One,
1553 e => {
1554 return Err(PyValueError::new_err(format!(
1555 "`strategy` must be one of {{'forward', 'backward', 'min', 'max', 'mean', 'zero', 'one'}}, got {e}",
1556 )));
1557 },
1558 };
1559 Ok(parsed)
1560}
1561
1562#[cfg(feature = "parquet")]
1563pub(crate) fn parse_parquet_compression(
1564 compression: &str,
1565 compression_level: Option<i32>,
1566) -> PyResult<ParquetCompression> {
1567 let parsed = match compression {
1568 "uncompressed" => ParquetCompression::Uncompressed,
1569 "snappy" => ParquetCompression::Snappy,
1570 "gzip" => ParquetCompression::Gzip(
1571 compression_level
1572 .map(|lvl| {
1573 GzipLevel::try_new(lvl as u8)
1574 .map_err(|e| PyValueError::new_err(format!("{e:?}")))
1575 })
1576 .transpose()?,
1577 ),
1578 "brotli" => ParquetCompression::Brotli(
1579 compression_level
1580 .map(|lvl| {
1581 BrotliLevel::try_new(lvl as u32)
1582 .map_err(|e| PyValueError::new_err(format!("{e:?}")))
1583 })
1584 .transpose()?,
1585 ),
1586 "lz4" => ParquetCompression::Lz4Raw,
1587 "zstd" => ParquetCompression::Zstd(
1588 compression_level
1589 .map(|lvl| {
1590 ZstdLevel::try_new(lvl).map_err(|e| PyValueError::new_err(format!("{e:?}")))
1591 })
1592 .transpose()?,
1593 ),
1594 e => {
1595 return Err(PyValueError::new_err(format!(
1596 "parquet `compression` must be one of {{'uncompressed', 'snappy', 'gzip', 'brotli', 'lz4', 'zstd'}}, got {e}",
1597 )));
1598 },
1599 };
1600 Ok(parsed)
1601}
1602
1603pub(crate) fn strings_to_pl_smallstr<I, S>(container: I) -> Vec<PlSmallStr>
1604where
1605 I: IntoIterator<Item = S>,
1606 S: AsRef<str>,
1607{
1608 container
1609 .into_iter()
1610 .map(|s| PlSmallStr::from_str(s.as_ref()))
1611 .collect()
1612}
1613
1614#[derive(Debug, Copy, Clone)]
1615pub struct PyCompatLevel(pub CompatLevel);
1616
1617impl<'a, 'py> FromPyObject<'a, 'py> for PyCompatLevel {
1618 type Error = PyErr;
1619
1620 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1621 Ok(PyCompatLevel(if let Ok(level) = ob.extract::<u16>() {
1622 if let Ok(compat_level) = CompatLevel::with_level(level) {
1623 compat_level
1624 } else {
1625 return Err(PyValueError::new_err("invalid compat level"));
1626 }
1627 } else if let Ok(future) = ob.extract::<bool>() {
1628 if future {
1629 CompatLevel::newest()
1630 } else {
1631 CompatLevel::oldest()
1632 }
1633 } else {
1634 return Err(PyTypeError::new_err(
1635 "'compat_level' argument accepts int or bool",
1636 ));
1637 }))
1638 }
1639}
1640
1641#[cfg(feature = "string_normalize")]
1642impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<UnicodeForm> {
1643 type Error = PyErr;
1644
1645 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1646 let parsed = match &*ob.extract::<PyBackedStr>()? {
1647 "NFC" => UnicodeForm::NFC,
1648 "NFKC" => UnicodeForm::NFKC,
1649 "NFD" => UnicodeForm::NFD,
1650 "NFKD" => UnicodeForm::NFKD,
1651 v => {
1652 return Err(PyValueError::new_err(format!(
1653 "`form` must be one of {{'NFC', 'NFKC', 'NFD', 'NFKD'}}, got {v}",
1654 )));
1655 },
1656 };
1657 Ok(Wrap(parsed))
1658 }
1659}
1660
1661#[cfg(feature = "parquet")]
1662impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<Option<KeyValueMetadata>> {
1663 type Error = PyErr;
1664
1665 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1666 #[derive(FromPyObject)]
1667 enum Metadata {
1668 Static(Vec<(String, String)>),
1669 Dynamic(Py<PyAny>),
1670 }
1671
1672 let metadata = Option::<Metadata>::extract(ob)?;
1673 let key_value_metadata = metadata.map(|x| match x {
1674 Metadata::Static(kv) => KeyValueMetadata::from_static(kv),
1675 Metadata::Dynamic(func) => KeyValueMetadata::from_py_function(func),
1676 });
1677 Ok(Wrap(key_value_metadata))
1678 }
1679}
1680
1681impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<Option<TimeZone>> {
1682 type Error = PyErr;
1683
1684 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1685 let tz = Option::<Wrap<PlSmallStr>>::extract(ob)?;
1686
1687 let tz = tz.map(|x| x.0);
1688
1689 Ok(Wrap(TimeZone::opt_try_new(tz).map_err(to_py_err)?))
1690 }
1691}
1692
1693impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<UpcastOrForbid> {
1694 type Error = PyErr;
1695
1696 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1697 let parsed = match &*ob.extract::<PyBackedStr>()? {
1698 "upcast" => UpcastOrForbid::Upcast,
1699 "forbid" => UpcastOrForbid::Forbid,
1700 v => {
1701 return Err(PyValueError::new_err(format!(
1702 "cast parameter must be one of {{'upcast', 'forbid'}}, got {v}",
1703 )));
1704 },
1705 };
1706 Ok(Wrap(parsed))
1707 }
1708}
1709
1710impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<ExtraColumnsPolicy> {
1711 type Error = PyErr;
1712
1713 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1714 let parsed = match &*ob.extract::<PyBackedStr>()? {
1715 "ignore" => ExtraColumnsPolicy::Ignore,
1716 "raise" => ExtraColumnsPolicy::Raise,
1717 v => {
1718 return Err(PyValueError::new_err(format!(
1719 "extra column/field parameter must be one of {{'ignore', 'raise'}}, got {v}",
1720 )));
1721 },
1722 };
1723 Ok(Wrap(parsed))
1724 }
1725}
1726
1727impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<MissingColumnsPolicy> {
1728 type Error = PyErr;
1729
1730 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1731 let parsed = match &*ob.extract::<PyBackedStr>()? {
1732 "insert" => MissingColumnsPolicy::Insert,
1733 "raise" => MissingColumnsPolicy::Raise,
1734 v => {
1735 return Err(PyValueError::new_err(format!(
1736 "missing column/field parameter must be one of {{'insert', 'raise'}}, got {v}",
1737 )));
1738 },
1739 };
1740 Ok(Wrap(parsed))
1741 }
1742}
1743
1744impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<MissingColumnsPolicyOrExpr> {
1745 type Error = PyErr;
1746
1747 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1748 if let Ok(pyexpr) = ob.extract::<PyExpr>() {
1749 return Ok(Wrap(MissingColumnsPolicyOrExpr::InsertWith(pyexpr.inner)));
1750 }
1751
1752 let parsed = match &*ob.extract::<PyBackedStr>()? {
1753 "insert" => MissingColumnsPolicyOrExpr::Insert,
1754 "raise" => MissingColumnsPolicyOrExpr::Raise,
1755 v => {
1756 return Err(PyValueError::new_err(format!(
1757 "missing column/field parameter must be one of {{'insert', 'raise', expression}}, got {v}",
1758 )));
1759 },
1760 };
1761 Ok(Wrap(parsed))
1762 }
1763}
1764
1765impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<ColumnMapping> {
1766 type Error = PyErr;
1767
1768 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1769 let (column_mapping_type, ob): (PyBackedStr, Bound<'_, PyAny>) = ob.extract()?;
1770
1771 Ok(Wrap(match &*column_mapping_type {
1772 "iceberg-column-mapping" => {
1773 let arrow_schema: Wrap<ArrowSchema> = ob.extract()?;
1774 ColumnMapping::Iceberg(Arc::new(
1775 IcebergSchema::from_arrow_schema(&arrow_schema.0).map_err(to_py_err)?,
1776 ))
1777 },
1778
1779 v => {
1780 return Err(PyValueError::new_err(format!(
1781 "unknown column mapping type: {v}"
1782 )));
1783 },
1784 }))
1785 }
1786}
1787
1788impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<DeletionFilesList> {
1789 type Error = PyErr;
1790
1791 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1792 let (deletion_file_type, ob): (PyBackedStr, Bound<'_, PyAny>) = ob.extract()?;
1793
1794 Ok(Wrap(match &*deletion_file_type {
1795 "iceberg-position-delete" => {
1796 let dict: Bound<'_, PyDict> = ob.extract()?;
1797
1798 let mut out = PlIndexMap::new();
1799
1800 for (k, v) in dict
1801 .try_iter()?
1802 .zip(dict.call_method0("values")?.try_iter()?)
1803 {
1804 let k: usize = k?.extract()?;
1805 let v: Bound<'_, PyAny> = v?.extract()?;
1806
1807 let files = v
1808 .try_iter()?
1809 .map(|x| {
1810 x.and_then(|x| {
1811 let x: String = x.extract()?;
1812 Ok(x)
1813 })
1814 })
1815 .collect::<PyResult<Arc<[String]>>>()?;
1816
1817 if !files.is_empty() {
1818 out.insert(k, files);
1819 }
1820 }
1821
1822 DeletionFilesList::IcebergPositionDelete(Arc::new(out))
1823 },
1824
1825 v => {
1826 return Err(PyValueError::new_err(format!(
1827 "unknown deletion file type: {v}"
1828 )));
1829 },
1830 }))
1831 }
1832}
1833
1834impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<DefaultFieldValues> {
1835 type Error = PyErr;
1836
1837 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1838 let (default_values_type, ob): (PyBackedStr, Bound<'_, PyAny>) = ob.extract()?;
1839
1840 Ok(Wrap(match &*default_values_type {
1841 "iceberg" => {
1842 let dict: Bound<'_, PyDict> = ob.extract()?;
1843
1844 let mut out = PlIndexMap::new();
1845
1846 for (k, v) in dict
1847 .try_iter()?
1848 .zip(dict.call_method0("values")?.try_iter()?)
1849 {
1850 let k: u32 = k?.extract()?;
1851 let v = v?;
1852
1853 let v: Result<Column, String> = if let Ok(s) = get_series(&v) {
1854 Ok(s.into_column())
1855 } else {
1856 let err_msg: String = v.extract()?;
1857 Err(err_msg)
1858 };
1859
1860 out.insert(k, v);
1861 }
1862
1863 DefaultFieldValues::Iceberg(Arc::new(IcebergIdentityTransformedPartitionFields(
1864 out,
1865 )))
1866 },
1867
1868 v => {
1869 return Err(PyValueError::new_err(format!(
1870 "unknown deletion file type: {v}"
1871 )));
1872 },
1873 }))
1874 }
1875}
1876
1877impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<PlRefPath> {
1878 type Error = PyErr;
1879
1880 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
1881 if let Ok(path) = ob.extract::<PyBackedStr>() {
1882 Ok(Wrap(PlRefPath::new(&*path)))
1883 } else if let Ok(path) = ob.extract::<std::path::PathBuf>() {
1884 Ok(Wrap(PlRefPath::try_from_path(&path).map_err(to_py_err)?))
1885 } else {
1886 Err(PyTypeError::new_err(format!(
1887 "PlRefPath cannot be formed from '{}'",
1888 ob.get_type()
1889 ))
1890 .into())
1891 }
1892 }
1893}
1894
1895impl<'py> IntoPyObject<'py> for Wrap<PlRefPath> {
1896 type Target = PyString;
1897 type Output = Bound<'py, Self::Target>;
1898 type Error = Infallible;
1899
1900 fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
1901 self.0.as_str().into_pyobject(py)
1902 }
1903}