1pub(crate) mod any_value;
2pub(crate) mod chunked_array;
3mod datetime;
4
5use std::convert::Infallible;
6use std::fmt::{Display, Formatter};
7use std::fs::File;
8use std::hash::{Hash, Hasher};
9use std::path::PathBuf;
10
11#[cfg(feature = "object")]
12use polars::chunked_array::object::PolarsObjectSafe;
13use polars::frame::row::Row;
14#[cfg(feature = "avro")]
15use polars::io::avro::AvroCompression;
16#[cfg(feature = "cloud")]
17use polars::io::cloud::CloudOptions;
18use polars::prelude::deletion::DeletionFilesList;
19use polars::series::ops::NullBehavior;
20use polars_core::utils::arrow::array::Array;
21use polars_core::utils::arrow::types::NativeType;
22use polars_core::utils::materialize_dyn_int;
23use polars_lazy::prelude::*;
24#[cfg(feature = "parquet")]
25use polars_parquet::write::StatisticsOptions;
26use polars_plan::dsl::ScanSources;
27use polars_utils::mmap::MemSlice;
28use polars_utils::pl_str::PlSmallStr;
29use polars_utils::total_ord::{TotalEq, TotalHash};
30use pyo3::basic::CompareOp;
31use pyo3::exceptions::{PyTypeError, PyValueError};
32use pyo3::intern;
33use pyo3::prelude::*;
34use pyo3::pybacked::PyBackedStr;
35use pyo3::sync::GILOnceCell;
36use pyo3::types::{PyDict, PyList, PySequence, PyString};
37
38use crate::error::PyPolarsErr;
39use crate::expr::PyExpr;
40use crate::file::{PythonScanSourceInput, get_python_scan_source_input};
41#[cfg(feature = "object")]
42use crate::object::OBJECT_NAME;
43use crate::prelude::*;
44use crate::py_modules::{pl_series, polars};
45use crate::series::PySeries;
46use crate::utils::to_py_err;
47use crate::{PyDataFrame, PyLazyFrame};
48
49pub(crate) unsafe trait Transparent {
52 type Target;
53}
54
55unsafe impl Transparent for PySeries {
56 type Target = Series;
57}
58
59unsafe impl<T> Transparent for Wrap<T> {
60 type Target = T;
61}
62
63unsafe impl<T: Transparent> Transparent for Option<T> {
64 type Target = Option<T::Target>;
65}
66
67pub(crate) fn reinterpret_vec<T: Transparent>(input: Vec<T>) -> Vec<T::Target> {
68 assert_eq!(size_of::<T>(), size_of::<T::Target>());
69 assert_eq!(align_of::<T>(), align_of::<T::Target>());
70 let len = input.len();
71 let cap = input.capacity();
72 let mut manual_drop_vec = std::mem::ManuallyDrop::new(input);
73 let vec_ptr: *mut T = manual_drop_vec.as_mut_ptr();
74 let ptr: *mut T::Target = vec_ptr as *mut T::Target;
75 unsafe { Vec::from_raw_parts(ptr, len, cap) }
76}
77
78pub(crate) fn vec_extract_wrapped<T>(buf: Vec<Wrap<T>>) -> Vec<T> {
79 reinterpret_vec(buf)
80}
81
82#[derive(PartialEq, Eq, Hash)]
83#[repr(transparent)]
84pub struct Wrap<T>(pub T);
85
86impl<T> Clone for Wrap<T>
87where
88 T: Clone,
89{
90 fn clone(&self) -> Self {
91 Wrap(self.0.clone())
92 }
93}
94impl<T> From<T> for Wrap<T> {
95 fn from(t: T) -> Self {
96 Wrap(t)
97 }
98}
99
100pub(crate) fn get_df(obj: &Bound<'_, PyAny>) -> PyResult<DataFrame> {
102 let pydf = obj.getattr(intern!(obj.py(), "_df"))?;
103 Ok(pydf.extract::<PyDataFrame>()?.df)
104}
105
106pub(crate) fn get_lf(obj: &Bound<'_, PyAny>) -> PyResult<LazyFrame> {
107 let pydf = obj.getattr(intern!(obj.py(), "_ldf"))?;
108 Ok(pydf.extract::<PyLazyFrame>()?.ldf)
109}
110
111pub(crate) fn get_series(obj: &Bound<'_, PyAny>) -> PyResult<Series> {
112 let s = obj.getattr(intern!(obj.py(), "_s"))?;
113 Ok(s.extract::<PySeries>()?.series)
114}
115
116pub(crate) fn to_series(py: Python<'_>, s: PySeries) -> PyResult<Bound<PyAny>> {
117 let series = pl_series(py).bind(py);
118 let constructor = series.getattr(intern!(py, "_from_pyseries"))?;
119 constructor.call1((s,))
120}
121
122impl<'py> FromPyObject<'py> for Wrap<PlSmallStr> {
123 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
124 Ok(Wrap((&*ob.extract::<PyBackedStr>()?).into()))
125 }
126}
127
128#[cfg(feature = "csv")]
129impl<'py> FromPyObject<'py> for Wrap<NullValues> {
130 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
131 if let Ok(s) = ob.extract::<PyBackedStr>() {
132 Ok(Wrap(NullValues::AllColumnsSingle((&*s).into())))
133 } else if let Ok(s) = ob.extract::<Vec<PyBackedStr>>() {
134 Ok(Wrap(NullValues::AllColumns(
135 s.into_iter().map(|x| (&*x).into()).collect(),
136 )))
137 } else if let Ok(s) = ob.extract::<Vec<(PyBackedStr, PyBackedStr)>>() {
138 Ok(Wrap(NullValues::Named(
139 s.into_iter()
140 .map(|(a, b)| ((&*a).into(), (&*b).into()))
141 .collect(),
142 )))
143 } else {
144 Err(
145 PyPolarsErr::Other("could not extract value from null_values argument".into())
146 .into(),
147 )
148 }
149 }
150}
151
152fn struct_dict<'a, 'py>(
153 py: Python<'py>,
154 vals: impl Iterator<Item = AnyValue<'a>>,
155 flds: &[Field],
156) -> PyResult<Bound<'py, PyDict>> {
157 let dict = PyDict::new(py);
158 flds.iter().zip(vals).try_for_each(|(fld, val)| {
159 dict.set_item(fld.name().as_str(), Wrap(val).into_pyobject(py)?)
160 })?;
161 Ok(dict)
162}
163
164fn decimal_to_digits(v: i128, buf: &mut [u128; 3]) -> usize {
166 const ZEROS: i128 = 0x3030_3030_3030_3030_3030_3030_3030_3030;
167 let buf = unsafe { std::mem::transmute::<&mut [u128; 3], &mut [u8; 48]>(buf) };
170 let mut buffer = itoa::Buffer::new();
171 let value = buffer.format(v);
172 let len = value.len();
173 for (dst, src) in buf.iter_mut().zip(value.as_bytes().iter()) {
174 *dst = *src
175 }
176
177 let ptr = buf.as_mut_ptr() as *mut i128;
178 unsafe {
179 *ptr -= ZEROS;
181 *ptr.add(1) -= ZEROS;
182 *ptr.add(2) -= ZEROS;
183 }
184 len
185}
186
187impl<'py> IntoPyObject<'py> for &Wrap<DataType> {
188 type Target = PyAny;
189 type Output = Bound<'py, Self::Target>;
190 type Error = PyErr;
191
192 fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
193 let pl = polars(py).bind(py);
194
195 match &self.0 {
196 DataType::Int8 => {
197 let class = pl.getattr(intern!(py, "Int8"))?;
198 class.call0()
199 },
200 DataType::Int16 => {
201 let class = pl.getattr(intern!(py, "Int16"))?;
202 class.call0()
203 },
204 DataType::Int32 => {
205 let class = pl.getattr(intern!(py, "Int32"))?;
206 class.call0()
207 },
208 DataType::Int64 => {
209 let class = pl.getattr(intern!(py, "Int64"))?;
210 class.call0()
211 },
212 DataType::UInt8 => {
213 let class = pl.getattr(intern!(py, "UInt8"))?;
214 class.call0()
215 },
216 DataType::UInt16 => {
217 let class = pl.getattr(intern!(py, "UInt16"))?;
218 class.call0()
219 },
220 DataType::UInt32 => {
221 let class = pl.getattr(intern!(py, "UInt32"))?;
222 class.call0()
223 },
224 DataType::UInt64 => {
225 let class = pl.getattr(intern!(py, "UInt64"))?;
226 class.call0()
227 },
228 DataType::Int128 => {
229 let class = pl.getattr(intern!(py, "Int128"))?;
230 class.call0()
231 },
232 DataType::Float32 => {
233 let class = pl.getattr(intern!(py, "Float32"))?;
234 class.call0()
235 },
236 DataType::Float64 | DataType::Unknown(UnknownKind::Float) => {
237 let class = pl.getattr(intern!(py, "Float64"))?;
238 class.call0()
239 },
240 DataType::Decimal(precision, scale) => {
241 let class = pl.getattr(intern!(py, "Decimal"))?;
242 let args = (*precision, *scale);
243 class.call1(args)
244 },
245 DataType::Boolean => {
246 let class = pl.getattr(intern!(py, "Boolean"))?;
247 class.call0()
248 },
249 DataType::String | DataType::Unknown(UnknownKind::Str) => {
250 let class = pl.getattr(intern!(py, "String"))?;
251 class.call0()
252 },
253 DataType::Binary => {
254 let class = pl.getattr(intern!(py, "Binary"))?;
255 class.call0()
256 },
257 DataType::Array(inner, size) => {
258 let class = pl.getattr(intern!(py, "Array"))?;
259 let inner = Wrap(*inner.clone());
260 let args = (&inner, *size);
261 class.call1(args)
262 },
263 DataType::List(inner) => {
264 let class = pl.getattr(intern!(py, "List"))?;
265 let inner = Wrap(*inner.clone());
266 class.call1((&inner,))
267 },
268 DataType::Date => {
269 let class = pl.getattr(intern!(py, "Date"))?;
270 class.call0()
271 },
272 DataType::Datetime(tu, tz) => {
273 let datetime_class = pl.getattr(intern!(py, "Datetime"))?;
274 datetime_class.call1((tu.to_ascii(), tz.as_deref().map(|x| x.as_str())))
275 },
276 DataType::Duration(tu) => {
277 let duration_class = pl.getattr(intern!(py, "Duration"))?;
278 duration_class.call1((tu.to_ascii(),))
279 },
280 #[cfg(feature = "object")]
281 DataType::Object(_) => {
282 let class = pl.getattr(intern!(py, "Object"))?;
283 class.call0()
284 },
285 DataType::Categorical(_, ordering) => {
286 let class = pl.getattr(intern!(py, "Categorical"))?;
287 class.call1((Wrap(*ordering),))
288 },
289 DataType::Enum(rev_map, _) => {
290 let categories = rev_map.as_ref().unwrap().get_categories();
292 let class = pl.getattr(intern!(py, "Enum"))?;
293 let s =
294 Series::from_arrow(PlSmallStr::from_static("category"), categories.to_boxed())
295 .map_err(PyPolarsErr::from)?;
296 let series = to_series(py, s.into())?;
297 class.call1((series,))
298 },
299 DataType::Time => pl.getattr(intern!(py, "Time")),
300 DataType::Struct(fields) => {
301 let field_class = pl.getattr(intern!(py, "Field"))?;
302 let iter = fields.iter().map(|fld| {
303 let name = fld.name().as_str();
304 let dtype = Wrap(fld.dtype().clone());
305 field_class.call1((name, &dtype)).unwrap()
306 });
307 let fields = PyList::new(py, iter)?;
308 let struct_class = pl.getattr(intern!(py, "Struct"))?;
309 struct_class.call1((fields,))
310 },
311 DataType::Null => {
312 let class = pl.getattr(intern!(py, "Null"))?;
313 class.call0()
314 },
315 DataType::Unknown(UnknownKind::Int(v)) => {
316 Wrap(materialize_dyn_int(*v).dtype()).into_pyobject(py)
317 },
318 DataType::Unknown(_) => {
319 let class = pl.getattr(intern!(py, "Unknown"))?;
320 class.call0()
321 },
322 DataType::BinaryOffset => {
323 unimplemented!()
324 },
325 }
326 }
327}
328
329impl<'py> FromPyObject<'py> for Wrap<Field> {
330 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
331 let py = ob.py();
332 let name = ob
333 .getattr(intern!(py, "name"))?
334 .str()?
335 .extract::<PyBackedStr>()?;
336 let dtype = ob
337 .getattr(intern!(py, "dtype"))?
338 .extract::<Wrap<DataType>>()?;
339 Ok(Wrap(Field::new((&*name).into(), dtype.0)))
340 }
341}
342
343impl<'py> FromPyObject<'py> for Wrap<DataType> {
344 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
345 let py = ob.py();
346 let type_name = ob.get_type().qualname()?.to_string();
347
348 let dtype = match &*type_name {
349 "DataTypeClass" => {
350 let name = ob
352 .getattr(intern!(py, "__name__"))?
353 .str()?
354 .extract::<PyBackedStr>()?;
355 match &*name {
356 "Int8" => DataType::Int8,
357 "Int16" => DataType::Int16,
358 "Int32" => DataType::Int32,
359 "Int64" => DataType::Int64,
360 "Int128" => DataType::Int128,
361 "UInt8" => DataType::UInt8,
362 "UInt16" => DataType::UInt16,
363 "UInt32" => DataType::UInt32,
364 "UInt64" => DataType::UInt64,
365 "Float32" => DataType::Float32,
366 "Float64" => DataType::Float64,
367 "Boolean" => DataType::Boolean,
368 "String" => DataType::String,
369 "Binary" => DataType::Binary,
370 "Categorical" => DataType::Categorical(None, Default::default()),
371 "Enum" => DataType::Enum(None, Default::default()),
372 "Date" => DataType::Date,
373 "Time" => DataType::Time,
374 "Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
375 "Duration" => DataType::Duration(TimeUnit::Microseconds),
376 "Decimal" => DataType::Decimal(None, None), "List" => DataType::List(Box::new(DataType::Null)),
378 "Array" => DataType::Array(Box::new(DataType::Null), 0),
379 "Struct" => DataType::Struct(vec![]),
380 "Null" => DataType::Null,
381 #[cfg(feature = "object")]
382 "Object" => DataType::Object(OBJECT_NAME),
383 "Unknown" => DataType::Unknown(Default::default()),
384 dt => {
385 return Err(PyTypeError::new_err(format!(
386 "'{dt}' is not a Polars data type",
387 )));
388 },
389 }
390 },
391 "Int8" => DataType::Int8,
392 "Int16" => DataType::Int16,
393 "Int32" => DataType::Int32,
394 "Int64" => DataType::Int64,
395 "Int128" => DataType::Int128,
396 "UInt8" => DataType::UInt8,
397 "UInt16" => DataType::UInt16,
398 "UInt32" => DataType::UInt32,
399 "UInt64" => DataType::UInt64,
400 "Float32" => DataType::Float32,
401 "Float64" => DataType::Float64,
402 "Boolean" => DataType::Boolean,
403 "String" => DataType::String,
404 "Binary" => DataType::Binary,
405 "Categorical" => {
406 let ordering = ob.getattr(intern!(py, "ordering")).unwrap();
407 let ordering = ordering.extract::<Wrap<CategoricalOrdering>>()?.0;
408 DataType::Categorical(None, ordering)
409 },
410 "Enum" => {
411 let categories = ob.getattr(intern!(py, "categories")).unwrap();
412 let s = get_series(&categories.as_borrowed())?;
413 let ca = s.str().map_err(PyPolarsErr::from)?;
414 let categories = ca.downcast_iter().next().unwrap().clone();
415 create_enum_dtype(categories)
416 },
417 "Date" => DataType::Date,
418 "Time" => DataType::Time,
419 "Datetime" => {
420 let time_unit = ob.getattr(intern!(py, "time_unit")).unwrap();
421 let time_unit = time_unit.extract::<Wrap<TimeUnit>>()?.0;
422 let time_zone = ob.getattr(intern!(py, "time_zone")).unwrap();
423 let time_zone = time_zone.extract::<Option<PyBackedStr>>()?;
424 DataType::Datetime(
425 time_unit,
426 TimeZone::opt_try_new(time_zone.as_deref()).map_err(to_py_err)?,
427 )
428 },
429 "Duration" => {
430 let time_unit = ob.getattr(intern!(py, "time_unit")).unwrap();
431 let time_unit = time_unit.extract::<Wrap<TimeUnit>>()?.0;
432 DataType::Duration(time_unit)
433 },
434 "Decimal" => {
435 let precision = ob.getattr(intern!(py, "precision"))?.extract()?;
436 let scale = ob.getattr(intern!(py, "scale"))?.extract()?;
437 DataType::Decimal(precision, Some(scale))
438 },
439 "List" => {
440 let inner = ob.getattr(intern!(py, "inner")).unwrap();
441 let inner = inner.extract::<Wrap<DataType>>()?;
442 DataType::List(Box::new(inner.0))
443 },
444 "Array" => {
445 let inner = ob.getattr(intern!(py, "inner")).unwrap();
446 let size = ob.getattr(intern!(py, "size")).unwrap();
447 let inner = inner.extract::<Wrap<DataType>>()?;
448 let size = size.extract::<usize>()?;
449 DataType::Array(Box::new(inner.0), size)
450 },
451 "Struct" => {
452 let fields = ob.getattr(intern!(py, "fields"))?;
453 let fields = fields
454 .extract::<Vec<Wrap<Field>>>()?
455 .into_iter()
456 .map(|f| f.0)
457 .collect::<Vec<Field>>();
458 DataType::Struct(fields)
459 },
460 "Null" => DataType::Null,
461 #[cfg(feature = "object")]
462 "Object" => DataType::Object(OBJECT_NAME),
463 "Unknown" => DataType::Unknown(Default::default()),
464 dt => {
465 return Err(PyTypeError::new_err(format!(
466 "'{dt}' is not a Polars data type",
467 )));
468 },
469 };
470 Ok(Wrap(dtype))
471 }
472}
473
474impl<'py> IntoPyObject<'py> for Wrap<CategoricalOrdering> {
475 type Target = PyString;
476 type Output = Bound<'py, Self::Target>;
477 type Error = Infallible;
478
479 fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
480 match self.0 {
481 CategoricalOrdering::Physical => "physical",
482 CategoricalOrdering::Lexical => "lexical",
483 }
484 .into_pyobject(py)
485 }
486}
487
488impl<'py> IntoPyObject<'py> for Wrap<TimeUnit> {
489 type Target = PyString;
490 type Output = Bound<'py, Self::Target>;
491 type Error = Infallible;
492
493 fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
494 self.0.to_ascii().into_pyobject(py)
495 }
496}
497
498#[cfg(feature = "parquet")]
499impl<'py> FromPyObject<'py> for Wrap<StatisticsOptions> {
500 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
501 let mut statistics = StatisticsOptions::empty();
502
503 let dict = ob.downcast::<PyDict>()?;
504 for (key, val) in dict {
505 let key = key.extract::<PyBackedStr>()?;
506 let val = val.extract::<bool>()?;
507
508 match key.as_ref() {
509 "min" => statistics.min_value = val,
510 "max" => statistics.max_value = val,
511 "distinct_count" => statistics.distinct_count = val,
512 "null_count" => statistics.null_count = val,
513 _ => {
514 return Err(PyTypeError::new_err(format!(
515 "'{key}' is not a valid statistic option",
516 )));
517 },
518 }
519 }
520
521 Ok(Wrap(statistics))
522 }
523}
524
525impl<'py> FromPyObject<'py> for Wrap<Row<'static>> {
526 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
527 let vals = ob.extract::<Vec<Wrap<AnyValue<'static>>>>()?;
528 let vals = reinterpret_vec(vals);
529 Ok(Wrap(Row(vals)))
530 }
531}
532
533impl<'py> FromPyObject<'py> for Wrap<Schema> {
534 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
535 let dict = ob.downcast::<PyDict>()?;
536
537 Ok(Wrap(
538 dict.iter()
539 .map(|(key, val)| {
540 let key = key.extract::<PyBackedStr>()?;
541 let val = val.extract::<Wrap<DataType>>()?;
542
543 Ok(Field::new((&*key).into(), val.0))
544 })
545 .collect::<PyResult<Schema>>()?,
546 ))
547 }
548}
549
550impl<'py> FromPyObject<'py> for Wrap<ScanSources> {
551 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
552 let list = ob.downcast::<PyList>()?.to_owned();
553
554 if list.is_empty() {
555 return Ok(Wrap(ScanSources::default()));
556 }
557
558 enum MutableSources {
559 Paths(Vec<PathBuf>),
560 Files(Vec<File>),
561 Buffers(Vec<MemSlice>),
562 }
563
564 let num_items = list.len();
565 let mut iter = list
566 .into_iter()
567 .map(|val| get_python_scan_source_input(val.unbind(), false));
568
569 let Some(first) = iter.next() else {
570 return Ok(Wrap(ScanSources::default()));
571 };
572
573 let mut sources = match first? {
574 PythonScanSourceInput::Path(path) => {
575 let mut sources = Vec::with_capacity(num_items);
576 sources.push(path);
577 MutableSources::Paths(sources)
578 },
579 PythonScanSourceInput::File(file) => {
580 let mut sources = Vec::with_capacity(num_items);
581 sources.push(file.into());
582 MutableSources::Files(sources)
583 },
584 PythonScanSourceInput::Buffer(buffer) => {
585 let mut sources = Vec::with_capacity(num_items);
586 sources.push(buffer);
587 MutableSources::Buffers(sources)
588 },
589 };
590
591 for source in iter {
592 match (&mut sources, source?) {
593 (MutableSources::Paths(v), PythonScanSourceInput::Path(p)) => v.push(p),
594 (MutableSources::Files(v), PythonScanSourceInput::File(f)) => v.push(f.into()),
595 (MutableSources::Buffers(v), PythonScanSourceInput::Buffer(f)) => v.push(f),
596 _ => {
597 return Err(PyTypeError::new_err(
598 "Cannot combine in-memory bytes, paths and files for scan sources",
599 ));
600 },
601 }
602 }
603
604 Ok(Wrap(match sources {
605 MutableSources::Paths(i) => ScanSources::Paths(i.into()),
606 MutableSources::Files(i) => ScanSources::Files(i.into()),
607 MutableSources::Buffers(i) => ScanSources::Buffers(i.into()),
608 }))
609 }
610}
611
612impl<'py> IntoPyObject<'py> for Wrap<&Schema> {
613 type Target = PyDict;
614 type Output = Bound<'py, Self::Target>;
615 type Error = PyErr;
616
617 fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
618 let dict = PyDict::new(py);
619 self.0
620 .iter()
621 .try_for_each(|(k, v)| dict.set_item(k.as_str(), &Wrap(v.clone())))?;
622 Ok(dict)
623 }
624}
625
626#[derive(Debug)]
627#[repr(transparent)]
628pub struct ObjectValue {
629 pub inner: PyObject,
630}
631
632impl Clone for ObjectValue {
633 fn clone(&self) -> Self {
634 Python::with_gil(|py| Self {
635 inner: self.inner.clone_ref(py),
636 })
637 }
638}
639
640impl Hash for ObjectValue {
641 fn hash<H: Hasher>(&self, state: &mut H) {
642 let h = Python::with_gil(|py| self.inner.bind(py).hash().expect("should be hashable"));
643 state.write_isize(h)
644 }
645}
646
647impl Eq for ObjectValue {}
648
649impl PartialEq for ObjectValue {
650 fn eq(&self, other: &Self) -> bool {
651 Python::with_gil(|py| {
652 match self
653 .inner
654 .bind(py)
655 .rich_compare(other.inner.bind(py), CompareOp::Eq)
656 {
657 Ok(result) => result.is_truthy().unwrap(),
658 Err(_) => false,
659 }
660 })
661 }
662}
663
664impl TotalEq for ObjectValue {
665 fn tot_eq(&self, other: &Self) -> bool {
666 self == other
667 }
668}
669
670impl TotalHash for ObjectValue {
671 fn tot_hash<H>(&self, state: &mut H)
672 where
673 H: Hasher,
674 {
675 self.hash(state);
676 }
677}
678
679impl Display for ObjectValue {
680 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
681 write!(f, "{}", self.inner)
682 }
683}
684
685#[cfg(feature = "object")]
686impl PolarsObject for ObjectValue {
687 fn type_name() -> &'static str {
688 "object"
689 }
690}
691
692impl From<PyObject> for ObjectValue {
693 fn from(p: PyObject) -> Self {
694 Self { inner: p }
695 }
696}
697
698impl<'py> FromPyObject<'py> for ObjectValue {
699 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
700 Ok(ObjectValue {
701 inner: ob.to_owned().unbind(),
702 })
703 }
704}
705
706#[cfg(feature = "object")]
710impl From<&dyn PolarsObjectSafe> for &ObjectValue {
711 fn from(val: &dyn PolarsObjectSafe) -> Self {
712 unsafe { &*(val as *const dyn PolarsObjectSafe as *const ObjectValue) }
713 }
714}
715
716impl<'a, 'py> IntoPyObject<'py> for &'a ObjectValue {
717 type Target = PyAny;
718 type Output = Borrowed<'a, 'py, Self::Target>;
719 type Error = std::convert::Infallible;
720
721 fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
722 Ok(self.inner.bind_borrowed(py))
723 }
724}
725
726impl Default for ObjectValue {
727 fn default() -> Self {
728 Python::with_gil(|py| ObjectValue { inner: py.None() })
729 }
730}
731
732impl<'py, T: NativeType + FromPyObject<'py>> FromPyObject<'py> for Wrap<Vec<T>> {
733 fn extract_bound(obj: &Bound<'py, PyAny>) -> PyResult<Self> {
734 let seq = obj.downcast::<PySequence>()?;
735 let mut v = Vec::with_capacity(seq.len().unwrap_or(0));
736 for item in seq.try_iter()? {
737 v.push(item?.extract::<T>()?);
738 }
739 Ok(Wrap(v))
740 }
741}
742
743#[cfg(feature = "asof_join")]
744impl<'py> FromPyObject<'py> for Wrap<AsofStrategy> {
745 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
746 let parsed = match &*(ob.extract::<PyBackedStr>()?) {
747 "backward" => AsofStrategy::Backward,
748 "forward" => AsofStrategy::Forward,
749 "nearest" => AsofStrategy::Nearest,
750 v => {
751 return Err(PyValueError::new_err(format!(
752 "asof `strategy` must be one of {{'backward', 'forward', 'nearest'}}, got {v}",
753 )));
754 },
755 };
756 Ok(Wrap(parsed))
757 }
758}
759
760impl<'py> FromPyObject<'py> for Wrap<InterpolationMethod> {
761 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
762 let parsed = match &*(ob.extract::<PyBackedStr>()?) {
763 "linear" => InterpolationMethod::Linear,
764 "nearest" => InterpolationMethod::Nearest,
765 v => {
766 return Err(PyValueError::new_err(format!(
767 "interpolation `method` must be one of {{'linear', 'nearest'}}, got {v}",
768 )));
769 },
770 };
771 Ok(Wrap(parsed))
772 }
773}
774
775#[cfg(feature = "avro")]
776impl<'py> FromPyObject<'py> for Wrap<Option<AvroCompression>> {
777 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
778 let parsed = match &*ob.extract::<PyBackedStr>()? {
779 "uncompressed" => None,
780 "snappy" => Some(AvroCompression::Snappy),
781 "deflate" => Some(AvroCompression::Deflate),
782 v => {
783 return Err(PyValueError::new_err(format!(
784 "avro `compression` must be one of {{'uncompressed', 'snappy', 'deflate'}}, got {v}",
785 )));
786 },
787 };
788 Ok(Wrap(parsed))
789 }
790}
791
792impl<'py> FromPyObject<'py> for Wrap<CategoricalOrdering> {
793 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
794 let parsed = match &*ob.extract::<PyBackedStr>()? {
795 "physical" => CategoricalOrdering::Physical,
796 "lexical" => CategoricalOrdering::Lexical,
797 v => {
798 return Err(PyValueError::new_err(format!(
799 "categorical `ordering` must be one of {{'physical', 'lexical'}}, got {v}",
800 )));
801 },
802 };
803 Ok(Wrap(parsed))
804 }
805}
806
807impl<'py> FromPyObject<'py> for Wrap<StartBy> {
808 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
809 let parsed = match &*ob.extract::<PyBackedStr>()? {
810 "window" => StartBy::WindowBound,
811 "datapoint" => StartBy::DataPoint,
812 "monday" => StartBy::Monday,
813 "tuesday" => StartBy::Tuesday,
814 "wednesday" => StartBy::Wednesday,
815 "thursday" => StartBy::Thursday,
816 "friday" => StartBy::Friday,
817 "saturday" => StartBy::Saturday,
818 "sunday" => StartBy::Sunday,
819 v => {
820 return Err(PyValueError::new_err(format!(
821 "`start_by` must be one of {{'window', 'datapoint', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday'}}, got {v}",
822 )));
823 },
824 };
825 Ok(Wrap(parsed))
826 }
827}
828
829impl<'py> FromPyObject<'py> for Wrap<ClosedWindow> {
830 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
831 let parsed = match &*ob.extract::<PyBackedStr>()? {
832 "left" => ClosedWindow::Left,
833 "right" => ClosedWindow::Right,
834 "both" => ClosedWindow::Both,
835 "none" => ClosedWindow::None,
836 v => {
837 return Err(PyValueError::new_err(format!(
838 "`closed` must be one of {{'left', 'right', 'both', 'none'}}, got {v}",
839 )));
840 },
841 };
842 Ok(Wrap(parsed))
843 }
844}
845
846impl<'py> FromPyObject<'py> for Wrap<RoundMode> {
847 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
848 let parsed = match &*ob.extract::<PyBackedStr>()? {
849 "half_to_even" => RoundMode::HalfToEven,
850 "half_away_from_zero" => RoundMode::HalfAwayFromZero,
851 v => {
852 return Err(PyValueError::new_err(format!(
853 "`mode` must be one of {{'half_to_even', 'half_away_from_zero'}}, got {v}",
854 )));
855 },
856 };
857 Ok(Wrap(parsed))
858 }
859}
860
861#[cfg(feature = "csv")]
862impl<'py> FromPyObject<'py> for Wrap<CsvEncoding> {
863 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
864 let parsed = match &*ob.extract::<PyBackedStr>()? {
865 "utf8" => CsvEncoding::Utf8,
866 "utf8-lossy" => CsvEncoding::LossyUtf8,
867 v => {
868 return Err(PyValueError::new_err(format!(
869 "csv `encoding` must be one of {{'utf8', 'utf8-lossy'}}, got {v}",
870 )));
871 },
872 };
873 Ok(Wrap(parsed))
874 }
875}
876
877#[cfg(feature = "ipc")]
878impl<'py> FromPyObject<'py> for Wrap<Option<IpcCompression>> {
879 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
880 let parsed = match &*ob.extract::<PyBackedStr>()? {
881 "uncompressed" => None,
882 "lz4" => Some(IpcCompression::LZ4),
883 "zstd" => Some(IpcCompression::ZSTD),
884 v => {
885 return Err(PyValueError::new_err(format!(
886 "ipc `compression` must be one of {{'uncompressed', 'lz4', 'zstd'}}, got {v}",
887 )));
888 },
889 };
890 Ok(Wrap(parsed))
891 }
892}
893
894impl<'py> FromPyObject<'py> for Wrap<JoinType> {
895 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
896 let parsed = match &*ob.extract::<PyBackedStr>()? {
897 "inner" => JoinType::Inner,
898 "left" => JoinType::Left,
899 "right" => JoinType::Right,
900 "full" => JoinType::Full,
901 "semi" => JoinType::Semi,
902 "anti" => JoinType::Anti,
903 #[cfg(feature = "cross_join")]
904 "cross" => JoinType::Cross,
905 v => {
906 return Err(PyValueError::new_err(format!(
907 "`how` must be one of {{'inner', 'left', 'full', 'semi', 'anti', 'cross'}}, got {v}",
908 )));
909 },
910 };
911 Ok(Wrap(parsed))
912 }
913}
914
915impl<'py> FromPyObject<'py> for Wrap<Label> {
916 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
917 let parsed = match &*ob.extract::<PyBackedStr>()? {
918 "left" => Label::Left,
919 "right" => Label::Right,
920 "datapoint" => Label::DataPoint,
921 v => {
922 return Err(PyValueError::new_err(format!(
923 "`label` must be one of {{'left', 'right', 'datapoint'}}, got {v}",
924 )));
925 },
926 };
927 Ok(Wrap(parsed))
928 }
929}
930
931impl<'py> FromPyObject<'py> for Wrap<ListToStructWidthStrategy> {
932 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
933 let parsed = match &*ob.extract::<PyBackedStr>()? {
934 "first_non_null" => ListToStructWidthStrategy::FirstNonNull,
935 "max_width" => ListToStructWidthStrategy::MaxWidth,
936 v => {
937 return Err(PyValueError::new_err(format!(
938 "`n_field_strategy` must be one of {{'first_non_null', 'max_width'}}, got {v}",
939 )));
940 },
941 };
942 Ok(Wrap(parsed))
943 }
944}
945
946impl<'py> FromPyObject<'py> for Wrap<NonExistent> {
947 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
948 let parsed = match &*ob.extract::<PyBackedStr>()? {
949 "null" => NonExistent::Null,
950 "raise" => NonExistent::Raise,
951 v => {
952 return Err(PyValueError::new_err(format!(
953 "`non_existent` must be one of {{'null', 'raise'}}, got {v}",
954 )));
955 },
956 };
957 Ok(Wrap(parsed))
958 }
959}
960
961impl<'py> FromPyObject<'py> for Wrap<NullBehavior> {
962 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
963 let parsed = match &*ob.extract::<PyBackedStr>()? {
964 "drop" => NullBehavior::Drop,
965 "ignore" => NullBehavior::Ignore,
966 v => {
967 return Err(PyValueError::new_err(format!(
968 "`null_behavior` must be one of {{'drop', 'ignore'}}, got {v}",
969 )));
970 },
971 };
972 Ok(Wrap(parsed))
973 }
974}
975
976impl<'py> FromPyObject<'py> for Wrap<NullStrategy> {
977 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
978 let parsed = match &*ob.extract::<PyBackedStr>()? {
979 "ignore" => NullStrategy::Ignore,
980 "propagate" => NullStrategy::Propagate,
981 v => {
982 return Err(PyValueError::new_err(format!(
983 "`null_strategy` must be one of {{'ignore', 'propagate'}}, got {v}",
984 )));
985 },
986 };
987 Ok(Wrap(parsed))
988 }
989}
990
991#[cfg(feature = "parquet")]
992impl<'py> FromPyObject<'py> for Wrap<ParallelStrategy> {
993 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
994 let parsed = match &*ob.extract::<PyBackedStr>()? {
995 "auto" => ParallelStrategy::Auto,
996 "columns" => ParallelStrategy::Columns,
997 "row_groups" => ParallelStrategy::RowGroups,
998 "prefiltered" => ParallelStrategy::Prefiltered,
999 "none" => ParallelStrategy::None,
1000 v => {
1001 return Err(PyValueError::new_err(format!(
1002 "`parallel` must be one of {{'auto', 'columns', 'row_groups', 'prefiltered', 'none'}}, got {v}",
1003 )));
1004 },
1005 };
1006 Ok(Wrap(parsed))
1007 }
1008}
1009
1010impl<'py> FromPyObject<'py> for Wrap<IndexOrder> {
1011 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1012 let parsed = match &*ob.extract::<PyBackedStr>()? {
1013 "fortran" => IndexOrder::Fortran,
1014 "c" => IndexOrder::C,
1015 v => {
1016 return Err(PyValueError::new_err(format!(
1017 "`order` must be one of {{'fortran', 'c'}}, got {v}",
1018 )));
1019 },
1020 };
1021 Ok(Wrap(parsed))
1022 }
1023}
1024
1025impl<'py> FromPyObject<'py> for Wrap<QuantileMethod> {
1026 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1027 let parsed = match &*ob.extract::<PyBackedStr>()? {
1028 "lower" => QuantileMethod::Lower,
1029 "higher" => QuantileMethod::Higher,
1030 "nearest" => QuantileMethod::Nearest,
1031 "linear" => QuantileMethod::Linear,
1032 "midpoint" => QuantileMethod::Midpoint,
1033 "equiprobable" => QuantileMethod::Equiprobable,
1034 v => {
1035 return Err(PyValueError::new_err(format!(
1036 "`interpolation` must be one of {{'lower', 'higher', 'nearest', 'linear', 'midpoint', 'equiprobable'}}, got {v}",
1037 )));
1038 },
1039 };
1040 Ok(Wrap(parsed))
1041 }
1042}
1043
1044impl<'py> FromPyObject<'py> for Wrap<RankMethod> {
1045 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1046 let parsed = match &*ob.extract::<PyBackedStr>()? {
1047 "min" => RankMethod::Min,
1048 "max" => RankMethod::Max,
1049 "average" => RankMethod::Average,
1050 "dense" => RankMethod::Dense,
1051 "ordinal" => RankMethod::Ordinal,
1052 "random" => RankMethod::Random,
1053 v => {
1054 return Err(PyValueError::new_err(format!(
1055 "rank `method` must be one of {{'min', 'max', 'average', 'dense', 'ordinal', 'random'}}, got {v}",
1056 )));
1057 },
1058 };
1059 Ok(Wrap(parsed))
1060 }
1061}
1062
1063impl<'py> FromPyObject<'py> for Wrap<Roll> {
1064 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1065 let parsed = match &*ob.extract::<PyBackedStr>()? {
1066 "raise" => Roll::Raise,
1067 "forward" => Roll::Forward,
1068 "backward" => Roll::Backward,
1069 v => {
1070 return Err(PyValueError::new_err(format!(
1071 "`roll` must be one of {{'raise', 'forward', 'backward'}}, got {v}",
1072 )));
1073 },
1074 };
1075 Ok(Wrap(parsed))
1076 }
1077}
1078
1079impl<'py> FromPyObject<'py> for Wrap<TimeUnit> {
1080 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1081 let parsed = match &*ob.extract::<PyBackedStr>()? {
1082 "ns" => TimeUnit::Nanoseconds,
1083 "us" => TimeUnit::Microseconds,
1084 "ms" => TimeUnit::Milliseconds,
1085 v => {
1086 return Err(PyValueError::new_err(format!(
1087 "`time_unit` must be one of {{'ns', 'us', 'ms'}}, got {v}",
1088 )));
1089 },
1090 };
1091 Ok(Wrap(parsed))
1092 }
1093}
1094
1095impl<'py> FromPyObject<'py> for Wrap<UniqueKeepStrategy> {
1096 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1097 let parsed = match &*ob.extract::<PyBackedStr>()? {
1098 "first" => UniqueKeepStrategy::First,
1099 "last" => UniqueKeepStrategy::Last,
1100 "none" => UniqueKeepStrategy::None,
1101 "any" => UniqueKeepStrategy::Any,
1102 v => {
1103 return Err(PyValueError::new_err(format!(
1104 "`keep` must be one of {{'first', 'last', 'any', 'none'}}, got {v}",
1105 )));
1106 },
1107 };
1108 Ok(Wrap(parsed))
1109 }
1110}
1111
1112#[cfg(feature = "search_sorted")]
1113impl<'py> FromPyObject<'py> for Wrap<SearchSortedSide> {
1114 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1115 let parsed = match &*ob.extract::<PyBackedStr>()? {
1116 "any" => SearchSortedSide::Any,
1117 "left" => SearchSortedSide::Left,
1118 "right" => SearchSortedSide::Right,
1119 v => {
1120 return Err(PyValueError::new_err(format!(
1121 "sorted `side` must be one of {{'any', 'left', 'right'}}, got {v}",
1122 )));
1123 },
1124 };
1125 Ok(Wrap(parsed))
1126 }
1127}
1128
1129impl<'py> FromPyObject<'py> for Wrap<ClosedInterval> {
1130 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1131 let parsed = match &*ob.extract::<PyBackedStr>()? {
1132 "both" => ClosedInterval::Both,
1133 "left" => ClosedInterval::Left,
1134 "right" => ClosedInterval::Right,
1135 "none" => ClosedInterval::None,
1136 v => {
1137 return Err(PyValueError::new_err(format!(
1138 "`closed` must be one of {{'both', 'left', 'right', 'none'}}, got {v}",
1139 )));
1140 },
1141 };
1142 Ok(Wrap(parsed))
1143 }
1144}
1145
1146impl<'py> FromPyObject<'py> for Wrap<WindowMapping> {
1147 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1148 let parsed = match &*ob.extract::<PyBackedStr>()? {
1149 "group_to_rows" => WindowMapping::GroupsToRows,
1150 "join" => WindowMapping::Join,
1151 "explode" => WindowMapping::Explode,
1152 v => {
1153 return Err(PyValueError::new_err(format!(
1154 "`mapping_strategy` must be one of {{'group_to_rows', 'join', 'explode'}}, got {v}",
1155 )));
1156 },
1157 };
1158 Ok(Wrap(parsed))
1159 }
1160}
1161
1162impl<'py> FromPyObject<'py> for Wrap<JoinValidation> {
1163 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1164 let parsed = match &*ob.extract::<PyBackedStr>()? {
1165 "1:1" => JoinValidation::OneToOne,
1166 "1:m" => JoinValidation::OneToMany,
1167 "m:m" => JoinValidation::ManyToMany,
1168 "m:1" => JoinValidation::ManyToOne,
1169 v => {
1170 return Err(PyValueError::new_err(format!(
1171 "`validate` must be one of {{'m:m', 'm:1', '1:m', '1:1'}}, got {v}",
1172 )));
1173 },
1174 };
1175 Ok(Wrap(parsed))
1176 }
1177}
1178
1179impl<'py> FromPyObject<'py> for Wrap<MaintainOrderJoin> {
1180 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1181 let parsed = match &*ob.extract::<PyBackedStr>()? {
1182 "none" => MaintainOrderJoin::None,
1183 "left" => MaintainOrderJoin::Left,
1184 "right" => MaintainOrderJoin::Right,
1185 "left_right" => MaintainOrderJoin::LeftRight,
1186 "right_left" => MaintainOrderJoin::RightLeft,
1187 v => {
1188 return Err(PyValueError::new_err(format!(
1189 "`maintain_order` must be one of {{'none', 'left', 'right', 'left_right', 'right_left'}}, got {v}",
1190 )));
1191 },
1192 };
1193 Ok(Wrap(parsed))
1194 }
1195}
1196
1197#[cfg(feature = "csv")]
1198impl<'py> FromPyObject<'py> for Wrap<QuoteStyle> {
1199 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1200 let parsed = match &*ob.extract::<PyBackedStr>()? {
1201 "always" => QuoteStyle::Always,
1202 "necessary" => QuoteStyle::Necessary,
1203 "non_numeric" => QuoteStyle::NonNumeric,
1204 "never" => QuoteStyle::Never,
1205 v => {
1206 return Err(PyValueError::new_err(format!(
1207 "`quote_style` must be one of {{'always', 'necessary', 'non_numeric', 'never'}}, got {v}",
1208 )));
1209 },
1210 };
1211 Ok(Wrap(parsed))
1212 }
1213}
1214
1215#[cfg(feature = "cloud")]
1216pub(crate) fn parse_cloud_options(
1217 uri: &str,
1218 kv: impl IntoIterator<Item = (String, String)>,
1219) -> PyResult<CloudOptions> {
1220 let iter: &mut dyn Iterator<Item = _> = &mut kv.into_iter();
1221 let out = CloudOptions::from_untyped_config(uri, iter).map_err(PyPolarsErr::from)?;
1222 Ok(out)
1223}
1224
1225#[cfg(feature = "list_sets")]
1226impl<'py> FromPyObject<'py> for Wrap<SetOperation> {
1227 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1228 let parsed = match &*ob.extract::<PyBackedStr>()? {
1229 "union" => SetOperation::Union,
1230 "difference" => SetOperation::Difference,
1231 "intersection" => SetOperation::Intersection,
1232 "symmetric_difference" => SetOperation::SymmetricDifference,
1233 v => {
1234 return Err(PyValueError::new_err(format!(
1235 "set operation must be one of {{'union', 'difference', 'intersection', 'symmetric_difference'}}, got {v}",
1236 )));
1237 },
1238 };
1239 Ok(Wrap(parsed))
1240 }
1241}
1242
1243impl<'py> FromPyObject<'py> for Wrap<CastColumnsPolicy> {
1245 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1246 if ob.is_none() {
1247 static DEFAULT: GILOnceCell<Wrap<CastColumnsPolicy>> = GILOnceCell::new();
1249
1250 let out = DEFAULT.get_or_try_init(ob.py(), || {
1251 let ob = PyModule::import(ob.py(), "polars.io.scan_options.cast_options")
1252 .unwrap()
1253 .getattr("ScanCastOptions")
1254 .unwrap()
1255 .call_method0("_default")
1256 .unwrap();
1257
1258 let out = Self::extract_bound(&ob)?;
1259
1260 debug_assert_eq!(&out.0, &CastColumnsPolicy::ERROR_ON_MISMATCH);
1262
1263 PyResult::Ok(out)
1264 })?;
1265
1266 return Ok(out.clone());
1267 }
1268
1269 let py = ob.py();
1270
1271 let integer_upcast = match &*ob
1272 .getattr(intern!(py, "integer_cast"))?
1273 .extract::<PyBackedStr>()?
1274 {
1275 "upcast" => true,
1276 "forbid" => false,
1277 v => {
1278 return Err(PyValueError::new_err(format!(
1279 "unknown option for integer_cast: {v}"
1280 )));
1281 },
1282 };
1283
1284 let mut float_upcast = false;
1285 let mut float_downcast = false;
1286
1287 let float_cast_object = ob.getattr(intern!(py, "float_cast"))?;
1288
1289 parse_multiple_options("float_cast", float_cast_object, |v| {
1290 match v {
1291 "forbid" => {},
1292 "upcast" => float_upcast = true,
1293 "downcast" => float_downcast = true,
1294 v => {
1295 return Err(PyValueError::new_err(format!(
1296 "unknown option for float_cast: {v}"
1297 )));
1298 },
1299 }
1300
1301 Ok(())
1302 })?;
1303
1304 let mut datetime_nanoseconds_downcast = false;
1305 let mut datetime_convert_timezone = false;
1306
1307 let datetime_cast_object = ob.getattr(intern!(py, "datetime_cast"))?;
1308
1309 parse_multiple_options("datetime_cast", datetime_cast_object, |v| {
1310 match v {
1311 "forbid" => {},
1312 "nanosecond-downcast" => datetime_nanoseconds_downcast = true,
1313 "convert-timezone" => datetime_convert_timezone = true,
1314 v => {
1315 return Err(PyValueError::new_err(format!(
1316 "unknown option for datetime_cast: {v}"
1317 )));
1318 },
1319 };
1320
1321 Ok(())
1322 })?;
1323
1324 let missing_struct_fields = match &*ob
1325 .getattr(intern!(py, "missing_struct_fields"))?
1326 .extract::<PyBackedStr>()?
1327 {
1328 "insert" => MissingColumnsPolicy::Insert,
1329 "raise" => MissingColumnsPolicy::Raise,
1330 v => {
1331 return Err(PyValueError::new_err(format!(
1332 "unknown option for missing_struct_fields: {v}"
1333 )));
1334 },
1335 };
1336
1337 let extra_struct_fields = match &*ob
1338 .getattr(intern!(py, "extra_struct_fields"))?
1339 .extract::<PyBackedStr>()?
1340 {
1341 "ignore" => ExtraColumnsPolicy::Ignore,
1342 "raise" => ExtraColumnsPolicy::Raise,
1343 v => {
1344 return Err(PyValueError::new_err(format!(
1345 "unknown option for extra_struct_fields: {v}"
1346 )));
1347 },
1348 };
1349
1350 return Ok(Wrap(CastColumnsPolicy {
1351 integer_upcast,
1352 float_upcast,
1353 float_downcast,
1354 datetime_nanoseconds_downcast,
1355 datetime_microseconds_downcast: false,
1356 datetime_convert_timezone,
1357 missing_struct_fields,
1358 extra_struct_fields,
1359 }));
1360
1361 fn parse_multiple_options(
1362 parameter_name: &'static str,
1363 py_object: Bound<'_, PyAny>,
1364 mut parser_func: impl FnMut(&str) -> PyResult<()>,
1365 ) -> PyResult<()> {
1366 if let Ok(v) = py_object.extract::<PyBackedStr>() {
1367 parser_func(&v)?;
1368 } else if let Ok(v) = py_object.try_iter() {
1369 for v in v {
1370 parser_func(&v?.extract::<PyBackedStr>()?)?;
1371 }
1372 } else {
1373 return Err(PyValueError::new_err(format!(
1374 "unknown type for {parameter_name}: {py_object}"
1375 )));
1376 }
1377
1378 Ok(())
1379 }
1380 }
1381}
1382
1383pub(crate) fn parse_fill_null_strategy(
1384 strategy: &str,
1385 limit: FillNullLimit,
1386) -> PyResult<FillNullStrategy> {
1387 let parsed = match strategy {
1388 "forward" => FillNullStrategy::Forward(limit),
1389 "backward" => FillNullStrategy::Backward(limit),
1390 "min" => FillNullStrategy::Min,
1391 "max" => FillNullStrategy::Max,
1392 "mean" => FillNullStrategy::Mean,
1393 "zero" => FillNullStrategy::Zero,
1394 "one" => FillNullStrategy::One,
1395 e => {
1396 return Err(PyValueError::new_err(format!(
1397 "`strategy` must be one of {{'forward', 'backward', 'min', 'max', 'mean', 'zero', 'one'}}, got {e}",
1398 )));
1399 },
1400 };
1401 Ok(parsed)
1402}
1403
1404#[cfg(feature = "parquet")]
1405pub(crate) fn parse_parquet_compression(
1406 compression: &str,
1407 compression_level: Option<i32>,
1408) -> PyResult<ParquetCompression> {
1409 let parsed = match compression {
1410 "uncompressed" => ParquetCompression::Uncompressed,
1411 "snappy" => ParquetCompression::Snappy,
1412 "gzip" => ParquetCompression::Gzip(
1413 compression_level
1414 .map(|lvl| {
1415 GzipLevel::try_new(lvl as u8)
1416 .map_err(|e| PyValueError::new_err(format!("{e:?}")))
1417 })
1418 .transpose()?,
1419 ),
1420 "lzo" => ParquetCompression::Lzo,
1421 "brotli" => ParquetCompression::Brotli(
1422 compression_level
1423 .map(|lvl| {
1424 BrotliLevel::try_new(lvl as u32)
1425 .map_err(|e| PyValueError::new_err(format!("{e:?}")))
1426 })
1427 .transpose()?,
1428 ),
1429 "lz4" => ParquetCompression::Lz4Raw,
1430 "zstd" => ParquetCompression::Zstd(
1431 compression_level
1432 .map(|lvl| {
1433 ZstdLevel::try_new(lvl).map_err(|e| PyValueError::new_err(format!("{e:?}")))
1434 })
1435 .transpose()?,
1436 ),
1437 e => {
1438 return Err(PyValueError::new_err(format!(
1439 "parquet `compression` must be one of {{'uncompressed', 'snappy', 'gzip', 'lzo', 'brotli', 'lz4', 'zstd'}}, got {e}",
1440 )));
1441 },
1442 };
1443 Ok(parsed)
1444}
1445
1446pub(crate) fn strings_to_pl_smallstr<I, S>(container: I) -> Vec<PlSmallStr>
1447where
1448 I: IntoIterator<Item = S>,
1449 S: AsRef<str>,
1450{
1451 container
1452 .into_iter()
1453 .map(|s| PlSmallStr::from_str(s.as_ref()))
1454 .collect()
1455}
1456
1457#[derive(Debug, Copy, Clone)]
1458pub struct PyCompatLevel(pub CompatLevel);
1459
1460impl<'py> FromPyObject<'py> for PyCompatLevel {
1461 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1462 Ok(PyCompatLevel(if let Ok(level) = ob.extract::<u16>() {
1463 if let Ok(compat_level) = CompatLevel::with_level(level) {
1464 compat_level
1465 } else {
1466 return Err(PyValueError::new_err("invalid compat level"));
1467 }
1468 } else if let Ok(future) = ob.extract::<bool>() {
1469 if future {
1470 CompatLevel::newest()
1471 } else {
1472 CompatLevel::oldest()
1473 }
1474 } else {
1475 return Err(PyTypeError::new_err(
1476 "'compat_level' argument accepts int or bool",
1477 ));
1478 }))
1479 }
1480}
1481
1482#[cfg(feature = "string_normalize")]
1483impl<'py> FromPyObject<'py> for Wrap<UnicodeForm> {
1484 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1485 let parsed = match &*ob.extract::<PyBackedStr>()? {
1486 "NFC" => UnicodeForm::NFC,
1487 "NFKC" => UnicodeForm::NFKC,
1488 "NFD" => UnicodeForm::NFD,
1489 "NFKD" => UnicodeForm::NFKD,
1490 v => {
1491 return Err(PyValueError::new_err(format!(
1492 "`form` must be one of {{'NFC', 'NFKC', 'NFD', 'NFKD'}}, got {v}",
1493 )));
1494 },
1495 };
1496 Ok(Wrap(parsed))
1497 }
1498}
1499
1500#[cfg(feature = "parquet")]
1501impl<'py> FromPyObject<'py> for Wrap<Option<KeyValueMetadata>> {
1502 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1503 #[derive(FromPyObject)]
1504 enum Metadata {
1505 Static(Vec<(String, String)>),
1506 Dynamic(PyObject),
1507 }
1508
1509 let metadata = Option::<Metadata>::extract_bound(ob)?;
1510 let key_value_metadata = metadata.map(|x| match x {
1511 Metadata::Static(kv) => KeyValueMetadata::from_static(kv),
1512 Metadata::Dynamic(func) => KeyValueMetadata::from_py_function(func),
1513 });
1514 Ok(Wrap(key_value_metadata))
1515 }
1516}
1517
1518impl<'py> FromPyObject<'py> for Wrap<Option<TimeZone>> {
1519 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1520 let tz = Option::<Wrap<PlSmallStr>>::extract_bound(ob)?;
1521
1522 let tz = tz.map(|x| x.0);
1523
1524 Ok(Wrap(TimeZone::opt_try_new(tz).map_err(to_py_err)?))
1525 }
1526}
1527
1528impl<'py> FromPyObject<'py> for Wrap<UpcastOrForbid> {
1529 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1530 let parsed = match &*ob.extract::<PyBackedStr>()? {
1531 "upcast" => UpcastOrForbid::Upcast,
1532 "forbid" => UpcastOrForbid::Forbid,
1533 v => {
1534 return Err(PyValueError::new_err(format!(
1535 "cast parameter must be one of {{'upcast', 'forbid'}}, got {v}",
1536 )));
1537 },
1538 };
1539 Ok(Wrap(parsed))
1540 }
1541}
1542
1543impl<'py> FromPyObject<'py> for Wrap<ExtraColumnsPolicy> {
1544 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1545 let parsed = match &*ob.extract::<PyBackedStr>()? {
1546 "ignore" => ExtraColumnsPolicy::Ignore,
1547 "raise" => ExtraColumnsPolicy::Raise,
1548 v => {
1549 return Err(PyValueError::new_err(format!(
1550 "extra column/field parameter must be one of {{'ignore', 'raise'}}, got {v}",
1551 )));
1552 },
1553 };
1554 Ok(Wrap(parsed))
1555 }
1556}
1557
1558impl<'py> FromPyObject<'py> for Wrap<MissingColumnsPolicy> {
1559 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1560 let parsed = match &*ob.extract::<PyBackedStr>()? {
1561 "insert" => MissingColumnsPolicy::Insert,
1562 "raise" => MissingColumnsPolicy::Raise,
1563 v => {
1564 return Err(PyValueError::new_err(format!(
1565 "missing column/field parameter must be one of {{'insert', 'raise'}}, got {v}",
1566 )));
1567 },
1568 };
1569 Ok(Wrap(parsed))
1570 }
1571}
1572
1573impl<'py> FromPyObject<'py> for Wrap<MissingColumnsPolicyOrExpr> {
1574 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1575 if let Ok(pyexpr) = ob.extract::<PyExpr>() {
1576 return Ok(Wrap(MissingColumnsPolicyOrExpr::InsertWith(pyexpr.inner)));
1577 }
1578
1579 let parsed = match &*ob.extract::<PyBackedStr>()? {
1580 "insert" => MissingColumnsPolicyOrExpr::Insert,
1581 "raise" => MissingColumnsPolicyOrExpr::Raise,
1582 v => {
1583 return Err(PyValueError::new_err(format!(
1584 "missing column/field parameter must be one of {{'insert', 'raise', expression}}, got {v}",
1585 )));
1586 },
1587 };
1588 Ok(Wrap(parsed))
1589 }
1590}
1591
1592impl<'py> FromPyObject<'py> for Wrap<DeletionFilesList> {
1593 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
1594 let (deletion_file_type, ob): (PyBackedStr, Bound<'_, PyAny>) = ob.extract()?;
1595
1596 Ok(Wrap(match &*deletion_file_type {
1597 "iceberg-position-delete" => {
1598 let dict: Bound<'_, PyDict> = ob.extract()?;
1599
1600 let mut out = PlIndexMap::new();
1601
1602 for (k, v) in dict
1603 .try_iter()?
1604 .zip(dict.call_method0("values")?.try_iter()?)
1605 {
1606 let k: usize = k?.extract()?;
1607 let v: Bound<'_, PyAny> = v?.extract()?;
1608
1609 let files = v
1610 .try_iter()?
1611 .map(|x| {
1612 x.and_then(|x| {
1613 let x: String = x.extract()?;
1614 Ok(x)
1615 })
1616 })
1617 .collect::<PyResult<Arc<[String]>>>()?;
1618
1619 if !files.is_empty() {
1620 out.insert(k, files);
1621 }
1622 }
1623
1624 DeletionFilesList::IcebergPositionDelete(Arc::new(out))
1625 },
1626
1627 v => {
1628 return Err(PyValueError::new_err(format!(
1629 "unknown deletion file type: {v}"
1630 )));
1631 },
1632 }))
1633 }
1634}