1use std::sync::Arc;
2
3use polars::prelude::default_values::DefaultFieldValues;
4use polars::prelude::deletion::DeletionFilesList;
5use polars::prelude::{
6 CastColumnsPolicy, ColumnMapping, ExtraColumnsPolicy, MissingColumnsPolicy, PlSmallStr, Schema,
7 UnifiedScanArgs,
8};
9use polars_io::{HiveOptions, RowIndex};
10use polars_utils::IdxSize;
11use polars_utils::plpath::PlPathRef;
12use polars_utils::slice_enum::Slice;
13use pyo3::types::PyAnyMethods;
14use pyo3::{Bound, FromPyObject, PyObject, PyResult};
15
16use crate::functions::parse_cloud_options;
17use crate::prelude::Wrap;
18
19pub struct PyScanOptions<'py>(Bound<'py, pyo3::PyAny>);
21
22impl<'py> FromPyObject<'py> for PyScanOptions<'py> {
23 fn extract_bound(ob: &Bound<'py, pyo3::PyAny>) -> pyo3::PyResult<Self> {
24 Ok(Self(ob.clone()))
25 }
26}
27
28impl PyScanOptions<'_> {
29 pub fn extract_unified_scan_args(
30 &self,
31 first_path: Option<PlPathRef>,
33 ) -> PyResult<UnifiedScanArgs> {
34 #[derive(FromPyObject)]
35 struct Extract {
36 row_index: Option<(Wrap<PlSmallStr>, IdxSize)>,
37 pre_slice: Option<(i64, usize)>,
38 cast_options: Wrap<CastColumnsPolicy>,
39 extra_columns: Wrap<ExtraColumnsPolicy>,
40 missing_columns: Wrap<MissingColumnsPolicy>,
41 include_file_paths: Option<Wrap<PlSmallStr>>,
42 glob: bool,
43 column_mapping: Option<Wrap<ColumnMapping>>,
44 default_values: Option<Wrap<DefaultFieldValues>>,
45 hive_partitioning: Option<bool>,
46 hive_schema: Option<Wrap<Schema>>,
47 try_parse_hive_dates: bool,
48 rechunk: bool,
49 cache: bool,
50 storage_options: Option<Vec<(String, String)>>,
51 credential_provider: Option<PyObject>,
52 retries: usize,
53 deletion_files: Option<Wrap<DeletionFilesList>>,
54 }
55
56 let Extract {
57 row_index,
58 pre_slice,
59 cast_options,
60 extra_columns,
61 missing_columns,
62 include_file_paths,
63 column_mapping,
64 default_values,
65 glob,
66 hive_partitioning,
67 hive_schema,
68 try_parse_hive_dates,
69 rechunk,
70 cache,
71 storage_options,
72 credential_provider,
73 retries,
74 deletion_files,
75 } = self.0.extract()?;
76
77 let cloud_options =
78 parse_cloud_options(first_path, storage_options, credential_provider, retries)?;
79
80 let hive_schema = hive_schema.map(|s| Arc::new(s.0));
81
82 let row_index = row_index.map(|(name, offset)| RowIndex {
83 name: name.0,
84 offset,
85 });
86
87 let hive_options = HiveOptions {
88 enabled: hive_partitioning,
89 hive_start_idx: 0,
90 schema: hive_schema,
91 try_parse_dates: try_parse_hive_dates,
92 };
93
94 let unified_scan_args = UnifiedScanArgs {
95 schema: None,
98 cloud_options,
99 hive_options,
100 rechunk,
101 cache,
102 glob,
103 projection: None,
104 column_mapping: column_mapping.map(|x| x.0),
105 default_values: default_values
106 .map(|x| x.0)
107 .filter(|DefaultFieldValues::Iceberg(v)| !v.is_empty()),
108 row_index,
109 pre_slice: pre_slice.map(Slice::from),
110 cast_columns_policy: cast_options.0,
111 missing_columns_policy: missing_columns.0,
112 extra_columns_policy: extra_columns.0,
113 include_file_paths: include_file_paths.map(|x| x.0),
114 deletion_files: DeletionFilesList::filter_empty(deletion_files.map(|x| x.0)),
115 };
116
117 Ok(unified_scan_args)
118 }
119}