1use std::sync::Arc;
2
3use polars::prelude::deletion::DeletionFilesList;
4use polars::prelude::{
5 CastColumnsPolicy, ColumnMapping, ExtraColumnsPolicy, MissingColumnsPolicy, PlSmallStr, Schema,
6 UnifiedScanArgs,
7};
8use polars_io::{HiveOptions, RowIndex};
9use polars_utils::IdxSize;
10use polars_utils::plpath::PlPathRef;
11use polars_utils::slice_enum::Slice;
12use pyo3::types::PyAnyMethods;
13use pyo3::{Bound, FromPyObject, PyObject, PyResult};
14
15use crate::prelude::Wrap;
16
17pub struct PyScanOptions<'py>(Bound<'py, pyo3::PyAny>);
19
20impl<'py> FromPyObject<'py> for PyScanOptions<'py> {
21 fn extract_bound(ob: &Bound<'py, pyo3::PyAny>) -> pyo3::PyResult<Self> {
22 Ok(Self(ob.clone()))
23 }
24}
25
26impl PyScanOptions<'_> {
27 pub fn extract_unified_scan_args(
28 &self,
29 first_path: Option<PlPathRef>,
31 ) -> PyResult<UnifiedScanArgs> {
32 #[derive(FromPyObject)]
33 struct Extract {
34 row_index: Option<(Wrap<PlSmallStr>, IdxSize)>,
35 pre_slice: Option<(i64, usize)>,
36 cast_options: Wrap<CastColumnsPolicy>,
37 extra_columns: Wrap<ExtraColumnsPolicy>,
38 missing_columns: Wrap<MissingColumnsPolicy>,
39 include_file_paths: Option<Wrap<PlSmallStr>>,
40 glob: bool,
41 hive_partitioning: Option<bool>,
42 hive_schema: Option<Wrap<Schema>>,
43 try_parse_hive_dates: bool,
44 rechunk: bool,
45 cache: bool,
46 storage_options: Option<Vec<(String, String)>>,
47 credential_provider: Option<PyObject>,
48 retries: usize,
49 deletion_files: Option<Wrap<DeletionFilesList>>,
50 column_mapping: Option<Wrap<ColumnMapping>>,
51 }
52
53 let Extract {
54 row_index,
55 pre_slice,
56 cast_options,
57 extra_columns,
58 missing_columns,
59 include_file_paths,
60 glob,
61 hive_partitioning,
62 hive_schema,
63 try_parse_hive_dates,
64 rechunk,
65 cache,
66 storage_options,
67 credential_provider,
68 retries,
69 deletion_files,
70 column_mapping,
71 } = self.0.extract()?;
72
73 let cloud_options = storage_options;
74
75 let cloud_options = if let Some(first_path) = first_path {
76 #[cfg(feature = "cloud")]
77 {
78 use polars_io::cloud::credential_provider::PlCredentialProvider;
79
80 use crate::prelude::parse_cloud_options;
81
82 let first_path_url = first_path.to_str();
83 let cloud_options =
84 parse_cloud_options(first_path_url, cloud_options.unwrap_or_default())?;
85
86 Some(
87 cloud_options
88 .with_max_retries(retries)
89 .with_credential_provider(
90 credential_provider.map(PlCredentialProvider::from_python_builder),
91 ),
92 )
93 }
94
95 #[cfg(not(feature = "cloud"))]
96 {
97 None
98 }
99 } else {
100 None
101 };
102
103 let hive_schema = hive_schema.map(|s| Arc::new(s.0));
104
105 let row_index = row_index.map(|(name, offset)| RowIndex {
106 name: name.0,
107 offset,
108 });
109
110 let hive_options = HiveOptions {
111 enabled: hive_partitioning,
112 hive_start_idx: 0,
113 schema: hive_schema,
114 try_parse_dates: try_parse_hive_dates,
115 };
116
117 let unified_scan_args = UnifiedScanArgs {
118 schema: None,
121 cloud_options,
122 hive_options,
123 rechunk,
124 cache,
125 glob,
126 projection: None,
127 row_index,
128 pre_slice: pre_slice.map(Slice::from),
129 cast_columns_policy: cast_options.0,
130 missing_columns_policy: missing_columns.0,
131 extra_columns_policy: extra_columns.0,
132 include_file_paths: include_file_paths.map(|x| x.0),
133 deletion_files: DeletionFilesList::filter_empty(deletion_files.map(|x| x.0)),
134 column_mapping: column_mapping.map(|x| x.0),
135 };
136
137 Ok(unified_scan_args)
138 }
139}