1use std::path::PathBuf;
2use std::sync::Arc;
3
4use polars::prelude::deletion::DeletionFilesList;
5use polars::prelude::{
6 CastColumnsPolicy, ExtraColumnsPolicy, MissingColumnsPolicy, PlSmallStr, Schema,
7 UnifiedScanArgs,
8};
9use polars_io::{HiveOptions, RowIndex};
10use polars_utils::IdxSize;
11use polars_utils::slice_enum::Slice;
12use pyo3::types::PyAnyMethods;
13use pyo3::{Bound, FromPyObject, PyObject, PyResult};
14
15use crate::prelude::Wrap;
16
17pub struct PyScanOptions<'py>(Bound<'py, pyo3::PyAny>);
19
20impl<'py> FromPyObject<'py> for PyScanOptions<'py> {
21 fn extract_bound(ob: &Bound<'py, pyo3::PyAny>) -> pyo3::PyResult<Self> {
22 Ok(Self(ob.clone()))
23 }
24}
25
26impl PyScanOptions<'_> {
27 pub fn extract_unified_scan_args(
28 &self,
29 first_path: Option<&PathBuf>,
31 ) -> PyResult<UnifiedScanArgs> {
32 #[derive(FromPyObject)]
33 struct Extract {
34 row_index: Option<(Wrap<PlSmallStr>, IdxSize)>,
35 pre_slice: Option<(i64, usize)>,
36 cast_options: Wrap<CastColumnsPolicy>,
37 extra_columns: Wrap<ExtraColumnsPolicy>,
38 missing_columns: Wrap<MissingColumnsPolicy>,
39 include_file_paths: Option<Wrap<PlSmallStr>>,
40 glob: bool,
41 hive_partitioning: Option<bool>,
42 hive_schema: Option<Wrap<Schema>>,
43 try_parse_hive_dates: bool,
44 rechunk: bool,
45 cache: bool,
46 storage_options: Option<Vec<(String, String)>>,
47 credential_provider: Option<PyObject>,
48 retries: usize,
49 deletion_files: Option<Wrap<DeletionFilesList>>,
50 }
51
52 let Extract {
53 row_index,
54 pre_slice,
55 cast_options,
56 extra_columns,
57 missing_columns,
58 include_file_paths,
59 glob,
60 hive_partitioning,
61 hive_schema,
62 try_parse_hive_dates,
63 rechunk,
64 cache,
65 storage_options,
66 credential_provider,
67 retries,
68 deletion_files,
69 } = self.0.extract()?;
70
71 let cloud_options = storage_options;
72
73 let cloud_options = if let Some(first_path) = first_path {
74 #[cfg(feature = "cloud")]
75 {
76 use polars_io::cloud::credential_provider::PlCredentialProvider;
77
78 use crate::prelude::parse_cloud_options;
79
80 let first_path_url = first_path.to_string_lossy();
81 let cloud_options =
82 parse_cloud_options(&first_path_url, cloud_options.unwrap_or_default())?;
83
84 Some(
85 cloud_options
86 .with_max_retries(retries)
87 .with_credential_provider(
88 credential_provider.map(PlCredentialProvider::from_python_builder),
89 ),
90 )
91 }
92
93 #[cfg(not(feature = "cloud"))]
94 {
95 None
96 }
97 } else {
98 None
99 };
100
101 let hive_schema = hive_schema.map(|s| Arc::new(s.0));
102
103 let row_index = row_index.map(|(name, offset)| RowIndex {
104 name: name.0,
105 offset,
106 });
107
108 let hive_options = HiveOptions {
109 enabled: hive_partitioning,
110 hive_start_idx: 0,
111 schema: hive_schema,
112 try_parse_dates: try_parse_hive_dates,
113 };
114
115 let unified_scan_args = UnifiedScanArgs {
116 schema: None,
119 cloud_options,
120 hive_options,
121 rechunk,
122 cache,
123 glob,
124 projection: None,
125 row_index,
126 pre_slice: pre_slice.map(Slice::from),
127 cast_columns_policy: cast_options.0,
128 missing_columns_policy: missing_columns.0,
129 extra_columns_policy: extra_columns.0,
130 include_file_paths: include_file_paths.map(|x| x.0),
131 deletion_files: DeletionFilesList::filter_empty(deletion_files.map(|x| x.0)),
132 };
133
134 Ok(unified_scan_args)
135 }
136}