polars_python/io/
mod.rs

1use std::sync::Arc;
2
3use polars::prelude::deletion::DeletionFilesList;
4use polars::prelude::{
5    CastColumnsPolicy, ColumnMapping, ExtraColumnsPolicy, MissingColumnsPolicy, PlSmallStr, Schema,
6    UnifiedScanArgs,
7};
8use polars_io::{HiveOptions, RowIndex};
9use polars_utils::IdxSize;
10use polars_utils::plpath::PlPathRef;
11use polars_utils::slice_enum::Slice;
12use pyo3::types::PyAnyMethods;
13use pyo3::{Bound, FromPyObject, PyObject, PyResult};
14
15use crate::prelude::Wrap;
16
17/// Interface to `class ScanOptions` on the Python side
18pub struct PyScanOptions<'py>(Bound<'py, pyo3::PyAny>);
19
20impl<'py> FromPyObject<'py> for PyScanOptions<'py> {
21    fn extract_bound(ob: &Bound<'py, pyo3::PyAny>) -> pyo3::PyResult<Self> {
22        Ok(Self(ob.clone()))
23    }
24}
25
26impl PyScanOptions<'_> {
27    pub fn extract_unified_scan_args(
28        &self,
29        // For cloud_options init
30        first_path: Option<PlPathRef>,
31    ) -> PyResult<UnifiedScanArgs> {
32        #[derive(FromPyObject)]
33        struct Extract {
34            row_index: Option<(Wrap<PlSmallStr>, IdxSize)>,
35            pre_slice: Option<(i64, usize)>,
36            cast_options: Wrap<CastColumnsPolicy>,
37            extra_columns: Wrap<ExtraColumnsPolicy>,
38            missing_columns: Wrap<MissingColumnsPolicy>,
39            include_file_paths: Option<Wrap<PlSmallStr>>,
40            glob: bool,
41            hive_partitioning: Option<bool>,
42            hive_schema: Option<Wrap<Schema>>,
43            try_parse_hive_dates: bool,
44            rechunk: bool,
45            cache: bool,
46            storage_options: Option<Vec<(String, String)>>,
47            credential_provider: Option<PyObject>,
48            retries: usize,
49            deletion_files: Option<Wrap<DeletionFilesList>>,
50            column_mapping: Option<Wrap<ColumnMapping>>,
51        }
52
53        let Extract {
54            row_index,
55            pre_slice,
56            cast_options,
57            extra_columns,
58            missing_columns,
59            include_file_paths,
60            glob,
61            hive_partitioning,
62            hive_schema,
63            try_parse_hive_dates,
64            rechunk,
65            cache,
66            storage_options,
67            credential_provider,
68            retries,
69            deletion_files,
70            column_mapping,
71        } = self.0.extract()?;
72
73        let cloud_options = storage_options;
74
75        let cloud_options = if let Some(first_path) = first_path {
76            #[cfg(feature = "cloud")]
77            {
78                use polars_io::cloud::credential_provider::PlCredentialProvider;
79
80                use crate::prelude::parse_cloud_options;
81
82                let first_path_url = first_path.to_str();
83                let cloud_options =
84                    parse_cloud_options(first_path_url, cloud_options.unwrap_or_default())?;
85
86                Some(
87                    cloud_options
88                        .with_max_retries(retries)
89                        .with_credential_provider(
90                            credential_provider.map(PlCredentialProvider::from_python_builder),
91                        ),
92                )
93            }
94
95            #[cfg(not(feature = "cloud"))]
96            {
97                None
98            }
99        } else {
100            None
101        };
102
103        let hive_schema = hive_schema.map(|s| Arc::new(s.0));
104
105        let row_index = row_index.map(|(name, offset)| RowIndex {
106            name: name.0,
107            offset,
108        });
109
110        let hive_options = HiveOptions {
111            enabled: hive_partitioning,
112            hive_start_idx: 0,
113            schema: hive_schema,
114            try_parse_dates: try_parse_hive_dates,
115        };
116
117        let unified_scan_args = UnifiedScanArgs {
118            // Schema is currently still stored inside the options per scan type, but we do eventually
119            // want to put it here instead.
120            schema: None,
121            cloud_options,
122            hive_options,
123            rechunk,
124            cache,
125            glob,
126            projection: None,
127            row_index,
128            pre_slice: pre_slice.map(Slice::from),
129            cast_columns_policy: cast_options.0,
130            missing_columns_policy: missing_columns.0,
131            extra_columns_policy: extra_columns.0,
132            include_file_paths: include_file_paths.map(|x| x.0),
133            deletion_files: DeletionFilesList::filter_empty(deletion_files.map(|x| x.0)),
134            column_mapping: column_mapping.map(|x| x.0),
135        };
136
137        Ok(unified_scan_args)
138    }
139}