polars_python/io/
mod.rs

1use std::path::PathBuf;
2use std::sync::Arc;
3
4use polars::prelude::deletion::DeletionFilesList;
5use polars::prelude::{
6    CastColumnsPolicy, ExtraColumnsPolicy, MissingColumnsPolicy, PlSmallStr, Schema,
7    UnifiedScanArgs,
8};
9use polars_io::{HiveOptions, RowIndex};
10use polars_utils::IdxSize;
11use polars_utils::slice_enum::Slice;
12use pyo3::types::PyAnyMethods;
13use pyo3::{Bound, FromPyObject, PyObject, PyResult};
14
15use crate::prelude::Wrap;
16
17/// Interface to `class ScanOptions` on the Python side
18pub struct PyScanOptions<'py>(Bound<'py, pyo3::PyAny>);
19
20impl<'py> FromPyObject<'py> for PyScanOptions<'py> {
21    fn extract_bound(ob: &Bound<'py, pyo3::PyAny>) -> pyo3::PyResult<Self> {
22        Ok(Self(ob.clone()))
23    }
24}
25
26impl PyScanOptions<'_> {
27    pub fn extract_unified_scan_args(
28        &self,
29        // For cloud_options init
30        first_path: Option<&PathBuf>,
31    ) -> PyResult<UnifiedScanArgs> {
32        #[derive(FromPyObject)]
33        struct Extract {
34            row_index: Option<(Wrap<PlSmallStr>, IdxSize)>,
35            pre_slice: Option<(i64, usize)>,
36            cast_options: Wrap<CastColumnsPolicy>,
37            extra_columns: Wrap<ExtraColumnsPolicy>,
38            missing_columns: Wrap<MissingColumnsPolicy>,
39            include_file_paths: Option<Wrap<PlSmallStr>>,
40            glob: bool,
41            hive_partitioning: Option<bool>,
42            hive_schema: Option<Wrap<Schema>>,
43            try_parse_hive_dates: bool,
44            rechunk: bool,
45            cache: bool,
46            storage_options: Option<Vec<(String, String)>>,
47            credential_provider: Option<PyObject>,
48            retries: usize,
49            deletion_files: Option<Wrap<DeletionFilesList>>,
50        }
51
52        let Extract {
53            row_index,
54            pre_slice,
55            cast_options,
56            extra_columns,
57            missing_columns,
58            include_file_paths,
59            glob,
60            hive_partitioning,
61            hive_schema,
62            try_parse_hive_dates,
63            rechunk,
64            cache,
65            storage_options,
66            credential_provider,
67            retries,
68            deletion_files,
69        } = self.0.extract()?;
70
71        let cloud_options = storage_options;
72
73        let cloud_options = if let Some(first_path) = first_path {
74            #[cfg(feature = "cloud")]
75            {
76                use polars_io::cloud::credential_provider::PlCredentialProvider;
77
78                use crate::prelude::parse_cloud_options;
79
80                let first_path_url = first_path.to_string_lossy();
81                let cloud_options =
82                    parse_cloud_options(&first_path_url, cloud_options.unwrap_or_default())?;
83
84                Some(
85                    cloud_options
86                        .with_max_retries(retries)
87                        .with_credential_provider(
88                            credential_provider.map(PlCredentialProvider::from_python_builder),
89                        ),
90                )
91            }
92
93            #[cfg(not(feature = "cloud"))]
94            {
95                None
96            }
97        } else {
98            None
99        };
100
101        let hive_schema = hive_schema.map(|s| Arc::new(s.0));
102
103        let row_index = row_index.map(|(name, offset)| RowIndex {
104            name: name.0,
105            offset,
106        });
107
108        let hive_options = HiveOptions {
109            enabled: hive_partitioning,
110            hive_start_idx: 0,
111            schema: hive_schema,
112            try_parse_dates: try_parse_hive_dates,
113        };
114
115        let unified_scan_args = UnifiedScanArgs {
116            // Schema is currently still stored inside the options per scan type, but we do eventually
117            // want to put it here instead.
118            schema: None,
119            cloud_options,
120            hive_options,
121            rechunk,
122            cache,
123            glob,
124            projection: None,
125            row_index,
126            pre_slice: pre_slice.map(Slice::from),
127            cast_columns_policy: cast_options.0,
128            missing_columns_policy: missing_columns.0,
129            extra_columns_policy: extra_columns.0,
130            include_file_paths: include_file_paths.map(|x| x.0),
131            deletion_files: DeletionFilesList::filter_empty(deletion_files.map(|x| x.0)),
132        };
133
134        Ok(unified_scan_args)
135    }
136}