polars_python/io/
mod.rs

1use std::sync::Arc;
2
3use polars::prelude::default_values::DefaultFieldValues;
4use polars::prelude::deletion::DeletionFilesList;
5use polars::prelude::{
6    CastColumnsPolicy, ColumnMapping, ExtraColumnsPolicy, MissingColumnsPolicy, PlSmallStr, Schema,
7    UnifiedScanArgs,
8};
9use polars_io::{HiveOptions, RowIndex};
10use polars_utils::IdxSize;
11use polars_utils::plpath::PlPathRef;
12use polars_utils::slice_enum::Slice;
13use pyo3::types::PyAnyMethods;
14use pyo3::{Bound, FromPyObject, PyObject, PyResult};
15
16use crate::functions::parse_cloud_options;
17use crate::prelude::Wrap;
18
19/// Interface to `class ScanOptions` on the Python side
20pub struct PyScanOptions<'py>(Bound<'py, pyo3::PyAny>);
21
22impl<'py> FromPyObject<'py> for PyScanOptions<'py> {
23    fn extract_bound(ob: &Bound<'py, pyo3::PyAny>) -> pyo3::PyResult<Self> {
24        Ok(Self(ob.clone()))
25    }
26}
27
28impl PyScanOptions<'_> {
29    pub fn extract_unified_scan_args(
30        &self,
31        // For cloud_options init
32        first_path: Option<PlPathRef>,
33    ) -> PyResult<UnifiedScanArgs> {
34        #[derive(FromPyObject)]
35        struct Extract {
36            row_index: Option<(Wrap<PlSmallStr>, IdxSize)>,
37            pre_slice: Option<(i64, usize)>,
38            cast_options: Wrap<CastColumnsPolicy>,
39            extra_columns: Wrap<ExtraColumnsPolicy>,
40            missing_columns: Wrap<MissingColumnsPolicy>,
41            include_file_paths: Option<Wrap<PlSmallStr>>,
42            glob: bool,
43            column_mapping: Option<Wrap<ColumnMapping>>,
44            default_values: Option<Wrap<DefaultFieldValues>>,
45            hive_partitioning: Option<bool>,
46            hive_schema: Option<Wrap<Schema>>,
47            try_parse_hive_dates: bool,
48            rechunk: bool,
49            cache: bool,
50            storage_options: Option<Vec<(String, String)>>,
51            credential_provider: Option<PyObject>,
52            retries: usize,
53            deletion_files: Option<Wrap<DeletionFilesList>>,
54        }
55
56        let Extract {
57            row_index,
58            pre_slice,
59            cast_options,
60            extra_columns,
61            missing_columns,
62            include_file_paths,
63            column_mapping,
64            default_values,
65            glob,
66            hive_partitioning,
67            hive_schema,
68            try_parse_hive_dates,
69            rechunk,
70            cache,
71            storage_options,
72            credential_provider,
73            retries,
74            deletion_files,
75        } = self.0.extract()?;
76
77        let cloud_options =
78            parse_cloud_options(first_path, storage_options, credential_provider, retries)?;
79
80        let hive_schema = hive_schema.map(|s| Arc::new(s.0));
81
82        let row_index = row_index.map(|(name, offset)| RowIndex {
83            name: name.0,
84            offset,
85        });
86
87        let hive_options = HiveOptions {
88            enabled: hive_partitioning,
89            hive_start_idx: 0,
90            schema: hive_schema,
91            try_parse_dates: try_parse_hive_dates,
92        };
93
94        let unified_scan_args = UnifiedScanArgs {
95            // Schema is currently still stored inside the options per scan type, but we do eventually
96            // want to put it here instead.
97            schema: None,
98            cloud_options,
99            hive_options,
100            rechunk,
101            cache,
102            glob,
103            projection: None,
104            column_mapping: column_mapping.map(|x| x.0),
105            default_values: default_values
106                .map(|x| x.0)
107                .filter(|DefaultFieldValues::Iceberg(v)| !v.is_empty()),
108            row_index,
109            pre_slice: pre_slice.map(Slice::from),
110            cast_columns_policy: cast_options.0,
111            missing_columns_policy: missing_columns.0,
112            extra_columns_policy: extra_columns.0,
113            include_file_paths: include_file_paths.map(|x| x.0),
114            deletion_files: DeletionFilesList::filter_empty(deletion_files.map(|x| x.0)),
115        };
116
117        Ok(unified_scan_args)
118    }
119}