1use std::sync::Arc;
2
3use polars::prelude::default_values::DefaultFieldValues;
4use polars::prelude::deletion::DeletionFilesList;
5use polars::prelude::{
6 CastColumnsPolicy, CloudScheme, ColumnMapping, ExtraColumnsPolicy, MissingColumnsPolicy,
7 PlSmallStr, Schema, TableStatistics, UnifiedScanArgs,
8};
9use polars_io::{HiveOptions, RowIndex};
10use polars_utils::IdxSize;
11use polars_utils::slice_enum::Slice;
12use pyo3::intern;
13use pyo3::prelude::*;
14use pyo3::pybacked::PyBackedStr;
15
16use crate::PyDataFrame;
17use crate::io::cloud_options::OptPyCloudOptions;
18use crate::prelude::Wrap;
19
20pub struct PyScanOptions<'py>(Bound<'py, PyAny>);
22
23impl<'a, 'py> FromPyObject<'a, 'py> for PyScanOptions<'py> {
24 type Error = PyErr;
25
26 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
27 Ok(Self(ob.to_owned()))
28 }
29}
30
31impl<'a, 'py> FromPyObject<'a, 'py> for Wrap<TableStatistics> {
32 type Error = PyErr;
33
34 fn extract(ob: Borrowed<'a, 'py, PyAny>) -> PyResult<Self> {
35 let py = ob.py();
36 let attr = ob.getattr(intern!(py, "_df"))?;
37 Ok(Wrap(TableStatistics(Arc::new(
38 PyDataFrame::extract(attr.as_borrowed())?.df.into_inner(),
39 ))))
40 }
41}
42
43impl PyScanOptions<'_> {
44 pub fn extract_unified_scan_args(
45 &self,
46 cloud_scheme: Option<CloudScheme>,
47 ) -> PyResult<UnifiedScanArgs> {
48 #[derive(FromPyObject)]
49 struct Extract<'a> {
50 row_index: Option<(Wrap<PlSmallStr>, IdxSize)>,
51 pre_slice: Option<(i64, usize)>,
52 cast_options: Wrap<CastColumnsPolicy>,
53 extra_columns: Wrap<ExtraColumnsPolicy>,
54 missing_columns: Wrap<MissingColumnsPolicy>,
55 include_file_paths: Option<Wrap<PlSmallStr>>,
56 glob: bool,
57 hidden_file_prefix: Option<Vec<PyBackedStr>>,
58 column_mapping: Option<Wrap<ColumnMapping>>,
59 default_values: Option<Wrap<DefaultFieldValues>>,
60 hive_partitioning: Option<bool>,
61 hive_schema: Option<Wrap<Schema>>,
62 try_parse_hive_dates: bool,
63 rechunk: bool,
64 cache: bool,
65 storage_options: OptPyCloudOptions<'a>,
66 credential_provider: Option<Py<PyAny>>,
67 deletion_files: Option<Wrap<DeletionFilesList>>,
68 table_statistics: Option<Wrap<TableStatistics>>,
69 row_count: Option<(u64, u64)>,
70 }
71
72 let Extract {
73 row_index,
74 pre_slice,
75 cast_options,
76 extra_columns,
77 missing_columns,
78 include_file_paths,
79 column_mapping,
80 default_values,
81 glob,
82 hidden_file_prefix,
83 hive_partitioning,
84 hive_schema,
85 try_parse_hive_dates,
86 rechunk,
87 cache,
88 storage_options,
89 credential_provider,
90 deletion_files,
91 table_statistics,
92 row_count,
93 } = self.0.extract()?;
94
95 let cloud_options =
96 storage_options.extract_opt_cloud_options(cloud_scheme, credential_provider)?;
97
98 let hive_schema = hive_schema.map(|s| Arc::new(s.0));
99
100 let row_index = row_index.map(|(name, offset)| RowIndex {
101 name: name.0,
102 offset,
103 });
104
105 let hive_options = HiveOptions {
106 enabled: hive_partitioning,
107 hive_start_idx: 0,
108 schema: hive_schema,
109 try_parse_dates: try_parse_hive_dates,
110 };
111
112 let unified_scan_args = UnifiedScanArgs {
113 schema: None,
116 cloud_options,
117 hive_options,
118 rechunk,
119 cache,
120 glob,
121 hidden_file_prefix: hidden_file_prefix
122 .map(|x| x.into_iter().map(|x| (*x).into()).collect()),
123 projection: None,
124 column_mapping: column_mapping.map(|x| x.0),
125 default_values: default_values
126 .map(|x| x.0)
127 .filter(|DefaultFieldValues::Iceberg(v)| !v.is_empty()),
128 row_index,
129 pre_slice: pre_slice.map(Slice::from),
130 cast_columns_policy: cast_options.0,
131 missing_columns_policy: missing_columns.0,
132 extra_columns_policy: extra_columns.0,
133 include_file_paths: include_file_paths.map(|x| x.0),
134 deletion_files: DeletionFilesList::filter_empty(deletion_files.map(|x| x.0)),
135 table_statistics: table_statistics.map(|x| x.0),
136 row_count,
137 };
138
139 Ok(unified_scan_args)
140 }
141}