polars_python/functions/
io.rs1use std::io::BufReader;
2
3#[cfg(any(feature = "ipc", feature = "parquet"))]
4use polars::prelude::ArrowSchema;
5use polars::prelude::CloudScheme;
6use pyo3::prelude::*;
7use pyo3::types::PyDict;
8
9use crate::conversion::Wrap;
10use crate::error::PyPolarsErr;
11use crate::file::{EitherRustPythonFile, get_either_file};
12use crate::io::cloud_options::OptPyCloudOptions;
13
14#[cfg(feature = "ipc")]
15#[pyfunction]
16pub fn read_ipc_schema(py: Python<'_>, py_f: Py<PyAny>) -> PyResult<Bound<'_, PyDict>> {
17 use arrow::io::ipc::read::read_file_metadata;
18 let metadata = match get_either_file(py_f, false)? {
19 EitherRustPythonFile::Rust(r) => {
20 read_file_metadata(&mut BufReader::new(r)).map_err(PyPolarsErr::from)?
21 },
22 EitherRustPythonFile::Py(mut r) => read_file_metadata(&mut r).map_err(PyPolarsErr::from)?,
23 };
24
25 let dict = PyDict::new(py);
26 fields_to_pydict(&metadata.schema, &dict)?;
27 Ok(dict)
28}
29
30#[cfg(feature = "parquet")]
31#[pyfunction]
32pub fn read_parquet_metadata(
33 py: Python,
34 py_f: Py<PyAny>,
35 storage_options: OptPyCloudOptions,
36 credential_provider: Option<Py<PyAny>>,
37) -> PyResult<Py<PyDict>> {
38 use std::io::Cursor;
39
40 use polars_error::feature_gated;
41 use polars_parquet::read::read_metadata;
42 use polars_parquet::read::schema::read_custom_key_value_metadata;
43
44 use crate::file::{PythonScanSourceInput, get_python_scan_source_input};
45
46 let metadata = match get_python_scan_source_input(py_f, false)? {
47 PythonScanSourceInput::Buffer(buf) => {
48 read_metadata(&mut Cursor::new(buf)).map_err(PyPolarsErr::from)?
49 },
50 PythonScanSourceInput::Path(p) => {
51 let cloud_options = storage_options.extract_opt_cloud_options(
52 CloudScheme::from_path(p.as_str()),
53 credential_provider,
54 )?;
55
56 if p.has_scheme() {
57 feature_gated!("cloud", {
58 use polars::prelude::ParquetObjectStore;
59 use polars_error::PolarsResult;
60
61 py.detach(|| {
62 polars_core::runtime::ASYNC.block_on(async {
63 let mut reader =
64 ParquetObjectStore::from_uri(p, cloud_options.as_ref(), None)
65 .await?;
66 let result = reader.get_metadata().await?;
67 PolarsResult::Ok((**result).clone())
68 })
69 })
70 })
71 .map_err(PyPolarsErr::from)?
72 } else {
73 let file = polars_utils::open_file(p.as_std_path()).map_err(PyPolarsErr::from)?;
74 read_metadata(&mut BufReader::new(file)).map_err(PyPolarsErr::from)?
75 }
76 },
77 PythonScanSourceInput::File(f) => {
78 read_metadata(&mut BufReader::new(f)).map_err(PyPolarsErr::from)?
79 },
80 };
81
82 let key_value_metadata = read_custom_key_value_metadata(metadata.key_value_metadata());
83 let dict = PyDict::new(py);
84 for (key, value) in key_value_metadata.into_iter() {
85 dict.set_item(key.as_str(), value.as_str())?;
86 }
87 Ok(dict.unbind())
88}
89
90#[cfg(all(feature = "parquet", feature = "json"))]
100#[pyfunction]
101pub fn _bench_parquet_metadata_bincode_size(
102 path: &str,
103 projection: Option<Vec<String>>,
104 predicate: Vec<String>,
105) -> PyResult<usize> {
106 use polars_parquet::read::read_metadata;
107 use polars_utils::pl_serialize;
108 use polars_utils::pl_str::PlSmallStr;
109
110 let file = std::fs::File::open(path).map_err(|e| PyPolarsErr::Other(e.to_string()))?;
111 let metadata = read_metadata(&mut BufReader::new(file)).map_err(PyPolarsErr::from)?;
112
113 let bytes = match projection {
115 None => {
116 pl_serialize::serialize_to_bytes::<_, false>(&metadata).map_err(PyPolarsErr::from)?
117 },
118 Some(keep) => {
119 let keep_pl: Vec<PlSmallStr> = keep.into_iter().map(PlSmallStr::from).collect();
120 let pred_pl: Vec<PlSmallStr> = predicate.into_iter().map(PlSmallStr::from).collect();
121 let pruned = metadata
122 .pruned(&keep_pl, &pred_pl)
123 .map_err(|e| PyPolarsErr::Other(e.to_string()))?;
124 pl_serialize::serialize_to_bytes::<_, false>(&pruned).map_err(PyPolarsErr::from)?
125 },
126 };
127 Ok(bytes.len())
128}
129
130#[cfg(all(feature = "parquet", feature = "json"))]
137#[pyfunction]
138pub fn _parquet_metadata_pruned_json(
139 path: &str,
140 projection: Vec<String>,
141 predicate: Vec<String>,
142) -> PyResult<String> {
143 use polars_parquet::read::read_metadata;
144 use polars_utils::pl_str::PlSmallStr;
145
146 let file = std::fs::File::open(path).map_err(|e| PyPolarsErr::Other(e.to_string()))?;
147 let metadata = read_metadata(&mut BufReader::new(file)).map_err(PyPolarsErr::from)?;
148
149 let keep: Vec<PlSmallStr> = projection.into_iter().map(PlSmallStr::from).collect();
150 let pred: Vec<PlSmallStr> = predicate.into_iter().map(PlSmallStr::from).collect();
151 let pruned = metadata
152 .pruned(&keep, &pred)
153 .map_err(|e| PyPolarsErr::Other(e.to_string()))?;
154
155 serde_json::to_string(&pruned).map_err(|e| PyPolarsErr::Other(e.to_string()).into())
156}
157
158#[cfg(any(feature = "ipc", feature = "parquet"))]
159fn fields_to_pydict(schema: &ArrowSchema, dict: &Bound<'_, PyDict>) -> PyResult<()> {
160 for field in schema.iter_values() {
161 let dt = Wrap(polars::prelude::DataType::from_arrow_field(field));
162 dict.set_item(field.name.as_str(), &dt)?;
163 }
164 Ok(())
165}
166
167#[cfg(feature = "clipboard")]
168#[pyfunction]
169pub fn read_clipboard_string() -> PyResult<String> {
170 use arboard;
171 let mut clipboard =
172 arboard::Clipboard::new().map_err(|e| PyPolarsErr::Other(format!("{e}")))?;
173 let result = clipboard
174 .get_text()
175 .map_err(|e| PyPolarsErr::Other(format!("{e}")))?;
176 Ok(result)
177}
178
179#[cfg(feature = "clipboard")]
180#[pyfunction]
181pub fn write_clipboard_string(s: &str) -> PyResult<()> {
182 use arboard;
183 let mut clipboard =
184 arboard::Clipboard::new().map_err(|e| PyPolarsErr::Other(format!("{e}")))?;
185 clipboard
186 .set_text(s)
187 .map_err(|e| PyPolarsErr::Other(format!("{e}")))?;
188 Ok(())
189}