polars_plan/dsl/file_scan/
python_dataset.rs1use std::fmt::Debug;
2use std::sync::OnceLock;
3
4use polars_core::error::PolarsResult;
5use polars_core::schema::SchemaRef;
6use polars_utils::pl_str::PlSmallStr;
7use polars_utils::python_function::PythonObject;
8
9use crate::dsl::DslPlan;
10
11pub static DATASET_PROVIDER_VTABLE: OnceLock<PythonDatasetProviderVTable> = OnceLock::new();
14
15pub struct PythonDatasetProviderVTable {
16 pub reader_name: fn(dataset_object: &PythonObject) -> PlSmallStr,
17
18 pub schema: fn(dataset_object: &PythonObject) -> PolarsResult<SchemaRef>,
19
20 #[expect(clippy::type_complexity)]
21 pub to_dataset_scan: fn(
22 dataset_object: &PythonObject,
23 limit: Option<usize>,
24 projection: Option<&[PlSmallStr]>,
25 ) -> PolarsResult<DslPlan>,
26}
27
28pub fn dataset_provider_vtable() -> Result<&'static PythonDatasetProviderVTable, &'static str> {
29 DATASET_PROVIDER_VTABLE
30 .get()
31 .ok_or("DATASET_PROVIDER_VTABLE not initialized")
32}
33
34#[derive(Debug)]
36#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
37pub struct PythonDatasetProvider {
38 dataset_object: PythonObject,
39}
40
41impl PythonDatasetProvider {
42 pub fn new(dataset_object: PythonObject) -> Self {
43 Self { dataset_object }
44 }
45
46 pub fn reader_name(&self) -> PlSmallStr {
47 (dataset_provider_vtable().unwrap().reader_name)(&self.dataset_object)
48 }
49
50 pub fn schema(&self) -> PolarsResult<SchemaRef> {
51 (dataset_provider_vtable().unwrap().schema)(&self.dataset_object)
52 }
53
54 pub fn to_dataset_scan(
55 &self,
56 limit: Option<usize>,
57 projection: Option<&[PlSmallStr]>,
58 ) -> PolarsResult<DslPlan> {
59 (dataset_provider_vtable().unwrap().to_dataset_scan)(
60 &self.dataset_object,
61 limit,
62 projection,
63 )
64 }
65}