polars_plan/dsl/file_scan/
python_dataset.rs

1use std::fmt::Debug;
2use std::sync::OnceLock;
3
4use polars_core::error::PolarsResult;
5use polars_core::schema::SchemaRef;
6use polars_utils::pl_str::PlSmallStr;
7use polars_utils::python_function::PythonObject;
8
9use crate::dsl::DslPlan;
10
11/// This is for `polars-python` to inject so that the implementation can be done there:
12/// * The impls for converting from Python objects are there.
13pub static DATASET_PROVIDER_VTABLE: OnceLock<PythonDatasetProviderVTable> = OnceLock::new();
14
15pub struct PythonDatasetProviderVTable {
16    pub reader_name: fn(dataset_object: &PythonObject) -> PlSmallStr,
17
18    pub schema: fn(dataset_object: &PythonObject) -> PolarsResult<SchemaRef>,
19
20    #[expect(clippy::type_complexity)]
21    pub to_dataset_scan: fn(
22        dataset_object: &PythonObject,
23        limit: Option<usize>,
24        projection: Option<&[PlSmallStr]>,
25    ) -> PolarsResult<DslPlan>,
26}
27
28pub fn dataset_provider_vtable() -> Result<&'static PythonDatasetProviderVTable, &'static str> {
29    DATASET_PROVIDER_VTABLE
30        .get()
31        .ok_or("DATASET_PROVIDER_VTABLE not initialized")
32}
33
34/// Currently intended only for Iceberg support
35#[derive(Debug)]
36#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
37pub struct PythonDatasetProvider {
38    dataset_object: PythonObject,
39}
40
41impl PythonDatasetProvider {
42    pub fn new(dataset_object: PythonObject) -> Self {
43        Self { dataset_object }
44    }
45
46    pub fn reader_name(&self) -> PlSmallStr {
47        (dataset_provider_vtable().unwrap().reader_name)(&self.dataset_object)
48    }
49
50    pub fn schema(&self) -> PolarsResult<SchemaRef> {
51        (dataset_provider_vtable().unwrap().schema)(&self.dataset_object)
52    }
53
54    pub fn to_dataset_scan(
55        &self,
56        limit: Option<usize>,
57        projection: Option<&[PlSmallStr]>,
58    ) -> PolarsResult<DslPlan> {
59        (dataset_provider_vtable().unwrap().to_dataset_scan)(
60            &self.dataset_object,
61            limit,
62            projection,
63        )
64    }
65}