use std::fmt::Debug;
use std::sync::OnceLock;
use polars_core::error::PolarsResult;
use polars_core::schema::SchemaRef;
use polars_utils::pl_str::PlSmallStr;
use polars_utils::python_function::PythonObject;
use crate::dsl::DslPlan;
pub static DATASET_PROVIDER_VTABLE: OnceLock<PythonDatasetProviderVTable> = OnceLock::new();
pub struct PythonDatasetProviderVTable {
pub name: fn(dataset_object: &PythonObject) -> PlSmallStr,
pub schema: fn(dataset_object: &PythonObject) -> PolarsResult<SchemaRef>,
#[expect(clippy::type_complexity)]
pub to_dataset_scan: fn(
dataset_object: &PythonObject,
existing_resolved_version_key: Option<&str>,
limit: Option<usize>,
projection: Option<&[PlSmallStr]>,
filter_columns: Option<&[PlSmallStr]>,
pyarrow_predicate: Option<&str>,
) -> PolarsResult<Option<(DslPlan, PlSmallStr)>>,
}
pub fn dataset_provider_vtable() -> Result<&'static PythonDatasetProviderVTable, &'static str> {
DATASET_PROVIDER_VTABLE
.get()
.ok_or("DATASET_PROVIDER_VTABLE not initialized")
}
#[derive(Debug)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
pub struct PythonDatasetProvider {
dataset_object: PythonObject,
}
impl PythonDatasetProvider {
pub fn new(dataset_object: PythonObject) -> Self {
Self { dataset_object }
}
pub fn name(&self) -> PlSmallStr {
(dataset_provider_vtable().unwrap().name)(&self.dataset_object)
}
pub fn schema(&self) -> PolarsResult<SchemaRef> {
(dataset_provider_vtable().unwrap().schema)(&self.dataset_object)
}
pub fn to_dataset_scan(
&self,
existing_resolved_version_key: Option<&str>,
limit: Option<usize>,
projection: Option<&[PlSmallStr]>,
filter_columns: Option<&[PlSmallStr]>,
pyarrow_predicate: Option<&str>,
) -> PolarsResult<Option<(DslPlan, PlSmallStr)>> {
(dataset_provider_vtable().unwrap().to_dataset_scan)(
&self.dataset_object,
existing_resolved_version_key,
limit,
projection,
filter_columns,
pyarrow_predicate,
)
}
}