supertable-core 0.1.0

Core library for SuperTable, a next-generation open table format
Documentation
//! # SuperTable DataFusion Integration
//!
//! This module provides the `TableProvider` implementation for DataFusion,
//! enabling SuperTable to be used as a source for SQL queries and DataFrame operations.

use crate::table::Table;
use arrow::datatypes::SchemaRef;
use async_trait::async_trait;
use datafusion::catalog::Session;
use datafusion::datasource::TableProvider;
use datafusion::logical_expr::{Expr, TableType};
use datafusion::physical_plan::ExecutionPlan;
use std::any::Any;
use std::sync::Arc;

/// A DataFusion TableProvider for SuperTable.
#[derive(Debug)]
pub struct SuperTableProvider {
    table: Table,
}

impl SuperTableProvider {
    pub fn new(table: Table) -> Self {
        Self { table }
    }
}

#[async_trait]
impl TableProvider for SuperTableProvider {
    fn as_any(&self) -> &dyn Any {
        self
    }

    fn schema(&self) -> SchemaRef {
        self.table.metadata.current_schema().to_arrow_schema_ref()
    }

    fn table_type(&self) -> TableType {
        TableType::Base
    }

    async fn scan(
        &self,
        _state: &dyn Session,
        _projection: Option<&Vec<usize>>,
        _filters: &[Expr],
        _limit: Option<usize>,
    ) -> datafusion::error::Result<Arc<dyn ExecutionPlan>> {
        let snapshot = self.table.metadata.current_snapshot().ok_or_else(|| {
            datafusion::error::DataFusionError::Plan("No current snapshot found".to_string())
        })?;

        // This is a placeholder. In a production implementation, we would:
        // 1. Get the list of data files from the snapshot.
        // 2. Apply partition pruning based on `_filters`.
        // 3. Apply file pruning based on `_statistics`.
        // 4. Create a ParquetExec with the filtered files.
        let _files = snapshot
            .all_data_files(&self.table.storage)
            .await
            .map_err(|e| datafusion::error::DataFusionError::External(e.into()))?;

        Err(datafusion::error::DataFusionError::NotImplemented(
            "Physical scan execution requires further integration with DataFusion's ParquetExec"
                .to_string(),
        ))
    }
}