Skip to main content

datafusion_federation/sql/
executor.rs

1use async_trait::async_trait;
2use core::fmt;
3use datafusion::{
4    arrow::datatypes::SchemaRef,
5    common::Statistics,
6    error::Result,
7    logical_expr::LogicalPlan,
8    physical_plan::{metrics::MetricsSet, PhysicalExpr, SendableRecordBatchStream},
9    sql::{sqlparser::ast, unparser::dialect::Dialect},
10};
11use std::sync::Arc;
12
13pub type SQLExecutorRef = Arc<dyn SQLExecutor>;
14pub type AstAnalyzer = Box<dyn FnMut(ast::Statement) -> Result<ast::Statement>>;
15pub type LogicalOptimizer = Box<dyn FnMut(LogicalPlan) -> Result<LogicalPlan>>;
16pub type SqlQueryRewriter = Box<dyn FnMut(String) -> Result<String>>;
17
18#[async_trait]
19pub trait SQLExecutor: Sync + Send {
20    /// Executor name
21    fn name(&self) -> &str;
22
23    /// Executor compute context allows differentiating the remote compute context
24    /// such as authorization or active database.
25    ///
26    /// Note: returning None here may cause incorrect federation with other providers of the
27    /// same name that also have a compute_context of None.
28    /// Instead try to return a unique string that will never match any other
29    /// provider's context.
30    fn compute_context(&self) -> Option<String>;
31
32    /// The specific SQL dialect (currently supports 'sqlite', 'postgres', 'flight')
33    fn dialect(&self) -> Arc<dyn Dialect>;
34
35    /// Returns the analyzer rule specific for this engine to modify the logical plan before execution
36    fn logical_optimizer(&self) -> Option<LogicalOptimizer> {
37        None
38    }
39
40    /// Returns an AST analyzer specific for this engine to modify the AST before execution
41    fn ast_analyzer(&self) -> Option<AstAnalyzer> {
42        None
43    }
44
45    /// Execute a SQL query.
46    ///
47    /// `filters` contain physical expressions generated at runtime, like
48    /// `DynamicFilterPhysicalExpr`. Since the concrete expression values only become available when
49    /// the `SendableRecordBatchStream` is executed, they must be manually added to the SQL query,
50    /// if necessary. However, they can be safely ignored.
51    fn execute(
52        &self,
53        query: &str,
54        schema: SchemaRef,
55        filters: &[Arc<dyn PhysicalExpr>],
56    ) -> Result<SendableRecordBatchStream>;
57
58    /// Returns statistics for this `SQLExecutor` node. If statistics are not available, it should
59    /// return [`Statistics::new_unknown`] (the default), not an error. See the `ExecutionPlan`
60    /// trait.
61    async fn statistics(&self, plan: &LogicalPlan) -> Result<Statistics> {
62        Ok(Statistics::new_unknown(plan.schema().as_arrow()))
63    }
64
65    /// Returns the tables provided by the remote
66    async fn table_names(&self) -> Result<Vec<String>>;
67
68    /// Returns the schema of table_name within this [`SQLExecutor`]
69    async fn get_table_schema(&self, table_name: &str) -> Result<SchemaRef>;
70
71    /// Returns the execution metrics, if available.
72    fn metrics(&self) -> Option<MetricsSet> {
73        None
74    }
75}
76
77impl fmt::Debug for dyn SQLExecutor {
78    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
79        write!(f, "{} {:?}", self.name(), self.compute_context())
80    }
81}
82
83impl fmt::Display for dyn SQLExecutor {
84    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
85        write!(f, "{} {:?}", self.name(), self.compute_context())
86    }
87}