datafusion_federation/sql/
executor.rs

1use async_trait::async_trait;
2use core::fmt;
3use datafusion::{
4    arrow::datatypes::SchemaRef,
5    common::Statistics,
6    error::Result,
7    logical_expr::LogicalPlan,
8    physical_plan::{metrics::MetricsSet, SendableRecordBatchStream},
9    sql::{sqlparser::ast, unparser::dialect::Dialect},
10};
11use std::sync::Arc;
12
13pub type SQLExecutorRef = Arc<dyn SQLExecutor>;
14pub type AstAnalyzer = Box<dyn FnMut(ast::Statement) -> Result<ast::Statement>>;
15pub type LogicalOptimizer = Box<dyn FnMut(LogicalPlan) -> Result<LogicalPlan>>;
16
17#[async_trait]
18pub trait SQLExecutor: Sync + Send {
19    /// Executor name
20    fn name(&self) -> &str;
21
22    /// Executor compute context allows differentiating the remote compute context
23    /// such as authorization or active database.
24    ///
25    /// Note: returning None here may cause incorrect federation with other providers of the
26    /// same name that also have a compute_context of None.
27    /// Instead try to return a unique string that will never match any other
28    /// provider's context.
29    fn compute_context(&self) -> Option<String>;
30
31    /// The specific SQL dialect (currently supports 'sqlite', 'postgres', 'flight')
32    fn dialect(&self) -> Arc<dyn Dialect>;
33
34    /// Returns the analyzer rule specific for this engine to modify the logical plan before execution
35    fn logical_optimizer(&self) -> Option<LogicalOptimizer> {
36        None
37    }
38
39    /// Returns an AST analyzer specific for this engine to modify the AST before execution
40    fn ast_analyzer(&self) -> Option<AstAnalyzer> {
41        None
42    }
43
44    /// Execute a SQL query
45    fn execute(&self, query: &str, schema: SchemaRef) -> Result<SendableRecordBatchStream>;
46
47    /// Returns statistics for this `SQLExecutor` node. If statistics are not available, it should
48    /// return [`Statistics::new_unknown`] (the default), not an error. See the `ExecutionPlan`
49    /// trait.
50    async fn statistics(&self, plan: &LogicalPlan) -> Result<Statistics> {
51        Ok(Statistics::new_unknown(plan.schema().as_arrow()))
52    }
53
54    /// Returns the tables provided by the remote
55    async fn table_names(&self) -> Result<Vec<String>>;
56
57    /// Returns the schema of table_name within this [`SQLExecutor`]
58    async fn get_table_schema(&self, table_name: &str) -> Result<SchemaRef>;
59
60    /// Returns the execution metrics, if available.
61    fn metrics(&self) -> Option<MetricsSet> {
62        None
63    }
64}
65
66impl fmt::Debug for dyn SQLExecutor {
67    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
68        write!(f, "{} {:?}", self.name(), self.compute_context())
69    }
70}
71
72impl fmt::Display for dyn SQLExecutor {
73    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
74        write!(f, "{} {:?}", self.name(), self.compute_context())
75    }
76}