Skip to main content

datafusion_expr/
planner.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! [`ContextProvider`] and [`ExprPlanner`] APIs to customize SQL query planning
19
20use std::fmt::Debug;
21use std::sync::Arc;
22
23use crate::expr::NullTreatment;
24#[cfg(feature = "sql")]
25use crate::logical_plan::LogicalPlan;
26use crate::{
27    AggregateUDF, Expr, GetFieldAccess, HigherOrderUDF, ScalarUDF, SortExpr, TableSource,
28    WindowFrame, WindowFunctionDefinition, WindowUDF,
29};
30use arrow::datatypes::{DataType, Field, FieldRef, SchemaRef};
31use datafusion_common::datatype::DataTypeExt;
32use datafusion_common::{
33    DFSchema, Result, TableReference, config::ConfigOptions,
34    file_options::file_type::FileType, not_impl_err,
35};
36#[cfg(feature = "sql")]
37use sqlparser::ast::{Expr as SQLExpr, Ident, ObjectName, TableAlias, TableFactor};
38
39/// Provides the `SQL` query planner meta-data about tables and
40/// functions referenced in SQL statements, without a direct dependency on the
41/// `datafusion` Catalog structures such as [`TableProvider`]
42///
43/// [`TableProvider`]: https://docs.rs/datafusion/latest/datafusion/catalog/trait.TableProvider.html
44pub trait ContextProvider {
45    /// Returns a table by reference, if it exists
46    fn get_table_source(&self, name: TableReference) -> Result<Arc<dyn TableSource>>;
47
48    /// Return the type of a file based on its extension (e.g. `.parquet`)
49    ///
50    /// This is used to plan `COPY` statements
51    fn get_file_type(&self, _ext: &str) -> Result<Arc<dyn FileType>> {
52        not_impl_err!("Registered file types are not supported")
53    }
54
55    /// Getter for a table function
56    fn get_table_function_source(
57        &self,
58        _name: &str,
59        _args: Vec<Expr>,
60    ) -> Result<Arc<dyn TableSource>> {
61        not_impl_err!("Table Functions are not supported")
62    }
63
64    /// Provides an intermediate table that is used to store the results of a CTE during execution
65    ///
66    /// CTE stands for "Common Table Expression"
67    ///
68    /// # Notes
69    /// We don't directly implement this in [`SqlToRel`] as implementing this function
70    /// often requires access to a table that contains
71    /// execution-related types that can't be a direct dependency
72    /// of the sql crate (for example [`CteWorkTable`]).
73    ///
74    /// The [`ContextProvider`] provides a way to "hide" this dependency.
75    ///
76    /// [`SqlToRel`]: https://docs.rs/datafusion/latest/datafusion/sql/planner/struct.SqlToRel.html
77    /// [`CteWorkTable`]: https://docs.rs/datafusion/latest/datafusion/datasource/cte_worktable/struct.CteWorkTable.html
78    fn create_cte_work_table(
79        &self,
80        _name: &str,
81        _schema: SchemaRef,
82    ) -> Result<Arc<dyn TableSource>> {
83        not_impl_err!("Recursive CTE is not implemented")
84    }
85
86    /// Return [`ExprPlanner`] extensions for planning expressions
87    fn get_expr_planners(&self) -> &[Arc<dyn ExprPlanner>] {
88        &[]
89    }
90
91    /// Return [`RelationPlanner`] extensions for planning table factors
92    #[cfg(feature = "sql")]
93    fn get_relation_planners(&self) -> &[Arc<dyn RelationPlanner>] {
94        &[]
95    }
96
97    /// Return [`TypePlanner`] extensions for planning data types
98    #[cfg(feature = "sql")]
99    fn get_type_planner(&self) -> Option<Arc<dyn TypePlanner>> {
100        None
101    }
102
103    /// Return the scalar function with a given name, if any
104    fn get_function_meta(&self, name: &str) -> Option<Arc<ScalarUDF>>;
105
106    /// Return the higher order function with a given name, if any
107    fn get_higher_order_meta(&self, name: &str) -> Option<Arc<HigherOrderUDF>>;
108
109    /// Return the aggregate function with a given name, if any
110    fn get_aggregate_meta(&self, name: &str) -> Option<Arc<AggregateUDF>>;
111
112    /// Return the window function with a given name, if any
113    fn get_window_meta(&self, name: &str) -> Option<Arc<WindowUDF>>;
114
115    /// Return the system/user-defined variable type, if any
116    ///
117    /// A user defined variable is typically accessed via `@var_name`
118    fn get_variable_type(&self, variable_names: &[String]) -> Option<DataType>;
119
120    /// Return metadata about a system/user-defined variable, if any.
121    ///
122    /// By default, this wraps [`Self::get_variable_type`] in an Arrow [`Field`]
123    /// with nullable set to `true` and no metadata. Implementations that can
124    /// provide richer information (such as nullability or extension metadata)
125    /// should override this method.
126    fn get_variable_field(&self, variable_names: &[String]) -> Option<FieldRef> {
127        self.get_variable_type(variable_names)
128            .map(|data_type| data_type.into_nullable_field_ref())
129    }
130
131    /// Return overall configuration options
132    fn options(&self) -> &ConfigOptions;
133
134    /// Return all scalar function names
135    fn udf_names(&self) -> Vec<String>;
136
137    /// Return all higher order function names
138    fn higher_order_function_names(&self) -> Vec<String>;
139
140    /// Return all aggregate function names
141    fn udaf_names(&self) -> Vec<String>;
142
143    /// Return all window function names
144    fn udwf_names(&self) -> Vec<String>;
145}
146
147/// Customize planning of SQL AST expressions to [`Expr`]s
148///
149/// For more background, please also see the [Extending SQL in DataFusion: from ->> to TABLESAMPLE blog]
150///
151/// [Extending SQL in DataFusion: from ->> to TABLESAMPLE blog]: https://datafusion.apache.org/blog/2026/01/12/extending-sql
152pub trait ExprPlanner: Debug + Send + Sync {
153    /// Plan the binary operation between two expressions, returns original
154    /// BinaryExpr if not possible
155    fn plan_binary_op(
156        &self,
157        expr: RawBinaryExpr,
158        _schema: &DFSchema,
159    ) -> Result<PlannerResult<RawBinaryExpr>> {
160        Ok(PlannerResult::Original(expr))
161    }
162
163    /// Plan the field access expression, such as `foo.bar`
164    ///
165    /// returns original [`RawFieldAccessExpr`] if not possible
166    fn plan_field_access(
167        &self,
168        expr: RawFieldAccessExpr,
169        _schema: &DFSchema,
170    ) -> Result<PlannerResult<RawFieldAccessExpr>> {
171        Ok(PlannerResult::Original(expr))
172    }
173
174    /// Plan an array literal, such as `[1, 2, 3]`
175    ///
176    /// Returns original expression arguments if not possible
177    fn plan_array_literal(
178        &self,
179        exprs: Vec<Expr>,
180        _schema: &DFSchema,
181    ) -> Result<PlannerResult<Vec<Expr>>> {
182        Ok(PlannerResult::Original(exprs))
183    }
184
185    /// Plan a `POSITION` expression, such as `POSITION(<expr> in <expr>)`
186    ///
187    /// Returns original expression arguments if not possible
188    fn plan_position(&self, args: Vec<Expr>) -> Result<PlannerResult<Vec<Expr>>> {
189        Ok(PlannerResult::Original(args))
190    }
191
192    /// Plan a dictionary literal, such as `{ key: value, ...}`
193    ///
194    /// Returns original expression arguments if not possible
195    fn plan_dictionary_literal(
196        &self,
197        expr: RawDictionaryExpr,
198        _schema: &DFSchema,
199    ) -> Result<PlannerResult<RawDictionaryExpr>> {
200        Ok(PlannerResult::Original(expr))
201    }
202
203    /// Plan an extract expression, such as`EXTRACT(month FROM foo)`
204    ///
205    /// Returns original expression arguments if not possible
206    fn plan_extract(&self, args: Vec<Expr>) -> Result<PlannerResult<Vec<Expr>>> {
207        Ok(PlannerResult::Original(args))
208    }
209
210    /// Plan an substring expression, such as `SUBSTRING(<expr> [FROM <expr>] [FOR <expr>])`
211    ///
212    /// Returns original expression arguments if not possible
213    fn plan_substring(&self, args: Vec<Expr>) -> Result<PlannerResult<Vec<Expr>>> {
214        Ok(PlannerResult::Original(args))
215    }
216
217    /// Plans a struct literal, such as  `{'field1' : expr1, 'field2' : expr2, ...}`
218    ///
219    /// This function takes a vector of expressions and a boolean flag
220    /// indicating whether the struct uses the optional name
221    ///
222    /// Returns the original input expressions if planning is not possible.
223    fn plan_struct_literal(
224        &self,
225        args: Vec<Expr>,
226        _is_named_struct: bool,
227    ) -> Result<PlannerResult<Vec<Expr>>> {
228        Ok(PlannerResult::Original(args))
229    }
230
231    /// Plans an overlay expression, such as `overlay(str PLACING substr FROM pos [FOR count])`
232    ///
233    /// Returns original expression arguments if not possible
234    fn plan_overlay(&self, args: Vec<Expr>) -> Result<PlannerResult<Vec<Expr>>> {
235        Ok(PlannerResult::Original(args))
236    }
237
238    /// Plans a `make_map` expression, such as `make_map(key1, value1, key2, value2, ...)`
239    ///
240    /// Returns original expression arguments if not possible
241    fn plan_make_map(&self, args: Vec<Expr>) -> Result<PlannerResult<Vec<Expr>>> {
242        Ok(PlannerResult::Original(args))
243    }
244
245    /// Plans compound identifier such as `db.schema.table` for non-empty nested names
246    ///
247    /// # Note:
248    /// Currently compound identifier for outer query schema is not supported.
249    ///
250    /// Returns original expression if not possible
251    fn plan_compound_identifier(
252        &self,
253        _field: &Field,
254        _qualifier: Option<&TableReference>,
255        _nested_names: &[String],
256    ) -> Result<PlannerResult<Vec<Expr>>> {
257        not_impl_err!(
258            "Default planner compound identifier hasn't been implemented for ExprPlanner"
259        )
260    }
261
262    /// Plans aggregate functions, such as `COUNT(<expr>)`
263    ///
264    /// Returns original expression arguments if not possible
265    fn plan_aggregate(
266        &self,
267        expr: RawAggregateExpr,
268    ) -> Result<PlannerResult<RawAggregateExpr>> {
269        Ok(PlannerResult::Original(expr))
270    }
271
272    /// Plans window functions, such as `COUNT(<expr>)`
273    ///
274    /// Returns original expression arguments if not possible
275    fn plan_window(&self, expr: RawWindowExpr) -> Result<PlannerResult<RawWindowExpr>> {
276        Ok(PlannerResult::Original(expr))
277    }
278}
279
280/// An operator with two arguments to plan
281///
282/// Note `left` and `right` are DataFusion [`Expr`]s but the `op` is the SQL AST
283/// operator.
284///
285/// This structure is used by [`ExprPlanner`] to plan operators with
286/// custom expressions.
287#[derive(Debug, Clone)]
288pub struct RawBinaryExpr {
289    #[cfg(not(feature = "sql"))]
290    pub op: datafusion_expr_common::operator::Operator,
291    #[cfg(feature = "sql")]
292    pub op: sqlparser::ast::BinaryOperator,
293    pub left: Expr,
294    pub right: Expr,
295}
296
297/// An expression with GetFieldAccess to plan
298///
299/// This structure is used by [`ExprPlanner`] to plan operators with
300/// custom expressions.
301#[derive(Debug, Clone)]
302pub struct RawFieldAccessExpr {
303    pub field_access: GetFieldAccess,
304    pub expr: Expr,
305}
306
307/// A Dictionary literal expression `{ key: value, ...}`
308///
309/// This structure is used by [`ExprPlanner`] to plan operators with
310/// custom expressions.
311#[derive(Debug, Clone)]
312pub struct RawDictionaryExpr {
313    pub keys: Vec<Expr>,
314    pub values: Vec<Expr>,
315}
316
317/// This structure is used by `AggregateFunctionPlanner` to plan operators with
318/// custom expressions.
319#[derive(Debug, Clone)]
320pub struct RawAggregateExpr {
321    pub func: Arc<AggregateUDF>,
322    pub args: Vec<Expr>,
323    pub distinct: bool,
324    pub filter: Option<Box<Expr>>,
325    pub order_by: Vec<SortExpr>,
326    pub null_treatment: Option<NullTreatment>,
327}
328
329/// This structure is used by `WindowFunctionPlanner` to plan operators with
330/// custom expressions.
331#[derive(Debug, Clone)]
332pub struct RawWindowExpr {
333    pub func_def: WindowFunctionDefinition,
334    pub args: Vec<Expr>,
335    pub partition_by: Vec<Expr>,
336    pub order_by: Vec<SortExpr>,
337    pub window_frame: WindowFrame,
338    pub filter: Option<Box<Expr>>,
339    pub null_treatment: Option<NullTreatment>,
340    pub distinct: bool,
341}
342
343/// Result of planning a raw expr with [`ExprPlanner`]
344#[derive(Debug, Clone)]
345pub enum PlannerResult<T> {
346    /// The raw expression was successfully planned as a new [`Expr`]
347    Planned(Expr),
348    /// The raw expression could not be planned, and is returned unmodified
349    Original(T),
350}
351
352/// Result of planning a relation with [`RelationPlanner`]
353#[cfg(feature = "sql")]
354#[derive(Debug, Clone)]
355pub struct PlannedRelation {
356    /// The logical plan for the relation
357    pub plan: LogicalPlan,
358    /// Optional table alias for the relation
359    pub alias: Option<TableAlias>,
360}
361
362#[cfg(feature = "sql")]
363impl PlannedRelation {
364    /// Create a new `PlannedRelation` with the given plan and alias
365    pub fn new(plan: LogicalPlan, alias: Option<TableAlias>) -> Self {
366        Self { plan, alias }
367    }
368}
369
370/// Result of attempting to plan a relation with extension planners
371#[cfg(feature = "sql")]
372#[derive(Debug)]
373pub enum RelationPlanning {
374    /// The relation was successfully planned by an extension planner
375    Planned(Box<PlannedRelation>),
376    /// No extension planner handled the relation, return it for default processing
377    Original(Box<TableFactor>),
378}
379
380/// Customize planning SQL table factors to [`LogicalPlan`]s.
381#[cfg(feature = "sql")]
382/// For more background, please also see the [Extending SQL in DataFusion: from ->> to TABLESAMPLE blog]
383///
384/// [Extending SQL in DataFusion: from ->> to TABLESAMPLE blog]: https://datafusion.apache.org/blog/2026/01/12/extending-sql
385pub trait RelationPlanner: Debug + Send + Sync {
386    /// Plan a table factor into a [`LogicalPlan`].
387    ///
388    /// Returning [`RelationPlanning::Planned`] short-circuits further planning and uses the
389    /// provided plan. Returning [`RelationPlanning::Original`] allows the next registered planner,
390    /// or DataFusion's default logic, to handle the relation.
391    fn plan_relation(
392        &self,
393        relation: TableFactor,
394        context: &mut dyn RelationPlannerContext,
395    ) -> Result<RelationPlanning>;
396}
397
398/// Provides utilities for relation planners to interact with DataFusion's SQL
399/// planner.
400///
401/// This trait provides SQL planning utilities specific to relation planning,
402/// such as converting SQL expressions to logical expressions and normalizing
403/// identifiers. It uses composition to provide access to session context via
404/// [`ContextProvider`].
405#[cfg(feature = "sql")]
406pub trait RelationPlannerContext {
407    /// Provides access to the underlying context provider for reading session
408    /// configuration, accessing tables, functions, and other metadata.
409    fn context_provider(&self) -> &dyn ContextProvider;
410
411    /// Plans the specified relation through the full planner pipeline, starting
412    /// from the first registered relation planner.
413    fn plan(&mut self, relation: TableFactor) -> Result<LogicalPlan>;
414
415    /// Converts a SQL expression into a logical expression using the current
416    /// planner context.
417    fn sql_to_expr(&mut self, expr: SQLExpr, schema: &DFSchema) -> Result<Expr>;
418
419    /// Converts a SQL expression into a logical expression without DataFusion
420    /// rewrites.
421    fn sql_expr_to_logical_expr(
422        &mut self,
423        expr: SQLExpr,
424        schema: &DFSchema,
425    ) -> Result<Expr>;
426
427    /// Normalizes an identifier according to session settings.
428    fn normalize_ident(&self, ident: Ident) -> String;
429
430    /// Normalizes a SQL object name into a [`TableReference`].
431    fn object_name_to_table_reference(&self, name: ObjectName) -> Result<TableReference>;
432}
433
434/// Customize planning SQL types to DataFusion (Arrow) types.
435#[cfg(feature = "sql")]
436/// For more background, please also see the [Extending SQL in DataFusion: from ->> to TABLESAMPLE blog]
437///
438/// [Extending SQL in DataFusion: from ->> to TABLESAMPLE blog]: https://datafusion.apache.org/blog/2026/01/12/extending-sql
439pub trait TypePlanner: Debug + Send + Sync {
440    /// Plan SQL [`sqlparser::ast::DataType`] to DataFusion [`DataType`]
441    ///
442    /// Returns None if not possible
443    #[deprecated(since = "53.0.0", note = "Use plan_type_field()")]
444    fn plan_type(
445        &self,
446        _sql_type: &sqlparser::ast::DataType,
447    ) -> Result<Option<DataType>> {
448        Ok(None)
449    }
450
451    /// Plan SQL [`sqlparser::ast::DataType`] to DataFusion [`FieldRef`]
452    ///
453    /// Returns None if not possible. Unlike [`Self::plan_type`], `plan_type_field()`
454    /// makes it possible to express extension types (e.g., `arrow.uuid`) or otherwise
455    /// insert metadata into the DataFusion type representation. The default implementation
456    /// falls back on [`Self::plan_type`] for backward compatibility and wraps the result
457    /// in a nullable field reference.
458    fn plan_type_field(
459        &self,
460        sql_type: &sqlparser::ast::DataType,
461    ) -> Result<Option<FieldRef>> {
462        #[expect(deprecated)]
463        Ok(self
464            .plan_type(sql_type)?
465            .map(|data_type| data_type.into_nullable_field_ref()))
466    }
467}