datafusion_expr/planner.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! [`ContextProvider`] and [`ExprPlanner`] APIs to customize SQL query planning
19
20use std::fmt::Debug;
21use std::sync::Arc;
22
23use crate::expr::NullTreatment;
24#[cfg(feature = "sql")]
25use crate::logical_plan::LogicalPlan;
26use crate::{
27 AggregateUDF, Expr, GetFieldAccess, ScalarUDF, SortExpr, TableSource, WindowFrame,
28 WindowFunctionDefinition, WindowUDF,
29};
30use arrow::datatypes::{DataType, Field, FieldRef, SchemaRef};
31use datafusion_common::datatype::DataTypeExt;
32use datafusion_common::{
33 DFSchema, Result, TableReference, config::ConfigOptions,
34 file_options::file_type::FileType, not_impl_err,
35};
36#[cfg(feature = "sql")]
37use sqlparser::ast::{Expr as SQLExpr, Ident, ObjectName, TableAlias, TableFactor};
38
39/// Provides the `SQL` query planner meta-data about tables and
40/// functions referenced in SQL statements, without a direct dependency on the
41/// `datafusion` Catalog structures such as [`TableProvider`]
42///
43/// [`TableProvider`]: https://docs.rs/datafusion/latest/datafusion/catalog/trait.TableProvider.html
44pub trait ContextProvider {
45 /// Returns a table by reference, if it exists
46 fn get_table_source(&self, name: TableReference) -> Result<Arc<dyn TableSource>>;
47
48 /// Return the type of a file based on its extension (e.g. `.parquet`)
49 ///
50 /// This is used to plan `COPY` statements
51 fn get_file_type(&self, _ext: &str) -> Result<Arc<dyn FileType>> {
52 not_impl_err!("Registered file types are not supported")
53 }
54
55 /// Getter for a table function
56 fn get_table_function_source(
57 &self,
58 _name: &str,
59 _args: Vec<Expr>,
60 ) -> Result<Arc<dyn TableSource>> {
61 not_impl_err!("Table Functions are not supported")
62 }
63
64 /// Provides an intermediate table that is used to store the results of a CTE during execution
65 ///
66 /// CTE stands for "Common Table Expression"
67 ///
68 /// # Notes
69 /// We don't directly implement this in [`SqlToRel`] as implementing this function
70 /// often requires access to a table that contains
71 /// execution-related types that can't be a direct dependency
72 /// of the sql crate (for example [`CteWorkTable`]).
73 ///
74 /// The [`ContextProvider`] provides a way to "hide" this dependency.
75 ///
76 /// [`SqlToRel`]: https://docs.rs/datafusion/latest/datafusion/sql/planner/struct.SqlToRel.html
77 /// [`CteWorkTable`]: https://docs.rs/datafusion/latest/datafusion/datasource/cte_worktable/struct.CteWorkTable.html
78 fn create_cte_work_table(
79 &self,
80 _name: &str,
81 _schema: SchemaRef,
82 ) -> Result<Arc<dyn TableSource>> {
83 not_impl_err!("Recursive CTE is not implemented")
84 }
85
86 /// Return [`ExprPlanner`] extensions for planning expressions
87 fn get_expr_planners(&self) -> &[Arc<dyn ExprPlanner>] {
88 &[]
89 }
90
91 /// Return [`RelationPlanner`] extensions for planning table factors
92 #[cfg(feature = "sql")]
93 fn get_relation_planners(&self) -> &[Arc<dyn RelationPlanner>] {
94 &[]
95 }
96
97 /// Return [`TypePlanner`] extensions for planning data types
98 #[cfg(feature = "sql")]
99 fn get_type_planner(&self) -> Option<Arc<dyn TypePlanner>> {
100 None
101 }
102
103 /// Return the scalar function with a given name, if any
104 fn get_function_meta(&self, name: &str) -> Option<Arc<ScalarUDF>>;
105
106 /// Return the aggregate function with a given name, if any
107 fn get_aggregate_meta(&self, name: &str) -> Option<Arc<AggregateUDF>>;
108
109 /// Return the window function with a given name, if any
110 fn get_window_meta(&self, name: &str) -> Option<Arc<WindowUDF>>;
111
112 /// Return the system/user-defined variable type, if any
113 ///
114 /// A user defined variable is typically accessed via `@var_name`
115 fn get_variable_type(&self, variable_names: &[String]) -> Option<DataType>;
116
117 /// Return metadata about a system/user-defined variable, if any.
118 ///
119 /// By default, this wraps [`Self::get_variable_type`] in an Arrow [`Field`]
120 /// with nullable set to `true` and no metadata. Implementations that can
121 /// provide richer information (such as nullability or extension metadata)
122 /// should override this method.
123 fn get_variable_field(&self, variable_names: &[String]) -> Option<FieldRef> {
124 self.get_variable_type(variable_names)
125 .map(|data_type| data_type.into_nullable_field_ref())
126 }
127
128 /// Return overall configuration options
129 fn options(&self) -> &ConfigOptions;
130
131 /// Return all scalar function names
132 fn udf_names(&self) -> Vec<String>;
133
134 /// Return all aggregate function names
135 fn udaf_names(&self) -> Vec<String>;
136
137 /// Return all window function names
138 fn udwf_names(&self) -> Vec<String>;
139}
140
141/// Customize planning of SQL AST expressions to [`Expr`]s
142///
143/// For more background, please also see the [Extending SQL in DataFusion: from ->> to TABLESAMPLE blog]
144///
145/// [Extending SQL in DataFusion: from ->> to TABLESAMPLE blog]: https://datafusion.apache.org/blog/2026/01/12/extending-sql
146pub trait ExprPlanner: Debug + Send + Sync {
147 /// Plan the binary operation between two expressions, returns original
148 /// BinaryExpr if not possible
149 fn plan_binary_op(
150 &self,
151 expr: RawBinaryExpr,
152 _schema: &DFSchema,
153 ) -> Result<PlannerResult<RawBinaryExpr>> {
154 Ok(PlannerResult::Original(expr))
155 }
156
157 /// Plan the field access expression, such as `foo.bar`
158 ///
159 /// returns original [`RawFieldAccessExpr`] if not possible
160 fn plan_field_access(
161 &self,
162 expr: RawFieldAccessExpr,
163 _schema: &DFSchema,
164 ) -> Result<PlannerResult<RawFieldAccessExpr>> {
165 Ok(PlannerResult::Original(expr))
166 }
167
168 /// Plan an array literal, such as `[1, 2, 3]`
169 ///
170 /// Returns original expression arguments if not possible
171 fn plan_array_literal(
172 &self,
173 exprs: Vec<Expr>,
174 _schema: &DFSchema,
175 ) -> Result<PlannerResult<Vec<Expr>>> {
176 Ok(PlannerResult::Original(exprs))
177 }
178
179 /// Plan a `POSITION` expression, such as `POSITION(<expr> in <expr>)`
180 ///
181 /// Returns original expression arguments if not possible
182 fn plan_position(&self, args: Vec<Expr>) -> Result<PlannerResult<Vec<Expr>>> {
183 Ok(PlannerResult::Original(args))
184 }
185
186 /// Plan a dictionary literal, such as `{ key: value, ...}`
187 ///
188 /// Returns original expression arguments if not possible
189 fn plan_dictionary_literal(
190 &self,
191 expr: RawDictionaryExpr,
192 _schema: &DFSchema,
193 ) -> Result<PlannerResult<RawDictionaryExpr>> {
194 Ok(PlannerResult::Original(expr))
195 }
196
197 /// Plan an extract expression, such as`EXTRACT(month FROM foo)`
198 ///
199 /// Returns original expression arguments if not possible
200 fn plan_extract(&self, args: Vec<Expr>) -> Result<PlannerResult<Vec<Expr>>> {
201 Ok(PlannerResult::Original(args))
202 }
203
204 /// Plan an substring expression, such as `SUBSTRING(<expr> [FROM <expr>] [FOR <expr>])`
205 ///
206 /// Returns original expression arguments if not possible
207 fn plan_substring(&self, args: Vec<Expr>) -> Result<PlannerResult<Vec<Expr>>> {
208 Ok(PlannerResult::Original(args))
209 }
210
211 /// Plans a struct literal, such as `{'field1' : expr1, 'field2' : expr2, ...}`
212 ///
213 /// This function takes a vector of expressions and a boolean flag
214 /// indicating whether the struct uses the optional name
215 ///
216 /// Returns the original input expressions if planning is not possible.
217 fn plan_struct_literal(
218 &self,
219 args: Vec<Expr>,
220 _is_named_struct: bool,
221 ) -> Result<PlannerResult<Vec<Expr>>> {
222 Ok(PlannerResult::Original(args))
223 }
224
225 /// Plans an overlay expression, such as `overlay(str PLACING substr FROM pos [FOR count])`
226 ///
227 /// Returns original expression arguments if not possible
228 fn plan_overlay(&self, args: Vec<Expr>) -> Result<PlannerResult<Vec<Expr>>> {
229 Ok(PlannerResult::Original(args))
230 }
231
232 /// Plans a `make_map` expression, such as `make_map(key1, value1, key2, value2, ...)`
233 ///
234 /// Returns original expression arguments if not possible
235 fn plan_make_map(&self, args: Vec<Expr>) -> Result<PlannerResult<Vec<Expr>>> {
236 Ok(PlannerResult::Original(args))
237 }
238
239 /// Plans compound identifier such as `db.schema.table` for non-empty nested names
240 ///
241 /// # Note:
242 /// Currently compound identifier for outer query schema is not supported.
243 ///
244 /// Returns original expression if not possible
245 fn plan_compound_identifier(
246 &self,
247 _field: &Field,
248 _qualifier: Option<&TableReference>,
249 _nested_names: &[String],
250 ) -> Result<PlannerResult<Vec<Expr>>> {
251 not_impl_err!(
252 "Default planner compound identifier hasn't been implemented for ExprPlanner"
253 )
254 }
255
256 /// Plans aggregate functions, such as `COUNT(<expr>)`
257 ///
258 /// Returns original expression arguments if not possible
259 fn plan_aggregate(
260 &self,
261 expr: RawAggregateExpr,
262 ) -> Result<PlannerResult<RawAggregateExpr>> {
263 Ok(PlannerResult::Original(expr))
264 }
265
266 /// Plans window functions, such as `COUNT(<expr>)`
267 ///
268 /// Returns original expression arguments if not possible
269 fn plan_window(&self, expr: RawWindowExpr) -> Result<PlannerResult<RawWindowExpr>> {
270 Ok(PlannerResult::Original(expr))
271 }
272}
273
274/// An operator with two arguments to plan
275///
276/// Note `left` and `right` are DataFusion [`Expr`]s but the `op` is the SQL AST
277/// operator.
278///
279/// This structure is used by [`ExprPlanner`] to plan operators with
280/// custom expressions.
281#[derive(Debug, Clone)]
282pub struct RawBinaryExpr {
283 #[cfg(not(feature = "sql"))]
284 pub op: datafusion_expr_common::operator::Operator,
285 #[cfg(feature = "sql")]
286 pub op: sqlparser::ast::BinaryOperator,
287 pub left: Expr,
288 pub right: Expr,
289}
290
291/// An expression with GetFieldAccess to plan
292///
293/// This structure is used by [`ExprPlanner`] to plan operators with
294/// custom expressions.
295#[derive(Debug, Clone)]
296pub struct RawFieldAccessExpr {
297 pub field_access: GetFieldAccess,
298 pub expr: Expr,
299}
300
301/// A Dictionary literal expression `{ key: value, ...}`
302///
303/// This structure is used by [`ExprPlanner`] to plan operators with
304/// custom expressions.
305#[derive(Debug, Clone)]
306pub struct RawDictionaryExpr {
307 pub keys: Vec<Expr>,
308 pub values: Vec<Expr>,
309}
310
311/// This structure is used by `AggregateFunctionPlanner` to plan operators with
312/// custom expressions.
313#[derive(Debug, Clone)]
314pub struct RawAggregateExpr {
315 pub func: Arc<AggregateUDF>,
316 pub args: Vec<Expr>,
317 pub distinct: bool,
318 pub filter: Option<Box<Expr>>,
319 pub order_by: Vec<SortExpr>,
320 pub null_treatment: Option<NullTreatment>,
321}
322
323/// This structure is used by `WindowFunctionPlanner` to plan operators with
324/// custom expressions.
325#[derive(Debug, Clone)]
326pub struct RawWindowExpr {
327 pub func_def: WindowFunctionDefinition,
328 pub args: Vec<Expr>,
329 pub partition_by: Vec<Expr>,
330 pub order_by: Vec<SortExpr>,
331 pub window_frame: WindowFrame,
332 pub filter: Option<Box<Expr>>,
333 pub null_treatment: Option<NullTreatment>,
334 pub distinct: bool,
335}
336
337/// Result of planning a raw expr with [`ExprPlanner`]
338#[derive(Debug, Clone)]
339pub enum PlannerResult<T> {
340 /// The raw expression was successfully planned as a new [`Expr`]
341 Planned(Expr),
342 /// The raw expression could not be planned, and is returned unmodified
343 Original(T),
344}
345
346/// Result of planning a relation with [`RelationPlanner`]
347#[cfg(feature = "sql")]
348#[derive(Debug, Clone)]
349pub struct PlannedRelation {
350 /// The logical plan for the relation
351 pub plan: LogicalPlan,
352 /// Optional table alias for the relation
353 pub alias: Option<TableAlias>,
354}
355
356#[cfg(feature = "sql")]
357impl PlannedRelation {
358 /// Create a new `PlannedRelation` with the given plan and alias
359 pub fn new(plan: LogicalPlan, alias: Option<TableAlias>) -> Self {
360 Self { plan, alias }
361 }
362}
363
364/// Result of attempting to plan a relation with extension planners
365#[cfg(feature = "sql")]
366#[derive(Debug)]
367pub enum RelationPlanning {
368 /// The relation was successfully planned by an extension planner
369 Planned(Box<PlannedRelation>),
370 /// No extension planner handled the relation, return it for default processing
371 Original(Box<TableFactor>),
372}
373
374/// Customize planning SQL table factors to [`LogicalPlan`]s.
375#[cfg(feature = "sql")]
376/// For more background, please also see the [Extending SQL in DataFusion: from ->> to TABLESAMPLE blog]
377///
378/// [Extending SQL in DataFusion: from ->> to TABLESAMPLE blog]: https://datafusion.apache.org/blog/2026/01/12/extending-sql
379pub trait RelationPlanner: Debug + Send + Sync {
380 /// Plan a table factor into a [`LogicalPlan`].
381 ///
382 /// Returning [`RelationPlanning::Planned`] short-circuits further planning and uses the
383 /// provided plan. Returning [`RelationPlanning::Original`] allows the next registered planner,
384 /// or DataFusion's default logic, to handle the relation.
385 fn plan_relation(
386 &self,
387 relation: TableFactor,
388 context: &mut dyn RelationPlannerContext,
389 ) -> Result<RelationPlanning>;
390}
391
392/// Provides utilities for relation planners to interact with DataFusion's SQL
393/// planner.
394///
395/// This trait provides SQL planning utilities specific to relation planning,
396/// such as converting SQL expressions to logical expressions and normalizing
397/// identifiers. It uses composition to provide access to session context via
398/// [`ContextProvider`].
399#[cfg(feature = "sql")]
400pub trait RelationPlannerContext {
401 /// Provides access to the underlying context provider for reading session
402 /// configuration, accessing tables, functions, and other metadata.
403 fn context_provider(&self) -> &dyn ContextProvider;
404
405 /// Plans the specified relation through the full planner pipeline, starting
406 /// from the first registered relation planner.
407 fn plan(&mut self, relation: TableFactor) -> Result<LogicalPlan>;
408
409 /// Converts a SQL expression into a logical expression using the current
410 /// planner context.
411 fn sql_to_expr(&mut self, expr: SQLExpr, schema: &DFSchema) -> Result<Expr>;
412
413 /// Converts a SQL expression into a logical expression without DataFusion
414 /// rewrites.
415 fn sql_expr_to_logical_expr(
416 &mut self,
417 expr: SQLExpr,
418 schema: &DFSchema,
419 ) -> Result<Expr>;
420
421 /// Normalizes an identifier according to session settings.
422 fn normalize_ident(&self, ident: Ident) -> String;
423
424 /// Normalizes a SQL object name into a [`TableReference`].
425 fn object_name_to_table_reference(&self, name: ObjectName) -> Result<TableReference>;
426}
427
428/// Customize planning SQL types to DataFusion (Arrow) types.
429#[cfg(feature = "sql")]
430/// For more background, please also see the [Extending SQL in DataFusion: from ->> to TABLESAMPLE blog]
431///
432/// [Extending SQL in DataFusion: from ->> to TABLESAMPLE blog]: https://datafusion.apache.org/blog/2026/01/12/extending-sql
433pub trait TypePlanner: Debug + Send + Sync {
434 /// Plan SQL [`sqlparser::ast::DataType`] to DataFusion [`DataType`]
435 ///
436 /// Returns None if not possible
437 fn plan_type(
438 &self,
439 _sql_type: &sqlparser::ast::DataType,
440 ) -> Result<Option<DataType>> {
441 Ok(None)
442 }
443}