datafusion_expr/planner.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! [`ContextProvider`] and [`ExprPlanner`] APIs to customize SQL query planning
19
20use std::fmt::Debug;
21use std::sync::Arc;
22
23use crate::expr::NullTreatment;
24use crate::{
25 AggregateUDF, Expr, GetFieldAccess, ScalarUDF, SortExpr, TableSource, WindowFrame,
26 WindowFunctionDefinition, WindowUDF,
27};
28use arrow::datatypes::{DataType, Field, SchemaRef};
29use datafusion_common::{
30 config::ConfigOptions, file_options::file_type::FileType, not_impl_err, DFSchema,
31 Result, TableReference,
32};
33
34/// Provides the `SQL` query planner meta-data about tables and
35/// functions referenced in SQL statements, without a direct dependency on the
36/// `datafusion` Catalog structures such as [`TableProvider`]
37///
38/// [`TableProvider`]: https://docs.rs/datafusion/latest/datafusion/catalog/trait.TableProvider.html
39pub trait ContextProvider {
40 /// Returns a table by reference, if it exists
41 fn get_table_source(&self, name: TableReference) -> Result<Arc<dyn TableSource>>;
42
43 /// Return the type of a file based on its extension (e.g. `.parquet`)
44 ///
45 /// This is used to plan `COPY` statements
46 fn get_file_type(&self, _ext: &str) -> Result<Arc<dyn FileType>> {
47 not_impl_err!("Registered file types are not supported")
48 }
49
50 /// Getter for a table function
51 fn get_table_function_source(
52 &self,
53 _name: &str,
54 _args: Vec<Expr>,
55 ) -> Result<Arc<dyn TableSource>> {
56 not_impl_err!("Table Functions are not supported")
57 }
58
59 /// Provides an intermediate table that is used to store the results of a CTE during execution
60 ///
61 /// CTE stands for "Common Table Expression"
62 ///
63 /// # Notes
64 /// We don't directly implement this in [`SqlToRel`] as implementing this function
65 /// often requires access to a table that contains
66 /// execution-related types that can't be a direct dependency
67 /// of the sql crate (for example [`CteWorkTable`]).
68 ///
69 /// The [`ContextProvider`] provides a way to "hide" this dependency.
70 ///
71 /// [`SqlToRel`]: https://docs.rs/datafusion/latest/datafusion/sql/planner/struct.SqlToRel.html
72 /// [`CteWorkTable`]: https://docs.rs/datafusion/latest/datafusion/datasource/cte_worktable/struct.CteWorkTable.html
73 fn create_cte_work_table(
74 &self,
75 _name: &str,
76 _schema: SchemaRef,
77 ) -> Result<Arc<dyn TableSource>> {
78 not_impl_err!("Recursive CTE is not implemented")
79 }
80
81 /// Return [`ExprPlanner`] extensions for planning expressions
82 fn get_expr_planners(&self) -> &[Arc<dyn ExprPlanner>] {
83 &[]
84 }
85
86 /// Return [`TypePlanner`] extensions for planning data types
87 #[cfg(feature = "sql")]
88 fn get_type_planner(&self) -> Option<Arc<dyn TypePlanner>> {
89 None
90 }
91
92 /// Return the scalar function with a given name, if any
93 fn get_function_meta(&self, name: &str) -> Option<Arc<ScalarUDF>>;
94
95 /// Return the aggregate function with a given name, if any
96 fn get_aggregate_meta(&self, name: &str) -> Option<Arc<AggregateUDF>>;
97
98 /// Return the window function with a given name, if any
99 fn get_window_meta(&self, name: &str) -> Option<Arc<WindowUDF>>;
100
101 /// Return the system/user-defined variable type, if any
102 ///
103 /// A user defined variable is typically accessed via `@var_name`
104 fn get_variable_type(&self, variable_names: &[String]) -> Option<DataType>;
105
106 /// Return overall configuration options
107 fn options(&self) -> &ConfigOptions;
108
109 /// Return all scalar function names
110 fn udf_names(&self) -> Vec<String>;
111
112 /// Return all aggregate function names
113 fn udaf_names(&self) -> Vec<String>;
114
115 /// Return all window function names
116 fn udwf_names(&self) -> Vec<String>;
117}
118
119/// Customize planning of SQL AST expressions to [`Expr`]s
120pub trait ExprPlanner: Debug + Send + Sync {
121 /// Plan the binary operation between two expressions, returns original
122 /// BinaryExpr if not possible
123 fn plan_binary_op(
124 &self,
125 expr: RawBinaryExpr,
126 _schema: &DFSchema,
127 ) -> Result<PlannerResult<RawBinaryExpr>> {
128 Ok(PlannerResult::Original(expr))
129 }
130
131 /// Plan the field access expression, such as `foo.bar`
132 ///
133 /// returns original [`RawFieldAccessExpr`] if not possible
134 fn plan_field_access(
135 &self,
136 expr: RawFieldAccessExpr,
137 _schema: &DFSchema,
138 ) -> Result<PlannerResult<RawFieldAccessExpr>> {
139 Ok(PlannerResult::Original(expr))
140 }
141
142 /// Plan an array literal, such as `[1, 2, 3]`
143 ///
144 /// Returns original expression arguments if not possible
145 fn plan_array_literal(
146 &self,
147 exprs: Vec<Expr>,
148 _schema: &DFSchema,
149 ) -> Result<PlannerResult<Vec<Expr>>> {
150 Ok(PlannerResult::Original(exprs))
151 }
152
153 /// Plan a `POSITION` expression, such as `POSITION(<expr> in <expr>)`
154 ///
155 /// Returns original expression arguments if not possible
156 fn plan_position(&self, args: Vec<Expr>) -> Result<PlannerResult<Vec<Expr>>> {
157 Ok(PlannerResult::Original(args))
158 }
159
160 /// Plan a dictionary literal, such as `{ key: value, ...}`
161 ///
162 /// Returns original expression arguments if not possible
163 fn plan_dictionary_literal(
164 &self,
165 expr: RawDictionaryExpr,
166 _schema: &DFSchema,
167 ) -> Result<PlannerResult<RawDictionaryExpr>> {
168 Ok(PlannerResult::Original(expr))
169 }
170
171 /// Plan an extract expression, such as`EXTRACT(month FROM foo)`
172 ///
173 /// Returns original expression arguments if not possible
174 fn plan_extract(&self, args: Vec<Expr>) -> Result<PlannerResult<Vec<Expr>>> {
175 Ok(PlannerResult::Original(args))
176 }
177
178 /// Plan an substring expression, such as `SUBSTRING(<expr> [FROM <expr>] [FOR <expr>])`
179 ///
180 /// Returns original expression arguments if not possible
181 fn plan_substring(&self, args: Vec<Expr>) -> Result<PlannerResult<Vec<Expr>>> {
182 Ok(PlannerResult::Original(args))
183 }
184
185 /// Plans a struct literal, such as `{'field1' : expr1, 'field2' : expr2, ...}`
186 ///
187 /// This function takes a vector of expressions and a boolean flag
188 /// indicating whether the struct uses the optional name
189 ///
190 /// Returns the original input expressions if planning is not possible.
191 fn plan_struct_literal(
192 &self,
193 args: Vec<Expr>,
194 _is_named_struct: bool,
195 ) -> Result<PlannerResult<Vec<Expr>>> {
196 Ok(PlannerResult::Original(args))
197 }
198
199 /// Plans an overlay expression, such as `overlay(str PLACING substr FROM pos [FOR count])`
200 ///
201 /// Returns original expression arguments if not possible
202 fn plan_overlay(&self, args: Vec<Expr>) -> Result<PlannerResult<Vec<Expr>>> {
203 Ok(PlannerResult::Original(args))
204 }
205
206 /// Plans a `make_map` expression, such as `make_map(key1, value1, key2, value2, ...)`
207 ///
208 /// Returns original expression arguments if not possible
209 fn plan_make_map(&self, args: Vec<Expr>) -> Result<PlannerResult<Vec<Expr>>> {
210 Ok(PlannerResult::Original(args))
211 }
212
213 /// Plans compound identifier such as `db.schema.table` for non-empty nested names
214 ///
215 /// # Note:
216 /// Currently compound identifier for outer query schema is not supported.
217 ///
218 /// Returns original expression if not possible
219 fn plan_compound_identifier(
220 &self,
221 _field: &Field,
222 _qualifier: Option<&TableReference>,
223 _nested_names: &[String],
224 ) -> Result<PlannerResult<Vec<Expr>>> {
225 not_impl_err!(
226 "Default planner compound identifier hasn't been implemented for ExprPlanner"
227 )
228 }
229
230 /// Plans `ANY` expression, such as `expr = ANY(array_expr)`
231 ///
232 /// Returns origin binary expression if not possible
233 fn plan_any(&self, expr: RawBinaryExpr) -> Result<PlannerResult<RawBinaryExpr>> {
234 Ok(PlannerResult::Original(expr))
235 }
236
237 /// Plans aggregate functions, such as `COUNT(<expr>)`
238 ///
239 /// Returns original expression arguments if not possible
240 fn plan_aggregate(
241 &self,
242 expr: RawAggregateExpr,
243 ) -> Result<PlannerResult<RawAggregateExpr>> {
244 Ok(PlannerResult::Original(expr))
245 }
246
247 /// Plans window functions, such as `COUNT(<expr>)`
248 ///
249 /// Returns original expression arguments if not possible
250 fn plan_window(&self, expr: RawWindowExpr) -> Result<PlannerResult<RawWindowExpr>> {
251 Ok(PlannerResult::Original(expr))
252 }
253}
254
255/// An operator with two arguments to plan
256///
257/// Note `left` and `right` are DataFusion [`Expr`]s but the `op` is the SQL AST
258/// operator.
259///
260/// This structure is used by [`ExprPlanner`] to plan operators with
261/// custom expressions.
262#[derive(Debug, Clone)]
263pub struct RawBinaryExpr {
264 #[cfg(not(feature = "sql"))]
265 pub op: datafusion_expr_common::operator::Operator,
266 #[cfg(feature = "sql")]
267 pub op: sqlparser::ast::BinaryOperator,
268 pub left: Expr,
269 pub right: Expr,
270}
271
272/// An expression with GetFieldAccess to plan
273///
274/// This structure is used by [`ExprPlanner`] to plan operators with
275/// custom expressions.
276#[derive(Debug, Clone)]
277pub struct RawFieldAccessExpr {
278 pub field_access: GetFieldAccess,
279 pub expr: Expr,
280}
281
282/// A Dictionary literal expression `{ key: value, ...}`
283///
284/// This structure is used by [`ExprPlanner`] to plan operators with
285/// custom expressions.
286#[derive(Debug, Clone)]
287pub struct RawDictionaryExpr {
288 pub keys: Vec<Expr>,
289 pub values: Vec<Expr>,
290}
291
292/// This structure is used by `AggregateFunctionPlanner` to plan operators with
293/// custom expressions.
294#[derive(Debug, Clone)]
295pub struct RawAggregateExpr {
296 pub func: Arc<AggregateUDF>,
297 pub args: Vec<Expr>,
298 pub distinct: bool,
299 pub filter: Option<Box<Expr>>,
300 pub order_by: Vec<SortExpr>,
301 pub null_treatment: Option<NullTreatment>,
302}
303
304/// This structure is used by `WindowFunctionPlanner` to plan operators with
305/// custom expressions.
306#[derive(Debug, Clone)]
307pub struct RawWindowExpr {
308 pub func_def: WindowFunctionDefinition,
309 pub args: Vec<Expr>,
310 pub partition_by: Vec<Expr>,
311 pub order_by: Vec<SortExpr>,
312 pub window_frame: WindowFrame,
313 pub filter: Option<Box<Expr>>,
314 pub null_treatment: Option<NullTreatment>,
315 pub distinct: bool,
316}
317
318/// Result of planning a raw expr with [`ExprPlanner`]
319#[derive(Debug, Clone)]
320pub enum PlannerResult<T> {
321 /// The raw expression was successfully planned as a new [`Expr`]
322 Planned(Expr),
323 /// The raw expression could not be planned, and is returned unmodified
324 Original(T),
325}
326
327/// Customize planning SQL types to DataFusion (Arrow) types.
328#[cfg(feature = "sql")]
329pub trait TypePlanner: Debug + Send + Sync {
330 /// Plan SQL [`sqlparser::ast::DataType`] to DataFusion [`DataType`]
331 ///
332 /// Returns None if not possible
333 fn plan_type(
334 &self,
335 _sql_type: &sqlparser::ast::DataType,
336 ) -> Result<Option<DataType>> {
337 Ok(None)
338 }
339}