datafusion_sql/expr/
mod.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use arrow::datatypes::{DataType, TimeUnit};
19use datafusion_expr::planner::{
20    PlannerResult, RawBinaryExpr, RawDictionaryExpr, RawFieldAccessExpr,
21};
22use sqlparser::ast::{
23    AccessExpr, BinaryOperator, CastFormat, CastKind, DataType as SQLDataType,
24    DictionaryField, Expr as SQLExpr, ExprWithAlias as SQLExprWithAlias, MapEntry,
25    StructField, Subscript, TrimWhereField, Value, ValueWithSpan,
26};
27
28use datafusion_common::{
29    internal_datafusion_err, internal_err, not_impl_err, plan_err, DFSchema, Result,
30    ScalarValue,
31};
32
33use datafusion_expr::expr::ScalarFunction;
34use datafusion_expr::expr::{InList, WildcardOptions};
35use datafusion_expr::{
36    lit, Between, BinaryExpr, Cast, Expr, ExprSchemable, GetFieldAccess, Like, Literal,
37    Operator, TryCast,
38};
39
40use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
41
42mod binary_op;
43mod function;
44mod grouping_set;
45mod identifier;
46mod order_by;
47mod subquery;
48mod substring;
49mod unary_op;
50mod value;
51
52impl<S: ContextProvider> SqlToRel<'_, S> {
53    pub(crate) fn sql_expr_to_logical_expr_with_alias(
54        &self,
55        sql: SQLExprWithAlias,
56        schema: &DFSchema,
57        planner_context: &mut PlannerContext,
58    ) -> Result<Expr> {
59        let mut expr =
60            self.sql_expr_to_logical_expr(sql.expr, schema, planner_context)?;
61        if let Some(alias) = sql.alias {
62            expr = expr.alias(alias.value);
63        }
64        Ok(expr)
65    }
66    pub(crate) fn sql_expr_to_logical_expr(
67        &self,
68        sql: SQLExpr,
69        schema: &DFSchema,
70        planner_context: &mut PlannerContext,
71    ) -> Result<Expr> {
72        enum StackEntry {
73            SQLExpr(Box<SQLExpr>),
74            Operator(BinaryOperator),
75        }
76
77        // Virtual stack machine to convert SQLExpr to Expr
78        // This allows visiting the expr tree in a depth-first manner which
79        // produces expressions in postfix notations, i.e. `a + b` => `a b +`.
80        // See https://github.com/apache/datafusion/issues/1444
81        let mut stack = vec![StackEntry::SQLExpr(Box::new(sql))];
82        let mut eval_stack = vec![];
83
84        while let Some(entry) = stack.pop() {
85            match entry {
86                StackEntry::SQLExpr(sql_expr) => {
87                    match *sql_expr {
88                        SQLExpr::BinaryOp { left, op, right } => {
89                            // Note the order that we push the entries to the stack
90                            // is important. We want to visit the left node first.
91                            stack.push(StackEntry::Operator(op));
92                            stack.push(StackEntry::SQLExpr(right));
93                            stack.push(StackEntry::SQLExpr(left));
94                        }
95                        _ => {
96                            let expr = self.sql_expr_to_logical_expr_internal(
97                                *sql_expr,
98                                schema,
99                                planner_context,
100                            )?;
101                            eval_stack.push(expr);
102                        }
103                    }
104                }
105                StackEntry::Operator(op) => {
106                    let right = eval_stack.pop().unwrap();
107                    let left = eval_stack.pop().unwrap();
108                    let expr = self.build_logical_expr(op, left, right, schema)?;
109                    eval_stack.push(expr);
110                }
111            }
112        }
113
114        assert_eq!(1, eval_stack.len());
115        let expr = eval_stack.pop().unwrap();
116        Ok(expr)
117    }
118
119    fn build_logical_expr(
120        &self,
121        op: BinaryOperator,
122        left: Expr,
123        right: Expr,
124        schema: &DFSchema,
125    ) -> Result<Expr> {
126        // try extension planers
127        let mut binary_expr = RawBinaryExpr { op, left, right };
128        for planner in self.context_provider.get_expr_planners() {
129            match planner.plan_binary_op(binary_expr, schema)? {
130                PlannerResult::Planned(expr) => {
131                    return Ok(expr);
132                }
133                PlannerResult::Original(expr) => {
134                    binary_expr = expr;
135                }
136            }
137        }
138
139        let RawBinaryExpr { op, left, right } = binary_expr;
140        Ok(Expr::BinaryExpr(BinaryExpr::new(
141            Box::new(left),
142            self.parse_sql_binary_op(op)?,
143            Box::new(right),
144        )))
145    }
146
147    pub fn sql_to_expr_with_alias(
148        &self,
149        sql: SQLExprWithAlias,
150        schema: &DFSchema,
151        planner_context: &mut PlannerContext,
152    ) -> Result<Expr> {
153        let mut expr =
154            self.sql_expr_to_logical_expr_with_alias(sql, schema, planner_context)?;
155        expr = self.rewrite_partial_qualifier(expr, schema);
156        self.validate_schema_satisfies_exprs(schema, &[expr.clone()])?;
157        let (expr, _) = expr.infer_placeholder_types(schema)?;
158        Ok(expr)
159    }
160
161    /// Generate a relational expression from a SQL expression
162    pub fn sql_to_expr(
163        &self,
164        sql: SQLExpr,
165        schema: &DFSchema,
166        planner_context: &mut PlannerContext,
167    ) -> Result<Expr> {
168        // The location of the original SQL expression in the source code
169        let mut expr = self.sql_expr_to_logical_expr(sql, schema, planner_context)?;
170        expr = self.rewrite_partial_qualifier(expr, schema);
171        self.validate_schema_satisfies_exprs(schema, std::slice::from_ref(&expr))?;
172        let (expr, _) = expr.infer_placeholder_types(schema)?;
173        Ok(expr)
174    }
175
176    /// Rewrite aliases which are not-complete (e.g. ones that only include only table qualifier in a schema.table qualified relation)
177    fn rewrite_partial_qualifier(&self, expr: Expr, schema: &DFSchema) -> Expr {
178        match expr {
179            Expr::Column(col) => match &col.relation {
180                Some(q) => {
181                    match schema.iter().find(|(qualifier, field)| match qualifier {
182                        Some(field_q) => {
183                            field.name() == &col.name
184                                && field_q.to_string().ends_with(&format!(".{q}"))
185                        }
186                        _ => false,
187                    }) {
188                        Some((qualifier, df_field)) => Expr::from((qualifier, df_field)),
189                        None => Expr::Column(col),
190                    }
191                }
192                None => Expr::Column(col),
193            },
194            _ => expr,
195        }
196    }
197
198    /// Internal implementation. Use
199    /// [`Self::sql_expr_to_logical_expr`] to plan exprs.
200    #[cfg_attr(feature = "recursive_protection", recursive::recursive)]
201    fn sql_expr_to_logical_expr_internal(
202        &self,
203        sql: SQLExpr,
204        schema: &DFSchema,
205        planner_context: &mut PlannerContext,
206    ) -> Result<Expr> {
207        // NOTE: This function is called recursively, so each match arm body should be as
208        //       small as possible to decrease stack requirement.
209        //       Follow the common pattern of extracting into a separate function for
210        //       non-trivial arms. See https://github.com/apache/datafusion/pull/12384 for
211        //       more context.
212        match sql {
213            SQLExpr::Value(value) => {
214                self.parse_value(value.into(), planner_context.prepare_param_data_types())
215            }
216            SQLExpr::Extract { field, expr, .. } => {
217                let mut extract_args = vec![
218                    Expr::Literal(ScalarValue::from(format!("{field}")), None),
219                    self.sql_expr_to_logical_expr(*expr, schema, planner_context)?,
220                ];
221
222                for planner in self.context_provider.get_expr_planners() {
223                    match planner.plan_extract(extract_args)? {
224                        PlannerResult::Planned(expr) => return Ok(expr),
225                        PlannerResult::Original(args) => {
226                            extract_args = args;
227                        }
228                    }
229                }
230
231                not_impl_err!("Extract not supported by ExprPlanner: {extract_args:?}")
232            }
233
234            SQLExpr::Array(arr) => self.sql_array_literal(arr.elem, schema),
235            SQLExpr::Interval(interval) => self.sql_interval_to_expr(false, interval),
236            SQLExpr::Identifier(id) => {
237                self.sql_identifier_to_expr(id, schema, planner_context)
238            }
239
240            // <expr>["foo"], <expr>[4] or <expr>[4:5]
241            SQLExpr::CompoundFieldAccess { root, access_chain } => self
242                .sql_compound_field_access_to_expr(
243                    *root,
244                    access_chain,
245                    schema,
246                    planner_context,
247                ),
248
249            SQLExpr::CompoundIdentifier(ids) => {
250                self.sql_compound_identifier_to_expr(ids, schema, planner_context)
251            }
252
253            SQLExpr::Case {
254                operand,
255                conditions,
256                else_result,
257            } => self.sql_case_identifier_to_expr(
258                operand,
259                conditions,
260                else_result,
261                schema,
262                planner_context,
263            ),
264
265            SQLExpr::Cast {
266                kind: CastKind::Cast | CastKind::DoubleColon,
267                expr,
268                data_type,
269                format,
270            } => self.sql_cast_to_expr(*expr, data_type, format, schema, planner_context),
271
272            SQLExpr::Cast {
273                kind: CastKind::TryCast | CastKind::SafeCast,
274                expr,
275                data_type,
276                format,
277            } => {
278                if let Some(format) = format {
279                    return not_impl_err!("CAST with format is not supported: {format}");
280                }
281
282                Ok(Expr::TryCast(TryCast::new(
283                    Box::new(self.sql_expr_to_logical_expr(
284                        *expr,
285                        schema,
286                        planner_context,
287                    )?),
288                    self.convert_data_type(&data_type)?,
289                )))
290            }
291
292            SQLExpr::TypedString { data_type, value } => Ok(Expr::Cast(Cast::new(
293                Box::new(lit(value.into_string().unwrap())),
294                self.convert_data_type(&data_type)?,
295            ))),
296
297            SQLExpr::IsNull(expr) => Ok(Expr::IsNull(Box::new(
298                self.sql_expr_to_logical_expr(*expr, schema, planner_context)?,
299            ))),
300
301            SQLExpr::IsNotNull(expr) => Ok(Expr::IsNotNull(Box::new(
302                self.sql_expr_to_logical_expr(*expr, schema, planner_context)?,
303            ))),
304
305            SQLExpr::IsDistinctFrom(left, right) => {
306                Ok(Expr::BinaryExpr(BinaryExpr::new(
307                    Box::new(self.sql_expr_to_logical_expr(
308                        *left,
309                        schema,
310                        planner_context,
311                    )?),
312                    Operator::IsDistinctFrom,
313                    Box::new(self.sql_expr_to_logical_expr(
314                        *right,
315                        schema,
316                        planner_context,
317                    )?),
318                )))
319            }
320
321            SQLExpr::IsNotDistinctFrom(left, right) => {
322                Ok(Expr::BinaryExpr(BinaryExpr::new(
323                    Box::new(self.sql_expr_to_logical_expr(
324                        *left,
325                        schema,
326                        planner_context,
327                    )?),
328                    Operator::IsNotDistinctFrom,
329                    Box::new(self.sql_expr_to_logical_expr(
330                        *right,
331                        schema,
332                        planner_context,
333                    )?),
334                )))
335            }
336
337            SQLExpr::IsTrue(expr) => Ok(Expr::IsTrue(Box::new(
338                self.sql_expr_to_logical_expr(*expr, schema, planner_context)?,
339            ))),
340
341            SQLExpr::IsFalse(expr) => Ok(Expr::IsFalse(Box::new(
342                self.sql_expr_to_logical_expr(*expr, schema, planner_context)?,
343            ))),
344
345            SQLExpr::IsNotTrue(expr) => Ok(Expr::IsNotTrue(Box::new(
346                self.sql_expr_to_logical_expr(*expr, schema, planner_context)?,
347            ))),
348
349            SQLExpr::IsNotFalse(expr) => Ok(Expr::IsNotFalse(Box::new(
350                self.sql_expr_to_logical_expr(*expr, schema, planner_context)?,
351            ))),
352
353            SQLExpr::IsUnknown(expr) => Ok(Expr::IsUnknown(Box::new(
354                self.sql_expr_to_logical_expr(*expr, schema, planner_context)?,
355            ))),
356
357            SQLExpr::IsNotUnknown(expr) => Ok(Expr::IsNotUnknown(Box::new(
358                self.sql_expr_to_logical_expr(*expr, schema, planner_context)?,
359            ))),
360
361            SQLExpr::UnaryOp { op, expr } => {
362                self.parse_sql_unary_op(op, *expr, schema, planner_context)
363            }
364
365            SQLExpr::Between {
366                expr,
367                negated,
368                low,
369                high,
370            } => Ok(Expr::Between(Between::new(
371                Box::new(self.sql_expr_to_logical_expr(
372                    *expr,
373                    schema,
374                    planner_context,
375                )?),
376                negated,
377                Box::new(self.sql_expr_to_logical_expr(*low, schema, planner_context)?),
378                Box::new(self.sql_expr_to_logical_expr(
379                    *high,
380                    schema,
381                    planner_context,
382                )?),
383            ))),
384
385            SQLExpr::InList {
386                expr,
387                list,
388                negated,
389            } => self.sql_in_list_to_expr(*expr, list, negated, schema, planner_context),
390
391            SQLExpr::Like {
392                negated,
393                expr,
394                pattern,
395                escape_char,
396                any,
397            } => self.sql_like_to_expr(
398                negated,
399                *expr,
400                *pattern,
401                escape_char,
402                schema,
403                planner_context,
404                false,
405                any,
406            ),
407
408            SQLExpr::ILike {
409                negated,
410                expr,
411                pattern,
412                escape_char,
413                any,
414            } => self.sql_like_to_expr(
415                negated,
416                *expr,
417                *pattern,
418                escape_char,
419                schema,
420                planner_context,
421                true,
422                any,
423            ),
424
425            SQLExpr::SimilarTo {
426                negated,
427                expr,
428                pattern,
429                escape_char,
430            } => self.sql_similarto_to_expr(
431                negated,
432                *expr,
433                *pattern,
434                escape_char,
435                schema,
436                planner_context,
437            ),
438
439            SQLExpr::BinaryOp { .. } => {
440                internal_err!("binary_op should be handled by sql_expr_to_logical_expr.")
441            }
442
443            #[cfg(feature = "unicode_expressions")]
444            SQLExpr::Substring {
445                expr,
446                substring_from,
447                substring_for,
448                special: _,
449            } => self.sql_substring_to_expr(
450                expr,
451                substring_from,
452                substring_for,
453                schema,
454                planner_context,
455            ),
456
457            #[cfg(not(feature = "unicode_expressions"))]
458            SQLExpr::Substring { .. } => {
459                internal_err!(
460                    "statement substring requires compilation with feature flag: unicode_expressions."
461                )
462            }
463
464            SQLExpr::Trim {
465                expr,
466                trim_where,
467                trim_what,
468                trim_characters,
469            } => self.sql_trim_to_expr(
470                *expr,
471                trim_where,
472                trim_what,
473                trim_characters,
474                schema,
475                planner_context,
476            ),
477
478            SQLExpr::Function(function) => {
479                self.sql_function_to_expr(function, schema, planner_context)
480            }
481
482            SQLExpr::Rollup(exprs) => {
483                self.sql_rollup_to_expr(exprs, schema, planner_context)
484            }
485            SQLExpr::Cube(exprs) => self.sql_cube_to_expr(exprs, schema, planner_context),
486            SQLExpr::GroupingSets(exprs) => {
487                self.sql_grouping_sets_to_expr(exprs, schema, planner_context)
488            }
489
490            SQLExpr::Floor {
491                expr,
492                field: _field,
493            } => self.sql_fn_name_to_expr(*expr, "floor", schema, planner_context),
494            SQLExpr::Ceil {
495                expr,
496                field: _field,
497            } => self.sql_fn_name_to_expr(*expr, "ceil", schema, planner_context),
498            SQLExpr::Overlay {
499                expr,
500                overlay_what,
501                overlay_from,
502                overlay_for,
503            } => self.sql_overlay_to_expr(
504                *expr,
505                *overlay_what,
506                *overlay_from,
507                overlay_for,
508                schema,
509                planner_context,
510            ),
511            SQLExpr::Nested(e) => {
512                self.sql_expr_to_logical_expr(*e, schema, planner_context)
513            }
514
515            SQLExpr::Exists { subquery, negated } => {
516                self.parse_exists_subquery(*subquery, negated, schema, planner_context)
517            }
518            SQLExpr::InSubquery {
519                expr,
520                subquery,
521                negated,
522            } => {
523                self.parse_in_subquery(*expr, *subquery, negated, schema, planner_context)
524            }
525            SQLExpr::Subquery(subquery) => {
526                self.parse_scalar_subquery(*subquery, schema, planner_context)
527            }
528
529            SQLExpr::Struct { values, fields } => {
530                self.parse_struct(schema, planner_context, values, fields)
531            }
532            SQLExpr::Position { expr, r#in } => {
533                self.sql_position_to_expr(*expr, *r#in, schema, planner_context)
534            }
535            SQLExpr::AtTimeZone {
536                timestamp,
537                time_zone,
538            } => Ok(Expr::Cast(Cast::new(
539                Box::new(self.sql_expr_to_logical_expr_internal(
540                    *timestamp,
541                    schema,
542                    planner_context,
543                )?),
544                match *time_zone {
545                    SQLExpr::Value(ValueWithSpan {
546                        value: Value::SingleQuotedString(s),
547                        span: _,
548                    }) => DataType::Timestamp(TimeUnit::Nanosecond, Some(s.into())),
549                    _ => {
550                        return not_impl_err!(
551                            "Unsupported ast node in sqltorel: {time_zone:?}"
552                        )
553                    }
554                },
555            ))),
556            SQLExpr::Dictionary(fields) => {
557                self.try_plan_dictionary_literal(fields, schema, planner_context)
558            }
559            SQLExpr::Map(map) => {
560                self.try_plan_map_literal(map.entries, schema, planner_context)
561            }
562            SQLExpr::AnyOp {
563                left,
564                compare_op,
565                right,
566                // ANY/SOME are equivalent, this field specifies which the user
567                // specified but it doesn't affect the plan so ignore the field
568                is_some: _,
569            } => {
570                let mut binary_expr = RawBinaryExpr {
571                    op: compare_op,
572                    left: self.sql_expr_to_logical_expr(
573                        *left,
574                        schema,
575                        planner_context,
576                    )?,
577                    right: self.sql_expr_to_logical_expr(
578                        *right,
579                        schema,
580                        planner_context,
581                    )?,
582                };
583                for planner in self.context_provider.get_expr_planners() {
584                    match planner.plan_any(binary_expr)? {
585                        PlannerResult::Planned(expr) => {
586                            return Ok(expr);
587                        }
588                        PlannerResult::Original(expr) => {
589                            binary_expr = expr;
590                        }
591                    }
592                }
593                not_impl_err!("AnyOp not supported by ExprPlanner: {binary_expr:?}")
594            }
595            #[expect(deprecated)]
596            SQLExpr::Wildcard(_token) => Ok(Expr::Wildcard {
597                qualifier: None,
598                options: Box::new(WildcardOptions::default()),
599            }),
600            #[expect(deprecated)]
601            SQLExpr::QualifiedWildcard(object_name, _token) => Ok(Expr::Wildcard {
602                qualifier: Some(self.object_name_to_table_reference(object_name)?),
603                options: Box::new(WildcardOptions::default()),
604            }),
605            SQLExpr::Tuple(values) => self.parse_tuple(schema, planner_context, values),
606            _ => not_impl_err!("Unsupported ast node in sqltorel: {sql:?}"),
607        }
608    }
609
610    /// Parses a struct(..) expression and plans it creation
611    fn parse_struct(
612        &self,
613        schema: &DFSchema,
614        planner_context: &mut PlannerContext,
615        values: Vec<SQLExpr>,
616        fields: Vec<StructField>,
617    ) -> Result<Expr> {
618        if !fields.is_empty() {
619            return not_impl_err!("Struct fields are not supported yet");
620        }
621        let is_named_struct = values
622            .iter()
623            .any(|value| matches!(value, SQLExpr::Named { .. }));
624
625        let mut create_struct_args = if is_named_struct {
626            self.create_named_struct_expr(values, schema, planner_context)?
627        } else {
628            self.create_struct_expr(values, schema, planner_context)?
629        };
630
631        for planner in self.context_provider.get_expr_planners() {
632            match planner.plan_struct_literal(create_struct_args, is_named_struct)? {
633                PlannerResult::Planned(expr) => return Ok(expr),
634                PlannerResult::Original(args) => create_struct_args = args,
635            }
636        }
637        not_impl_err!("Struct not supported by ExprPlanner: {create_struct_args:?}")
638    }
639
640    fn parse_tuple(
641        &self,
642        schema: &DFSchema,
643        planner_context: &mut PlannerContext,
644        values: Vec<SQLExpr>,
645    ) -> Result<Expr> {
646        match values.first() {
647            Some(SQLExpr::Identifier(_))
648            | Some(SQLExpr::Value(_))
649            | Some(SQLExpr::CompoundIdentifier(_)) => {
650                self.parse_struct(schema, planner_context, values, vec![])
651            }
652            None => not_impl_err!("Empty tuple not supported yet"),
653            _ => {
654                not_impl_err!("Only identifiers and literals are supported in tuples")
655            }
656        }
657    }
658
659    fn sql_position_to_expr(
660        &self,
661        substr_expr: SQLExpr,
662        str_expr: SQLExpr,
663        schema: &DFSchema,
664        planner_context: &mut PlannerContext,
665    ) -> Result<Expr> {
666        let substr =
667            self.sql_expr_to_logical_expr(substr_expr, schema, planner_context)?;
668        let fullstr = self.sql_expr_to_logical_expr(str_expr, schema, planner_context)?;
669        let mut position_args = vec![fullstr, substr];
670        for planner in self.context_provider.get_expr_planners() {
671            match planner.plan_position(position_args)? {
672                PlannerResult::Planned(expr) => return Ok(expr),
673                PlannerResult::Original(args) => {
674                    position_args = args;
675                }
676            }
677        }
678
679        not_impl_err!("Position not supported by ExprPlanner: {position_args:?}")
680    }
681
682    fn try_plan_dictionary_literal(
683        &self,
684        fields: Vec<DictionaryField>,
685        schema: &DFSchema,
686        planner_context: &mut PlannerContext,
687    ) -> Result<Expr> {
688        let mut keys = vec![];
689        let mut values = vec![];
690        for field in fields {
691            let key = lit(field.key.value);
692            let value =
693                self.sql_expr_to_logical_expr(*field.value, schema, planner_context)?;
694            keys.push(key);
695            values.push(value);
696        }
697
698        let mut raw_expr = RawDictionaryExpr { keys, values };
699
700        for planner in self.context_provider.get_expr_planners() {
701            match planner.plan_dictionary_literal(raw_expr, schema)? {
702                PlannerResult::Planned(expr) => {
703                    return Ok(expr);
704                }
705                PlannerResult::Original(expr) => raw_expr = expr,
706            }
707        }
708        not_impl_err!("Dictionary not supported by ExprPlanner: {raw_expr:?}")
709    }
710
711    fn try_plan_map_literal(
712        &self,
713        entries: Vec<MapEntry>,
714        schema: &DFSchema,
715        planner_context: &mut PlannerContext,
716    ) -> Result<Expr> {
717        let mut exprs: Vec<_> = entries
718            .into_iter()
719            .flat_map(|entry| vec![entry.key, entry.value].into_iter())
720            .map(|expr| self.sql_expr_to_logical_expr(*expr, schema, planner_context))
721            .collect::<Result<Vec<_>>>()?;
722        for planner in self.context_provider.get_expr_planners() {
723            match planner.plan_make_map(exprs)? {
724                PlannerResult::Planned(expr) => {
725                    return Ok(expr);
726                }
727                PlannerResult::Original(expr) => exprs = expr,
728            }
729        }
730        not_impl_err!("MAP not supported by ExprPlanner: {exprs:?}")
731    }
732
733    // Handles a call to struct(...) where the arguments are named. For example
734    // `struct (v as foo, v2 as bar)` by creating a call to the `named_struct` function
735    fn create_named_struct_expr(
736        &self,
737        values: Vec<SQLExpr>,
738        input_schema: &DFSchema,
739        planner_context: &mut PlannerContext,
740    ) -> Result<Vec<Expr>> {
741        Ok(values
742            .into_iter()
743            .enumerate()
744            .map(|(i, value)| {
745                let args = if let SQLExpr::Named { expr, name } = value {
746                    [
747                        name.value.lit(),
748                        self.sql_expr_to_logical_expr(
749                            *expr,
750                            input_schema,
751                            planner_context,
752                        )?,
753                    ]
754                } else {
755                    [
756                        format!("c{i}").lit(),
757                        self.sql_expr_to_logical_expr(
758                            value,
759                            input_schema,
760                            planner_context,
761                        )?,
762                    ]
763                };
764
765                Ok(args)
766            })
767            .collect::<Result<Vec<_>>>()?
768            .into_iter()
769            .flatten()
770            .collect())
771    }
772
773    // Handles a call to struct(...) where the arguments are not named. For example
774    // `struct (v, v2)` by creating a call to the `struct` function
775    // which will create a struct with fields named `c0`, `c1`, etc.
776    fn create_struct_expr(
777        &self,
778        values: Vec<SQLExpr>,
779        input_schema: &DFSchema,
780        planner_context: &mut PlannerContext,
781    ) -> Result<Vec<Expr>> {
782        values
783            .into_iter()
784            .map(|value| {
785                self.sql_expr_to_logical_expr(value, input_schema, planner_context)
786            })
787            .collect::<Result<Vec<_>>>()
788    }
789
790    fn sql_in_list_to_expr(
791        &self,
792        expr: SQLExpr,
793        list: Vec<SQLExpr>,
794        negated: bool,
795        schema: &DFSchema,
796        planner_context: &mut PlannerContext,
797    ) -> Result<Expr> {
798        let list_expr = list
799            .into_iter()
800            .map(|e| self.sql_expr_to_logical_expr(e, schema, planner_context))
801            .collect::<Result<Vec<_>>>()?;
802
803        Ok(Expr::InList(InList::new(
804            Box::new(self.sql_expr_to_logical_expr(expr, schema, planner_context)?),
805            list_expr,
806            negated,
807        )))
808    }
809
810    #[allow(clippy::too_many_arguments)]
811    fn sql_like_to_expr(
812        &self,
813        negated: bool,
814        expr: SQLExpr,
815        pattern: SQLExpr,
816        escape_char: Option<String>,
817        schema: &DFSchema,
818        planner_context: &mut PlannerContext,
819        case_insensitive: bool,
820        any: bool,
821    ) -> Result<Expr> {
822        if any {
823            return not_impl_err!("ANY in LIKE expression");
824        }
825        let pattern = self.sql_expr_to_logical_expr(pattern, schema, planner_context)?;
826        let escape_char = if let Some(char) = escape_char {
827            if char.len() != 1 {
828                return plan_err!("Invalid escape character in LIKE expression");
829            }
830            Some(char.chars().next().unwrap())
831        } else {
832            None
833        };
834        Ok(Expr::Like(Like::new(
835            negated,
836            Box::new(self.sql_expr_to_logical_expr(expr, schema, planner_context)?),
837            Box::new(pattern),
838            escape_char,
839            case_insensitive,
840        )))
841    }
842
843    fn sql_similarto_to_expr(
844        &self,
845        negated: bool,
846        expr: SQLExpr,
847        pattern: SQLExpr,
848        escape_char: Option<String>,
849        schema: &DFSchema,
850        planner_context: &mut PlannerContext,
851    ) -> Result<Expr> {
852        let pattern = self.sql_expr_to_logical_expr(pattern, schema, planner_context)?;
853        let pattern_type = pattern.get_type(schema)?;
854        if pattern_type != DataType::Utf8 && pattern_type != DataType::Null {
855            return plan_err!("Invalid pattern in SIMILAR TO expression");
856        }
857        let escape_char = if let Some(char) = escape_char {
858            if char.len() != 1 {
859                return plan_err!("Invalid escape character in SIMILAR TO expression");
860            }
861            Some(char.chars().next().unwrap())
862        } else {
863            None
864        };
865        Ok(Expr::SimilarTo(Like::new(
866            negated,
867            Box::new(self.sql_expr_to_logical_expr(expr, schema, planner_context)?),
868            Box::new(pattern),
869            escape_char,
870            false,
871        )))
872    }
873
874    fn sql_trim_to_expr(
875        &self,
876        expr: SQLExpr,
877        trim_where: Option<TrimWhereField>,
878        trim_what: Option<Box<SQLExpr>>,
879        trim_characters: Option<Vec<SQLExpr>>,
880        schema: &DFSchema,
881        planner_context: &mut PlannerContext,
882    ) -> Result<Expr> {
883        let arg = self.sql_expr_to_logical_expr(expr, schema, planner_context)?;
884        let args = match (trim_what, trim_characters) {
885            (Some(to_trim), None) => {
886                let to_trim =
887                    self.sql_expr_to_logical_expr(*to_trim, schema, planner_context)?;
888                Ok(vec![arg, to_trim])
889            }
890            (None, Some(trim_characters)) => {
891                if let Some(first) = trim_characters.first() {
892                    let to_trim = self.sql_expr_to_logical_expr(
893                        first.clone(),
894                        schema,
895                        planner_context,
896                    )?;
897                    Ok(vec![arg, to_trim])
898                } else {
899                    plan_err!("TRIM CHARACTERS cannot be empty")
900                }
901            }
902            (Some(_), Some(_)) => {
903                plan_err!("Both TRIM and TRIM CHARACTERS cannot be specified")
904            }
905            (None, None) => Ok(vec![arg]),
906        }?;
907
908        let fun_name = match trim_where {
909            Some(TrimWhereField::Leading) => "ltrim",
910            Some(TrimWhereField::Trailing) => "rtrim",
911            Some(TrimWhereField::Both) => "btrim",
912            None => "trim",
913        };
914        let fun = self
915            .context_provider
916            .get_function_meta(fun_name)
917            .ok_or_else(|| {
918                internal_datafusion_err!("Unable to find expected '{fun_name}' function")
919            })?;
920
921        Ok(Expr::ScalarFunction(ScalarFunction::new_udf(fun, args)))
922    }
923
924    fn sql_overlay_to_expr(
925        &self,
926        expr: SQLExpr,
927        overlay_what: SQLExpr,
928        overlay_from: SQLExpr,
929        overlay_for: Option<Box<SQLExpr>>,
930        schema: &DFSchema,
931        planner_context: &mut PlannerContext,
932    ) -> Result<Expr> {
933        let arg = self.sql_expr_to_logical_expr(expr, schema, planner_context)?;
934        let what_arg =
935            self.sql_expr_to_logical_expr(overlay_what, schema, planner_context)?;
936        let from_arg =
937            self.sql_expr_to_logical_expr(overlay_from, schema, planner_context)?;
938        let mut overlay_args = match overlay_for {
939            Some(for_expr) => {
940                let for_expr =
941                    self.sql_expr_to_logical_expr(*for_expr, schema, planner_context)?;
942                vec![arg, what_arg, from_arg, for_expr]
943            }
944            None => vec![arg, what_arg, from_arg],
945        };
946        for planner in self.context_provider.get_expr_planners() {
947            match planner.plan_overlay(overlay_args)? {
948                PlannerResult::Planned(expr) => return Ok(expr),
949                PlannerResult::Original(args) => overlay_args = args,
950            }
951        }
952        not_impl_err!("Overlay not supported by ExprPlanner: {overlay_args:?}")
953    }
954
955    fn sql_cast_to_expr(
956        &self,
957        expr: SQLExpr,
958        data_type: SQLDataType,
959        format: Option<CastFormat>,
960        schema: &DFSchema,
961        planner_context: &mut PlannerContext,
962    ) -> Result<Expr> {
963        if let Some(format) = format {
964            return not_impl_err!("CAST with format is not supported: {format}");
965        }
966
967        let dt = self.convert_data_type(&data_type)?;
968        let expr = self.sql_expr_to_logical_expr(expr, schema, planner_context)?;
969
970        // numeric constants are treated as seconds (rather as nanoseconds)
971        // to align with postgres / duckdb semantics
972        let expr = match &dt {
973            DataType::Timestamp(TimeUnit::Nanosecond, tz)
974                if expr.get_type(schema)? == DataType::Int64 =>
975            {
976                Expr::Cast(Cast::new(
977                    Box::new(expr),
978                    DataType::Timestamp(TimeUnit::Second, tz.clone()),
979                ))
980            }
981            _ => expr,
982        };
983
984        Ok(Expr::Cast(Cast::new(Box::new(expr), dt)))
985    }
986
987    /// Extracts the root expression and access chain from a compound expression.
988    ///
989    /// This function attempts to identify if a compound expression (like `a.b.c`) should be treated
990    /// as a column reference with a qualifier (like `table.column`) or as a field access expression.
991    ///
992    /// # Arguments
993    ///
994    /// * `root` - The root SQL expression (e.g., the first part of `a.b.c`)
995    /// * `access_chain` - Vector of access expressions (e.g., `.b` and `.c` parts)
996    /// * `schema` - The schema to resolve column references against
997    /// * `planner_context` - Context for planning expressions
998    ///
999    /// # Returns
1000    ///
1001    /// A tuple containing:
1002    /// * The resolved root expression
1003    /// * The remaining access chain that should be processed as field accesses
1004    fn extract_root_and_access_chain(
1005        &self,
1006        root: SQLExpr,
1007        mut access_chain: Vec<AccessExpr>,
1008        schema: &DFSchema,
1009        planner_context: &mut PlannerContext,
1010    ) -> Result<(Expr, Vec<AccessExpr>)> {
1011        let SQLExpr::Identifier(root_ident) = root else {
1012            let root = self.sql_expr_to_logical_expr(root, schema, planner_context)?;
1013            return Ok((root, access_chain));
1014        };
1015
1016        let mut compound_idents = vec![root_ident];
1017        let first_non_ident = access_chain
1018            .iter()
1019            .position(|access| !matches!(access, AccessExpr::Dot(SQLExpr::Identifier(_))))
1020            .unwrap_or(access_chain.len());
1021        for access in access_chain.drain(0..first_non_ident) {
1022            if let AccessExpr::Dot(SQLExpr::Identifier(ident)) = access {
1023                compound_idents.push(ident);
1024            } else {
1025                return internal_err!("Expected identifier in access chain");
1026            }
1027        }
1028
1029        let root = if compound_idents.len() == 1 {
1030            self.sql_identifier_to_expr(
1031                compound_idents.pop().unwrap(),
1032                schema,
1033                planner_context,
1034            )?
1035        } else {
1036            self.sql_compound_identifier_to_expr(
1037                compound_idents,
1038                schema,
1039                planner_context,
1040            )?
1041        };
1042        Ok((root, access_chain))
1043    }
1044
1045    fn sql_compound_field_access_to_expr(
1046        &self,
1047        root: SQLExpr,
1048        access_chain: Vec<AccessExpr>,
1049        schema: &DFSchema,
1050        planner_context: &mut PlannerContext,
1051    ) -> Result<Expr> {
1052        let (root, access_chain) = self.extract_root_and_access_chain(
1053            root,
1054            access_chain,
1055            schema,
1056            planner_context,
1057        )?;
1058        let fields = access_chain
1059            .into_iter()
1060            .map(|field| match field {
1061                AccessExpr::Subscript(subscript) => {
1062                    match subscript {
1063                        Subscript::Index { index } => {
1064                            // index can be a name, in which case it is a named field access
1065                            match index {
1066                                SQLExpr::Value(ValueWithSpan {
1067                                    value:
1068                                        Value::SingleQuotedString(s)
1069                                        | Value::DoubleQuotedString(s),
1070                                    span: _,
1071                                }) => Ok(Some(GetFieldAccess::NamedStructField {
1072                                    name: ScalarValue::from(s),
1073                                })),
1074                                SQLExpr::JsonAccess { .. } => {
1075                                    not_impl_err!("JsonAccess")
1076                                }
1077                                // otherwise treat like a list index
1078                                _ => Ok(Some(GetFieldAccess::ListIndex {
1079                                    key: Box::new(self.sql_expr_to_logical_expr(
1080                                        index,
1081                                        schema,
1082                                        planner_context,
1083                                    )?),
1084                                })),
1085                            }
1086                        }
1087                        Subscript::Slice {
1088                            lower_bound,
1089                            upper_bound,
1090                            stride,
1091                        } => {
1092                            // Means access like [:2]
1093                            let lower_bound = if let Some(lower_bound) = lower_bound {
1094                                self.sql_expr_to_logical_expr(
1095                                    lower_bound,
1096                                    schema,
1097                                    planner_context,
1098                                )
1099                            } else {
1100                                not_impl_err!("Slice subscript requires a lower bound")
1101                            }?;
1102
1103                            // means access like [2:]
1104                            let upper_bound = if let Some(upper_bound) = upper_bound {
1105                                self.sql_expr_to_logical_expr(
1106                                    upper_bound,
1107                                    schema,
1108                                    planner_context,
1109                                )
1110                            } else {
1111                                not_impl_err!("Slice subscript requires an upper bound")
1112                            }?;
1113
1114                            // stride, default to 1
1115                            let stride = if let Some(stride) = stride {
1116                                self.sql_expr_to_logical_expr(
1117                                    stride,
1118                                    schema,
1119                                    planner_context,
1120                                )?
1121                            } else {
1122                                lit(1i64)
1123                            };
1124
1125                            Ok(Some(GetFieldAccess::ListRange {
1126                                start: Box::new(lower_bound),
1127                                stop: Box::new(upper_bound),
1128                                stride: Box::new(stride),
1129                            }))
1130                        }
1131                    }
1132                }
1133                AccessExpr::Dot(expr) => match expr {
1134                    SQLExpr::Value(ValueWithSpan {
1135                        value: Value::SingleQuotedString(s) | Value::DoubleQuotedString(s),
1136                        span    : _
1137                    }) => Ok(Some(GetFieldAccess::NamedStructField {
1138                        name: ScalarValue::from(s),
1139                    })),
1140                    _ => {
1141                        not_impl_err!(
1142                            "Dot access not supported for non-string expr: {expr:?}"
1143                        )
1144                    }
1145                },
1146            })
1147            .collect::<Result<Vec<_>>>()?;
1148
1149        fields
1150            .into_iter()
1151            .flatten()
1152            .try_fold(root, |expr, field_access| {
1153                let mut field_access_expr = RawFieldAccessExpr { expr, field_access };
1154                for planner in self.context_provider.get_expr_planners() {
1155                    match planner.plan_field_access(field_access_expr, schema)? {
1156                        PlannerResult::Planned(expr) => return Ok(expr),
1157                        PlannerResult::Original(expr) => {
1158                            field_access_expr = expr;
1159                        }
1160                    }
1161                }
1162                not_impl_err!(
1163                    "GetFieldAccess not supported by ExprPlanner: {field_access_expr:?}"
1164                )
1165            })
1166    }
1167}
1168
1169#[cfg(test)]
1170mod tests {
1171    use std::collections::HashMap;
1172    use std::sync::Arc;
1173
1174    use arrow::datatypes::{Field, Schema};
1175    use sqlparser::dialect::GenericDialect;
1176    use sqlparser::parser::Parser;
1177
1178    use datafusion_common::config::ConfigOptions;
1179    use datafusion_common::TableReference;
1180    use datafusion_expr::logical_plan::builder::LogicalTableSource;
1181    use datafusion_expr::{AggregateUDF, ScalarUDF, TableSource, WindowUDF};
1182
1183    use super::*;
1184
1185    struct TestContextProvider {
1186        options: ConfigOptions,
1187        tables: HashMap<String, Arc<dyn TableSource>>,
1188    }
1189
1190    impl TestContextProvider {
1191        pub fn new() -> Self {
1192            let mut tables = HashMap::new();
1193            tables.insert(
1194                "table1".to_string(),
1195                create_table_source(vec![Field::new(
1196                    "column1".to_string(),
1197                    DataType::Utf8,
1198                    false,
1199                )]),
1200            );
1201
1202            Self {
1203                options: Default::default(),
1204                tables,
1205            }
1206        }
1207    }
1208
1209    impl ContextProvider for TestContextProvider {
1210        fn get_table_source(&self, name: TableReference) -> Result<Arc<dyn TableSource>> {
1211            match self.tables.get(name.table()) {
1212                Some(table) => Ok(Arc::clone(table)),
1213                _ => plan_err!("Table not found: {}", name.table()),
1214            }
1215        }
1216
1217        fn get_function_meta(&self, _name: &str) -> Option<Arc<ScalarUDF>> {
1218            None
1219        }
1220
1221        fn get_aggregate_meta(&self, name: &str) -> Option<Arc<AggregateUDF>> {
1222            match name {
1223                "sum" => Some(datafusion_functions_aggregate::sum::sum_udaf()),
1224                _ => None,
1225            }
1226        }
1227
1228        fn get_variable_type(&self, _variable_names: &[String]) -> Option<DataType> {
1229            None
1230        }
1231
1232        fn options(&self) -> &ConfigOptions {
1233            &self.options
1234        }
1235
1236        fn get_window_meta(&self, _name: &str) -> Option<Arc<WindowUDF>> {
1237            None
1238        }
1239
1240        fn udf_names(&self) -> Vec<String> {
1241            Vec::new()
1242        }
1243
1244        fn udaf_names(&self) -> Vec<String> {
1245            vec!["sum".to_string()]
1246        }
1247
1248        fn udwf_names(&self) -> Vec<String> {
1249            Vec::new()
1250        }
1251    }
1252
1253    fn create_table_source(fields: Vec<Field>) -> Arc<dyn TableSource> {
1254        Arc::new(LogicalTableSource::new(Arc::new(
1255            Schema::new_with_metadata(fields, HashMap::new()),
1256        )))
1257    }
1258
1259    macro_rules! test_stack_overflow {
1260        ($num_expr:expr) => {
1261            paste::item! {
1262                #[test]
1263                fn [<test_stack_overflow_ $num_expr>]() {
1264                    let schema = DFSchema::empty();
1265                    let mut planner_context = PlannerContext::default();
1266
1267                    let expr_str = (0..$num_expr)
1268                        .map(|i| format!("column1 = 'value{:?}'", i))
1269                        .collect::<Vec<String>>()
1270                        .join(" OR ");
1271
1272                    let dialect = GenericDialect{};
1273                    let mut parser = Parser::new(&dialect)
1274                        .try_with_sql(expr_str.as_str())
1275                        .unwrap();
1276                    let sql_expr = parser.parse_expr().unwrap();
1277
1278                    let context_provider = TestContextProvider::new();
1279                    let sql_to_rel = SqlToRel::new(&context_provider);
1280
1281                    // Should not stack overflow
1282                    sql_to_rel.sql_expr_to_logical_expr(
1283                        sql_expr,
1284                        &schema,
1285                        &mut planner_context,
1286                    ).unwrap();
1287                }
1288            }
1289        };
1290    }
1291
1292    test_stack_overflow!(64);
1293    test_stack_overflow!(128);
1294    test_stack_overflow!(256);
1295    test_stack_overflow!(512);
1296    test_stack_overflow!(1024);
1297    test_stack_overflow!(2048);
1298    test_stack_overflow!(4096);
1299    test_stack_overflow!(8192);
1300    #[test]
1301    fn test_sql_to_expr_with_alias() {
1302        let schema = DFSchema::empty();
1303        let mut planner_context = PlannerContext::default();
1304
1305        let expr_str = "SUM(int_col) as sum_int_col";
1306
1307        let dialect = GenericDialect {};
1308        let mut parser = Parser::new(&dialect).try_with_sql(expr_str).unwrap();
1309        // from sqlparser
1310        let sql_expr = parser.parse_expr_with_alias().unwrap();
1311
1312        let context_provider = TestContextProvider::new();
1313        let sql_to_rel = SqlToRel::new(&context_provider);
1314
1315        let expr = sql_to_rel
1316            .sql_expr_to_logical_expr_with_alias(sql_expr, &schema, &mut planner_context)
1317            .unwrap();
1318
1319        assert!(matches!(expr, Expr::Alias(_)));
1320    }
1321}