1use arrow::datatypes::{DataType, TimeUnit};
19use datafusion_expr::planner::{
20 PlannerResult, RawBinaryExpr, RawDictionaryExpr, RawFieldAccessExpr,
21};
22use sqlparser::ast::{
23 AccessExpr, BinaryOperator, CastFormat, CastKind, DataType as SQLDataType,
24 DictionaryField, Expr as SQLExpr, ExprWithAlias as SQLExprWithAlias, MapEntry,
25 StructField, Subscript, TrimWhereField, Value, ValueWithSpan,
26};
27
28use datafusion_common::{
29 internal_datafusion_err, internal_err, not_impl_err, plan_err, DFSchema, Result,
30 ScalarValue,
31};
32
33use datafusion_expr::expr::ScalarFunction;
34use datafusion_expr::expr::{InList, WildcardOptions};
35use datafusion_expr::{
36 lit, Between, BinaryExpr, Cast, Expr, ExprSchemable, GetFieldAccess, Like, Literal,
37 Operator, TryCast,
38};
39
40use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
41
42mod binary_op;
43mod function;
44mod grouping_set;
45mod identifier;
46mod order_by;
47mod subquery;
48mod substring;
49mod unary_op;
50mod value;
51
52impl<S: ContextProvider> SqlToRel<'_, S> {
53 pub(crate) fn sql_expr_to_logical_expr_with_alias(
54 &self,
55 sql: SQLExprWithAlias,
56 schema: &DFSchema,
57 planner_context: &mut PlannerContext,
58 ) -> Result<Expr> {
59 let mut expr =
60 self.sql_expr_to_logical_expr(sql.expr, schema, planner_context)?;
61 if let Some(alias) = sql.alias {
62 expr = expr.alias(alias.value);
63 }
64 Ok(expr)
65 }
66 pub(crate) fn sql_expr_to_logical_expr(
67 &self,
68 sql: SQLExpr,
69 schema: &DFSchema,
70 planner_context: &mut PlannerContext,
71 ) -> Result<Expr> {
72 enum StackEntry {
73 SQLExpr(Box<SQLExpr>),
74 Operator(BinaryOperator),
75 }
76
77 let mut stack = vec![StackEntry::SQLExpr(Box::new(sql))];
82 let mut eval_stack = vec![];
83
84 while let Some(entry) = stack.pop() {
85 match entry {
86 StackEntry::SQLExpr(sql_expr) => {
87 match *sql_expr {
88 SQLExpr::BinaryOp { left, op, right } => {
89 stack.push(StackEntry::Operator(op));
92 stack.push(StackEntry::SQLExpr(right));
93 stack.push(StackEntry::SQLExpr(left));
94 }
95 _ => {
96 let expr = self.sql_expr_to_logical_expr_internal(
97 *sql_expr,
98 schema,
99 planner_context,
100 )?;
101 eval_stack.push(expr);
102 }
103 }
104 }
105 StackEntry::Operator(op) => {
106 let right = eval_stack.pop().unwrap();
107 let left = eval_stack.pop().unwrap();
108 let expr = self.build_logical_expr(op, left, right, schema)?;
109 eval_stack.push(expr);
110 }
111 }
112 }
113
114 assert_eq!(1, eval_stack.len());
115 let expr = eval_stack.pop().unwrap();
116 Ok(expr)
117 }
118
119 fn build_logical_expr(
120 &self,
121 op: BinaryOperator,
122 left: Expr,
123 right: Expr,
124 schema: &DFSchema,
125 ) -> Result<Expr> {
126 let mut binary_expr = RawBinaryExpr { op, left, right };
128 for planner in self.context_provider.get_expr_planners() {
129 match planner.plan_binary_op(binary_expr, schema)? {
130 PlannerResult::Planned(expr) => {
131 return Ok(expr);
132 }
133 PlannerResult::Original(expr) => {
134 binary_expr = expr;
135 }
136 }
137 }
138
139 let RawBinaryExpr { op, left, right } = binary_expr;
140 Ok(Expr::BinaryExpr(BinaryExpr::new(
141 Box::new(left),
142 self.parse_sql_binary_op(op)?,
143 Box::new(right),
144 )))
145 }
146
147 pub fn sql_to_expr_with_alias(
148 &self,
149 sql: SQLExprWithAlias,
150 schema: &DFSchema,
151 planner_context: &mut PlannerContext,
152 ) -> Result<Expr> {
153 let mut expr =
154 self.sql_expr_to_logical_expr_with_alias(sql, schema, planner_context)?;
155 expr = self.rewrite_partial_qualifier(expr, schema);
156 self.validate_schema_satisfies_exprs(schema, &[expr.clone()])?;
157 let (expr, _) = expr.infer_placeholder_types(schema)?;
158 Ok(expr)
159 }
160
161 pub fn sql_to_expr(
163 &self,
164 sql: SQLExpr,
165 schema: &DFSchema,
166 planner_context: &mut PlannerContext,
167 ) -> Result<Expr> {
168 let mut expr = self.sql_expr_to_logical_expr(sql, schema, planner_context)?;
170 expr = self.rewrite_partial_qualifier(expr, schema);
171 self.validate_schema_satisfies_exprs(schema, std::slice::from_ref(&expr))?;
172 let (expr, _) = expr.infer_placeholder_types(schema)?;
173 Ok(expr)
174 }
175
176 fn rewrite_partial_qualifier(&self, expr: Expr, schema: &DFSchema) -> Expr {
178 match expr {
179 Expr::Column(col) => match &col.relation {
180 Some(q) => {
181 match schema.iter().find(|(qualifier, field)| match qualifier {
182 Some(field_q) => {
183 field.name() == &col.name
184 && field_q.to_string().ends_with(&format!(".{q}"))
185 }
186 _ => false,
187 }) {
188 Some((qualifier, df_field)) => Expr::from((qualifier, df_field)),
189 None => Expr::Column(col),
190 }
191 }
192 None => Expr::Column(col),
193 },
194 _ => expr,
195 }
196 }
197
198 #[cfg_attr(feature = "recursive_protection", recursive::recursive)]
201 fn sql_expr_to_logical_expr_internal(
202 &self,
203 sql: SQLExpr,
204 schema: &DFSchema,
205 planner_context: &mut PlannerContext,
206 ) -> Result<Expr> {
207 match sql {
213 SQLExpr::Value(value) => {
214 self.parse_value(value.into(), planner_context.prepare_param_data_types())
215 }
216 SQLExpr::Extract { field, expr, .. } => {
217 let mut extract_args = vec![
218 Expr::Literal(ScalarValue::from(format!("{field}")), None),
219 self.sql_expr_to_logical_expr(*expr, schema, planner_context)?,
220 ];
221
222 for planner in self.context_provider.get_expr_planners() {
223 match planner.plan_extract(extract_args)? {
224 PlannerResult::Planned(expr) => return Ok(expr),
225 PlannerResult::Original(args) => {
226 extract_args = args;
227 }
228 }
229 }
230
231 not_impl_err!("Extract not supported by ExprPlanner: {extract_args:?}")
232 }
233
234 SQLExpr::Array(arr) => self.sql_array_literal(arr.elem, schema),
235 SQLExpr::Interval(interval) => self.sql_interval_to_expr(false, interval),
236 SQLExpr::Identifier(id) => {
237 self.sql_identifier_to_expr(id, schema, planner_context)
238 }
239
240 SQLExpr::CompoundFieldAccess { root, access_chain } => self
242 .sql_compound_field_access_to_expr(
243 *root,
244 access_chain,
245 schema,
246 planner_context,
247 ),
248
249 SQLExpr::CompoundIdentifier(ids) => {
250 self.sql_compound_identifier_to_expr(ids, schema, planner_context)
251 }
252
253 SQLExpr::Case {
254 operand,
255 conditions,
256 else_result,
257 } => self.sql_case_identifier_to_expr(
258 operand,
259 conditions,
260 else_result,
261 schema,
262 planner_context,
263 ),
264
265 SQLExpr::Cast {
266 kind: CastKind::Cast | CastKind::DoubleColon,
267 expr,
268 data_type,
269 format,
270 } => self.sql_cast_to_expr(*expr, data_type, format, schema, planner_context),
271
272 SQLExpr::Cast {
273 kind: CastKind::TryCast | CastKind::SafeCast,
274 expr,
275 data_type,
276 format,
277 } => {
278 if let Some(format) = format {
279 return not_impl_err!("CAST with format is not supported: {format}");
280 }
281
282 Ok(Expr::TryCast(TryCast::new(
283 Box::new(self.sql_expr_to_logical_expr(
284 *expr,
285 schema,
286 planner_context,
287 )?),
288 self.convert_data_type(&data_type)?,
289 )))
290 }
291
292 SQLExpr::TypedString { data_type, value } => Ok(Expr::Cast(Cast::new(
293 Box::new(lit(value.into_string().unwrap())),
294 self.convert_data_type(&data_type)?,
295 ))),
296
297 SQLExpr::IsNull(expr) => Ok(Expr::IsNull(Box::new(
298 self.sql_expr_to_logical_expr(*expr, schema, planner_context)?,
299 ))),
300
301 SQLExpr::IsNotNull(expr) => Ok(Expr::IsNotNull(Box::new(
302 self.sql_expr_to_logical_expr(*expr, schema, planner_context)?,
303 ))),
304
305 SQLExpr::IsDistinctFrom(left, right) => {
306 Ok(Expr::BinaryExpr(BinaryExpr::new(
307 Box::new(self.sql_expr_to_logical_expr(
308 *left,
309 schema,
310 planner_context,
311 )?),
312 Operator::IsDistinctFrom,
313 Box::new(self.sql_expr_to_logical_expr(
314 *right,
315 schema,
316 planner_context,
317 )?),
318 )))
319 }
320
321 SQLExpr::IsNotDistinctFrom(left, right) => {
322 Ok(Expr::BinaryExpr(BinaryExpr::new(
323 Box::new(self.sql_expr_to_logical_expr(
324 *left,
325 schema,
326 planner_context,
327 )?),
328 Operator::IsNotDistinctFrom,
329 Box::new(self.sql_expr_to_logical_expr(
330 *right,
331 schema,
332 planner_context,
333 )?),
334 )))
335 }
336
337 SQLExpr::IsTrue(expr) => Ok(Expr::IsTrue(Box::new(
338 self.sql_expr_to_logical_expr(*expr, schema, planner_context)?,
339 ))),
340
341 SQLExpr::IsFalse(expr) => Ok(Expr::IsFalse(Box::new(
342 self.sql_expr_to_logical_expr(*expr, schema, planner_context)?,
343 ))),
344
345 SQLExpr::IsNotTrue(expr) => Ok(Expr::IsNotTrue(Box::new(
346 self.sql_expr_to_logical_expr(*expr, schema, planner_context)?,
347 ))),
348
349 SQLExpr::IsNotFalse(expr) => Ok(Expr::IsNotFalse(Box::new(
350 self.sql_expr_to_logical_expr(*expr, schema, planner_context)?,
351 ))),
352
353 SQLExpr::IsUnknown(expr) => Ok(Expr::IsUnknown(Box::new(
354 self.sql_expr_to_logical_expr(*expr, schema, planner_context)?,
355 ))),
356
357 SQLExpr::IsNotUnknown(expr) => Ok(Expr::IsNotUnknown(Box::new(
358 self.sql_expr_to_logical_expr(*expr, schema, planner_context)?,
359 ))),
360
361 SQLExpr::UnaryOp { op, expr } => {
362 self.parse_sql_unary_op(op, *expr, schema, planner_context)
363 }
364
365 SQLExpr::Between {
366 expr,
367 negated,
368 low,
369 high,
370 } => Ok(Expr::Between(Between::new(
371 Box::new(self.sql_expr_to_logical_expr(
372 *expr,
373 schema,
374 planner_context,
375 )?),
376 negated,
377 Box::new(self.sql_expr_to_logical_expr(*low, schema, planner_context)?),
378 Box::new(self.sql_expr_to_logical_expr(
379 *high,
380 schema,
381 planner_context,
382 )?),
383 ))),
384
385 SQLExpr::InList {
386 expr,
387 list,
388 negated,
389 } => self.sql_in_list_to_expr(*expr, list, negated, schema, planner_context),
390
391 SQLExpr::Like {
392 negated,
393 expr,
394 pattern,
395 escape_char,
396 any,
397 } => self.sql_like_to_expr(
398 negated,
399 *expr,
400 *pattern,
401 escape_char,
402 schema,
403 planner_context,
404 false,
405 any,
406 ),
407
408 SQLExpr::ILike {
409 negated,
410 expr,
411 pattern,
412 escape_char,
413 any,
414 } => self.sql_like_to_expr(
415 negated,
416 *expr,
417 *pattern,
418 escape_char,
419 schema,
420 planner_context,
421 true,
422 any,
423 ),
424
425 SQLExpr::SimilarTo {
426 negated,
427 expr,
428 pattern,
429 escape_char,
430 } => self.sql_similarto_to_expr(
431 negated,
432 *expr,
433 *pattern,
434 escape_char,
435 schema,
436 planner_context,
437 ),
438
439 SQLExpr::BinaryOp { .. } => {
440 internal_err!("binary_op should be handled by sql_expr_to_logical_expr.")
441 }
442
443 #[cfg(feature = "unicode_expressions")]
444 SQLExpr::Substring {
445 expr,
446 substring_from,
447 substring_for,
448 special: _,
449 } => self.sql_substring_to_expr(
450 expr,
451 substring_from,
452 substring_for,
453 schema,
454 planner_context,
455 ),
456
457 #[cfg(not(feature = "unicode_expressions"))]
458 SQLExpr::Substring { .. } => {
459 internal_err!(
460 "statement substring requires compilation with feature flag: unicode_expressions."
461 )
462 }
463
464 SQLExpr::Trim {
465 expr,
466 trim_where,
467 trim_what,
468 trim_characters,
469 } => self.sql_trim_to_expr(
470 *expr,
471 trim_where,
472 trim_what,
473 trim_characters,
474 schema,
475 planner_context,
476 ),
477
478 SQLExpr::Function(function) => {
479 self.sql_function_to_expr(function, schema, planner_context)
480 }
481
482 SQLExpr::Rollup(exprs) => {
483 self.sql_rollup_to_expr(exprs, schema, planner_context)
484 }
485 SQLExpr::Cube(exprs) => self.sql_cube_to_expr(exprs, schema, planner_context),
486 SQLExpr::GroupingSets(exprs) => {
487 self.sql_grouping_sets_to_expr(exprs, schema, planner_context)
488 }
489
490 SQLExpr::Floor {
491 expr,
492 field: _field,
493 } => self.sql_fn_name_to_expr(*expr, "floor", schema, planner_context),
494 SQLExpr::Ceil {
495 expr,
496 field: _field,
497 } => self.sql_fn_name_to_expr(*expr, "ceil", schema, planner_context),
498 SQLExpr::Overlay {
499 expr,
500 overlay_what,
501 overlay_from,
502 overlay_for,
503 } => self.sql_overlay_to_expr(
504 *expr,
505 *overlay_what,
506 *overlay_from,
507 overlay_for,
508 schema,
509 planner_context,
510 ),
511 SQLExpr::Nested(e) => {
512 self.sql_expr_to_logical_expr(*e, schema, planner_context)
513 }
514
515 SQLExpr::Exists { subquery, negated } => {
516 self.parse_exists_subquery(*subquery, negated, schema, planner_context)
517 }
518 SQLExpr::InSubquery {
519 expr,
520 subquery,
521 negated,
522 } => {
523 self.parse_in_subquery(*expr, *subquery, negated, schema, planner_context)
524 }
525 SQLExpr::Subquery(subquery) => {
526 self.parse_scalar_subquery(*subquery, schema, planner_context)
527 }
528
529 SQLExpr::Struct { values, fields } => {
530 self.parse_struct(schema, planner_context, values, fields)
531 }
532 SQLExpr::Position { expr, r#in } => {
533 self.sql_position_to_expr(*expr, *r#in, schema, planner_context)
534 }
535 SQLExpr::AtTimeZone {
536 timestamp,
537 time_zone,
538 } => Ok(Expr::Cast(Cast::new(
539 Box::new(self.sql_expr_to_logical_expr_internal(
540 *timestamp,
541 schema,
542 planner_context,
543 )?),
544 match *time_zone {
545 SQLExpr::Value(ValueWithSpan {
546 value: Value::SingleQuotedString(s),
547 span: _,
548 }) => DataType::Timestamp(TimeUnit::Nanosecond, Some(s.into())),
549 _ => {
550 return not_impl_err!(
551 "Unsupported ast node in sqltorel: {time_zone:?}"
552 )
553 }
554 },
555 ))),
556 SQLExpr::Dictionary(fields) => {
557 self.try_plan_dictionary_literal(fields, schema, planner_context)
558 }
559 SQLExpr::Map(map) => {
560 self.try_plan_map_literal(map.entries, schema, planner_context)
561 }
562 SQLExpr::AnyOp {
563 left,
564 compare_op,
565 right,
566 is_some: _,
569 } => {
570 let mut binary_expr = RawBinaryExpr {
571 op: compare_op,
572 left: self.sql_expr_to_logical_expr(
573 *left,
574 schema,
575 planner_context,
576 )?,
577 right: self.sql_expr_to_logical_expr(
578 *right,
579 schema,
580 planner_context,
581 )?,
582 };
583 for planner in self.context_provider.get_expr_planners() {
584 match planner.plan_any(binary_expr)? {
585 PlannerResult::Planned(expr) => {
586 return Ok(expr);
587 }
588 PlannerResult::Original(expr) => {
589 binary_expr = expr;
590 }
591 }
592 }
593 not_impl_err!("AnyOp not supported by ExprPlanner: {binary_expr:?}")
594 }
595 #[expect(deprecated)]
596 SQLExpr::Wildcard(_token) => Ok(Expr::Wildcard {
597 qualifier: None,
598 options: Box::new(WildcardOptions::default()),
599 }),
600 #[expect(deprecated)]
601 SQLExpr::QualifiedWildcard(object_name, _token) => Ok(Expr::Wildcard {
602 qualifier: Some(self.object_name_to_table_reference(object_name)?),
603 options: Box::new(WildcardOptions::default()),
604 }),
605 SQLExpr::Tuple(values) => self.parse_tuple(schema, planner_context, values),
606 _ => not_impl_err!("Unsupported ast node in sqltorel: {sql:?}"),
607 }
608 }
609
610 fn parse_struct(
612 &self,
613 schema: &DFSchema,
614 planner_context: &mut PlannerContext,
615 values: Vec<SQLExpr>,
616 fields: Vec<StructField>,
617 ) -> Result<Expr> {
618 if !fields.is_empty() {
619 return not_impl_err!("Struct fields are not supported yet");
620 }
621 let is_named_struct = values
622 .iter()
623 .any(|value| matches!(value, SQLExpr::Named { .. }));
624
625 let mut create_struct_args = if is_named_struct {
626 self.create_named_struct_expr(values, schema, planner_context)?
627 } else {
628 self.create_struct_expr(values, schema, planner_context)?
629 };
630
631 for planner in self.context_provider.get_expr_planners() {
632 match planner.plan_struct_literal(create_struct_args, is_named_struct)? {
633 PlannerResult::Planned(expr) => return Ok(expr),
634 PlannerResult::Original(args) => create_struct_args = args,
635 }
636 }
637 not_impl_err!("Struct not supported by ExprPlanner: {create_struct_args:?}")
638 }
639
640 fn parse_tuple(
641 &self,
642 schema: &DFSchema,
643 planner_context: &mut PlannerContext,
644 values: Vec<SQLExpr>,
645 ) -> Result<Expr> {
646 match values.first() {
647 Some(SQLExpr::Identifier(_))
648 | Some(SQLExpr::Value(_))
649 | Some(SQLExpr::CompoundIdentifier(_)) => {
650 self.parse_struct(schema, planner_context, values, vec![])
651 }
652 None => not_impl_err!("Empty tuple not supported yet"),
653 _ => {
654 not_impl_err!("Only identifiers and literals are supported in tuples")
655 }
656 }
657 }
658
659 fn sql_position_to_expr(
660 &self,
661 substr_expr: SQLExpr,
662 str_expr: SQLExpr,
663 schema: &DFSchema,
664 planner_context: &mut PlannerContext,
665 ) -> Result<Expr> {
666 let substr =
667 self.sql_expr_to_logical_expr(substr_expr, schema, planner_context)?;
668 let fullstr = self.sql_expr_to_logical_expr(str_expr, schema, planner_context)?;
669 let mut position_args = vec![fullstr, substr];
670 for planner in self.context_provider.get_expr_planners() {
671 match planner.plan_position(position_args)? {
672 PlannerResult::Planned(expr) => return Ok(expr),
673 PlannerResult::Original(args) => {
674 position_args = args;
675 }
676 }
677 }
678
679 not_impl_err!("Position not supported by ExprPlanner: {position_args:?}")
680 }
681
682 fn try_plan_dictionary_literal(
683 &self,
684 fields: Vec<DictionaryField>,
685 schema: &DFSchema,
686 planner_context: &mut PlannerContext,
687 ) -> Result<Expr> {
688 let mut keys = vec![];
689 let mut values = vec![];
690 for field in fields {
691 let key = lit(field.key.value);
692 let value =
693 self.sql_expr_to_logical_expr(*field.value, schema, planner_context)?;
694 keys.push(key);
695 values.push(value);
696 }
697
698 let mut raw_expr = RawDictionaryExpr { keys, values };
699
700 for planner in self.context_provider.get_expr_planners() {
701 match planner.plan_dictionary_literal(raw_expr, schema)? {
702 PlannerResult::Planned(expr) => {
703 return Ok(expr);
704 }
705 PlannerResult::Original(expr) => raw_expr = expr,
706 }
707 }
708 not_impl_err!("Dictionary not supported by ExprPlanner: {raw_expr:?}")
709 }
710
711 fn try_plan_map_literal(
712 &self,
713 entries: Vec<MapEntry>,
714 schema: &DFSchema,
715 planner_context: &mut PlannerContext,
716 ) -> Result<Expr> {
717 let mut exprs: Vec<_> = entries
718 .into_iter()
719 .flat_map(|entry| vec![entry.key, entry.value].into_iter())
720 .map(|expr| self.sql_expr_to_logical_expr(*expr, schema, planner_context))
721 .collect::<Result<Vec<_>>>()?;
722 for planner in self.context_provider.get_expr_planners() {
723 match planner.plan_make_map(exprs)? {
724 PlannerResult::Planned(expr) => {
725 return Ok(expr);
726 }
727 PlannerResult::Original(expr) => exprs = expr,
728 }
729 }
730 not_impl_err!("MAP not supported by ExprPlanner: {exprs:?}")
731 }
732
733 fn create_named_struct_expr(
736 &self,
737 values: Vec<SQLExpr>,
738 input_schema: &DFSchema,
739 planner_context: &mut PlannerContext,
740 ) -> Result<Vec<Expr>> {
741 Ok(values
742 .into_iter()
743 .enumerate()
744 .map(|(i, value)| {
745 let args = if let SQLExpr::Named { expr, name } = value {
746 [
747 name.value.lit(),
748 self.sql_expr_to_logical_expr(
749 *expr,
750 input_schema,
751 planner_context,
752 )?,
753 ]
754 } else {
755 [
756 format!("c{i}").lit(),
757 self.sql_expr_to_logical_expr(
758 value,
759 input_schema,
760 planner_context,
761 )?,
762 ]
763 };
764
765 Ok(args)
766 })
767 .collect::<Result<Vec<_>>>()?
768 .into_iter()
769 .flatten()
770 .collect())
771 }
772
773 fn create_struct_expr(
777 &self,
778 values: Vec<SQLExpr>,
779 input_schema: &DFSchema,
780 planner_context: &mut PlannerContext,
781 ) -> Result<Vec<Expr>> {
782 values
783 .into_iter()
784 .map(|value| {
785 self.sql_expr_to_logical_expr(value, input_schema, planner_context)
786 })
787 .collect::<Result<Vec<_>>>()
788 }
789
790 fn sql_in_list_to_expr(
791 &self,
792 expr: SQLExpr,
793 list: Vec<SQLExpr>,
794 negated: bool,
795 schema: &DFSchema,
796 planner_context: &mut PlannerContext,
797 ) -> Result<Expr> {
798 let list_expr = list
799 .into_iter()
800 .map(|e| self.sql_expr_to_logical_expr(e, schema, planner_context))
801 .collect::<Result<Vec<_>>>()?;
802
803 Ok(Expr::InList(InList::new(
804 Box::new(self.sql_expr_to_logical_expr(expr, schema, planner_context)?),
805 list_expr,
806 negated,
807 )))
808 }
809
810 #[allow(clippy::too_many_arguments)]
811 fn sql_like_to_expr(
812 &self,
813 negated: bool,
814 expr: SQLExpr,
815 pattern: SQLExpr,
816 escape_char: Option<String>,
817 schema: &DFSchema,
818 planner_context: &mut PlannerContext,
819 case_insensitive: bool,
820 any: bool,
821 ) -> Result<Expr> {
822 if any {
823 return not_impl_err!("ANY in LIKE expression");
824 }
825 let pattern = self.sql_expr_to_logical_expr(pattern, schema, planner_context)?;
826 let escape_char = if let Some(char) = escape_char {
827 if char.len() != 1 {
828 return plan_err!("Invalid escape character in LIKE expression");
829 }
830 Some(char.chars().next().unwrap())
831 } else {
832 None
833 };
834 Ok(Expr::Like(Like::new(
835 negated,
836 Box::new(self.sql_expr_to_logical_expr(expr, schema, planner_context)?),
837 Box::new(pattern),
838 escape_char,
839 case_insensitive,
840 )))
841 }
842
843 fn sql_similarto_to_expr(
844 &self,
845 negated: bool,
846 expr: SQLExpr,
847 pattern: SQLExpr,
848 escape_char: Option<String>,
849 schema: &DFSchema,
850 planner_context: &mut PlannerContext,
851 ) -> Result<Expr> {
852 let pattern = self.sql_expr_to_logical_expr(pattern, schema, planner_context)?;
853 let pattern_type = pattern.get_type(schema)?;
854 if pattern_type != DataType::Utf8 && pattern_type != DataType::Null {
855 return plan_err!("Invalid pattern in SIMILAR TO expression");
856 }
857 let escape_char = if let Some(char) = escape_char {
858 if char.len() != 1 {
859 return plan_err!("Invalid escape character in SIMILAR TO expression");
860 }
861 Some(char.chars().next().unwrap())
862 } else {
863 None
864 };
865 Ok(Expr::SimilarTo(Like::new(
866 negated,
867 Box::new(self.sql_expr_to_logical_expr(expr, schema, planner_context)?),
868 Box::new(pattern),
869 escape_char,
870 false,
871 )))
872 }
873
874 fn sql_trim_to_expr(
875 &self,
876 expr: SQLExpr,
877 trim_where: Option<TrimWhereField>,
878 trim_what: Option<Box<SQLExpr>>,
879 trim_characters: Option<Vec<SQLExpr>>,
880 schema: &DFSchema,
881 planner_context: &mut PlannerContext,
882 ) -> Result<Expr> {
883 let arg = self.sql_expr_to_logical_expr(expr, schema, planner_context)?;
884 let args = match (trim_what, trim_characters) {
885 (Some(to_trim), None) => {
886 let to_trim =
887 self.sql_expr_to_logical_expr(*to_trim, schema, planner_context)?;
888 Ok(vec![arg, to_trim])
889 }
890 (None, Some(trim_characters)) => {
891 if let Some(first) = trim_characters.first() {
892 let to_trim = self.sql_expr_to_logical_expr(
893 first.clone(),
894 schema,
895 planner_context,
896 )?;
897 Ok(vec![arg, to_trim])
898 } else {
899 plan_err!("TRIM CHARACTERS cannot be empty")
900 }
901 }
902 (Some(_), Some(_)) => {
903 plan_err!("Both TRIM and TRIM CHARACTERS cannot be specified")
904 }
905 (None, None) => Ok(vec![arg]),
906 }?;
907
908 let fun_name = match trim_where {
909 Some(TrimWhereField::Leading) => "ltrim",
910 Some(TrimWhereField::Trailing) => "rtrim",
911 Some(TrimWhereField::Both) => "btrim",
912 None => "trim",
913 };
914 let fun = self
915 .context_provider
916 .get_function_meta(fun_name)
917 .ok_or_else(|| {
918 internal_datafusion_err!("Unable to find expected '{fun_name}' function")
919 })?;
920
921 Ok(Expr::ScalarFunction(ScalarFunction::new_udf(fun, args)))
922 }
923
924 fn sql_overlay_to_expr(
925 &self,
926 expr: SQLExpr,
927 overlay_what: SQLExpr,
928 overlay_from: SQLExpr,
929 overlay_for: Option<Box<SQLExpr>>,
930 schema: &DFSchema,
931 planner_context: &mut PlannerContext,
932 ) -> Result<Expr> {
933 let arg = self.sql_expr_to_logical_expr(expr, schema, planner_context)?;
934 let what_arg =
935 self.sql_expr_to_logical_expr(overlay_what, schema, planner_context)?;
936 let from_arg =
937 self.sql_expr_to_logical_expr(overlay_from, schema, planner_context)?;
938 let mut overlay_args = match overlay_for {
939 Some(for_expr) => {
940 let for_expr =
941 self.sql_expr_to_logical_expr(*for_expr, schema, planner_context)?;
942 vec![arg, what_arg, from_arg, for_expr]
943 }
944 None => vec![arg, what_arg, from_arg],
945 };
946 for planner in self.context_provider.get_expr_planners() {
947 match planner.plan_overlay(overlay_args)? {
948 PlannerResult::Planned(expr) => return Ok(expr),
949 PlannerResult::Original(args) => overlay_args = args,
950 }
951 }
952 not_impl_err!("Overlay not supported by ExprPlanner: {overlay_args:?}")
953 }
954
955 fn sql_cast_to_expr(
956 &self,
957 expr: SQLExpr,
958 data_type: SQLDataType,
959 format: Option<CastFormat>,
960 schema: &DFSchema,
961 planner_context: &mut PlannerContext,
962 ) -> Result<Expr> {
963 if let Some(format) = format {
964 return not_impl_err!("CAST with format is not supported: {format}");
965 }
966
967 let dt = self.convert_data_type(&data_type)?;
968 let expr = self.sql_expr_to_logical_expr(expr, schema, planner_context)?;
969
970 let expr = match &dt {
973 DataType::Timestamp(TimeUnit::Nanosecond, tz)
974 if expr.get_type(schema)? == DataType::Int64 =>
975 {
976 Expr::Cast(Cast::new(
977 Box::new(expr),
978 DataType::Timestamp(TimeUnit::Second, tz.clone()),
979 ))
980 }
981 _ => expr,
982 };
983
984 Ok(Expr::Cast(Cast::new(Box::new(expr), dt)))
985 }
986
987 fn extract_root_and_access_chain(
1005 &self,
1006 root: SQLExpr,
1007 mut access_chain: Vec<AccessExpr>,
1008 schema: &DFSchema,
1009 planner_context: &mut PlannerContext,
1010 ) -> Result<(Expr, Vec<AccessExpr>)> {
1011 let SQLExpr::Identifier(root_ident) = root else {
1012 let root = self.sql_expr_to_logical_expr(root, schema, planner_context)?;
1013 return Ok((root, access_chain));
1014 };
1015
1016 let mut compound_idents = vec![root_ident];
1017 let first_non_ident = access_chain
1018 .iter()
1019 .position(|access| !matches!(access, AccessExpr::Dot(SQLExpr::Identifier(_))))
1020 .unwrap_or(access_chain.len());
1021 for access in access_chain.drain(0..first_non_ident) {
1022 if let AccessExpr::Dot(SQLExpr::Identifier(ident)) = access {
1023 compound_idents.push(ident);
1024 } else {
1025 return internal_err!("Expected identifier in access chain");
1026 }
1027 }
1028
1029 let root = if compound_idents.len() == 1 {
1030 self.sql_identifier_to_expr(
1031 compound_idents.pop().unwrap(),
1032 schema,
1033 planner_context,
1034 )?
1035 } else {
1036 self.sql_compound_identifier_to_expr(
1037 compound_idents,
1038 schema,
1039 planner_context,
1040 )?
1041 };
1042 Ok((root, access_chain))
1043 }
1044
1045 fn sql_compound_field_access_to_expr(
1046 &self,
1047 root: SQLExpr,
1048 access_chain: Vec<AccessExpr>,
1049 schema: &DFSchema,
1050 planner_context: &mut PlannerContext,
1051 ) -> Result<Expr> {
1052 let (root, access_chain) = self.extract_root_and_access_chain(
1053 root,
1054 access_chain,
1055 schema,
1056 planner_context,
1057 )?;
1058 let fields = access_chain
1059 .into_iter()
1060 .map(|field| match field {
1061 AccessExpr::Subscript(subscript) => {
1062 match subscript {
1063 Subscript::Index { index } => {
1064 match index {
1066 SQLExpr::Value(ValueWithSpan {
1067 value:
1068 Value::SingleQuotedString(s)
1069 | Value::DoubleQuotedString(s),
1070 span: _,
1071 }) => Ok(Some(GetFieldAccess::NamedStructField {
1072 name: ScalarValue::from(s),
1073 })),
1074 SQLExpr::JsonAccess { .. } => {
1075 not_impl_err!("JsonAccess")
1076 }
1077 _ => Ok(Some(GetFieldAccess::ListIndex {
1079 key: Box::new(self.sql_expr_to_logical_expr(
1080 index,
1081 schema,
1082 planner_context,
1083 )?),
1084 })),
1085 }
1086 }
1087 Subscript::Slice {
1088 lower_bound,
1089 upper_bound,
1090 stride,
1091 } => {
1092 let lower_bound = if let Some(lower_bound) = lower_bound {
1094 self.sql_expr_to_logical_expr(
1095 lower_bound,
1096 schema,
1097 planner_context,
1098 )
1099 } else {
1100 not_impl_err!("Slice subscript requires a lower bound")
1101 }?;
1102
1103 let upper_bound = if let Some(upper_bound) = upper_bound {
1105 self.sql_expr_to_logical_expr(
1106 upper_bound,
1107 schema,
1108 planner_context,
1109 )
1110 } else {
1111 not_impl_err!("Slice subscript requires an upper bound")
1112 }?;
1113
1114 let stride = if let Some(stride) = stride {
1116 self.sql_expr_to_logical_expr(
1117 stride,
1118 schema,
1119 planner_context,
1120 )?
1121 } else {
1122 lit(1i64)
1123 };
1124
1125 Ok(Some(GetFieldAccess::ListRange {
1126 start: Box::new(lower_bound),
1127 stop: Box::new(upper_bound),
1128 stride: Box::new(stride),
1129 }))
1130 }
1131 }
1132 }
1133 AccessExpr::Dot(expr) => match expr {
1134 SQLExpr::Value(ValueWithSpan {
1135 value: Value::SingleQuotedString(s) | Value::DoubleQuotedString(s),
1136 span : _
1137 }) => Ok(Some(GetFieldAccess::NamedStructField {
1138 name: ScalarValue::from(s),
1139 })),
1140 _ => {
1141 not_impl_err!(
1142 "Dot access not supported for non-string expr: {expr:?}"
1143 )
1144 }
1145 },
1146 })
1147 .collect::<Result<Vec<_>>>()?;
1148
1149 fields
1150 .into_iter()
1151 .flatten()
1152 .try_fold(root, |expr, field_access| {
1153 let mut field_access_expr = RawFieldAccessExpr { expr, field_access };
1154 for planner in self.context_provider.get_expr_planners() {
1155 match planner.plan_field_access(field_access_expr, schema)? {
1156 PlannerResult::Planned(expr) => return Ok(expr),
1157 PlannerResult::Original(expr) => {
1158 field_access_expr = expr;
1159 }
1160 }
1161 }
1162 not_impl_err!(
1163 "GetFieldAccess not supported by ExprPlanner: {field_access_expr:?}"
1164 )
1165 })
1166 }
1167}
1168
1169#[cfg(test)]
1170mod tests {
1171 use std::collections::HashMap;
1172 use std::sync::Arc;
1173
1174 use arrow::datatypes::{Field, Schema};
1175 use sqlparser::dialect::GenericDialect;
1176 use sqlparser::parser::Parser;
1177
1178 use datafusion_common::config::ConfigOptions;
1179 use datafusion_common::TableReference;
1180 use datafusion_expr::logical_plan::builder::LogicalTableSource;
1181 use datafusion_expr::{AggregateUDF, ScalarUDF, TableSource, WindowUDF};
1182
1183 use super::*;
1184
1185 struct TestContextProvider {
1186 options: ConfigOptions,
1187 tables: HashMap<String, Arc<dyn TableSource>>,
1188 }
1189
1190 impl TestContextProvider {
1191 pub fn new() -> Self {
1192 let mut tables = HashMap::new();
1193 tables.insert(
1194 "table1".to_string(),
1195 create_table_source(vec![Field::new(
1196 "column1".to_string(),
1197 DataType::Utf8,
1198 false,
1199 )]),
1200 );
1201
1202 Self {
1203 options: Default::default(),
1204 tables,
1205 }
1206 }
1207 }
1208
1209 impl ContextProvider for TestContextProvider {
1210 fn get_table_source(&self, name: TableReference) -> Result<Arc<dyn TableSource>> {
1211 match self.tables.get(name.table()) {
1212 Some(table) => Ok(Arc::clone(table)),
1213 _ => plan_err!("Table not found: {}", name.table()),
1214 }
1215 }
1216
1217 fn get_function_meta(&self, _name: &str) -> Option<Arc<ScalarUDF>> {
1218 None
1219 }
1220
1221 fn get_aggregate_meta(&self, name: &str) -> Option<Arc<AggregateUDF>> {
1222 match name {
1223 "sum" => Some(datafusion_functions_aggregate::sum::sum_udaf()),
1224 _ => None,
1225 }
1226 }
1227
1228 fn get_variable_type(&self, _variable_names: &[String]) -> Option<DataType> {
1229 None
1230 }
1231
1232 fn options(&self) -> &ConfigOptions {
1233 &self.options
1234 }
1235
1236 fn get_window_meta(&self, _name: &str) -> Option<Arc<WindowUDF>> {
1237 None
1238 }
1239
1240 fn udf_names(&self) -> Vec<String> {
1241 Vec::new()
1242 }
1243
1244 fn udaf_names(&self) -> Vec<String> {
1245 vec!["sum".to_string()]
1246 }
1247
1248 fn udwf_names(&self) -> Vec<String> {
1249 Vec::new()
1250 }
1251 }
1252
1253 fn create_table_source(fields: Vec<Field>) -> Arc<dyn TableSource> {
1254 Arc::new(LogicalTableSource::new(Arc::new(
1255 Schema::new_with_metadata(fields, HashMap::new()),
1256 )))
1257 }
1258
1259 macro_rules! test_stack_overflow {
1260 ($num_expr:expr) => {
1261 paste::item! {
1262 #[test]
1263 fn [<test_stack_overflow_ $num_expr>]() {
1264 let schema = DFSchema::empty();
1265 let mut planner_context = PlannerContext::default();
1266
1267 let expr_str = (0..$num_expr)
1268 .map(|i| format!("column1 = 'value{:?}'", i))
1269 .collect::<Vec<String>>()
1270 .join(" OR ");
1271
1272 let dialect = GenericDialect{};
1273 let mut parser = Parser::new(&dialect)
1274 .try_with_sql(expr_str.as_str())
1275 .unwrap();
1276 let sql_expr = parser.parse_expr().unwrap();
1277
1278 let context_provider = TestContextProvider::new();
1279 let sql_to_rel = SqlToRel::new(&context_provider);
1280
1281 sql_to_rel.sql_expr_to_logical_expr(
1283 sql_expr,
1284 &schema,
1285 &mut planner_context,
1286 ).unwrap();
1287 }
1288 }
1289 };
1290 }
1291
1292 test_stack_overflow!(64);
1293 test_stack_overflow!(128);
1294 test_stack_overflow!(256);
1295 test_stack_overflow!(512);
1296 test_stack_overflow!(1024);
1297 test_stack_overflow!(2048);
1298 test_stack_overflow!(4096);
1299 test_stack_overflow!(8192);
1300 #[test]
1301 fn test_sql_to_expr_with_alias() {
1302 let schema = DFSchema::empty();
1303 let mut planner_context = PlannerContext::default();
1304
1305 let expr_str = "SUM(int_col) as sum_int_col";
1306
1307 let dialect = GenericDialect {};
1308 let mut parser = Parser::new(&dialect).try_with_sql(expr_str).unwrap();
1309 let sql_expr = parser.parse_expr_with_alias().unwrap();
1311
1312 let context_provider = TestContextProvider::new();
1313 let sql_to_rel = SqlToRel::new(&context_provider);
1314
1315 let expr = sql_to_rel
1316 .sql_expr_to_logical_expr_with_alias(sql_expr, &schema, &mut planner_context)
1317 .unwrap();
1318
1319 assert!(matches!(expr, Expr::Alias(_)));
1320 }
1321}