1use super::{DialectImpl, DialectType};
15use crate::error::Result;
16use crate::expressions::{
17 CeilFunc, CurrentTimestamp, DataType, DateTimeField, Expression, ExtractFunc, Function,
18 Literal, StructField, UnaryFunc, VarArgFunc,
19};
20use crate::generator::GeneratorConfig;
21use crate::tokens::TokenizerConfig;
22
23pub struct SparkDialect;
25
26impl DialectImpl for SparkDialect {
27 fn dialect_type(&self) -> DialectType {
28 DialectType::Spark
29 }
30
31 fn tokenizer_config(&self) -> TokenizerConfig {
32 let mut config = TokenizerConfig::default();
33 config.identifiers.clear();
35 config.identifiers.insert('`', '`');
36 config.quotes.insert("\"".to_string(), "\"".to_string());
38 config.string_escapes.push('\\');
40 config
42 .keywords
43 .insert("DIV".to_string(), crate::tokens::TokenType::Div);
44 config
45 .keywords
46 .insert("REPAIR".to_string(), crate::tokens::TokenType::Command);
47 config
48 .keywords
49 .insert("MSCK".to_string(), crate::tokens::TokenType::Command);
50 config
52 .numeric_literals
53 .insert("L".to_string(), "BIGINT".to_string());
54 config
55 .numeric_literals
56 .insert("S".to_string(), "SMALLINT".to_string());
57 config
58 .numeric_literals
59 .insert("Y".to_string(), "TINYINT".to_string());
60 config
61 .numeric_literals
62 .insert("D".to_string(), "DOUBLE".to_string());
63 config
64 .numeric_literals
65 .insert("F".to_string(), "FLOAT".to_string());
66 config
67 .numeric_literals
68 .insert("BD".to_string(), "DECIMAL".to_string());
69 config.identifiers_can_start_with_digit = true;
71 config.string_escapes_allowed_in_raw_strings = false;
74 config
75 }
76
77 fn generator_config(&self) -> GeneratorConfig {
78 use crate::generator::IdentifierQuoteStyle;
79 GeneratorConfig {
80 identifier_quote: '`',
81 identifier_quote_style: IdentifierQuoteStyle::BACKTICK,
82 dialect: Some(DialectType::Spark),
83 struct_field_sep: ": ",
85 create_function_return_as: false,
87 alias_post_tablesample: true,
89 tablesample_seed_keyword: "REPEATABLE",
90 join_hints: false,
91 identifiers_can_start_with_digit: true,
92 schema_comment_with_eq: false,
94 ..Default::default()
95 }
96 }
97
98 fn transform_expr(&self, expr: Expression) -> Result<Expression> {
99 match expr {
100 Expression::IfNull(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
102 original_name: None,
103 expressions: vec![f.this, f.expression],
104 inferred_type: None,
105 }))),
106
107 Expression::Nvl(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
109 original_name: None,
110 expressions: vec![f.this, f.expression],
111 inferred_type: None,
112 }))),
113
114 Expression::Cast(mut c) => {
116 c.to = Self::normalize_spark_type(c.to);
117 Ok(Expression::Cast(c))
118 }
119
120 Expression::TryCast(mut c) => {
122 c.to = Self::normalize_spark_type(c.to);
123 Ok(Expression::TryCast(c))
124 }
125
126 Expression::SafeCast(mut c) => {
128 c.to = Self::normalize_spark_type(c.to);
129 Ok(Expression::TryCast(c))
130 }
131
132 Expression::Trim(mut t) => {
135 if !t.sql_standard_syntax && t.characters.is_some() {
136 t.sql_standard_syntax = true;
139 }
140 Ok(Expression::Trim(t))
141 }
142
143 Expression::ILike(op) => Ok(Expression::ILike(op)),
145
146 Expression::Unnest(f) => Ok(Expression::Explode(Box::new(UnaryFunc::new(f.this)))),
148
149 Expression::Explode(f) => Ok(Expression::Explode(f)),
151
152 Expression::ExplodeOuter(f) => Ok(Expression::ExplodeOuter(f)),
154
155 Expression::Random(_) => Ok(Expression::Rand(Box::new(crate::expressions::Rand {
157 seed: None,
158 lower: None,
159 upper: None,
160 }))),
161
162 Expression::Rand(r) => Ok(Expression::Rand(r)),
164
165 Expression::Concat(op) => Ok(Expression::Function(Box::new(Function::new(
167 "CONCAT".to_string(),
168 vec![op.left, op.right],
169 )))),
170
171 Expression::Function(f) => self.transform_function(*f),
175
176 Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
178
179 Expression::Parameter(mut p)
181 if p.style == crate::expressions::ParameterStyle::Dollar =>
182 {
183 p.style = crate::expressions::ParameterStyle::DollarBrace;
184 if let Some(idx) = p.index {
186 p.name = Some(idx.to_string());
187 }
188 Ok(Expression::Parameter(p))
189 }
190
191 Expression::JSONExtract(je) if je.variant_extract.is_some() => {
193 let path = match *je.expression {
195 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
196 let Literal::String(s) = lit.as_ref() else {
197 unreachable!()
198 };
199 Expression::Literal(Box::new(Literal::String(format!("$.{}", s))))
200 }
201 other => other,
202 };
203 Ok(Expression::Function(Box::new(Function::new(
204 "GET_JSON_OBJECT".to_string(),
205 vec![*je.this, path],
206 ))))
207 }
208
209 _ => Ok(expr),
211 }
212 }
213}
214
215impl SparkDialect {
216 fn normalize_spark_type(dt: DataType) -> DataType {
221 match dt {
222 DataType::VarChar { length: None, .. }
223 | DataType::Char { length: None }
224 | DataType::Text => DataType::Custom {
225 name: "STRING".to_string(),
226 },
227 DataType::VarChar { .. } | DataType::Char { .. } => dt,
229 DataType::Struct { fields, nested } => {
231 let normalized_fields: Vec<StructField> = fields
232 .into_iter()
233 .map(|mut f| {
234 f.data_type = Self::normalize_spark_type(f.data_type);
235 f
236 })
237 .collect();
238 DataType::Struct {
239 fields: normalized_fields,
240 nested,
241 }
242 }
243 _ => dt,
244 }
245 }
246
247 fn transform_function(&self, f: Function) -> Result<Expression> {
248 let name_upper = f.name.to_uppercase();
249 match name_upper.as_str() {
250 "IFNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
252 original_name: None,
253 expressions: f.args,
254 inferred_type: None,
255 }))),
256
257 "NVL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
259 original_name: None,
260 expressions: f.args,
261 inferred_type: None,
262 }))),
263
264 "ISNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
266 original_name: None,
267 expressions: f.args,
268 inferred_type: None,
269 }))),
270
271 "GROUP_CONCAT" if !f.args.is_empty() => {
274 Ok(Expression::Function(Box::new(Function::new(
276 "COLLECT_LIST".to_string(),
277 f.args,
278 ))))
279 }
280
281 "STRING_AGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
284 Function::new("COLLECT_LIST".to_string(), f.args),
285 ))),
286
287 "LISTAGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
289 "COLLECT_LIST".to_string(),
290 f.args,
291 )))),
292
293 "SUBSTRING" | "SUBSTR" => Ok(Expression::Function(Box::new(f))),
295
296 "LENGTH" => Ok(Expression::Function(Box::new(f))),
298
299 "LEN" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc::new(
301 f.args.into_iter().next().unwrap(),
302 )))),
303
304 "RANDOM" => Ok(Expression::Rand(Box::new(crate::expressions::Rand {
306 seed: None,
307 lower: None,
308 upper: None,
309 }))),
310
311 "RAND" => Ok(Expression::Rand(Box::new(crate::expressions::Rand {
313 seed: None,
314 lower: None,
315 upper: None,
316 }))),
317
318 "NOW" => Ok(Expression::CurrentTimestamp(
320 crate::expressions::CurrentTimestamp {
321 precision: None,
322 sysdate: false,
323 },
324 )),
325
326 "GETDATE" => Ok(Expression::CurrentTimestamp(
328 crate::expressions::CurrentTimestamp {
329 precision: None,
330 sysdate: false,
331 },
332 )),
333
334 "CURRENT_TIMESTAMP" => Ok(Expression::CurrentTimestamp(
336 crate::expressions::CurrentTimestamp {
337 precision: None,
338 sysdate: false,
339 },
340 )),
341
342 "CURRENT_DATE" => Ok(Expression::CurrentDate(crate::expressions::CurrentDate)),
344
345 "TO_DATE" if f.args.len() == 2 => {
347 let is_default_format = matches!(&f.args[1], Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(s) if s == "yyyy-MM-dd"));
348 if is_default_format {
349 Ok(Expression::Function(Box::new(Function::new(
350 "TO_DATE".to_string(),
351 vec![f.args.into_iter().next().unwrap()],
352 ))))
353 } else {
354 Ok(Expression::Function(Box::new(f)))
355 }
356 }
357 "TO_DATE" => Ok(Expression::Function(Box::new(f))),
358
359 "TO_TIMESTAMP" => Ok(Expression::Function(Box::new(f))),
361
362 "DATE_FORMAT" => Ok(Expression::Function(Box::new(f))),
364
365 "STRFTIME" => Ok(Expression::Function(Box::new(Function::new(
367 "DATE_FORMAT".to_string(),
368 f.args,
369 )))),
370
371 "TO_CHAR" => Ok(Expression::Function(Box::new(Function::new(
373 "DATE_FORMAT".to_string(),
374 f.args,
375 )))),
376
377 "DATE_TRUNC" => Ok(Expression::Function(Box::new(f))),
379
380 "TRUNC" => Ok(Expression::Function(Box::new(f))),
382
383 "EXTRACT" => Ok(Expression::Function(Box::new(f))),
385
386 "DATEPART" => Ok(Expression::Function(Box::new(Function::new(
388 "EXTRACT".to_string(),
389 f.args,
390 )))),
391
392 "UNIX_TIMESTAMP" => {
395 if f.args.is_empty() {
396 Ok(Expression::Function(Box::new(Function::new(
397 "UNIX_TIMESTAMP".to_string(),
398 vec![Expression::CurrentTimestamp(CurrentTimestamp {
399 precision: None,
400 sysdate: false,
401 })],
402 ))))
403 } else {
404 Ok(Expression::Function(Box::new(f)))
405 }
406 }
407
408 "FROM_UNIXTIME" => Ok(Expression::Function(Box::new(f))),
410
411 "STR_TO_MAP" => {
414 if f.args.len() == 1 {
415 let mut args = f.args;
416 args.push(Expression::Literal(Box::new(
417 crate::expressions::Literal::String(",".to_string()),
418 )));
419 args.push(Expression::Literal(Box::new(
420 crate::expressions::Literal::String(":".to_string()),
421 )));
422 Ok(Expression::Function(Box::new(Function::new(
423 "STR_TO_MAP".to_string(),
424 args,
425 ))))
426 } else {
427 Ok(Expression::Function(Box::new(f)))
428 }
429 }
430
431 "POSITION" => Ok(Expression::Function(Box::new(f))),
433
434 "LOCATE" => Ok(Expression::Function(Box::new(f))),
436
437 "STRPOS" if f.args.len() == 2 => {
439 let mut args = f.args;
440 let first = args.remove(0);
441 let second = args.remove(0);
442 Ok(Expression::Function(Box::new(Function::new(
444 "LOCATE".to_string(),
445 vec![second, first],
446 ))))
447 }
448
449 "CHARINDEX" if f.args.len() >= 2 => {
451 let mut args = f.args;
452 let substring = args.remove(0);
453 let string = args.remove(0);
454 let mut locate_args = vec![substring, string];
455 if !args.is_empty() {
456 locate_args.push(args.remove(0));
457 }
458 Ok(Expression::Function(Box::new(Function::new(
459 "LOCATE".to_string(),
460 locate_args,
461 ))))
462 }
463
464 "INSTR" => Ok(Expression::Function(Box::new(f))),
466
467 "CEILING" if f.args.len() == 1 => Ok(Expression::Ceil(Box::new(CeilFunc {
469 this: f.args.into_iter().next().unwrap(),
470 decimals: None,
471 to: None,
472 }))),
473
474 "CEIL" if f.args.len() == 1 => Ok(Expression::Ceil(Box::new(CeilFunc {
476 this: f.args.into_iter().next().unwrap(),
477 decimals: None,
478 to: None,
479 }))),
480
481 "UNNEST" => Ok(Expression::Function(Box::new(Function::new(
483 "EXPLODE".to_string(),
484 f.args,
485 )))),
486
487 "FLATTEN" => Ok(Expression::Function(Box::new(f))),
489
490 "ARRAY_AGG" => Ok(Expression::Function(Box::new(Function::new(
492 "COLLECT_LIST".to_string(),
493 f.args,
494 )))),
495
496 "COLLECT_LIST" => Ok(Expression::Function(Box::new(f))),
498
499 "COLLECT_SET" => Ok(Expression::Function(Box::new(f))),
501
502 "ARRAY_LENGTH" | "CARDINALITY" => Ok(Expression::Function(Box::new(Function::new(
504 "SIZE".to_string(),
505 f.args,
506 )))),
507
508 "SIZE" => Ok(Expression::Function(Box::new(f))),
510
511 "SPLIT" => Ok(Expression::Function(Box::new(f))),
513
514 "REGEXP_REPLACE" if f.args.len() > 4 => {
517 let mut args = f.args;
518 args.truncate(4);
519 Ok(Expression::Function(Box::new(Function::new(
520 "REGEXP_REPLACE".to_string(),
521 args,
522 ))))
523 }
524 "REGEXP_REPLACE" => Ok(Expression::Function(Box::new(f))),
525
526 "REGEXP_EXTRACT" => Ok(Expression::Function(Box::new(f))),
528
529 "REGEXP_EXTRACT_ALL" => Ok(Expression::Function(Box::new(f))),
531
532 "RLIKE" | "REGEXP_LIKE" => Ok(Expression::Function(Box::new(Function::new(
534 "RLIKE".to_string(),
535 f.args,
536 )))),
537
538 "JSON_EXTRACT" => Ok(Expression::Function(Box::new(Function::new(
540 "GET_JSON_OBJECT".to_string(),
541 f.args,
542 )))),
543
544 "JSON_EXTRACT_SCALAR" => Ok(Expression::Function(Box::new(Function::new(
546 "GET_JSON_OBJECT".to_string(),
547 f.args,
548 )))),
549
550 "GET_JSON_OBJECT" => Ok(Expression::Function(Box::new(f))),
552
553 "FROM_JSON" => Ok(Expression::Function(Box::new(f))),
555
556 "TO_JSON" => Ok(Expression::Function(Box::new(f))),
558
559 "PARSE_JSON" if f.args.len() == 1 => Ok(f.args.into_iter().next().unwrap()),
561 "PARSE_JSON" => Ok(Expression::Function(Box::new(Function::new(
562 "FROM_JSON".to_string(),
563 f.args,
564 )))),
565
566 "DATEDIFF" | "DATE_DIFF" => Ok(Expression::Function(Box::new(Function::new(
568 "DATEDIFF".to_string(),
569 f.args,
570 )))),
571
572 "DATE_ADD" | "DATEADD" => Ok(Expression::Function(Box::new(Function::new(
574 "DATE_ADD".to_string(),
575 f.args,
576 )))),
577
578 "DATE_SUB" => Ok(Expression::Function(Box::new(f))),
580
581 "TIMESTAMPADD" => Ok(Expression::Function(Box::new(f))),
583
584 "TIMESTAMPDIFF" => Ok(Expression::Function(Box::new(f))),
586
587 "ADD_MONTHS" => Ok(Expression::Function(Box::new(f))),
589
590 "MONTHS_BETWEEN" => Ok(Expression::Function(Box::new(f))),
592
593 "NVL" => Ok(Expression::Function(Box::new(f))),
595
596 "NVL2" => Ok(Expression::Function(Box::new(f))),
598
599 "MAP" => Ok(Expression::Function(Box::new(f))),
601
602 "ARRAY" => Ok(Expression::Function(Box::new(f))),
604
605 "ROW" => Ok(Expression::Function(Box::new(Function::new(
607 "STRUCT".to_string(),
608 f.args,
609 )))),
610
611 "STRUCT" => {
613 let mut col_idx = 1usize;
614 let named_args: Vec<Expression> = f
615 .args
616 .into_iter()
617 .map(|arg| {
618 let current_idx = col_idx;
619 col_idx += 1;
620 match &arg {
622 Expression::Alias(_) => arg, Expression::Star(_) => arg, Expression::Column(c) if c.table.is_none() => {
625 let name = c.name.name.clone();
627 Expression::Alias(Box::new(crate::expressions::Alias {
628 this: arg,
629 alias: crate::expressions::Identifier::new(&name),
630 column_aliases: Vec::new(),
631 pre_alias_comments: Vec::new(),
632 trailing_comments: Vec::new(),
633 inferred_type: None,
634 }))
635 }
636 _ => {
637 let name = format!("col{}", current_idx);
639 Expression::Alias(Box::new(crate::expressions::Alias {
640 this: arg,
641 alias: crate::expressions::Identifier::new(&name),
642 column_aliases: Vec::new(),
643 pre_alias_comments: Vec::new(),
644 trailing_comments: Vec::new(),
645 inferred_type: None,
646 }))
647 }
648 }
649 })
650 .collect();
651 Ok(Expression::Function(Box::new(Function {
652 name: "STRUCT".to_string(),
653 args: named_args,
654 distinct: false,
655 trailing_comments: Vec::new(),
656 use_bracket_syntax: false,
657 no_parens: false,
658 quoted: false,
659 span: None,
660 inferred_type: None,
661 })))
662 }
663
664 "NAMED_STRUCT" => Ok(Expression::Function(Box::new(f))),
666
667 "MAP_FROM_ARRAYS" => Ok(Expression::Function(Box::new(f))),
669
670 "ARRAY_SORT" => Ok(Expression::Function(Box::new(f))),
672
673 "ARRAY_DISTINCT" => Ok(Expression::Function(Box::new(f))),
675
676 "ARRAY_UNION" => Ok(Expression::Function(Box::new(f))),
678
679 "ARRAY_INTERSECT" => Ok(Expression::Function(Box::new(f))),
681
682 "ARRAY_EXCEPT" => Ok(Expression::Function(Box::new(f))),
684
685 "ARRAY_CONTAINS" => Ok(Expression::Function(Box::new(f))),
687
688 "ELEMENT_AT" => Ok(Expression::Function(Box::new(f))),
690
691 "TRY_ELEMENT_AT" => Ok(Expression::Function(Box::new(f))),
693
694 "TRANSFORM" => Ok(Expression::Function(Box::new(f))),
696
697 "FILTER" => Ok(Expression::Function(Box::new(f))),
699
700 "AGGREGATE" => Ok(Expression::Function(Box::new(f))),
702
703 "SEQUENCE" => Ok(Expression::Function(Box::new(f))),
705
706 "GENERATE_SERIES" => Ok(Expression::Function(Box::new(Function::new(
708 "SEQUENCE".to_string(),
709 f.args,
710 )))),
711
712 "STARTSWITH" | "STARTS_WITH" => Ok(Expression::Function(Box::new(Function::new(
714 "STARTSWITH".to_string(),
715 f.args,
716 )))),
717
718 "ENDSWITH" | "ENDS_WITH" => Ok(Expression::Function(Box::new(Function::new(
720 "ENDSWITH".to_string(),
721 f.args,
722 )))),
723
724 "ARRAY_CONSTRUCT_COMPACT" => {
726 let inner =
727 Expression::Function(Box::new(Function::new("ARRAY".to_string(), f.args)));
728 Ok(Expression::Function(Box::new(Function::new(
729 "ARRAY_COMPACT".to_string(),
730 vec![inner],
731 ))))
732 }
733
734 "ARRAY_TO_STRING" => Ok(Expression::Function(Box::new(Function::new(
736 "ARRAY_JOIN".to_string(),
737 f.args,
738 )))),
739
740 "TO_ARRAY" if f.args.len() == 1 => {
742 let x = f.args[0].clone();
743 match &x {
746 Expression::ArrayFunc(arr) => {
747 Ok(Expression::Function(Box::new(Function::new(
749 "ARRAY".to_string(),
750 arr.expressions.clone(),
751 ))))
752 }
753 _ => Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
754 condition: Expression::IsNull(Box::new(crate::expressions::IsNull {
755 this: x.clone(),
756 not: false,
757 postfix_form: false,
758 })),
759 true_value: Expression::Null(crate::expressions::Null),
760 false_value: Some(Expression::Function(Box::new(Function::new(
761 "ARRAY".to_string(),
762 vec![x],
763 )))),
764 original_name: Some("IF".to_string()),
765 inferred_type: None,
766 }))),
767 }
768 }
769
770 "REGEXP_SUBSTR" if f.args.len() >= 2 => {
772 let subject = f.args[0].clone();
773 let pattern = f.args[1].clone();
774 let group = if f.args.len() >= 6 {
777 let g = &f.args[5];
778 if matches!(g, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"))
780 {
781 None
782 } else {
783 Some(g.clone())
784 }
785 } else {
786 None
787 };
788 let mut args = vec![subject, pattern];
789 if let Some(g) = group {
790 args.push(g);
791 }
792 Ok(Expression::Function(Box::new(Function::new(
793 "REGEXP_EXTRACT".to_string(),
794 args,
795 ))))
796 }
797
798 "UUID_STRING" => Ok(Expression::Function(Box::new(Function::new(
800 "UUID".to_string(),
801 vec![],
802 )))),
803
804 "OBJECT_CONSTRUCT" if f.args.len() >= 2 && f.args.len() % 2 == 0 => {
806 let mut struct_args = Vec::new();
810 for pair in f.args.chunks(2) {
811 if let Expression::Literal(lit) = &pair[0] {
812 if let Literal::String(key) = lit.as_ref() {
813 struct_args.push(Expression::Alias(Box::new(
814 crate::expressions::Alias {
815 this: pair[1].clone(),
816 alias: crate::expressions::Identifier::new(key.clone()),
817 column_aliases: vec![],
818 pre_alias_comments: vec![],
819 trailing_comments: vec![],
820 inferred_type: None,
821 },
822 )));
823 }
824 } else {
825 struct_args.push(pair[1].clone());
826 }
827 }
828 Ok(Expression::Function(Box::new(Function::new(
829 "STRUCT".to_string(),
830 struct_args,
831 ))))
832 }
833
834 "DATE_PART" if f.args.len() == 2 => {
836 let mut args = f.args;
837 let part = args.remove(0);
838 let expr = args.remove(0);
839 if let Some(field) = expr_to_datetime_field(&part) {
840 Ok(Expression::Extract(Box::new(ExtractFunc {
841 this: expr,
842 field,
843 })))
844 } else {
845 Ok(Expression::Function(Box::new(Function::new(
847 "DATE_PART".to_string(),
848 vec![part, expr],
849 ))))
850 }
851 }
852
853 "GET_PATH" if f.args.len() == 2 => {
855 let mut args = f.args;
856 let this = args.remove(0);
857 let path = args.remove(0);
858 let json_path = match &path {
859 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
860 let Literal::String(s) = lit.as_ref() else {
861 unreachable!()
862 };
863 let normalized = if s.starts_with('$') {
864 s.clone()
865 } else if s.starts_with('[') {
866 format!("${}", s)
867 } else {
868 format!("$.{}", s)
869 };
870 Expression::Literal(Box::new(Literal::String(normalized)))
871 }
872 _ => path,
873 };
874 Ok(Expression::Function(Box::new(Function::new(
875 "GET_JSON_OBJECT".to_string(),
876 vec![this, json_path],
877 ))))
878 }
879
880 "BITWISE_LEFT_SHIFT" => Ok(Expression::Function(Box::new(Function::new(
882 "SHIFTLEFT".to_string(),
883 f.args,
884 )))),
885
886 "BITWISE_RIGHT_SHIFT" => Ok(Expression::Function(Box::new(Function::new(
888 "SHIFTRIGHT".to_string(),
889 f.args,
890 )))),
891
892 "APPROX_DISTINCT" => Ok(Expression::Function(Box::new(Function::new(
894 "APPROX_COUNT_DISTINCT".to_string(),
895 f.args,
896 )))),
897
898 "ARRAY_SLICE" => Ok(Expression::Function(Box::new(Function::new(
900 "SLICE".to_string(),
901 f.args,
902 )))),
903
904 "DATE_FROM_PARTS" => Ok(Expression::Function(Box::new(Function::new(
906 "MAKE_DATE".to_string(),
907 f.args,
908 )))),
909
910 "DAYOFWEEK_ISO" => Ok(Expression::Function(Box::new(Function::new(
912 "DAYOFWEEK".to_string(),
913 f.args,
914 )))),
915
916 "FORMAT" => Ok(Expression::Function(Box::new(Function::new(
918 "FORMAT_STRING".to_string(),
919 f.args,
920 )))),
921
922 "LOGICAL_AND" => Ok(Expression::Function(Box::new(Function::new(
924 "BOOL_AND".to_string(),
925 f.args,
926 )))),
927
928 "VARIANCE_POP" => Ok(Expression::Function(Box::new(Function::new(
930 "VAR_POP".to_string(),
931 f.args,
932 )))),
933
934 "WEEK_OF_YEAR" => Ok(Expression::Function(Box::new(Function::new(
936 "WEEKOFYEAR".to_string(),
937 f.args,
938 )))),
939
940 "BIT_GET" => Ok(Expression::Function(Box::new(Function::new(
942 "GETBIT".to_string(),
943 f.args,
944 )))),
945
946 "CURDATE" => Ok(Expression::CurrentDate(crate::expressions::CurrentDate)),
948
949 _ => Ok(Expression::Function(Box::new(f))),
951 }
952 }
953
954 fn transform_aggregate_function(
955 &self,
956 f: Box<crate::expressions::AggregateFunction>,
957 ) -> Result<Expression> {
958 let name_upper = f.name.to_uppercase();
959 match name_upper.as_str() {
960 "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
962 Function::new("COLLECT_LIST".to_string(), f.args),
963 ))),
964
965 "STRING_AGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
967 Function::new("COLLECT_LIST".to_string(), f.args),
968 ))),
969
970 "LISTAGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
972 "COLLECT_LIST".to_string(),
973 f.args,
974 )))),
975
976 "ARRAY_AGG" if !f.args.is_empty() => {
978 let mut af = f;
979 af.name = "COLLECT_LIST".to_string();
980 Ok(Expression::AggregateFunction(af))
981 }
982
983 "LOGICAL_OR" if !f.args.is_empty() => {
985 let mut af = f;
986 af.name = "BOOL_OR".to_string();
987 Ok(Expression::AggregateFunction(af))
988 }
989
990 _ => Ok(Expression::AggregateFunction(f)),
992 }
993 }
994}
995
996fn expr_to_datetime_field(expr: &Expression) -> Option<DateTimeField> {
998 let name = match expr {
999 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
1000 let Literal::String(s) = lit.as_ref() else {
1001 unreachable!()
1002 };
1003 s.to_uppercase()
1004 }
1005 Expression::Identifier(id) => id.name.to_uppercase(),
1006 Expression::Var(v) => v.this.to_uppercase(),
1007 Expression::Column(col) if col.table.is_none() => col.name.name.to_uppercase(),
1008 _ => return None,
1009 };
1010 match name.as_str() {
1011 "YEAR" | "Y" | "YY" | "YYY" | "YYYY" | "YR" | "YEARS" | "YRS" => Some(DateTimeField::Year),
1012 "MONTH" | "MM" | "MON" | "MONS" | "MONTHS" => Some(DateTimeField::Month),
1013 "DAY" | "D" | "DD" | "DAYS" | "DAYOFMONTH" => Some(DateTimeField::Day),
1014 "HOUR" | "H" | "HH" | "HR" | "HOURS" | "HRS" => Some(DateTimeField::Hour),
1015 "MINUTE" | "MI" | "MIN" | "MINUTES" | "MINS" => Some(DateTimeField::Minute),
1016 "SECOND" | "S" | "SEC" | "SECONDS" | "SECS" => Some(DateTimeField::Second),
1017 "MILLISECOND" | "MS" | "MSEC" | "MILLISECONDS" => Some(DateTimeField::Millisecond),
1018 "MICROSECOND" | "US" | "USEC" | "MICROSECONDS" => Some(DateTimeField::Microsecond),
1019 "DOW" | "DAYOFWEEK" | "DAYOFWEEK_ISO" | "DW" => Some(DateTimeField::DayOfWeek),
1020 "DOY" | "DAYOFYEAR" => Some(DateTimeField::DayOfYear),
1021 "WEEK" | "W" | "WK" | "WEEKOFYEAR" | "WOY" => Some(DateTimeField::Week),
1022 "QUARTER" | "Q" | "QTR" | "QTRS" | "QUARTERS" => Some(DateTimeField::Quarter),
1023 "EPOCH" | "EPOCH_SECOND" | "EPOCH_SECONDS" => Some(DateTimeField::Epoch),
1024 "TIMEZONE" | "TIMEZONE_HOUR" | "TZH" => Some(DateTimeField::TimezoneHour),
1025 "TIMEZONE_MINUTE" | "TZM" => Some(DateTimeField::TimezoneMinute),
1026 _ => Some(DateTimeField::Custom(name)),
1027 }
1028}