1use super::{DialectImpl, DialectType};
15use crate::error::Result;
16use crate::expressions::{
17 CeilFunc, CurrentTimestamp, DataType, DateTimeField, Expression, ExtractFunc, Function,
18 Literal, StructField, UnaryFunc, VarArgFunc,
19};
20use crate::generator::GeneratorConfig;
21use crate::tokens::TokenizerConfig;
22
23pub struct SparkDialect;
25
26impl DialectImpl for SparkDialect {
27 fn dialect_type(&self) -> DialectType {
28 DialectType::Spark
29 }
30
31 fn tokenizer_config(&self) -> TokenizerConfig {
32 let mut config = TokenizerConfig::default();
33 config.identifiers.clear();
35 config.identifiers.insert('`', '`');
36 config.quotes.insert("\"".to_string(), "\"".to_string());
38 config.string_escapes.push('\\');
40 config
42 .keywords
43 .insert("DIV".to_string(), crate::tokens::TokenType::Div);
44 config
45 .keywords
46 .insert("REPAIR".to_string(), crate::tokens::TokenType::Command);
47 config
48 .keywords
49 .insert("MSCK".to_string(), crate::tokens::TokenType::Command);
50 config
52 .numeric_literals
53 .insert("L".to_string(), "BIGINT".to_string());
54 config
55 .numeric_literals
56 .insert("S".to_string(), "SMALLINT".to_string());
57 config
58 .numeric_literals
59 .insert("Y".to_string(), "TINYINT".to_string());
60 config
61 .numeric_literals
62 .insert("D".to_string(), "DOUBLE".to_string());
63 config
64 .numeric_literals
65 .insert("F".to_string(), "FLOAT".to_string());
66 config
67 .numeric_literals
68 .insert("BD".to_string(), "DECIMAL".to_string());
69 config.identifiers_can_start_with_digit = true;
71 config.string_escapes_allowed_in_raw_strings = false;
74 config
75 }
76
77 fn generator_config(&self) -> GeneratorConfig {
78 use crate::generator::IdentifierQuoteStyle;
79 GeneratorConfig {
80 identifier_quote: '`',
81 identifier_quote_style: IdentifierQuoteStyle::BACKTICK,
82 dialect: Some(DialectType::Spark),
83 struct_field_sep: ": ",
85 create_function_return_as: false,
87 alias_post_tablesample: true,
89 tablesample_seed_keyword: "REPEATABLE",
90 join_hints: false,
91 identifiers_can_start_with_digit: true,
92 schema_comment_with_eq: false,
94 ..Default::default()
95 }
96 }
97
98 fn transform_expr(&self, expr: Expression) -> Result<Expression> {
99 match expr {
100 Expression::IfNull(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
102 original_name: None,
103 expressions: vec![f.this, f.expression],
104 inferred_type: None,
105 }))),
106
107 Expression::Nvl(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
109 original_name: None,
110 expressions: vec![f.this, f.expression],
111 inferred_type: None,
112 }))),
113
114 Expression::Cast(mut c) => {
116 c.to = Self::normalize_spark_type(c.to);
117 Ok(Expression::Cast(c))
118 }
119
120 Expression::TryCast(mut c) => {
122 c.to = Self::normalize_spark_type(c.to);
123 Ok(Expression::TryCast(c))
124 }
125
126 Expression::SafeCast(mut c) => {
128 c.to = Self::normalize_spark_type(c.to);
129 Ok(Expression::TryCast(c))
130 }
131
132 Expression::Trim(mut t) => {
135 if !t.sql_standard_syntax && t.characters.is_some() {
136 t.sql_standard_syntax = true;
139 }
140 Ok(Expression::Trim(t))
141 }
142
143 Expression::ILike(op) => Ok(Expression::ILike(op)),
145
146 Expression::Unnest(f) => Ok(Expression::Explode(Box::new(UnaryFunc::new(f.this)))),
148
149 Expression::Explode(f) => Ok(Expression::Explode(f)),
151
152 Expression::ExplodeOuter(f) => Ok(Expression::ExplodeOuter(f)),
154
155 Expression::Random(_) => Ok(Expression::Rand(Box::new(crate::expressions::Rand {
157 seed: None,
158 lower: None,
159 upper: None,
160 }))),
161
162 Expression::Rand(r) => Ok(Expression::Rand(r)),
164
165 Expression::Concat(op) => Ok(Expression::Function(Box::new(Function::new(
167 "CONCAT".to_string(),
168 vec![op.left, op.right],
169 )))),
170
171 Expression::Function(f) => self.transform_function(*f),
175
176 Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
178
179 Expression::Parameter(mut p)
181 if p.style == crate::expressions::ParameterStyle::Dollar =>
182 {
183 p.style = crate::expressions::ParameterStyle::DollarBrace;
184 if let Some(idx) = p.index {
186 p.name = Some(idx.to_string());
187 }
188 Ok(Expression::Parameter(p))
189 }
190
191 Expression::JSONExtract(je) if je.variant_extract.is_some() => {
193 let path = match *je.expression {
195 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
196 let Literal::String(s) = lit.as_ref() else {
197 unreachable!()
198 };
199 Expression::Literal(Box::new(Literal::String(format!("$.{}", s))))
200 }
201 other => other,
202 };
203 Ok(Expression::Function(Box::new(Function::new(
204 "GET_JSON_OBJECT".to_string(),
205 vec![*je.this, path],
206 ))))
207 }
208
209 _ => Ok(expr),
211 }
212 }
213}
214
215impl SparkDialect {
216 fn normalize_spark_type(dt: DataType) -> DataType {
221 match dt {
222 DataType::VarChar { length: None, .. }
223 | DataType::Char { length: None }
224 | DataType::Text => DataType::Custom {
225 name: "STRING".to_string(),
226 },
227 DataType::VarChar { .. } | DataType::Char { .. } => dt,
229 DataType::Struct { fields, nested } => {
231 let normalized_fields: Vec<StructField> = fields
232 .into_iter()
233 .map(|mut f| {
234 f.data_type = Self::normalize_spark_type(f.data_type);
235 f
236 })
237 .collect();
238 DataType::Struct {
239 fields: normalized_fields,
240 nested,
241 }
242 }
243 _ => dt,
244 }
245 }
246
247 fn transform_function(&self, f: Function) -> Result<Expression> {
248 let name_upper = f.name.to_uppercase();
249 match name_upper.as_str() {
250 "IFNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
252 original_name: None,
253 expressions: f.args,
254 inferred_type: None,
255 }))),
256
257 "NVL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
259 original_name: None,
260 expressions: f.args,
261 inferred_type: None,
262 }))),
263
264 "ISNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
266 original_name: None,
267 expressions: f.args,
268 inferred_type: None,
269 }))),
270
271 "GROUP_CONCAT" if !f.args.is_empty() => {
274 Ok(Expression::Function(Box::new(Function::new(
276 "COLLECT_LIST".to_string(),
277 f.args,
278 ))))
279 }
280
281 "STRING_AGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
284 Function::new("COLLECT_LIST".to_string(), f.args),
285 ))),
286
287 "LISTAGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
289 "COLLECT_LIST".to_string(),
290 f.args,
291 )))),
292
293 "SUBSTRING" | "SUBSTR" => Ok(Expression::Function(Box::new(f))),
295
296 "LENGTH" => Ok(Expression::Function(Box::new(f))),
298
299 "LEN" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc::new(
301 f.args.into_iter().next().unwrap(),
302 )))),
303
304 "RANDOM" => Ok(Expression::Rand(Box::new(crate::expressions::Rand {
306 seed: None,
307 lower: None,
308 upper: None,
309 }))),
310
311 "RAND" => Ok(Expression::Rand(Box::new(crate::expressions::Rand {
313 seed: None,
314 lower: None,
315 upper: None,
316 }))),
317
318 "NOW" => Ok(Expression::CurrentTimestamp(
320 crate::expressions::CurrentTimestamp {
321 precision: None,
322 sysdate: false,
323 },
324 )),
325
326 "GETDATE" => Ok(Expression::CurrentTimestamp(
328 crate::expressions::CurrentTimestamp {
329 precision: None,
330 sysdate: false,
331 },
332 )),
333
334 "CURRENT_TIMESTAMP" => Ok(Expression::CurrentTimestamp(
336 crate::expressions::CurrentTimestamp {
337 precision: None,
338 sysdate: false,
339 },
340 )),
341
342 "CURRENT_DATE" => Ok(Expression::CurrentDate(crate::expressions::CurrentDate)),
344
345 "TO_DATE" if f.args.len() == 2 => {
347 let is_default_format = matches!(&f.args[1], Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(s) if s == "yyyy-MM-dd"));
348 if is_default_format {
349 Ok(Expression::Function(Box::new(Function::new(
350 "TO_DATE".to_string(),
351 vec![f.args.into_iter().next().unwrap()],
352 ))))
353 } else {
354 Ok(Expression::Function(Box::new(f)))
355 }
356 }
357 "TO_DATE" => Ok(Expression::Function(Box::new(f))),
358
359 "TO_TIMESTAMP" => Ok(Expression::Function(Box::new(f))),
361
362 "DATE_FORMAT" => Ok(Expression::Function(Box::new(f))),
364
365 "STRFTIME" => Ok(Expression::Function(Box::new(Function::new(
367 "DATE_FORMAT".to_string(),
368 f.args,
369 )))),
370
371 "TO_CHAR" => Ok(Expression::Function(Box::new(Function::new(
373 "DATE_FORMAT".to_string(),
374 f.args,
375 )))),
376
377 "DATE_TRUNC" => Ok(Expression::Function(Box::new(f))),
379
380 "TRUNC" => Ok(Expression::Function(Box::new(f))),
382
383 "EXTRACT" => Ok(Expression::Function(Box::new(f))),
385
386 "DATEPART" => Ok(Expression::Function(Box::new(Function::new(
388 "EXTRACT".to_string(),
389 f.args,
390 )))),
391
392 "UNIX_TIMESTAMP" => {
395 if f.args.is_empty() {
396 Ok(Expression::Function(Box::new(Function::new(
397 "UNIX_TIMESTAMP".to_string(),
398 vec![Expression::CurrentTimestamp(CurrentTimestamp {
399 precision: None,
400 sysdate: false,
401 })],
402 ))))
403 } else {
404 Ok(Expression::Function(Box::new(f)))
405 }
406 }
407
408 "FROM_UNIXTIME" => Ok(Expression::Function(Box::new(f))),
410
411 "STR_TO_MAP" => {
414 if f.args.len() == 1 {
415 let mut args = f.args;
416 args.push(Expression::Literal(Box::new(
417 crate::expressions::Literal::String(",".to_string()),
418 )));
419 args.push(Expression::Literal(Box::new(
420 crate::expressions::Literal::String(":".to_string()),
421 )));
422 Ok(Expression::Function(Box::new(Function::new(
423 "STR_TO_MAP".to_string(),
424 args,
425 ))))
426 } else {
427 Ok(Expression::Function(Box::new(f)))
428 }
429 }
430
431 "POSITION" => Ok(Expression::Function(Box::new(f))),
433
434 "LOCATE" => Ok(Expression::Function(Box::new(f))),
436
437 "STRPOS" if f.args.len() == 2 => {
439 let mut args = f.args;
440 let first = args.remove(0);
441 let second = args.remove(0);
442 Ok(Expression::Function(Box::new(Function::new(
444 "LOCATE".to_string(),
445 vec![second, first],
446 ))))
447 }
448
449 "CHARINDEX" if f.args.len() >= 2 => {
451 let mut args = f.args;
452 let substring = args.remove(0);
453 let string = args.remove(0);
454 let mut locate_args = vec![substring, string];
455 if !args.is_empty() {
456 locate_args.push(args.remove(0));
457 }
458 Ok(Expression::Function(Box::new(Function::new(
459 "LOCATE".to_string(),
460 locate_args,
461 ))))
462 }
463
464 "INSTR" => Ok(Expression::Function(Box::new(f))),
466
467 "CEILING" if f.args.len() == 1 => Ok(Expression::Ceil(Box::new(CeilFunc {
469 this: f.args.into_iter().next().unwrap(),
470 decimals: None,
471 to: None,
472 }))),
473
474 "CEIL" if f.args.len() == 1 => Ok(Expression::Ceil(Box::new(CeilFunc {
476 this: f.args.into_iter().next().unwrap(),
477 decimals: None,
478 to: None,
479 }))),
480
481 "UNNEST" => Ok(Expression::Function(Box::new(Function::new(
483 "EXPLODE".to_string(),
484 f.args,
485 )))),
486
487 "FLATTEN" => Ok(Expression::Function(Box::new(f))),
489
490 "ARRAY_AGG" => Ok(Expression::Function(Box::new(Function::new(
492 "COLLECT_LIST".to_string(),
493 f.args,
494 )))),
495
496 "COLLECT_LIST" => Ok(Expression::Function(Box::new(f))),
498
499 "COLLECT_SET" => Ok(Expression::Function(Box::new(f))),
501
502 "ARRAY_LENGTH" | "CARDINALITY" => Ok(Expression::Function(Box::new(Function::new(
504 "SIZE".to_string(),
505 f.args,
506 )))),
507
508 "SIZE" => Ok(Expression::Function(Box::new(f))),
510
511 "SPLIT" => Ok(Expression::Function(Box::new(f))),
513
514 "REGEXP_REPLACE" if f.args.len() > 4 => {
517 let mut args = f.args;
518 args.truncate(4);
519 Ok(Expression::Function(Box::new(Function::new(
520 "REGEXP_REPLACE".to_string(),
521 args,
522 ))))
523 }
524 "REGEXP_REPLACE" => Ok(Expression::Function(Box::new(f))),
525
526 "REGEXP_EXTRACT" => Ok(Expression::Function(Box::new(f))),
528
529 "REGEXP_EXTRACT_ALL" => Ok(Expression::Function(Box::new(f))),
531
532 "RLIKE" | "REGEXP_LIKE" => Ok(Expression::Function(Box::new(Function::new(
534 "RLIKE".to_string(),
535 f.args,
536 )))),
537
538 "JSON_EXTRACT" => Ok(Expression::Function(Box::new(Function::new(
540 "GET_JSON_OBJECT".to_string(),
541 f.args,
542 )))),
543
544 "JSON_EXTRACT_SCALAR" => Ok(Expression::Function(Box::new(Function::new(
546 "GET_JSON_OBJECT".to_string(),
547 f.args,
548 )))),
549
550 "GET_JSON_OBJECT" => Ok(Expression::Function(Box::new(f))),
552
553 "FROM_JSON" => Ok(Expression::Function(Box::new(f))),
555
556 "TO_JSON" => Ok(Expression::Function(Box::new(f))),
558
559 "PARSE_JSON" if f.args.len() == 1 => Ok(f.args.into_iter().next().unwrap()),
561 "PARSE_JSON" => Ok(Expression::Function(Box::new(Function::new(
562 "FROM_JSON".to_string(),
563 f.args,
564 )))),
565
566 "DATEDIFF" | "DATE_DIFF" => Ok(Expression::Function(Box::new(Function::new(
568 "DATEDIFF".to_string(),
569 f.args,
570 )))),
571
572 "DATE_ADD" | "DATEADD" => Ok(Expression::Function(Box::new(Function::new(
574 "DATE_ADD".to_string(),
575 f.args,
576 )))),
577
578 "DATE_SUB" => Ok(Expression::Function(Box::new(f))),
580
581 "TIMESTAMPADD" => Ok(Expression::Function(Box::new(f))),
583
584 "TIMESTAMPDIFF" => Ok(Expression::Function(Box::new(f))),
586
587 "ADD_MONTHS" => Ok(Expression::Function(Box::new(f))),
589
590 "MONTHS_BETWEEN" => Ok(Expression::Function(Box::new(f))),
592
593 "NVL" => Ok(Expression::Function(Box::new(f))),
595
596 "NVL2" => Ok(Expression::Function(Box::new(f))),
598
599 "MAP" => Ok(Expression::Function(Box::new(f))),
601
602 "ARRAY" => Ok(Expression::Function(Box::new(f))),
604
605 "ROW" => Ok(Expression::Function(Box::new(Function::new(
607 "STRUCT".to_string(),
608 f.args,
609 )))),
610
611 "STRUCT" => {
613 let mut col_idx = 1usize;
614 let named_args: Vec<Expression> = f
615 .args
616 .into_iter()
617 .map(|arg| {
618 let current_idx = col_idx;
619 col_idx += 1;
620 match &arg {
622 Expression::Alias(_) => arg, Expression::Star(_) => arg, Expression::Column(c) if c.table.is_none() => {
625 let name = c.name.name.clone();
627 Expression::Alias(Box::new(crate::expressions::Alias {
628 this: arg,
629 alias: crate::expressions::Identifier::new(&name),
630 column_aliases: Vec::new(),
631 alias_explicit_as: false,
632 alias_keyword: None,
633 pre_alias_comments: Vec::new(),
634 trailing_comments: Vec::new(),
635 inferred_type: None,
636 }))
637 }
638 _ => {
639 let name = format!("col{}", current_idx);
641 Expression::Alias(Box::new(crate::expressions::Alias {
642 this: arg,
643 alias: crate::expressions::Identifier::new(&name),
644 column_aliases: Vec::new(),
645 alias_explicit_as: false,
646 alias_keyword: None,
647 pre_alias_comments: Vec::new(),
648 trailing_comments: Vec::new(),
649 inferred_type: None,
650 }))
651 }
652 }
653 })
654 .collect();
655 Ok(Expression::Function(Box::new(Function {
656 name: "STRUCT".to_string(),
657 args: named_args,
658 distinct: false,
659 trailing_comments: Vec::new(),
660 use_bracket_syntax: false,
661 no_parens: false,
662 quoted: false,
663 span: None,
664 inferred_type: None,
665 })))
666 }
667
668 "NAMED_STRUCT" if f.args.len() % 2 == 0 => {
670 let original_args = f.args.clone();
671 let mut struct_args = Vec::new();
672 for pair in f.args.chunks(2) {
673 if let Expression::Literal(lit) = &pair[0] {
674 if let Literal::String(field_name) = lit.as_ref() {
675 struct_args.push(Expression::Alias(Box::new(
676 crate::expressions::Alias {
677 this: pair[1].clone(),
678 alias: crate::expressions::Identifier::new(field_name),
679 column_aliases: Vec::new(),
680 alias_explicit_as: false,
681 alias_keyword: None,
682 pre_alias_comments: Vec::new(),
683 trailing_comments: Vec::new(),
684 inferred_type: None,
685 },
686 )));
687 continue;
688 }
689 }
690 return Ok(Expression::Function(Box::new(Function::new(
691 "NAMED_STRUCT".to_string(),
692 original_args,
693 ))));
694 }
695 Ok(Expression::Function(Box::new(Function::new(
696 "STRUCT".to_string(),
697 struct_args,
698 ))))
699 }
700
701 "NAMED_STRUCT" => Ok(Expression::Function(Box::new(f))),
703
704 "MAP_FROM_ARRAYS" => Ok(Expression::Function(Box::new(f))),
706
707 "ARRAY_SORT" => Ok(Expression::Function(Box::new(f))),
709
710 "ARRAY_DISTINCT" => Ok(Expression::Function(Box::new(f))),
712
713 "ARRAY_UNION" => Ok(Expression::Function(Box::new(f))),
715
716 "ARRAY_INTERSECT" => Ok(Expression::Function(Box::new(f))),
718
719 "ARRAY_EXCEPT" => Ok(Expression::Function(Box::new(f))),
721
722 "ARRAY_CONTAINS" => Ok(Expression::Function(Box::new(f))),
724
725 "ELEMENT_AT" => Ok(Expression::Function(Box::new(f))),
727
728 "TRY_ELEMENT_AT" => Ok(Expression::Function(Box::new(f))),
730
731 "TRANSFORM" => Ok(Expression::Function(Box::new(f))),
733
734 "FILTER" => Ok(Expression::Function(Box::new(f))),
736
737 "AGGREGATE" => Ok(Expression::Function(Box::new(f))),
739
740 "SEQUENCE" => Ok(Expression::Function(Box::new(f))),
742
743 "GENERATE_SERIES" => Ok(Expression::Function(Box::new(Function::new(
745 "SEQUENCE".to_string(),
746 f.args,
747 )))),
748
749 "STARTSWITH" | "STARTS_WITH" => Ok(Expression::Function(Box::new(Function::new(
751 "STARTSWITH".to_string(),
752 f.args,
753 )))),
754
755 "ENDSWITH" | "ENDS_WITH" => Ok(Expression::Function(Box::new(Function::new(
757 "ENDSWITH".to_string(),
758 f.args,
759 )))),
760
761 "ARRAY_CONSTRUCT_COMPACT" => {
763 let inner =
764 Expression::Function(Box::new(Function::new("ARRAY".to_string(), f.args)));
765 Ok(Expression::Function(Box::new(Function::new(
766 "ARRAY_COMPACT".to_string(),
767 vec![inner],
768 ))))
769 }
770
771 "ARRAY_TO_STRING" => Ok(Expression::Function(Box::new(Function::new(
773 "ARRAY_JOIN".to_string(),
774 f.args,
775 )))),
776
777 "TO_ARRAY" if f.args.len() == 1 => {
779 let x = f.args[0].clone();
780 match &x {
783 Expression::ArrayFunc(arr) => {
784 Ok(Expression::Function(Box::new(Function::new(
786 "ARRAY".to_string(),
787 arr.expressions.clone(),
788 ))))
789 }
790 _ => Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
791 condition: Expression::IsNull(Box::new(crate::expressions::IsNull {
792 this: x.clone(),
793 not: false,
794 postfix_form: false,
795 })),
796 true_value: Expression::Null(crate::expressions::Null),
797 false_value: Some(Expression::Function(Box::new(Function::new(
798 "ARRAY".to_string(),
799 vec![x],
800 )))),
801 original_name: Some("IF".to_string()),
802 inferred_type: None,
803 }))),
804 }
805 }
806
807 "REGEXP_SUBSTR" if f.args.len() >= 2 => {
809 let subject = f.args[0].clone();
810 let pattern = f.args[1].clone();
811 let group = if f.args.len() >= 6 {
814 let g = &f.args[5];
815 if matches!(g, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"))
817 {
818 None
819 } else {
820 Some(g.clone())
821 }
822 } else {
823 None
824 };
825 let mut args = vec![subject, pattern];
826 if let Some(g) = group {
827 args.push(g);
828 }
829 Ok(Expression::Function(Box::new(Function::new(
830 "REGEXP_EXTRACT".to_string(),
831 args,
832 ))))
833 }
834
835 "UUID_STRING" => {
837 if f.args.is_empty() {
838 Ok(Expression::Function(Box::new(Function::new(
839 "UUID".to_string(),
840 vec![],
841 ))))
842 } else {
843 Ok(Expression::Function(Box::new(Function::new(
844 "UUID_STRING".to_string(),
845 f.args,
846 ))))
847 }
848 }
849
850 "OBJECT_CONSTRUCT" if f.args.len() >= 2 && f.args.len() % 2 == 0 => {
852 let mut struct_args = Vec::new();
856 for pair in f.args.chunks(2) {
857 if let Expression::Literal(lit) = &pair[0] {
858 if let Literal::String(key) = lit.as_ref() {
859 struct_args.push(Expression::Alias(Box::new(
860 crate::expressions::Alias {
861 this: pair[1].clone(),
862 alias: crate::expressions::Identifier::new(key.clone()),
863 column_aliases: vec![],
864 alias_explicit_as: false,
865 alias_keyword: None,
866 pre_alias_comments: vec![],
867 trailing_comments: vec![],
868 inferred_type: None,
869 },
870 )));
871 }
872 } else {
873 struct_args.push(pair[1].clone());
874 }
875 }
876 Ok(Expression::Function(Box::new(Function::new(
877 "STRUCT".to_string(),
878 struct_args,
879 ))))
880 }
881
882 "DATE_PART" if f.args.len() == 2 => {
884 let mut args = f.args;
885 let part = args.remove(0);
886 let expr = args.remove(0);
887 if let Some(field) = expr_to_datetime_field(&part) {
888 Ok(Expression::Extract(Box::new(ExtractFunc {
889 this: expr,
890 field,
891 })))
892 } else {
893 Ok(Expression::Function(Box::new(Function::new(
895 "DATE_PART".to_string(),
896 vec![part, expr],
897 ))))
898 }
899 }
900
901 "GET_PATH" if f.args.len() == 2 => {
903 let mut args = f.args;
904 let this = args.remove(0);
905 let path = args.remove(0);
906 let json_path = match &path {
907 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
908 let Literal::String(s) = lit.as_ref() else {
909 unreachable!()
910 };
911 let normalized = if s.starts_with('$') {
912 s.clone()
913 } else if s.starts_with('[') {
914 format!("${}", s)
915 } else {
916 format!("$.{}", s)
917 };
918 Expression::Literal(Box::new(Literal::String(normalized)))
919 }
920 _ => path,
921 };
922 Ok(Expression::Function(Box::new(Function::new(
923 "GET_JSON_OBJECT".to_string(),
924 vec![this, json_path],
925 ))))
926 }
927
928 "BITWISE_LEFT_SHIFT" => Ok(Expression::Function(Box::new(Function::new(
930 "SHIFTLEFT".to_string(),
931 f.args,
932 )))),
933
934 "BITWISE_RIGHT_SHIFT" => Ok(Expression::Function(Box::new(Function::new(
936 "SHIFTRIGHT".to_string(),
937 f.args,
938 )))),
939
940 "APPROX_DISTINCT" => Ok(Expression::Function(Box::new(Function::new(
942 "APPROX_COUNT_DISTINCT".to_string(),
943 f.args,
944 )))),
945
946 "ARRAY_SLICE" => Ok(Expression::Function(Box::new(Function::new(
948 "SLICE".to_string(),
949 f.args,
950 )))),
951
952 "DATE_FROM_PARTS" => Ok(Expression::Function(Box::new(Function::new(
954 "MAKE_DATE".to_string(),
955 f.args,
956 )))),
957
958 "DAYOFWEEK_ISO" => Ok(Expression::Function(Box::new(Function::new(
960 "DAYOFWEEK".to_string(),
961 f.args,
962 )))),
963
964 "FORMAT" => Ok(Expression::Function(Box::new(Function::new(
966 "FORMAT_STRING".to_string(),
967 f.args,
968 )))),
969
970 "LOGICAL_AND" => Ok(Expression::Function(Box::new(Function::new(
972 "BOOL_AND".to_string(),
973 f.args,
974 )))),
975
976 "VARIANCE_POP" => Ok(Expression::Function(Box::new(Function::new(
978 "VAR_POP".to_string(),
979 f.args,
980 )))),
981
982 "WEEK_OF_YEAR" => Ok(Expression::Function(Box::new(Function::new(
984 "WEEKOFYEAR".to_string(),
985 f.args,
986 )))),
987
988 "BIT_GET" => Ok(Expression::Function(Box::new(Function::new(
990 "GETBIT".to_string(),
991 f.args,
992 )))),
993
994 "CURDATE" => Ok(Expression::CurrentDate(crate::expressions::CurrentDate)),
996
997 _ => Ok(Expression::Function(Box::new(f))),
999 }
1000 }
1001
1002 fn transform_aggregate_function(
1003 &self,
1004 f: Box<crate::expressions::AggregateFunction>,
1005 ) -> Result<Expression> {
1006 let name_upper = f.name.to_uppercase();
1007 match name_upper.as_str() {
1008 "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
1010 Function::new("COLLECT_LIST".to_string(), f.args),
1011 ))),
1012
1013 "STRING_AGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
1015 Function::new("COLLECT_LIST".to_string(), f.args),
1016 ))),
1017
1018 "LISTAGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
1020 "COLLECT_LIST".to_string(),
1021 f.args,
1022 )))),
1023
1024 "ARRAY_AGG" if !f.args.is_empty() => {
1026 let mut af = f;
1027 af.name = "COLLECT_LIST".to_string();
1028 Ok(Expression::AggregateFunction(af))
1029 }
1030
1031 "LOGICAL_OR" if !f.args.is_empty() => {
1033 let mut af = f;
1034 af.name = "BOOL_OR".to_string();
1035 Ok(Expression::AggregateFunction(af))
1036 }
1037
1038 _ => Ok(Expression::AggregateFunction(f)),
1040 }
1041 }
1042}
1043
1044fn expr_to_datetime_field(expr: &Expression) -> Option<DateTimeField> {
1046 let name = match expr {
1047 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
1048 let Literal::String(s) = lit.as_ref() else {
1049 unreachable!()
1050 };
1051 s.to_uppercase()
1052 }
1053 Expression::Identifier(id) => id.name.to_uppercase(),
1054 Expression::Var(v) => v.this.to_uppercase(),
1055 Expression::Column(col) if col.table.is_none() => col.name.name.to_uppercase(),
1056 _ => return None,
1057 };
1058 match name.as_str() {
1059 "YEAR" | "Y" | "YY" | "YYY" | "YYYY" | "YR" | "YEARS" | "YRS" => Some(DateTimeField::Year),
1060 "MONTH" | "MM" | "MON" | "MONS" | "MONTHS" => Some(DateTimeField::Month),
1061 "DAY" | "D" | "DD" | "DAYS" | "DAYOFMONTH" => Some(DateTimeField::Day),
1062 "HOUR" | "H" | "HH" | "HR" | "HOURS" | "HRS" => Some(DateTimeField::Hour),
1063 "MINUTE" | "MI" | "MIN" | "MINUTES" | "MINS" => Some(DateTimeField::Minute),
1064 "SECOND" | "S" | "SEC" | "SECONDS" | "SECS" => Some(DateTimeField::Second),
1065 "MILLISECOND" | "MS" | "MSEC" | "MILLISECONDS" => Some(DateTimeField::Millisecond),
1066 "MICROSECOND" | "US" | "USEC" | "MICROSECONDS" => Some(DateTimeField::Microsecond),
1067 "DOW" | "DAYOFWEEK" | "DAYOFWEEK_ISO" | "DW" => Some(DateTimeField::DayOfWeek),
1068 "DOY" | "DAYOFYEAR" => Some(DateTimeField::DayOfYear),
1069 "WEEK" | "W" | "WK" | "WEEKOFYEAR" | "WOY" => Some(DateTimeField::Week),
1070 "QUARTER" | "Q" | "QTR" | "QTRS" | "QUARTERS" => Some(DateTimeField::Quarter),
1071 "EPOCH" | "EPOCH_SECOND" | "EPOCH_SECONDS" => Some(DateTimeField::Epoch),
1072 "TIMEZONE" | "TIMEZONE_HOUR" | "TZH" => Some(DateTimeField::TimezoneHour),
1073 "TIMEZONE_MINUTE" | "TZM" => Some(DateTimeField::TimezoneMinute),
1074 _ => Some(DateTimeField::Custom(name)),
1075 }
1076}