1use super::{DialectImpl, DialectType};
15use crate::error::Result;
16use crate::expressions::{
17 CeilFunc, CurrentTimestamp, DataType, DateTimeField, Expression, ExtractFunc, Function,
18 Literal, StructField, UnaryFunc, VarArgFunc,
19};
20use crate::generator::GeneratorConfig;
21use crate::tokens::TokenizerConfig;
22
23pub struct SparkDialect;
25
26impl DialectImpl for SparkDialect {
27 fn dialect_type(&self) -> DialectType {
28 DialectType::Spark
29 }
30
31 fn tokenizer_config(&self) -> TokenizerConfig {
32 let mut config = TokenizerConfig::default();
33 config.identifiers.clear();
35 config.identifiers.insert('`', '`');
36 config.quotes.insert("\"".to_string(), "\"".to_string());
38 config.string_escapes.push('\\');
40 config
42 .keywords
43 .insert("DIV".to_string(), crate::tokens::TokenType::Div);
44 config
46 .numeric_literals
47 .insert("L".to_string(), "BIGINT".to_string());
48 config
49 .numeric_literals
50 .insert("S".to_string(), "SMALLINT".to_string());
51 config
52 .numeric_literals
53 .insert("Y".to_string(), "TINYINT".to_string());
54 config
55 .numeric_literals
56 .insert("D".to_string(), "DOUBLE".to_string());
57 config
58 .numeric_literals
59 .insert("F".to_string(), "FLOAT".to_string());
60 config
61 .numeric_literals
62 .insert("BD".to_string(), "DECIMAL".to_string());
63 config.identifiers_can_start_with_digit = true;
65 config.string_escapes_allowed_in_raw_strings = false;
68 config
69 }
70
71 fn generator_config(&self) -> GeneratorConfig {
72 use crate::generator::IdentifierQuoteStyle;
73 GeneratorConfig {
74 identifier_quote: '`',
75 identifier_quote_style: IdentifierQuoteStyle::BACKTICK,
76 dialect: Some(DialectType::Spark),
77 struct_field_sep: ": ",
79 create_function_return_as: false,
81 alias_post_tablesample: true,
83 tablesample_seed_keyword: "REPEATABLE",
84 join_hints: false,
85 identifiers_can_start_with_digit: true,
86 schema_comment_with_eq: false,
88 ..Default::default()
89 }
90 }
91
92 fn transform_expr(&self, expr: Expression) -> Result<Expression> {
93 match expr {
94 Expression::IfNull(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
96 original_name: None,
97 expressions: vec![f.this, f.expression],
98 inferred_type: None,
99 }))),
100
101 Expression::Nvl(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
103 original_name: None,
104 expressions: vec![f.this, f.expression],
105 inferred_type: None,
106 }))),
107
108 Expression::Cast(mut c) => {
110 c.to = Self::normalize_spark_type(c.to);
111 Ok(Expression::Cast(c))
112 }
113
114 Expression::TryCast(mut c) => {
116 c.to = Self::normalize_spark_type(c.to);
117 Ok(Expression::TryCast(c))
118 }
119
120 Expression::SafeCast(mut c) => {
122 c.to = Self::normalize_spark_type(c.to);
123 Ok(Expression::TryCast(c))
124 }
125
126 Expression::Trim(mut t) => {
129 if !t.sql_standard_syntax && t.characters.is_some() {
130 t.sql_standard_syntax = true;
133 }
134 Ok(Expression::Trim(t))
135 }
136
137 Expression::ILike(op) => Ok(Expression::ILike(op)),
139
140 Expression::Unnest(f) => Ok(Expression::Explode(Box::new(UnaryFunc::new(f.this)))),
142
143 Expression::Explode(f) => Ok(Expression::Explode(f)),
145
146 Expression::ExplodeOuter(f) => Ok(Expression::ExplodeOuter(f)),
148
149 Expression::Random(_) => Ok(Expression::Rand(Box::new(crate::expressions::Rand {
151 seed: None,
152 lower: None,
153 upper: None,
154 }))),
155
156 Expression::Rand(r) => Ok(Expression::Rand(r)),
158
159 Expression::Concat(op) => Ok(Expression::Function(Box::new(Function::new(
161 "CONCAT".to_string(),
162 vec![op.left, op.right],
163 )))),
164
165 Expression::Function(f) => self.transform_function(*f),
169
170 Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
172
173 Expression::Parameter(mut p)
175 if p.style == crate::expressions::ParameterStyle::Dollar =>
176 {
177 p.style = crate::expressions::ParameterStyle::DollarBrace;
178 if let Some(idx) = p.index {
180 p.name = Some(idx.to_string());
181 }
182 Ok(Expression::Parameter(p))
183 }
184
185 Expression::JSONExtract(je) if je.variant_extract.is_some() => {
187 let path = match *je.expression {
189 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
190 let Literal::String(s) = lit.as_ref() else {
191 unreachable!()
192 };
193 Expression::Literal(Box::new(Literal::String(format!("$.{}", s))))
194 }
195 other => other,
196 };
197 Ok(Expression::Function(Box::new(Function::new(
198 "GET_JSON_OBJECT".to_string(),
199 vec![*je.this, path],
200 ))))
201 }
202
203 _ => Ok(expr),
205 }
206 }
207}
208
209impl SparkDialect {
210 fn normalize_spark_type(dt: DataType) -> DataType {
215 match dt {
216 DataType::VarChar { length: None, .. }
217 | DataType::Char { length: None }
218 | DataType::Text => DataType::Custom {
219 name: "STRING".to_string(),
220 },
221 DataType::VarChar { .. } | DataType::Char { .. } => dt,
223 DataType::Struct { fields, nested } => {
225 let normalized_fields: Vec<StructField> = fields
226 .into_iter()
227 .map(|mut f| {
228 f.data_type = Self::normalize_spark_type(f.data_type);
229 f
230 })
231 .collect();
232 DataType::Struct {
233 fields: normalized_fields,
234 nested,
235 }
236 }
237 _ => dt,
238 }
239 }
240
241 fn transform_function(&self, f: Function) -> Result<Expression> {
242 let name_upper = f.name.to_uppercase();
243 match name_upper.as_str() {
244 "IFNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
246 original_name: None,
247 expressions: f.args,
248 inferred_type: None,
249 }))),
250
251 "NVL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
253 original_name: None,
254 expressions: f.args,
255 inferred_type: None,
256 }))),
257
258 "ISNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
260 original_name: None,
261 expressions: f.args,
262 inferred_type: None,
263 }))),
264
265 "GROUP_CONCAT" if !f.args.is_empty() => {
268 Ok(Expression::Function(Box::new(Function::new(
270 "COLLECT_LIST".to_string(),
271 f.args,
272 ))))
273 }
274
275 "STRING_AGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
278 Function::new("COLLECT_LIST".to_string(), f.args),
279 ))),
280
281 "LISTAGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
283 "COLLECT_LIST".to_string(),
284 f.args,
285 )))),
286
287 "SUBSTRING" | "SUBSTR" => Ok(Expression::Function(Box::new(f))),
289
290 "LENGTH" => Ok(Expression::Function(Box::new(f))),
292
293 "LEN" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc::new(
295 f.args.into_iter().next().unwrap(),
296 )))),
297
298 "RANDOM" => Ok(Expression::Rand(Box::new(crate::expressions::Rand {
300 seed: None,
301 lower: None,
302 upper: None,
303 }))),
304
305 "RAND" => Ok(Expression::Rand(Box::new(crate::expressions::Rand {
307 seed: None,
308 lower: None,
309 upper: None,
310 }))),
311
312 "NOW" => Ok(Expression::CurrentTimestamp(
314 crate::expressions::CurrentTimestamp {
315 precision: None,
316 sysdate: false,
317 },
318 )),
319
320 "GETDATE" => Ok(Expression::CurrentTimestamp(
322 crate::expressions::CurrentTimestamp {
323 precision: None,
324 sysdate: false,
325 },
326 )),
327
328 "CURRENT_TIMESTAMP" => Ok(Expression::CurrentTimestamp(
330 crate::expressions::CurrentTimestamp {
331 precision: None,
332 sysdate: false,
333 },
334 )),
335
336 "CURRENT_DATE" => Ok(Expression::CurrentDate(crate::expressions::CurrentDate)),
338
339 "TO_DATE" if f.args.len() == 2 => {
341 let is_default_format = matches!(&f.args[1], Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(s) if s == "yyyy-MM-dd"));
342 if is_default_format {
343 Ok(Expression::Function(Box::new(Function::new(
344 "TO_DATE".to_string(),
345 vec![f.args.into_iter().next().unwrap()],
346 ))))
347 } else {
348 Ok(Expression::Function(Box::new(f)))
349 }
350 }
351 "TO_DATE" => Ok(Expression::Function(Box::new(f))),
352
353 "TO_TIMESTAMP" => Ok(Expression::Function(Box::new(f))),
355
356 "DATE_FORMAT" => Ok(Expression::Function(Box::new(f))),
358
359 "STRFTIME" => Ok(Expression::Function(Box::new(Function::new(
361 "DATE_FORMAT".to_string(),
362 f.args,
363 )))),
364
365 "TO_CHAR" => Ok(Expression::Function(Box::new(Function::new(
367 "DATE_FORMAT".to_string(),
368 f.args,
369 )))),
370
371 "DATE_TRUNC" => Ok(Expression::Function(Box::new(f))),
373
374 "TRUNC" => Ok(Expression::Function(Box::new(f))),
376
377 "EXTRACT" => Ok(Expression::Function(Box::new(f))),
379
380 "DATEPART" => Ok(Expression::Function(Box::new(Function::new(
382 "EXTRACT".to_string(),
383 f.args,
384 )))),
385
386 "UNIX_TIMESTAMP" => {
389 if f.args.is_empty() {
390 Ok(Expression::Function(Box::new(Function::new(
391 "UNIX_TIMESTAMP".to_string(),
392 vec![Expression::CurrentTimestamp(CurrentTimestamp {
393 precision: None,
394 sysdate: false,
395 })],
396 ))))
397 } else {
398 Ok(Expression::Function(Box::new(f)))
399 }
400 }
401
402 "FROM_UNIXTIME" => Ok(Expression::Function(Box::new(f))),
404
405 "STR_TO_MAP" => {
408 if f.args.len() == 1 {
409 let mut args = f.args;
410 args.push(Expression::Literal(Box::new(
411 crate::expressions::Literal::String(",".to_string()),
412 )));
413 args.push(Expression::Literal(Box::new(
414 crate::expressions::Literal::String(":".to_string()),
415 )));
416 Ok(Expression::Function(Box::new(Function::new(
417 "STR_TO_MAP".to_string(),
418 args,
419 ))))
420 } else {
421 Ok(Expression::Function(Box::new(f)))
422 }
423 }
424
425 "POSITION" => Ok(Expression::Function(Box::new(f))),
427
428 "LOCATE" => Ok(Expression::Function(Box::new(f))),
430
431 "STRPOS" if f.args.len() == 2 => {
433 let mut args = f.args;
434 let first = args.remove(0);
435 let second = args.remove(0);
436 Ok(Expression::Function(Box::new(Function::new(
438 "LOCATE".to_string(),
439 vec![second, first],
440 ))))
441 }
442
443 "CHARINDEX" if f.args.len() >= 2 => {
445 let mut args = f.args;
446 let substring = args.remove(0);
447 let string = args.remove(0);
448 let mut locate_args = vec![substring, string];
449 if !args.is_empty() {
450 locate_args.push(args.remove(0));
451 }
452 Ok(Expression::Function(Box::new(Function::new(
453 "LOCATE".to_string(),
454 locate_args,
455 ))))
456 }
457
458 "INSTR" => Ok(Expression::Function(Box::new(f))),
460
461 "CEILING" if f.args.len() == 1 => Ok(Expression::Ceil(Box::new(CeilFunc {
463 this: f.args.into_iter().next().unwrap(),
464 decimals: None,
465 to: None,
466 }))),
467
468 "CEIL" if f.args.len() == 1 => Ok(Expression::Ceil(Box::new(CeilFunc {
470 this: f.args.into_iter().next().unwrap(),
471 decimals: None,
472 to: None,
473 }))),
474
475 "UNNEST" => Ok(Expression::Function(Box::new(Function::new(
477 "EXPLODE".to_string(),
478 f.args,
479 )))),
480
481 "FLATTEN" => Ok(Expression::Function(Box::new(f))),
483
484 "ARRAY_AGG" => Ok(Expression::Function(Box::new(Function::new(
486 "COLLECT_LIST".to_string(),
487 f.args,
488 )))),
489
490 "COLLECT_LIST" => Ok(Expression::Function(Box::new(f))),
492
493 "COLLECT_SET" => Ok(Expression::Function(Box::new(f))),
495
496 "ARRAY_LENGTH" | "CARDINALITY" => Ok(Expression::Function(Box::new(Function::new(
498 "SIZE".to_string(),
499 f.args,
500 )))),
501
502 "SIZE" => Ok(Expression::Function(Box::new(f))),
504
505 "SPLIT" => Ok(Expression::Function(Box::new(f))),
507
508 "REGEXP_REPLACE" if f.args.len() > 4 => {
511 let mut args = f.args;
512 args.truncate(4);
513 Ok(Expression::Function(Box::new(Function::new(
514 "REGEXP_REPLACE".to_string(),
515 args,
516 ))))
517 }
518 "REGEXP_REPLACE" => Ok(Expression::Function(Box::new(f))),
519
520 "REGEXP_EXTRACT" => Ok(Expression::Function(Box::new(f))),
522
523 "REGEXP_EXTRACT_ALL" => Ok(Expression::Function(Box::new(f))),
525
526 "RLIKE" | "REGEXP_LIKE" => Ok(Expression::Function(Box::new(Function::new(
528 "RLIKE".to_string(),
529 f.args,
530 )))),
531
532 "JSON_EXTRACT" => Ok(Expression::Function(Box::new(Function::new(
534 "GET_JSON_OBJECT".to_string(),
535 f.args,
536 )))),
537
538 "JSON_EXTRACT_SCALAR" => Ok(Expression::Function(Box::new(Function::new(
540 "GET_JSON_OBJECT".to_string(),
541 f.args,
542 )))),
543
544 "GET_JSON_OBJECT" => Ok(Expression::Function(Box::new(f))),
546
547 "FROM_JSON" => Ok(Expression::Function(Box::new(f))),
549
550 "TO_JSON" => Ok(Expression::Function(Box::new(f))),
552
553 "PARSE_JSON" if f.args.len() == 1 => Ok(f.args.into_iter().next().unwrap()),
555 "PARSE_JSON" => Ok(Expression::Function(Box::new(Function::new(
556 "FROM_JSON".to_string(),
557 f.args,
558 )))),
559
560 "DATEDIFF" | "DATE_DIFF" => Ok(Expression::Function(Box::new(Function::new(
562 "DATEDIFF".to_string(),
563 f.args,
564 )))),
565
566 "DATE_ADD" | "DATEADD" => Ok(Expression::Function(Box::new(Function::new(
568 "DATE_ADD".to_string(),
569 f.args,
570 )))),
571
572 "DATE_SUB" => Ok(Expression::Function(Box::new(f))),
574
575 "TIMESTAMPADD" => Ok(Expression::Function(Box::new(f))),
577
578 "TIMESTAMPDIFF" => Ok(Expression::Function(Box::new(f))),
580
581 "ADD_MONTHS" => Ok(Expression::Function(Box::new(f))),
583
584 "MONTHS_BETWEEN" => Ok(Expression::Function(Box::new(f))),
586
587 "NVL" => Ok(Expression::Function(Box::new(f))),
589
590 "NVL2" => Ok(Expression::Function(Box::new(f))),
592
593 "MAP" => Ok(Expression::Function(Box::new(f))),
595
596 "ARRAY" => Ok(Expression::Function(Box::new(f))),
598
599 "ROW" => Ok(Expression::Function(Box::new(Function::new(
601 "STRUCT".to_string(),
602 f.args,
603 )))),
604
605 "STRUCT" => {
607 let mut col_idx = 1usize;
608 let named_args: Vec<Expression> = f
609 .args
610 .into_iter()
611 .map(|arg| {
612 let current_idx = col_idx;
613 col_idx += 1;
614 match &arg {
616 Expression::Alias(_) => arg, Expression::Star(_) => arg, Expression::Column(c) if c.table.is_none() => {
619 let name = c.name.name.clone();
621 Expression::Alias(Box::new(crate::expressions::Alias {
622 this: arg,
623 alias: crate::expressions::Identifier::new(&name),
624 column_aliases: Vec::new(),
625 pre_alias_comments: Vec::new(),
626 trailing_comments: Vec::new(),
627 inferred_type: None,
628 }))
629 }
630 _ => {
631 let name = format!("col{}", current_idx);
633 Expression::Alias(Box::new(crate::expressions::Alias {
634 this: arg,
635 alias: crate::expressions::Identifier::new(&name),
636 column_aliases: Vec::new(),
637 pre_alias_comments: Vec::new(),
638 trailing_comments: Vec::new(),
639 inferred_type: None,
640 }))
641 }
642 }
643 })
644 .collect();
645 Ok(Expression::Function(Box::new(Function {
646 name: "STRUCT".to_string(),
647 args: named_args,
648 distinct: false,
649 trailing_comments: Vec::new(),
650 use_bracket_syntax: false,
651 no_parens: false,
652 quoted: false,
653 span: None,
654 inferred_type: None,
655 })))
656 }
657
658 "NAMED_STRUCT" => Ok(Expression::Function(Box::new(f))),
660
661 "MAP_FROM_ARRAYS" => Ok(Expression::Function(Box::new(f))),
663
664 "ARRAY_SORT" => Ok(Expression::Function(Box::new(f))),
666
667 "ARRAY_DISTINCT" => Ok(Expression::Function(Box::new(f))),
669
670 "ARRAY_UNION" => Ok(Expression::Function(Box::new(f))),
672
673 "ARRAY_INTERSECT" => Ok(Expression::Function(Box::new(f))),
675
676 "ARRAY_EXCEPT" => Ok(Expression::Function(Box::new(f))),
678
679 "ARRAY_CONTAINS" => Ok(Expression::Function(Box::new(f))),
681
682 "ELEMENT_AT" => Ok(Expression::Function(Box::new(f))),
684
685 "TRY_ELEMENT_AT" => Ok(Expression::Function(Box::new(f))),
687
688 "TRANSFORM" => Ok(Expression::Function(Box::new(f))),
690
691 "FILTER" => Ok(Expression::Function(Box::new(f))),
693
694 "AGGREGATE" => Ok(Expression::Function(Box::new(f))),
696
697 "SEQUENCE" => Ok(Expression::Function(Box::new(f))),
699
700 "GENERATE_SERIES" => Ok(Expression::Function(Box::new(Function::new(
702 "SEQUENCE".to_string(),
703 f.args,
704 )))),
705
706 "STARTSWITH" | "STARTS_WITH" => Ok(Expression::Function(Box::new(Function::new(
708 "STARTSWITH".to_string(),
709 f.args,
710 )))),
711
712 "ENDSWITH" | "ENDS_WITH" => Ok(Expression::Function(Box::new(Function::new(
714 "ENDSWITH".to_string(),
715 f.args,
716 )))),
717
718 "ARRAY_CONSTRUCT_COMPACT" => {
720 let inner =
721 Expression::Function(Box::new(Function::new("ARRAY".to_string(), f.args)));
722 Ok(Expression::Function(Box::new(Function::new(
723 "ARRAY_COMPACT".to_string(),
724 vec![inner],
725 ))))
726 }
727
728 "ARRAY_TO_STRING" => Ok(Expression::Function(Box::new(Function::new(
730 "ARRAY_JOIN".to_string(),
731 f.args,
732 )))),
733
734 "TO_ARRAY" if f.args.len() == 1 => {
736 let x = f.args[0].clone();
737 match &x {
740 Expression::ArrayFunc(arr) => {
741 Ok(Expression::Function(Box::new(Function::new(
743 "ARRAY".to_string(),
744 arr.expressions.clone(),
745 ))))
746 }
747 _ => Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
748 condition: Expression::IsNull(Box::new(crate::expressions::IsNull {
749 this: x.clone(),
750 not: false,
751 postfix_form: false,
752 })),
753 true_value: Expression::Null(crate::expressions::Null),
754 false_value: Some(Expression::Function(Box::new(Function::new(
755 "ARRAY".to_string(),
756 vec![x],
757 )))),
758 original_name: Some("IF".to_string()),
759 inferred_type: None,
760 }))),
761 }
762 }
763
764 "REGEXP_SUBSTR" if f.args.len() >= 2 => {
766 let subject = f.args[0].clone();
767 let pattern = f.args[1].clone();
768 let group = if f.args.len() >= 6 {
771 let g = &f.args[5];
772 if matches!(g, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"))
774 {
775 None
776 } else {
777 Some(g.clone())
778 }
779 } else {
780 None
781 };
782 let mut args = vec![subject, pattern];
783 if let Some(g) = group {
784 args.push(g);
785 }
786 Ok(Expression::Function(Box::new(Function::new(
787 "REGEXP_EXTRACT".to_string(),
788 args,
789 ))))
790 }
791
792 "UUID_STRING" => Ok(Expression::Function(Box::new(Function::new(
794 "UUID".to_string(),
795 vec![],
796 )))),
797
798 "OBJECT_CONSTRUCT" if f.args.len() >= 2 && f.args.len() % 2 == 0 => {
800 let mut struct_args = Vec::new();
804 for pair in f.args.chunks(2) {
805 if let Expression::Literal(lit) = &pair[0] {
806 if let Literal::String(key) = lit.as_ref() {
807 struct_args.push(Expression::Alias(Box::new(
808 crate::expressions::Alias {
809 this: pair[1].clone(),
810 alias: crate::expressions::Identifier::new(key.clone()),
811 column_aliases: vec![],
812 pre_alias_comments: vec![],
813 trailing_comments: vec![],
814 inferred_type: None,
815 },
816 )));
817 }
818 } else {
819 struct_args.push(pair[1].clone());
820 }
821 }
822 Ok(Expression::Function(Box::new(Function::new(
823 "STRUCT".to_string(),
824 struct_args,
825 ))))
826 }
827
828 "DATE_PART" if f.args.len() == 2 => {
830 let mut args = f.args;
831 let part = args.remove(0);
832 let expr = args.remove(0);
833 if let Some(field) = expr_to_datetime_field(&part) {
834 Ok(Expression::Extract(Box::new(ExtractFunc {
835 this: expr,
836 field,
837 })))
838 } else {
839 Ok(Expression::Function(Box::new(Function::new(
841 "DATE_PART".to_string(),
842 vec![part, expr],
843 ))))
844 }
845 }
846
847 "GET_PATH" if f.args.len() == 2 => {
849 let mut args = f.args;
850 let this = args.remove(0);
851 let path = args.remove(0);
852 let json_path = match &path {
853 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
854 let Literal::String(s) = lit.as_ref() else {
855 unreachable!()
856 };
857 let normalized = if s.starts_with('$') {
858 s.clone()
859 } else if s.starts_with('[') {
860 format!("${}", s)
861 } else {
862 format!("$.{}", s)
863 };
864 Expression::Literal(Box::new(Literal::String(normalized)))
865 }
866 _ => path,
867 };
868 Ok(Expression::Function(Box::new(Function::new(
869 "GET_JSON_OBJECT".to_string(),
870 vec![this, json_path],
871 ))))
872 }
873
874 "BITWISE_LEFT_SHIFT" => Ok(Expression::Function(Box::new(Function::new(
876 "SHIFTLEFT".to_string(),
877 f.args,
878 )))),
879
880 "BITWISE_RIGHT_SHIFT" => Ok(Expression::Function(Box::new(Function::new(
882 "SHIFTRIGHT".to_string(),
883 f.args,
884 )))),
885
886 "APPROX_DISTINCT" => Ok(Expression::Function(Box::new(Function::new(
888 "APPROX_COUNT_DISTINCT".to_string(),
889 f.args,
890 )))),
891
892 "ARRAY_SLICE" => Ok(Expression::Function(Box::new(Function::new(
894 "SLICE".to_string(),
895 f.args,
896 )))),
897
898 "DATE_FROM_PARTS" => Ok(Expression::Function(Box::new(Function::new(
900 "MAKE_DATE".to_string(),
901 f.args,
902 )))),
903
904 "DAYOFWEEK_ISO" => Ok(Expression::Function(Box::new(Function::new(
906 "DAYOFWEEK".to_string(),
907 f.args,
908 )))),
909
910 "FORMAT" => Ok(Expression::Function(Box::new(Function::new(
912 "FORMAT_STRING".to_string(),
913 f.args,
914 )))),
915
916 "LOGICAL_AND" => Ok(Expression::Function(Box::new(Function::new(
918 "BOOL_AND".to_string(),
919 f.args,
920 )))),
921
922 "VARIANCE_POP" => Ok(Expression::Function(Box::new(Function::new(
924 "VAR_POP".to_string(),
925 f.args,
926 )))),
927
928 "WEEK_OF_YEAR" => Ok(Expression::Function(Box::new(Function::new(
930 "WEEKOFYEAR".to_string(),
931 f.args,
932 )))),
933
934 "BIT_GET" => Ok(Expression::Function(Box::new(Function::new(
936 "GETBIT".to_string(),
937 f.args,
938 )))),
939
940 "CURDATE" => Ok(Expression::CurrentDate(crate::expressions::CurrentDate)),
942
943 _ => Ok(Expression::Function(Box::new(f))),
945 }
946 }
947
948 fn transform_aggregate_function(
949 &self,
950 f: Box<crate::expressions::AggregateFunction>,
951 ) -> Result<Expression> {
952 let name_upper = f.name.to_uppercase();
953 match name_upper.as_str() {
954 "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
956 Function::new("COLLECT_LIST".to_string(), f.args),
957 ))),
958
959 "STRING_AGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
961 Function::new("COLLECT_LIST".to_string(), f.args),
962 ))),
963
964 "LISTAGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
966 "COLLECT_LIST".to_string(),
967 f.args,
968 )))),
969
970 "ARRAY_AGG" if !f.args.is_empty() => {
972 let mut af = f;
973 af.name = "COLLECT_LIST".to_string();
974 Ok(Expression::AggregateFunction(af))
975 }
976
977 "LOGICAL_OR" if !f.args.is_empty() => {
979 let mut af = f;
980 af.name = "BOOL_OR".to_string();
981 Ok(Expression::AggregateFunction(af))
982 }
983
984 _ => Ok(Expression::AggregateFunction(f)),
986 }
987 }
988}
989
990fn expr_to_datetime_field(expr: &Expression) -> Option<DateTimeField> {
992 let name = match expr {
993 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
994 let Literal::String(s) = lit.as_ref() else {
995 unreachable!()
996 };
997 s.to_uppercase()
998 }
999 Expression::Identifier(id) => id.name.to_uppercase(),
1000 Expression::Column(col) if col.table.is_none() => col.name.name.to_uppercase(),
1001 _ => return None,
1002 };
1003 match name.as_str() {
1004 "YEAR" | "Y" | "YY" | "YYY" | "YYYY" | "YR" | "YEARS" | "YRS" => Some(DateTimeField::Year),
1005 "MONTH" | "MM" | "MON" | "MONS" | "MONTHS" => Some(DateTimeField::Month),
1006 "DAY" | "D" | "DD" | "DAYS" | "DAYOFMONTH" => Some(DateTimeField::Day),
1007 "HOUR" | "H" | "HH" | "HR" | "HOURS" | "HRS" => Some(DateTimeField::Hour),
1008 "MINUTE" | "MI" | "MIN" | "MINUTES" | "MINS" => Some(DateTimeField::Minute),
1009 "SECOND" | "S" | "SEC" | "SECONDS" | "SECS" => Some(DateTimeField::Second),
1010 "MILLISECOND" | "MS" | "MSEC" | "MILLISECONDS" => Some(DateTimeField::Millisecond),
1011 "MICROSECOND" | "US" | "USEC" | "MICROSECONDS" => Some(DateTimeField::Microsecond),
1012 "DOW" | "DAYOFWEEK" | "DAYOFWEEK_ISO" | "DW" => Some(DateTimeField::DayOfWeek),
1013 "DOY" | "DAYOFYEAR" => Some(DateTimeField::DayOfYear),
1014 "WEEK" | "W" | "WK" | "WEEKOFYEAR" | "WOY" => Some(DateTimeField::Week),
1015 "QUARTER" | "Q" | "QTR" | "QTRS" | "QUARTERS" => Some(DateTimeField::Quarter),
1016 "EPOCH" | "EPOCH_SECOND" | "EPOCH_SECONDS" => Some(DateTimeField::Epoch),
1017 "TIMEZONE" | "TIMEZONE_HOUR" | "TZH" => Some(DateTimeField::TimezoneHour),
1018 "TIMEZONE_MINUTE" | "TZM" => Some(DateTimeField::TimezoneMinute),
1019 _ => Some(DateTimeField::Custom(name)),
1020 }
1021}