1use super::{DialectImpl, DialectType};
15use crate::error::Result;
16use crate::expressions::{
17 CeilFunc, CurrentTimestamp, DataType, DateTimeField, Expression, ExtractFunc, Function,
18 Literal, StructField, UnaryFunc, VarArgFunc,
19};
20#[cfg(feature = "generate")]
21use crate::generator::GeneratorConfig;
22use crate::tokens::TokenizerConfig;
23
24pub struct SparkDialect;
26
27impl DialectImpl for SparkDialect {
28 fn dialect_type(&self) -> DialectType {
29 DialectType::Spark
30 }
31
32 fn tokenizer_config(&self) -> TokenizerConfig {
33 let mut config = TokenizerConfig::default();
34 config.identifiers.clear();
36 config.identifiers.insert('`', '`');
37 config.quotes.insert("\"".to_string(), "\"".to_string());
39 config.string_escapes.push('\\');
41 config
43 .keywords
44 .insert("DIV".to_string(), crate::tokens::TokenType::Div);
45 config
46 .keywords
47 .insert("REPAIR".to_string(), crate::tokens::TokenType::Command);
48 config
49 .keywords
50 .insert("MSCK".to_string(), crate::tokens::TokenType::Command);
51 config
53 .numeric_literals
54 .insert("L".to_string(), "BIGINT".to_string());
55 config
56 .numeric_literals
57 .insert("S".to_string(), "SMALLINT".to_string());
58 config
59 .numeric_literals
60 .insert("Y".to_string(), "TINYINT".to_string());
61 config
62 .numeric_literals
63 .insert("D".to_string(), "DOUBLE".to_string());
64 config
65 .numeric_literals
66 .insert("F".to_string(), "FLOAT".to_string());
67 config
68 .numeric_literals
69 .insert("BD".to_string(), "DECIMAL".to_string());
70 config.identifiers_can_start_with_digit = true;
72 config.string_escapes_allowed_in_raw_strings = false;
75 config
76 }
77
78 #[cfg(feature = "generate")]
79
80 fn generator_config(&self) -> GeneratorConfig {
81 use crate::generator::IdentifierQuoteStyle;
82 GeneratorConfig {
83 identifier_quote: '`',
84 identifier_quote_style: IdentifierQuoteStyle::BACKTICK,
85 dialect: Some(DialectType::Spark),
86 struct_field_sep: ": ",
88 create_function_return_as: false,
90 alias_post_tablesample: true,
92 tablesample_seed_keyword: "REPEATABLE",
93 join_hints: false,
94 identifiers_can_start_with_digit: true,
95 schema_comment_with_eq: false,
97 ..Default::default()
98 }
99 }
100
101 #[cfg(feature = "transpile")]
102
103 fn transform_expr(&self, expr: Expression) -> Result<Expression> {
104 match expr {
105 Expression::IfNull(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
107 original_name: None,
108 expressions: vec![f.this, f.expression],
109 inferred_type: None,
110 }))),
111
112 Expression::Nvl(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
114 original_name: None,
115 expressions: vec![f.this, f.expression],
116 inferred_type: None,
117 }))),
118
119 Expression::Cast(mut c) => {
121 c.to = Self::normalize_spark_type(c.to);
122 Ok(Expression::Cast(c))
123 }
124
125 Expression::TryCast(mut c) => {
127 c.to = Self::normalize_spark_type(c.to);
128 Ok(Expression::TryCast(c))
129 }
130
131 Expression::SafeCast(mut c) => {
133 c.to = Self::normalize_spark_type(c.to);
134 Ok(Expression::TryCast(c))
135 }
136
137 Expression::Trim(mut t) => {
140 if !t.sql_standard_syntax && t.characters.is_some() {
141 t.sql_standard_syntax = true;
144 }
145 Ok(Expression::Trim(t))
146 }
147
148 Expression::ILike(op) => Ok(Expression::ILike(op)),
150
151 Expression::Unnest(f) => Ok(Expression::Explode(Box::new(UnaryFunc::new(f.this)))),
153
154 Expression::Explode(f) => Ok(Expression::Explode(f)),
156
157 Expression::ExplodeOuter(f) => Ok(Expression::ExplodeOuter(f)),
159
160 Expression::Random(_) => Ok(Expression::Rand(Box::new(crate::expressions::Rand {
162 seed: None,
163 lower: None,
164 upper: None,
165 }))),
166
167 Expression::Rand(r) => Ok(Expression::Rand(r)),
169
170 Expression::Concat(op) => Ok(Expression::Function(Box::new(Function::new(
172 "CONCAT".to_string(),
173 vec![op.left, op.right],
174 )))),
175
176 Expression::Function(f) => self.transform_function(*f),
180
181 Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
183
184 Expression::Parameter(mut p)
186 if p.style == crate::expressions::ParameterStyle::Dollar =>
187 {
188 p.style = crate::expressions::ParameterStyle::DollarBrace;
189 if let Some(idx) = p.index {
191 p.name = Some(idx.to_string());
192 }
193 Ok(Expression::Parameter(p))
194 }
195
196 Expression::JSONExtract(je) if je.variant_extract.is_some() => {
198 let path = match *je.expression {
200 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
201 let Literal::String(s) = lit.as_ref() else {
202 unreachable!()
203 };
204 Expression::Literal(Box::new(Literal::String(format!("$.{}", s))))
205 }
206 other => other,
207 };
208 Ok(Expression::Function(Box::new(Function::new(
209 "GET_JSON_OBJECT".to_string(),
210 vec![*je.this, path],
211 ))))
212 }
213
214 _ => Ok(expr),
216 }
217 }
218}
219
220#[cfg(feature = "transpile")]
221impl SparkDialect {
222 fn normalize_spark_type(dt: DataType) -> DataType {
227 match dt {
228 DataType::VarChar { length: None, .. }
229 | DataType::Char { length: None }
230 | DataType::Text => DataType::Custom {
231 name: "STRING".to_string(),
232 },
233 DataType::VarChar { .. } | DataType::Char { .. } => dt,
235 DataType::Struct { fields, nested } => {
237 let normalized_fields: Vec<StructField> = fields
238 .into_iter()
239 .map(|mut f| {
240 f.data_type = Self::normalize_spark_type(f.data_type);
241 f
242 })
243 .collect();
244 DataType::Struct {
245 fields: normalized_fields,
246 nested,
247 }
248 }
249 _ => dt,
250 }
251 }
252
253 fn transform_function(&self, f: Function) -> Result<Expression> {
254 let name_upper = f.name.to_uppercase();
255 match name_upper.as_str() {
256 "IFNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
258 original_name: None,
259 expressions: f.args,
260 inferred_type: None,
261 }))),
262
263 "NVL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
265 original_name: None,
266 expressions: f.args,
267 inferred_type: None,
268 }))),
269
270 "ISNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
272 original_name: None,
273 expressions: f.args,
274 inferred_type: None,
275 }))),
276
277 "GROUP_CONCAT" if !f.args.is_empty() => {
280 Ok(Expression::Function(Box::new(Function::new(
282 "COLLECT_LIST".to_string(),
283 f.args,
284 ))))
285 }
286
287 "STRING_AGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
290 Function::new("COLLECT_LIST".to_string(), f.args),
291 ))),
292
293 "LISTAGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
295 "COLLECT_LIST".to_string(),
296 f.args,
297 )))),
298
299 "SUBSTRING" | "SUBSTR" => Ok(Expression::Function(Box::new(f))),
301
302 "LENGTH" => Ok(Expression::Function(Box::new(f))),
304
305 "LEN" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc::new(
307 f.args.into_iter().next().unwrap(),
308 )))),
309
310 "RANDOM" => Ok(Expression::Rand(Box::new(crate::expressions::Rand {
312 seed: None,
313 lower: None,
314 upper: None,
315 }))),
316
317 "RAND" => Ok(Expression::Rand(Box::new(crate::expressions::Rand {
319 seed: None,
320 lower: None,
321 upper: None,
322 }))),
323
324 "NOW" => Ok(Expression::CurrentTimestamp(
326 crate::expressions::CurrentTimestamp {
327 precision: None,
328 sysdate: false,
329 },
330 )),
331
332 "GETDATE" => Ok(Expression::CurrentTimestamp(
334 crate::expressions::CurrentTimestamp {
335 precision: None,
336 sysdate: false,
337 },
338 )),
339
340 "CURRENT_TIMESTAMP" => Ok(Expression::CurrentTimestamp(
342 crate::expressions::CurrentTimestamp {
343 precision: None,
344 sysdate: false,
345 },
346 )),
347
348 "CURRENT_DATE" => Ok(Expression::CurrentDate(crate::expressions::CurrentDate)),
350
351 "TO_DATE" if f.args.len() == 2 => {
353 let is_default_format = matches!(&f.args[1], Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(s) if s == "yyyy-MM-dd"));
354 if is_default_format {
355 Ok(Expression::Function(Box::new(Function::new(
356 "TO_DATE".to_string(),
357 vec![f.args.into_iter().next().unwrap()],
358 ))))
359 } else {
360 Ok(Expression::Function(Box::new(f)))
361 }
362 }
363 "TO_DATE" => Ok(Expression::Function(Box::new(f))),
364
365 "TO_TIMESTAMP" => Ok(Expression::Function(Box::new(f))),
367
368 "DATE_FORMAT" => Ok(Expression::Function(Box::new(f))),
370
371 "STRFTIME" => Ok(Expression::Function(Box::new(Function::new(
373 "DATE_FORMAT".to_string(),
374 f.args,
375 )))),
376
377 "TO_CHAR" => Ok(Expression::Function(Box::new(Function::new(
379 "DATE_FORMAT".to_string(),
380 f.args,
381 )))),
382
383 "DATE_TRUNC" => Ok(Expression::Function(Box::new(f))),
385
386 "TRUNC" => Ok(Expression::Function(Box::new(f))),
388
389 "EXTRACT" => Ok(Expression::Function(Box::new(f))),
391
392 "DATEPART" => Ok(Expression::Function(Box::new(Function::new(
394 "EXTRACT".to_string(),
395 f.args,
396 )))),
397
398 "UNIX_TIMESTAMP" => {
401 if f.args.is_empty() {
402 Ok(Expression::Function(Box::new(Function::new(
403 "UNIX_TIMESTAMP".to_string(),
404 vec![Expression::CurrentTimestamp(CurrentTimestamp {
405 precision: None,
406 sysdate: false,
407 })],
408 ))))
409 } else {
410 Ok(Expression::Function(Box::new(f)))
411 }
412 }
413
414 "FROM_UNIXTIME" => Ok(Expression::Function(Box::new(f))),
416
417 "STR_TO_MAP" => {
420 if f.args.len() == 1 {
421 let mut args = f.args;
422 args.push(Expression::Literal(Box::new(
423 crate::expressions::Literal::String(",".to_string()),
424 )));
425 args.push(Expression::Literal(Box::new(
426 crate::expressions::Literal::String(":".to_string()),
427 )));
428 Ok(Expression::Function(Box::new(Function::new(
429 "STR_TO_MAP".to_string(),
430 args,
431 ))))
432 } else {
433 Ok(Expression::Function(Box::new(f)))
434 }
435 }
436
437 "POSITION" => Ok(Expression::Function(Box::new(f))),
439
440 "LOCATE" => Ok(Expression::Function(Box::new(f))),
442
443 "STRPOS" if f.args.len() == 2 => {
445 let mut args = f.args;
446 let first = args.remove(0);
447 let second = args.remove(0);
448 Ok(Expression::Function(Box::new(Function::new(
450 "LOCATE".to_string(),
451 vec![second, first],
452 ))))
453 }
454
455 "CHARINDEX" if f.args.len() >= 2 => {
457 let mut args = f.args;
458 let substring = args.remove(0);
459 let string = args.remove(0);
460 let mut locate_args = vec![substring, string];
461 if !args.is_empty() {
462 locate_args.push(args.remove(0));
463 }
464 Ok(Expression::Function(Box::new(Function::new(
465 "LOCATE".to_string(),
466 locate_args,
467 ))))
468 }
469
470 "INSTR" => Ok(Expression::Function(Box::new(f))),
472
473 "CEILING" if f.args.len() == 1 => Ok(Expression::Ceil(Box::new(CeilFunc {
475 this: f.args.into_iter().next().unwrap(),
476 decimals: None,
477 to: None,
478 }))),
479
480 "CEIL" if f.args.len() == 1 => Ok(Expression::Ceil(Box::new(CeilFunc {
482 this: f.args.into_iter().next().unwrap(),
483 decimals: None,
484 to: None,
485 }))),
486
487 "UNNEST" => Ok(Expression::Function(Box::new(Function::new(
489 "EXPLODE".to_string(),
490 f.args,
491 )))),
492
493 "FLATTEN" => Ok(Expression::Function(Box::new(f))),
495
496 "ARRAY_AGG" => Ok(Expression::Function(Box::new(Function::new(
498 "COLLECT_LIST".to_string(),
499 f.args,
500 )))),
501
502 "COLLECT_LIST" => Ok(Expression::Function(Box::new(f))),
504
505 "COLLECT_SET" => Ok(Expression::Function(Box::new(f))),
507
508 "ARRAY_LENGTH" | "CARDINALITY" => Ok(Expression::Function(Box::new(Function::new(
510 "SIZE".to_string(),
511 f.args,
512 )))),
513
514 "SIZE" => Ok(Expression::Function(Box::new(f))),
516
517 "SPLIT" => Ok(Expression::Function(Box::new(f))),
519
520 "REGEXP_REPLACE" if f.args.len() > 4 => {
523 let mut args = f.args;
524 args.truncate(4);
525 Ok(Expression::Function(Box::new(Function::new(
526 "REGEXP_REPLACE".to_string(),
527 args,
528 ))))
529 }
530 "REGEXP_REPLACE" => Ok(Expression::Function(Box::new(f))),
531
532 "REGEXP_EXTRACT" => Ok(Expression::Function(Box::new(f))),
534
535 "REGEXP_EXTRACT_ALL" => Ok(Expression::Function(Box::new(f))),
537
538 "RLIKE" | "REGEXP_LIKE" => Ok(Expression::Function(Box::new(Function::new(
540 "RLIKE".to_string(),
541 f.args,
542 )))),
543
544 "JSON_EXTRACT" => Ok(Expression::Function(Box::new(Function::new(
546 "GET_JSON_OBJECT".to_string(),
547 f.args,
548 )))),
549
550 "JSON_EXTRACT_SCALAR" => Ok(Expression::Function(Box::new(Function::new(
552 "GET_JSON_OBJECT".to_string(),
553 f.args,
554 )))),
555
556 "GET_JSON_OBJECT" => Ok(Expression::Function(Box::new(f))),
558
559 "FROM_JSON" => Ok(Expression::Function(Box::new(f))),
561
562 "TO_JSON" => Ok(Expression::Function(Box::new(f))),
564
565 "PARSE_JSON" if f.args.len() == 1 => Ok(f.args.into_iter().next().unwrap()),
567 "PARSE_JSON" => Ok(Expression::Function(Box::new(Function::new(
568 "FROM_JSON".to_string(),
569 f.args,
570 )))),
571
572 "DATEDIFF" | "DATE_DIFF" => Ok(Expression::Function(Box::new(Function::new(
574 "DATEDIFF".to_string(),
575 f.args,
576 )))),
577
578 "DATE_ADD" | "DATEADD" => Ok(Expression::Function(Box::new(Function::new(
580 "DATE_ADD".to_string(),
581 f.args,
582 )))),
583
584 "DATE_SUB" => Ok(Expression::Function(Box::new(f))),
586
587 "TIMESTAMPADD" => Ok(Expression::Function(Box::new(f))),
589
590 "TIMESTAMPDIFF" => Ok(Expression::Function(Box::new(f))),
592
593 "ADD_MONTHS" => Ok(Expression::Function(Box::new(f))),
595
596 "MONTHS_BETWEEN" => Ok(Expression::Function(Box::new(f))),
598
599 "NVL" => Ok(Expression::Function(Box::new(f))),
601
602 "NVL2" => Ok(Expression::Function(Box::new(f))),
604
605 "MAP" => Ok(Expression::Function(Box::new(f))),
607
608 "ARRAY" => Ok(Expression::Function(Box::new(f))),
610
611 "ROW" => Ok(Expression::Function(Box::new(Function::new(
613 "STRUCT".to_string(),
614 f.args,
615 )))),
616
617 "STRUCT" => {
619 let mut col_idx = 1usize;
620 let named_args: Vec<Expression> = f
621 .args
622 .into_iter()
623 .map(|arg| {
624 let current_idx = col_idx;
625 col_idx += 1;
626 match &arg {
628 Expression::Alias(_) => arg, Expression::Star(_) => arg, Expression::Column(c) if c.table.is_none() => {
631 let name = c.name.name.clone();
633 Expression::Alias(Box::new(crate::expressions::Alias {
634 this: arg,
635 alias: crate::expressions::Identifier::new(&name),
636 column_aliases: Vec::new(),
637 alias_explicit_as: false,
638 alias_keyword: None,
639 pre_alias_comments: Vec::new(),
640 trailing_comments: Vec::new(),
641 inferred_type: None,
642 }))
643 }
644 _ => {
645 let name = format!("col{}", current_idx);
647 Expression::Alias(Box::new(crate::expressions::Alias {
648 this: arg,
649 alias: crate::expressions::Identifier::new(&name),
650 column_aliases: Vec::new(),
651 alias_explicit_as: false,
652 alias_keyword: None,
653 pre_alias_comments: Vec::new(),
654 trailing_comments: Vec::new(),
655 inferred_type: None,
656 }))
657 }
658 }
659 })
660 .collect();
661 Ok(Expression::Function(Box::new(Function {
662 name: "STRUCT".to_string(),
663 args: named_args,
664 distinct: false,
665 trailing_comments: Vec::new(),
666 use_bracket_syntax: false,
667 no_parens: false,
668 quoted: false,
669 span: None,
670 inferred_type: None,
671 })))
672 }
673
674 "NAMED_STRUCT" if f.args.len() % 2 == 0 => {
676 let original_args = f.args.clone();
677 let mut struct_args = Vec::new();
678 for pair in f.args.chunks(2) {
679 if let Expression::Literal(lit) = &pair[0] {
680 if let Literal::String(field_name) = lit.as_ref() {
681 struct_args.push(Expression::Alias(Box::new(
682 crate::expressions::Alias {
683 this: pair[1].clone(),
684 alias: crate::expressions::Identifier::new(field_name),
685 column_aliases: Vec::new(),
686 alias_explicit_as: false,
687 alias_keyword: None,
688 pre_alias_comments: Vec::new(),
689 trailing_comments: Vec::new(),
690 inferred_type: None,
691 },
692 )));
693 continue;
694 }
695 }
696 return Ok(Expression::Function(Box::new(Function::new(
697 "NAMED_STRUCT".to_string(),
698 original_args,
699 ))));
700 }
701 Ok(Expression::Function(Box::new(Function::new(
702 "STRUCT".to_string(),
703 struct_args,
704 ))))
705 }
706
707 "NAMED_STRUCT" => Ok(Expression::Function(Box::new(f))),
709
710 "MAP_FROM_ARRAYS" => Ok(Expression::Function(Box::new(f))),
712
713 "ARRAY_SORT" => Ok(Expression::Function(Box::new(f))),
715
716 "ARRAY_DISTINCT" => Ok(Expression::Function(Box::new(f))),
718
719 "ARRAY_UNION" => Ok(Expression::Function(Box::new(f))),
721
722 "ARRAY_INTERSECT" => Ok(Expression::Function(Box::new(f))),
724
725 "ARRAY_EXCEPT" => Ok(Expression::Function(Box::new(f))),
727
728 "ARRAY_CONTAINS" => Ok(Expression::Function(Box::new(f))),
730
731 "ELEMENT_AT" => Ok(Expression::Function(Box::new(f))),
733
734 "TRY_ELEMENT_AT" => Ok(Expression::Function(Box::new(f))),
736
737 "TRANSFORM" => Ok(Expression::Function(Box::new(f))),
739
740 "FILTER" => Ok(Expression::Function(Box::new(f))),
742
743 "AGGREGATE" => Ok(Expression::Function(Box::new(f))),
745
746 "SEQUENCE" => Ok(Expression::Function(Box::new(f))),
748
749 "GENERATE_SERIES" => Ok(Expression::Function(Box::new(Function::new(
751 "SEQUENCE".to_string(),
752 f.args,
753 )))),
754
755 "STARTSWITH" | "STARTS_WITH" => Ok(Expression::Function(Box::new(Function::new(
757 "STARTSWITH".to_string(),
758 f.args,
759 )))),
760
761 "ENDSWITH" | "ENDS_WITH" => Ok(Expression::Function(Box::new(Function::new(
763 "ENDSWITH".to_string(),
764 f.args,
765 )))),
766
767 "ARRAY_CONSTRUCT_COMPACT" => {
769 let inner =
770 Expression::Function(Box::new(Function::new("ARRAY".to_string(), f.args)));
771 Ok(Expression::Function(Box::new(Function::new(
772 "ARRAY_COMPACT".to_string(),
773 vec![inner],
774 ))))
775 }
776
777 "ARRAY_TO_STRING" => Ok(Expression::Function(Box::new(Function::new(
779 "ARRAY_JOIN".to_string(),
780 f.args,
781 )))),
782
783 "TO_ARRAY" if f.args.len() == 1 => {
785 let x = f.args[0].clone();
786 match &x {
789 Expression::ArrayFunc(arr) => {
790 Ok(Expression::Function(Box::new(Function::new(
792 "ARRAY".to_string(),
793 arr.expressions.clone(),
794 ))))
795 }
796 _ => Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
797 condition: Expression::IsNull(Box::new(crate::expressions::IsNull {
798 this: x.clone(),
799 not: false,
800 postfix_form: false,
801 })),
802 true_value: Expression::Null(crate::expressions::Null),
803 false_value: Some(Expression::Function(Box::new(Function::new(
804 "ARRAY".to_string(),
805 vec![x],
806 )))),
807 original_name: Some("IF".to_string()),
808 inferred_type: None,
809 }))),
810 }
811 }
812
813 "REGEXP_SUBSTR" if f.args.len() >= 2 => {
815 let subject = f.args[0].clone();
816 let pattern = f.args[1].clone();
817 let group = if f.args.len() >= 6 {
820 let g = &f.args[5];
821 if matches!(g, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"))
823 {
824 None
825 } else {
826 Some(g.clone())
827 }
828 } else {
829 None
830 };
831 let mut args = vec![subject, pattern];
832 if let Some(g) = group {
833 args.push(g);
834 }
835 Ok(Expression::Function(Box::new(Function::new(
836 "REGEXP_EXTRACT".to_string(),
837 args,
838 ))))
839 }
840
841 "UUID_STRING" => {
843 if f.args.is_empty() {
844 Ok(Expression::Function(Box::new(Function::new(
845 "UUID".to_string(),
846 vec![],
847 ))))
848 } else {
849 Ok(Expression::Function(Box::new(Function::new(
850 "UUID_STRING".to_string(),
851 f.args,
852 ))))
853 }
854 }
855
856 "OBJECT_CONSTRUCT" if f.args.len() >= 2 && f.args.len() % 2 == 0 => {
858 let mut struct_args = Vec::new();
862 for pair in f.args.chunks(2) {
863 if let Expression::Literal(lit) = &pair[0] {
864 if let Literal::String(key) = lit.as_ref() {
865 struct_args.push(Expression::Alias(Box::new(
866 crate::expressions::Alias {
867 this: pair[1].clone(),
868 alias: crate::expressions::Identifier::new(key.clone()),
869 column_aliases: vec![],
870 alias_explicit_as: false,
871 alias_keyword: None,
872 pre_alias_comments: vec![],
873 trailing_comments: vec![],
874 inferred_type: None,
875 },
876 )));
877 }
878 } else {
879 struct_args.push(pair[1].clone());
880 }
881 }
882 Ok(Expression::Function(Box::new(Function::new(
883 "STRUCT".to_string(),
884 struct_args,
885 ))))
886 }
887
888 "DATE_PART" if f.args.len() == 2 => {
890 let mut args = f.args;
891 let part = args.remove(0);
892 let expr = args.remove(0);
893 if let Some(field) = expr_to_datetime_field(&part) {
894 Ok(Expression::Extract(Box::new(ExtractFunc {
895 this: expr,
896 field,
897 })))
898 } else {
899 Ok(Expression::Function(Box::new(Function::new(
901 "DATE_PART".to_string(),
902 vec![part, expr],
903 ))))
904 }
905 }
906
907 "GET_PATH" if f.args.len() == 2 => {
909 let mut args = f.args;
910 let this = args.remove(0);
911 let path = args.remove(0);
912 let json_path = match &path {
913 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
914 let Literal::String(s) = lit.as_ref() else {
915 unreachable!()
916 };
917 let normalized = if s.starts_with('$') {
918 s.clone()
919 } else if s.starts_with('[') {
920 format!("${}", s)
921 } else {
922 format!("$.{}", s)
923 };
924 Expression::Literal(Box::new(Literal::String(normalized)))
925 }
926 _ => path,
927 };
928 Ok(Expression::Function(Box::new(Function::new(
929 "GET_JSON_OBJECT".to_string(),
930 vec![this, json_path],
931 ))))
932 }
933
934 "BITWISE_LEFT_SHIFT" => Ok(Expression::Function(Box::new(Function::new(
936 "SHIFTLEFT".to_string(),
937 f.args,
938 )))),
939
940 "BITWISE_RIGHT_SHIFT" => Ok(Expression::Function(Box::new(Function::new(
942 "SHIFTRIGHT".to_string(),
943 f.args,
944 )))),
945
946 "APPROX_DISTINCT" => Ok(Expression::Function(Box::new(Function::new(
948 "APPROX_COUNT_DISTINCT".to_string(),
949 f.args,
950 )))),
951
952 "ARRAY_SLICE" => Ok(Expression::Function(Box::new(Function::new(
954 "SLICE".to_string(),
955 f.args,
956 )))),
957
958 "DATE_FROM_PARTS" => Ok(Expression::Function(Box::new(Function::new(
960 "MAKE_DATE".to_string(),
961 f.args,
962 )))),
963
964 "DAYOFWEEK_ISO" => Ok(Expression::Function(Box::new(Function::new(
966 "DAYOFWEEK".to_string(),
967 f.args,
968 )))),
969
970 "FORMAT" => Ok(Expression::Function(Box::new(Function::new(
972 "FORMAT_STRING".to_string(),
973 f.args,
974 )))),
975
976 "LOGICAL_AND" => Ok(Expression::Function(Box::new(Function::new(
978 "BOOL_AND".to_string(),
979 f.args,
980 )))),
981
982 "VARIANCE_POP" => Ok(Expression::Function(Box::new(Function::new(
984 "VAR_POP".to_string(),
985 f.args,
986 )))),
987
988 "WEEK_OF_YEAR" => Ok(Expression::Function(Box::new(Function::new(
990 "WEEKOFYEAR".to_string(),
991 f.args,
992 )))),
993
994 "BIT_GET" => Ok(Expression::Function(Box::new(Function::new(
996 "GETBIT".to_string(),
997 f.args,
998 )))),
999
1000 "CURDATE" => Ok(Expression::CurrentDate(crate::expressions::CurrentDate)),
1002
1003 _ => Ok(Expression::Function(Box::new(f))),
1005 }
1006 }
1007
1008 fn transform_aggregate_function(
1009 &self,
1010 f: Box<crate::expressions::AggregateFunction>,
1011 ) -> Result<Expression> {
1012 let name_upper = f.name.to_uppercase();
1013 match name_upper.as_str() {
1014 "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
1016 Function::new("COLLECT_LIST".to_string(), f.args),
1017 ))),
1018
1019 "STRING_AGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
1021 Function::new("COLLECT_LIST".to_string(), f.args),
1022 ))),
1023
1024 "LISTAGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
1026 "COLLECT_LIST".to_string(),
1027 f.args,
1028 )))),
1029
1030 "ARRAY_AGG" if !f.args.is_empty() => {
1032 let mut af = f;
1033 af.name = "COLLECT_LIST".to_string();
1034 Ok(Expression::AggregateFunction(af))
1035 }
1036
1037 "LOGICAL_OR" if !f.args.is_empty() => {
1039 let mut af = f;
1040 af.name = "BOOL_OR".to_string();
1041 Ok(Expression::AggregateFunction(af))
1042 }
1043
1044 _ => Ok(Expression::AggregateFunction(f)),
1046 }
1047 }
1048}
1049
1050fn expr_to_datetime_field(expr: &Expression) -> Option<DateTimeField> {
1052 let name = match expr {
1053 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
1054 let Literal::String(s) = lit.as_ref() else {
1055 unreachable!()
1056 };
1057 s.to_uppercase()
1058 }
1059 Expression::Identifier(id) => id.name.to_uppercase(),
1060 Expression::Var(v) => v.this.to_uppercase(),
1061 Expression::Column(col) if col.table.is_none() => col.name.name.to_uppercase(),
1062 _ => return None,
1063 };
1064 match name.as_str() {
1065 "YEAR" | "Y" | "YY" | "YYY" | "YYYY" | "YR" | "YEARS" | "YRS" => Some(DateTimeField::Year),
1066 "MONTH" | "MM" | "MON" | "MONS" | "MONTHS" => Some(DateTimeField::Month),
1067 "DAY" | "D" | "DD" | "DAYS" | "DAYOFMONTH" => Some(DateTimeField::Day),
1068 "HOUR" | "H" | "HH" | "HR" | "HOURS" | "HRS" => Some(DateTimeField::Hour),
1069 "MINUTE" | "MI" | "MIN" | "MINUTES" | "MINS" => Some(DateTimeField::Minute),
1070 "SECOND" | "S" | "SEC" | "SECONDS" | "SECS" => Some(DateTimeField::Second),
1071 "MILLISECOND" | "MS" | "MSEC" | "MILLISECONDS" => Some(DateTimeField::Millisecond),
1072 "MICROSECOND" | "US" | "USEC" | "MICROSECONDS" => Some(DateTimeField::Microsecond),
1073 "DOW" | "DAYOFWEEK" | "DAYOFWEEK_ISO" | "DW" => Some(DateTimeField::DayOfWeek),
1074 "DOY" | "DAYOFYEAR" => Some(DateTimeField::DayOfYear),
1075 "WEEK" | "W" | "WK" | "WEEKOFYEAR" | "WOY" => Some(DateTimeField::Week),
1076 "QUARTER" | "Q" | "QTR" | "QTRS" | "QUARTERS" => Some(DateTimeField::Quarter),
1077 "EPOCH" | "EPOCH_SECOND" | "EPOCH_SECONDS" => Some(DateTimeField::Epoch),
1078 "TIMEZONE" | "TIMEZONE_HOUR" | "TZH" => Some(DateTimeField::TimezoneHour),
1079 "TIMEZONE_MINUTE" | "TZM" => Some(DateTimeField::TimezoneMinute),
1080 _ => Some(DateTimeField::Custom(name)),
1081 }
1082}