1use super::{DialectImpl, DialectType};
15use crate::error::Result;
16use crate::expressions::{
17 CeilFunc, CurrentTimestamp, DataType, DateTimeField, Expression, ExtractFunc, Function,
18 Literal, StructField, UnaryFunc, VarArgFunc,
19};
20use crate::generator::GeneratorConfig;
21use crate::tokens::TokenizerConfig;
22
23pub struct SparkDialect;
25
26impl DialectImpl for SparkDialect {
27 fn dialect_type(&self) -> DialectType {
28 DialectType::Spark
29 }
30
31 fn tokenizer_config(&self) -> TokenizerConfig {
32 let mut config = TokenizerConfig::default();
33 config.identifiers.clear();
35 config.identifiers.insert('`', '`');
36 config.quotes.insert("\"".to_string(), "\"".to_string());
38 config.string_escapes.push('\\');
40 config
42 .keywords
43 .insert("DIV".to_string(), crate::tokens::TokenType::Div);
44 config
45 .keywords
46 .insert("REPAIR".to_string(), crate::tokens::TokenType::Command);
47 config
48 .keywords
49 .insert("MSCK".to_string(), crate::tokens::TokenType::Command);
50 config
52 .numeric_literals
53 .insert("L".to_string(), "BIGINT".to_string());
54 config
55 .numeric_literals
56 .insert("S".to_string(), "SMALLINT".to_string());
57 config
58 .numeric_literals
59 .insert("Y".to_string(), "TINYINT".to_string());
60 config
61 .numeric_literals
62 .insert("D".to_string(), "DOUBLE".to_string());
63 config
64 .numeric_literals
65 .insert("F".to_string(), "FLOAT".to_string());
66 config
67 .numeric_literals
68 .insert("BD".to_string(), "DECIMAL".to_string());
69 config.identifiers_can_start_with_digit = true;
71 config.string_escapes_allowed_in_raw_strings = false;
74 config
75 }
76
77 fn generator_config(&self) -> GeneratorConfig {
78 use crate::generator::IdentifierQuoteStyle;
79 GeneratorConfig {
80 identifier_quote: '`',
81 identifier_quote_style: IdentifierQuoteStyle::BACKTICK,
82 dialect: Some(DialectType::Spark),
83 struct_field_sep: ": ",
85 create_function_return_as: false,
87 alias_post_tablesample: true,
89 tablesample_seed_keyword: "REPEATABLE",
90 join_hints: false,
91 identifiers_can_start_with_digit: true,
92 schema_comment_with_eq: false,
94 ..Default::default()
95 }
96 }
97
98 fn transform_expr(&self, expr: Expression) -> Result<Expression> {
99 match expr {
100 Expression::IfNull(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
102 original_name: None,
103 expressions: vec![f.this, f.expression],
104 inferred_type: None,
105 }))),
106
107 Expression::Nvl(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
109 original_name: None,
110 expressions: vec![f.this, f.expression],
111 inferred_type: None,
112 }))),
113
114 Expression::Cast(mut c) => {
116 c.to = Self::normalize_spark_type(c.to);
117 Ok(Expression::Cast(c))
118 }
119
120 Expression::TryCast(mut c) => {
122 c.to = Self::normalize_spark_type(c.to);
123 Ok(Expression::TryCast(c))
124 }
125
126 Expression::SafeCast(mut c) => {
128 c.to = Self::normalize_spark_type(c.to);
129 Ok(Expression::TryCast(c))
130 }
131
132 Expression::Trim(mut t) => {
135 if !t.sql_standard_syntax && t.characters.is_some() {
136 t.sql_standard_syntax = true;
139 }
140 Ok(Expression::Trim(t))
141 }
142
143 Expression::ILike(op) => Ok(Expression::ILike(op)),
145
146 Expression::Unnest(f) => Ok(Expression::Explode(Box::new(UnaryFunc::new(f.this)))),
148
149 Expression::Explode(f) => Ok(Expression::Explode(f)),
151
152 Expression::ExplodeOuter(f) => Ok(Expression::ExplodeOuter(f)),
154
155 Expression::Random(_) => Ok(Expression::Rand(Box::new(crate::expressions::Rand {
157 seed: None,
158 lower: None,
159 upper: None,
160 }))),
161
162 Expression::Rand(r) => Ok(Expression::Rand(r)),
164
165 Expression::Concat(op) => Ok(Expression::Function(Box::new(Function::new(
167 "CONCAT".to_string(),
168 vec![op.left, op.right],
169 )))),
170
171 Expression::Function(f) => self.transform_function(*f),
175
176 Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
178
179 Expression::Parameter(mut p)
181 if p.style == crate::expressions::ParameterStyle::Dollar =>
182 {
183 p.style = crate::expressions::ParameterStyle::DollarBrace;
184 if let Some(idx) = p.index {
186 p.name = Some(idx.to_string());
187 }
188 Ok(Expression::Parameter(p))
189 }
190
191 Expression::JSONExtract(je) if je.variant_extract.is_some() => {
193 let path = match *je.expression {
195 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
196 let Literal::String(s) = lit.as_ref() else {
197 unreachable!()
198 };
199 Expression::Literal(Box::new(Literal::String(format!("$.{}", s))))
200 }
201 other => other,
202 };
203 Ok(Expression::Function(Box::new(Function::new(
204 "GET_JSON_OBJECT".to_string(),
205 vec![*je.this, path],
206 ))))
207 }
208
209 _ => Ok(expr),
211 }
212 }
213}
214
215impl SparkDialect {
216 fn normalize_spark_type(dt: DataType) -> DataType {
221 match dt {
222 DataType::VarChar { length: None, .. }
223 | DataType::Char { length: None }
224 | DataType::Text => DataType::Custom {
225 name: "STRING".to_string(),
226 },
227 DataType::VarChar { .. } | DataType::Char { .. } => dt,
229 DataType::Struct { fields, nested } => {
231 let normalized_fields: Vec<StructField> = fields
232 .into_iter()
233 .map(|mut f| {
234 f.data_type = Self::normalize_spark_type(f.data_type);
235 f
236 })
237 .collect();
238 DataType::Struct {
239 fields: normalized_fields,
240 nested,
241 }
242 }
243 _ => dt,
244 }
245 }
246
247 fn transform_function(&self, f: Function) -> Result<Expression> {
248 let name_upper = f.name.to_uppercase();
249 match name_upper.as_str() {
250 "IFNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
252 original_name: None,
253 expressions: f.args,
254 inferred_type: None,
255 }))),
256
257 "NVL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
259 original_name: None,
260 expressions: f.args,
261 inferred_type: None,
262 }))),
263
264 "ISNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
266 original_name: None,
267 expressions: f.args,
268 inferred_type: None,
269 }))),
270
271 "GROUP_CONCAT" if !f.args.is_empty() => {
274 Ok(Expression::Function(Box::new(Function::new(
276 "COLLECT_LIST".to_string(),
277 f.args,
278 ))))
279 }
280
281 "STRING_AGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
284 Function::new("COLLECT_LIST".to_string(), f.args),
285 ))),
286
287 "LISTAGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
289 "COLLECT_LIST".to_string(),
290 f.args,
291 )))),
292
293 "SUBSTRING" | "SUBSTR" => Ok(Expression::Function(Box::new(f))),
295
296 "LENGTH" => Ok(Expression::Function(Box::new(f))),
298
299 "LEN" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc::new(
301 f.args.into_iter().next().unwrap(),
302 )))),
303
304 "RANDOM" => Ok(Expression::Rand(Box::new(crate::expressions::Rand {
306 seed: None,
307 lower: None,
308 upper: None,
309 }))),
310
311 "RAND" => Ok(Expression::Rand(Box::new(crate::expressions::Rand {
313 seed: None,
314 lower: None,
315 upper: None,
316 }))),
317
318 "NOW" => Ok(Expression::CurrentTimestamp(
320 crate::expressions::CurrentTimestamp {
321 precision: None,
322 sysdate: false,
323 },
324 )),
325
326 "GETDATE" => Ok(Expression::CurrentTimestamp(
328 crate::expressions::CurrentTimestamp {
329 precision: None,
330 sysdate: false,
331 },
332 )),
333
334 "CURRENT_TIMESTAMP" => Ok(Expression::CurrentTimestamp(
336 crate::expressions::CurrentTimestamp {
337 precision: None,
338 sysdate: false,
339 },
340 )),
341
342 "CURRENT_DATE" => Ok(Expression::CurrentDate(crate::expressions::CurrentDate)),
344
345 "TO_DATE" if f.args.len() == 2 => {
347 let is_default_format = matches!(&f.args[1], Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(s) if s == "yyyy-MM-dd"));
348 if is_default_format {
349 Ok(Expression::Function(Box::new(Function::new(
350 "TO_DATE".to_string(),
351 vec![f.args.into_iter().next().unwrap()],
352 ))))
353 } else {
354 Ok(Expression::Function(Box::new(f)))
355 }
356 }
357 "TO_DATE" => Ok(Expression::Function(Box::new(f))),
358
359 "TO_TIMESTAMP" => Ok(Expression::Function(Box::new(f))),
361
362 "DATE_FORMAT" => Ok(Expression::Function(Box::new(f))),
364
365 "STRFTIME" => Ok(Expression::Function(Box::new(Function::new(
367 "DATE_FORMAT".to_string(),
368 f.args,
369 )))),
370
371 "TO_CHAR" => Ok(Expression::Function(Box::new(Function::new(
373 "DATE_FORMAT".to_string(),
374 f.args,
375 )))),
376
377 "DATE_TRUNC" => Ok(Expression::Function(Box::new(f))),
379
380 "TRUNC" => Ok(Expression::Function(Box::new(f))),
382
383 "EXTRACT" => Ok(Expression::Function(Box::new(f))),
385
386 "DATEPART" => Ok(Expression::Function(Box::new(Function::new(
388 "EXTRACT".to_string(),
389 f.args,
390 )))),
391
392 "UNIX_TIMESTAMP" => {
395 if f.args.is_empty() {
396 Ok(Expression::Function(Box::new(Function::new(
397 "UNIX_TIMESTAMP".to_string(),
398 vec![Expression::CurrentTimestamp(CurrentTimestamp {
399 precision: None,
400 sysdate: false,
401 })],
402 ))))
403 } else {
404 Ok(Expression::Function(Box::new(f)))
405 }
406 }
407
408 "FROM_UNIXTIME" => Ok(Expression::Function(Box::new(f))),
410
411 "STR_TO_MAP" => {
414 if f.args.len() == 1 {
415 let mut args = f.args;
416 args.push(Expression::Literal(Box::new(
417 crate::expressions::Literal::String(",".to_string()),
418 )));
419 args.push(Expression::Literal(Box::new(
420 crate::expressions::Literal::String(":".to_string()),
421 )));
422 Ok(Expression::Function(Box::new(Function::new(
423 "STR_TO_MAP".to_string(),
424 args,
425 ))))
426 } else {
427 Ok(Expression::Function(Box::new(f)))
428 }
429 }
430
431 "POSITION" => Ok(Expression::Function(Box::new(f))),
433
434 "LOCATE" => Ok(Expression::Function(Box::new(f))),
436
437 "STRPOS" if f.args.len() == 2 => {
439 let mut args = f.args;
440 let first = args.remove(0);
441 let second = args.remove(0);
442 Ok(Expression::Function(Box::new(Function::new(
444 "LOCATE".to_string(),
445 vec![second, first],
446 ))))
447 }
448
449 "CHARINDEX" if f.args.len() >= 2 => {
451 let mut args = f.args;
452 let substring = args.remove(0);
453 let string = args.remove(0);
454 let mut locate_args = vec![substring, string];
455 if !args.is_empty() {
456 locate_args.push(args.remove(0));
457 }
458 Ok(Expression::Function(Box::new(Function::new(
459 "LOCATE".to_string(),
460 locate_args,
461 ))))
462 }
463
464 "INSTR" => Ok(Expression::Function(Box::new(f))),
466
467 "CEILING" if f.args.len() == 1 => Ok(Expression::Ceil(Box::new(CeilFunc {
469 this: f.args.into_iter().next().unwrap(),
470 decimals: None,
471 to: None,
472 }))),
473
474 "CEIL" if f.args.len() == 1 => Ok(Expression::Ceil(Box::new(CeilFunc {
476 this: f.args.into_iter().next().unwrap(),
477 decimals: None,
478 to: None,
479 }))),
480
481 "UNNEST" => Ok(Expression::Function(Box::new(Function::new(
483 "EXPLODE".to_string(),
484 f.args,
485 )))),
486
487 "FLATTEN" => Ok(Expression::Function(Box::new(f))),
489
490 "ARRAY_AGG" => Ok(Expression::Function(Box::new(Function::new(
492 "COLLECT_LIST".to_string(),
493 f.args,
494 )))),
495
496 "COLLECT_LIST" => Ok(Expression::Function(Box::new(f))),
498
499 "COLLECT_SET" => Ok(Expression::Function(Box::new(f))),
501
502 "ARRAY_LENGTH" | "CARDINALITY" => Ok(Expression::Function(Box::new(Function::new(
504 "SIZE".to_string(),
505 f.args,
506 )))),
507
508 "SIZE" => Ok(Expression::Function(Box::new(f))),
510
511 "SPLIT" => Ok(Expression::Function(Box::new(f))),
513
514 "REGEXP_REPLACE" if f.args.len() > 4 => {
517 let mut args = f.args;
518 args.truncate(4);
519 Ok(Expression::Function(Box::new(Function::new(
520 "REGEXP_REPLACE".to_string(),
521 args,
522 ))))
523 }
524 "REGEXP_REPLACE" => Ok(Expression::Function(Box::new(f))),
525
526 "REGEXP_EXTRACT" => Ok(Expression::Function(Box::new(f))),
528
529 "REGEXP_EXTRACT_ALL" => Ok(Expression::Function(Box::new(f))),
531
532 "RLIKE" | "REGEXP_LIKE" => Ok(Expression::Function(Box::new(Function::new(
534 "RLIKE".to_string(),
535 f.args,
536 )))),
537
538 "JSON_EXTRACT" => Ok(Expression::Function(Box::new(Function::new(
540 "GET_JSON_OBJECT".to_string(),
541 f.args,
542 )))),
543
544 "JSON_EXTRACT_SCALAR" => Ok(Expression::Function(Box::new(Function::new(
546 "GET_JSON_OBJECT".to_string(),
547 f.args,
548 )))),
549
550 "GET_JSON_OBJECT" => Ok(Expression::Function(Box::new(f))),
552
553 "FROM_JSON" => Ok(Expression::Function(Box::new(f))),
555
556 "TO_JSON" => Ok(Expression::Function(Box::new(f))),
558
559 "PARSE_JSON" if f.args.len() == 1 => Ok(f.args.into_iter().next().unwrap()),
561 "PARSE_JSON" => Ok(Expression::Function(Box::new(Function::new(
562 "FROM_JSON".to_string(),
563 f.args,
564 )))),
565
566 "DATEDIFF" | "DATE_DIFF" => Ok(Expression::Function(Box::new(Function::new(
568 "DATEDIFF".to_string(),
569 f.args,
570 )))),
571
572 "DATE_ADD" | "DATEADD" => Ok(Expression::Function(Box::new(Function::new(
574 "DATE_ADD".to_string(),
575 f.args,
576 )))),
577
578 "DATE_SUB" => Ok(Expression::Function(Box::new(f))),
580
581 "TIMESTAMPADD" => Ok(Expression::Function(Box::new(f))),
583
584 "TIMESTAMPDIFF" => Ok(Expression::Function(Box::new(f))),
586
587 "ADD_MONTHS" => Ok(Expression::Function(Box::new(f))),
589
590 "MONTHS_BETWEEN" => Ok(Expression::Function(Box::new(f))),
592
593 "NVL" => Ok(Expression::Function(Box::new(f))),
595
596 "NVL2" => Ok(Expression::Function(Box::new(f))),
598
599 "MAP" => Ok(Expression::Function(Box::new(f))),
601
602 "ARRAY" => Ok(Expression::Function(Box::new(f))),
604
605 "ROW" => Ok(Expression::Function(Box::new(Function::new(
607 "STRUCT".to_string(),
608 f.args,
609 )))),
610
611 "STRUCT" => {
613 let mut col_idx = 1usize;
614 let named_args: Vec<Expression> = f
615 .args
616 .into_iter()
617 .map(|arg| {
618 let current_idx = col_idx;
619 col_idx += 1;
620 match &arg {
622 Expression::Alias(_) => arg, Expression::Star(_) => arg, Expression::Column(c) if c.table.is_none() => {
625 let name = c.name.name.clone();
627 Expression::Alias(Box::new(crate::expressions::Alias {
628 this: arg,
629 alias: crate::expressions::Identifier::new(&name),
630 column_aliases: Vec::new(),
631 pre_alias_comments: Vec::new(),
632 trailing_comments: Vec::new(),
633 inferred_type: None,
634 }))
635 }
636 _ => {
637 let name = format!("col{}", current_idx);
639 Expression::Alias(Box::new(crate::expressions::Alias {
640 this: arg,
641 alias: crate::expressions::Identifier::new(&name),
642 column_aliases: Vec::new(),
643 pre_alias_comments: Vec::new(),
644 trailing_comments: Vec::new(),
645 inferred_type: None,
646 }))
647 }
648 }
649 })
650 .collect();
651 Ok(Expression::Function(Box::new(Function {
652 name: "STRUCT".to_string(),
653 args: named_args,
654 distinct: false,
655 trailing_comments: Vec::new(),
656 use_bracket_syntax: false,
657 no_parens: false,
658 quoted: false,
659 span: None,
660 inferred_type: None,
661 })))
662 }
663
664 "NAMED_STRUCT" if f.args.len() % 2 == 0 => {
666 let original_args = f.args.clone();
667 let mut struct_args = Vec::new();
668 for pair in f.args.chunks(2) {
669 if let Expression::Literal(lit) = &pair[0] {
670 if let Literal::String(field_name) = lit.as_ref() {
671 struct_args.push(Expression::Alias(Box::new(
672 crate::expressions::Alias {
673 this: pair[1].clone(),
674 alias: crate::expressions::Identifier::new(field_name),
675 column_aliases: Vec::new(),
676 pre_alias_comments: Vec::new(),
677 trailing_comments: Vec::new(),
678 inferred_type: None,
679 },
680 )));
681 continue;
682 }
683 }
684 return Ok(Expression::Function(Box::new(Function::new(
685 "NAMED_STRUCT".to_string(),
686 original_args,
687 ))));
688 }
689 Ok(Expression::Function(Box::new(Function::new(
690 "STRUCT".to_string(),
691 struct_args,
692 ))))
693 }
694
695 "NAMED_STRUCT" => Ok(Expression::Function(Box::new(f))),
697
698 "MAP_FROM_ARRAYS" => Ok(Expression::Function(Box::new(f))),
700
701 "ARRAY_SORT" => Ok(Expression::Function(Box::new(f))),
703
704 "ARRAY_DISTINCT" => Ok(Expression::Function(Box::new(f))),
706
707 "ARRAY_UNION" => Ok(Expression::Function(Box::new(f))),
709
710 "ARRAY_INTERSECT" => Ok(Expression::Function(Box::new(f))),
712
713 "ARRAY_EXCEPT" => Ok(Expression::Function(Box::new(f))),
715
716 "ARRAY_CONTAINS" => Ok(Expression::Function(Box::new(f))),
718
719 "ELEMENT_AT" => Ok(Expression::Function(Box::new(f))),
721
722 "TRY_ELEMENT_AT" => Ok(Expression::Function(Box::new(f))),
724
725 "TRANSFORM" => Ok(Expression::Function(Box::new(f))),
727
728 "FILTER" => Ok(Expression::Function(Box::new(f))),
730
731 "AGGREGATE" => Ok(Expression::Function(Box::new(f))),
733
734 "SEQUENCE" => Ok(Expression::Function(Box::new(f))),
736
737 "GENERATE_SERIES" => Ok(Expression::Function(Box::new(Function::new(
739 "SEQUENCE".to_string(),
740 f.args,
741 )))),
742
743 "STARTSWITH" | "STARTS_WITH" => Ok(Expression::Function(Box::new(Function::new(
745 "STARTSWITH".to_string(),
746 f.args,
747 )))),
748
749 "ENDSWITH" | "ENDS_WITH" => Ok(Expression::Function(Box::new(Function::new(
751 "ENDSWITH".to_string(),
752 f.args,
753 )))),
754
755 "ARRAY_CONSTRUCT_COMPACT" => {
757 let inner =
758 Expression::Function(Box::new(Function::new("ARRAY".to_string(), f.args)));
759 Ok(Expression::Function(Box::new(Function::new(
760 "ARRAY_COMPACT".to_string(),
761 vec![inner],
762 ))))
763 }
764
765 "ARRAY_TO_STRING" => Ok(Expression::Function(Box::new(Function::new(
767 "ARRAY_JOIN".to_string(),
768 f.args,
769 )))),
770
771 "TO_ARRAY" if f.args.len() == 1 => {
773 let x = f.args[0].clone();
774 match &x {
777 Expression::ArrayFunc(arr) => {
778 Ok(Expression::Function(Box::new(Function::new(
780 "ARRAY".to_string(),
781 arr.expressions.clone(),
782 ))))
783 }
784 _ => Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
785 condition: Expression::IsNull(Box::new(crate::expressions::IsNull {
786 this: x.clone(),
787 not: false,
788 postfix_form: false,
789 })),
790 true_value: Expression::Null(crate::expressions::Null),
791 false_value: Some(Expression::Function(Box::new(Function::new(
792 "ARRAY".to_string(),
793 vec![x],
794 )))),
795 original_name: Some("IF".to_string()),
796 inferred_type: None,
797 }))),
798 }
799 }
800
801 "REGEXP_SUBSTR" if f.args.len() >= 2 => {
803 let subject = f.args[0].clone();
804 let pattern = f.args[1].clone();
805 let group = if f.args.len() >= 6 {
808 let g = &f.args[5];
809 if matches!(g, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"))
811 {
812 None
813 } else {
814 Some(g.clone())
815 }
816 } else {
817 None
818 };
819 let mut args = vec![subject, pattern];
820 if let Some(g) = group {
821 args.push(g);
822 }
823 Ok(Expression::Function(Box::new(Function::new(
824 "REGEXP_EXTRACT".to_string(),
825 args,
826 ))))
827 }
828
829 "UUID_STRING" => {
831 if f.args.is_empty() {
832 Ok(Expression::Function(Box::new(Function::new(
833 "UUID".to_string(),
834 vec![],
835 ))))
836 } else {
837 Ok(Expression::Function(Box::new(Function::new(
838 "UUID_STRING".to_string(),
839 f.args,
840 ))))
841 }
842 }
843
844 "OBJECT_CONSTRUCT" if f.args.len() >= 2 && f.args.len() % 2 == 0 => {
846 let mut struct_args = Vec::new();
850 for pair in f.args.chunks(2) {
851 if let Expression::Literal(lit) = &pair[0] {
852 if let Literal::String(key) = lit.as_ref() {
853 struct_args.push(Expression::Alias(Box::new(
854 crate::expressions::Alias {
855 this: pair[1].clone(),
856 alias: crate::expressions::Identifier::new(key.clone()),
857 column_aliases: vec![],
858 pre_alias_comments: vec![],
859 trailing_comments: vec![],
860 inferred_type: None,
861 },
862 )));
863 }
864 } else {
865 struct_args.push(pair[1].clone());
866 }
867 }
868 Ok(Expression::Function(Box::new(Function::new(
869 "STRUCT".to_string(),
870 struct_args,
871 ))))
872 }
873
874 "DATE_PART" if f.args.len() == 2 => {
876 let mut args = f.args;
877 let part = args.remove(0);
878 let expr = args.remove(0);
879 if let Some(field) = expr_to_datetime_field(&part) {
880 Ok(Expression::Extract(Box::new(ExtractFunc {
881 this: expr,
882 field,
883 })))
884 } else {
885 Ok(Expression::Function(Box::new(Function::new(
887 "DATE_PART".to_string(),
888 vec![part, expr],
889 ))))
890 }
891 }
892
893 "GET_PATH" if f.args.len() == 2 => {
895 let mut args = f.args;
896 let this = args.remove(0);
897 let path = args.remove(0);
898 let json_path = match &path {
899 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
900 let Literal::String(s) = lit.as_ref() else {
901 unreachable!()
902 };
903 let normalized = if s.starts_with('$') {
904 s.clone()
905 } else if s.starts_with('[') {
906 format!("${}", s)
907 } else {
908 format!("$.{}", s)
909 };
910 Expression::Literal(Box::new(Literal::String(normalized)))
911 }
912 _ => path,
913 };
914 Ok(Expression::Function(Box::new(Function::new(
915 "GET_JSON_OBJECT".to_string(),
916 vec![this, json_path],
917 ))))
918 }
919
920 "BITWISE_LEFT_SHIFT" => Ok(Expression::Function(Box::new(Function::new(
922 "SHIFTLEFT".to_string(),
923 f.args,
924 )))),
925
926 "BITWISE_RIGHT_SHIFT" => Ok(Expression::Function(Box::new(Function::new(
928 "SHIFTRIGHT".to_string(),
929 f.args,
930 )))),
931
932 "APPROX_DISTINCT" => Ok(Expression::Function(Box::new(Function::new(
934 "APPROX_COUNT_DISTINCT".to_string(),
935 f.args,
936 )))),
937
938 "ARRAY_SLICE" => Ok(Expression::Function(Box::new(Function::new(
940 "SLICE".to_string(),
941 f.args,
942 )))),
943
944 "DATE_FROM_PARTS" => Ok(Expression::Function(Box::new(Function::new(
946 "MAKE_DATE".to_string(),
947 f.args,
948 )))),
949
950 "DAYOFWEEK_ISO" => Ok(Expression::Function(Box::new(Function::new(
952 "DAYOFWEEK".to_string(),
953 f.args,
954 )))),
955
956 "FORMAT" => Ok(Expression::Function(Box::new(Function::new(
958 "FORMAT_STRING".to_string(),
959 f.args,
960 )))),
961
962 "LOGICAL_AND" => Ok(Expression::Function(Box::new(Function::new(
964 "BOOL_AND".to_string(),
965 f.args,
966 )))),
967
968 "VARIANCE_POP" => Ok(Expression::Function(Box::new(Function::new(
970 "VAR_POP".to_string(),
971 f.args,
972 )))),
973
974 "WEEK_OF_YEAR" => Ok(Expression::Function(Box::new(Function::new(
976 "WEEKOFYEAR".to_string(),
977 f.args,
978 )))),
979
980 "BIT_GET" => Ok(Expression::Function(Box::new(Function::new(
982 "GETBIT".to_string(),
983 f.args,
984 )))),
985
986 "CURDATE" => Ok(Expression::CurrentDate(crate::expressions::CurrentDate)),
988
989 _ => Ok(Expression::Function(Box::new(f))),
991 }
992 }
993
994 fn transform_aggregate_function(
995 &self,
996 f: Box<crate::expressions::AggregateFunction>,
997 ) -> Result<Expression> {
998 let name_upper = f.name.to_uppercase();
999 match name_upper.as_str() {
1000 "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
1002 Function::new("COLLECT_LIST".to_string(), f.args),
1003 ))),
1004
1005 "STRING_AGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
1007 Function::new("COLLECT_LIST".to_string(), f.args),
1008 ))),
1009
1010 "LISTAGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
1012 "COLLECT_LIST".to_string(),
1013 f.args,
1014 )))),
1015
1016 "ARRAY_AGG" if !f.args.is_empty() => {
1018 let mut af = f;
1019 af.name = "COLLECT_LIST".to_string();
1020 Ok(Expression::AggregateFunction(af))
1021 }
1022
1023 "LOGICAL_OR" if !f.args.is_empty() => {
1025 let mut af = f;
1026 af.name = "BOOL_OR".to_string();
1027 Ok(Expression::AggregateFunction(af))
1028 }
1029
1030 _ => Ok(Expression::AggregateFunction(f)),
1032 }
1033 }
1034}
1035
1036fn expr_to_datetime_field(expr: &Expression) -> Option<DateTimeField> {
1038 let name = match expr {
1039 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
1040 let Literal::String(s) = lit.as_ref() else {
1041 unreachable!()
1042 };
1043 s.to_uppercase()
1044 }
1045 Expression::Identifier(id) => id.name.to_uppercase(),
1046 Expression::Var(v) => v.this.to_uppercase(),
1047 Expression::Column(col) if col.table.is_none() => col.name.name.to_uppercase(),
1048 _ => return None,
1049 };
1050 match name.as_str() {
1051 "YEAR" | "Y" | "YY" | "YYY" | "YYYY" | "YR" | "YEARS" | "YRS" => Some(DateTimeField::Year),
1052 "MONTH" | "MM" | "MON" | "MONS" | "MONTHS" => Some(DateTimeField::Month),
1053 "DAY" | "D" | "DD" | "DAYS" | "DAYOFMONTH" => Some(DateTimeField::Day),
1054 "HOUR" | "H" | "HH" | "HR" | "HOURS" | "HRS" => Some(DateTimeField::Hour),
1055 "MINUTE" | "MI" | "MIN" | "MINUTES" | "MINS" => Some(DateTimeField::Minute),
1056 "SECOND" | "S" | "SEC" | "SECONDS" | "SECS" => Some(DateTimeField::Second),
1057 "MILLISECOND" | "MS" | "MSEC" | "MILLISECONDS" => Some(DateTimeField::Millisecond),
1058 "MICROSECOND" | "US" | "USEC" | "MICROSECONDS" => Some(DateTimeField::Microsecond),
1059 "DOW" | "DAYOFWEEK" | "DAYOFWEEK_ISO" | "DW" => Some(DateTimeField::DayOfWeek),
1060 "DOY" | "DAYOFYEAR" => Some(DateTimeField::DayOfYear),
1061 "WEEK" | "W" | "WK" | "WEEKOFYEAR" | "WOY" => Some(DateTimeField::Week),
1062 "QUARTER" | "Q" | "QTR" | "QTRS" | "QUARTERS" => Some(DateTimeField::Quarter),
1063 "EPOCH" | "EPOCH_SECOND" | "EPOCH_SECONDS" => Some(DateTimeField::Epoch),
1064 "TIMEZONE" | "TIMEZONE_HOUR" | "TZH" => Some(DateTimeField::TimezoneHour),
1065 "TIMEZONE_MINUTE" | "TZM" => Some(DateTimeField::TimezoneMinute),
1066 _ => Some(DateTimeField::Custom(name)),
1067 }
1068}