1use super::{DialectImpl, DialectType};
13use crate::error::Result;
14use crate::expressions::{
15 Alias, BinaryOp, CeilFunc, Column, Exists, Expression, From, Function, FunctionBody,
16 Identifier, JsonExtractFunc, LikeOp, Literal, Select, SplitFunc, StringAggFunc, UnaryFunc,
17 UnnestFunc, VarArgFunc, Where,
18};
19use crate::generator::GeneratorConfig;
20use crate::tokens::TokenizerConfig;
21
22pub struct BigQueryDialect;
24
25impl DialectImpl for BigQueryDialect {
26 fn dialect_type(&self) -> DialectType {
27 DialectType::BigQuery
28 }
29
30 fn tokenizer_config(&self) -> TokenizerConfig {
31 let mut config = TokenizerConfig::default();
32 config.identifiers.remove(&'"');
35 config.identifiers.insert('`', '`');
36 config.quotes.insert("\"".to_string(), "\"".to_string());
38 config.quotes.insert("'''".to_string(), "'''".to_string());
40 config
41 .quotes
42 .insert("\"\"\"".to_string(), "\"\"\"".to_string());
43 config.string_escapes = vec!['\'', '\\'];
45 config.b_prefix_is_byte_string = true;
47 config.hex_number_strings = true;
49 config.hex_string_is_integer_type = true;
51 config
52 }
53
54 fn generator_config(&self) -> GeneratorConfig {
55 use crate::generator::{IdentifierQuoteStyle, NormalizeFunctions};
56 GeneratorConfig {
57 identifier_quote: '`',
58 identifier_quote_style: IdentifierQuoteStyle::BACKTICK,
59 dialect: Some(DialectType::BigQuery),
60 normalize_functions: NormalizeFunctions::None,
62 interval_allows_plural_form: false,
64 join_hints: false,
65 query_hints: false,
66 table_hints: false,
67 limit_fetch_style: crate::generator::LimitFetchStyle::Limit,
68 rename_table_with_db: false,
69 nvl2_supported: false,
70 unnest_with_ordinality: false,
71 collate_is_func: true,
72 limit_only_literals: true,
73 supports_table_alias_columns: false,
74 unpivot_aliases_are_identifiers: false,
75 json_key_value_pair_sep: ",",
76 null_ordering_supported: false,
77 ignore_nulls_in_func: true,
78 json_path_single_quote_escape: true,
79 can_implement_array_any: true,
80 supports_to_number: false,
81 named_placeholder_token: "@",
82 hex_func: "TO_HEX",
83 with_properties_prefix: "OPTIONS",
84 supports_exploding_projections: false,
85 except_intersect_support_all_clause: false,
86 supports_unix_seconds: true,
87 try_supported: true,
89 semi_anti_join_with_side: false,
91 ..Default::default()
92 }
93 }
94
95 fn transform_expr(&self, expr: Expression) -> Result<Expression> {
96 match expr {
97 Expression::DataType(dt) => self.transform_data_type(dt),
99
100 Expression::IfNull(f) => Ok(Expression::IfNull(f)),
103
104 Expression::Nvl(f) => Ok(Expression::IfNull(f)),
106
107 Expression::Coalesce(f) => Ok(Expression::Coalesce(f)),
109
110 Expression::GroupConcat(f) => Ok(Expression::StringAgg(Box::new(StringAggFunc {
113 this: f.this,
114 separator: f.separator,
115 order_by: f.order_by,
116 distinct: f.distinct,
117 filter: f.filter,
118 limit: None,
119 }))),
120
121 Expression::TryCast(c) => {
127 let transformed_type = match self.transform_data_type(c.to)? {
128 Expression::DataType(dt) => dt,
129 _ => return Err(crate::error::Error::parse("Expected DataType", 0, 0)),
130 };
131 Ok(Expression::SafeCast(Box::new(crate::expressions::Cast {
132 this: c.this,
133 to: transformed_type,
134 trailing_comments: c.trailing_comments,
135 double_colon_syntax: c.double_colon_syntax,
136 format: c.format,
137 default: c.default,
138 })))
139 }
140
141 Expression::ILike(op) => {
144 let lower_left = Expression::Lower(Box::new(UnaryFunc::new(op.left)));
145 let lower_right = Expression::Lower(Box::new(UnaryFunc::new(op.right)));
146 Ok(Expression::Like(Box::new(LikeOp {
147 left: lower_left,
148 right: lower_right,
149 escape: op.escape,
150 quantifier: op.quantifier,
151 })))
152 }
153
154 Expression::RegexpLike(f) => Ok(Expression::Function(Box::new(Function::new(
156 "REGEXP_CONTAINS".to_string(),
157 vec![f.this, f.pattern],
158 )))),
159
160 Expression::Explode(f) => Ok(Expression::Unnest(Box::new(
163 crate::expressions::UnnestFunc {
164 this: f.this,
165 expressions: Vec::new(),
166 with_ordinality: false,
167 alias: None,
168 offset_alias: None,
169 },
170 ))),
171
172 Expression::ExplodeOuter(f) => Ok(Expression::Unnest(Box::new(
174 crate::expressions::UnnestFunc {
175 this: f.this,
176 expressions: Vec::new(),
177 with_ordinality: false,
178 alias: None,
179 offset_alias: None,
180 },
181 ))),
182
183 Expression::GenerateSeries(f) => {
185 let mut args = Vec::new();
186 if let Some(start) = f.start {
187 args.push(*start);
188 }
189 if let Some(end) = f.end {
190 args.push(*end);
191 }
192 if let Some(step) = f.step {
193 args.push(*step);
194 }
195 Ok(Expression::Function(Box::new(Function::new(
196 "GENERATE_ARRAY".to_string(),
197 args,
198 ))))
199 }
200
201 Expression::BitwiseAndAgg(f) => Ok(Expression::Function(Box::new(Function::new(
204 "BIT_AND".to_string(),
205 vec![f.this],
206 )))),
207
208 Expression::BitwiseOrAgg(f) => Ok(Expression::Function(Box::new(Function::new(
210 "BIT_OR".to_string(),
211 vec![f.this],
212 )))),
213
214 Expression::BitwiseXorAgg(f) => Ok(Expression::Function(Box::new(Function::new(
216 "BIT_XOR".to_string(),
217 vec![f.this],
218 )))),
219
220 Expression::BitwiseCount(f) => Ok(Expression::Function(Box::new(Function::new(
222 "BIT_COUNT".to_string(),
223 vec![f.this],
224 )))),
225
226 Expression::ByteLength(f) => Ok(Expression::Function(Box::new(Function::new(
228 "BYTE_LENGTH".to_string(),
229 vec![f.this],
230 )))),
231
232 Expression::IntDiv(f) => Ok(Expression::Function(Box::new(Function::new(
234 "DIV".to_string(),
235 vec![f.this, f.expression],
236 )))),
237
238 Expression::Int64(f) => Ok(Expression::Function(Box::new(Function::new(
240 "INT64".to_string(),
241 vec![f.this],
242 )))),
243
244 Expression::Random(_) => Ok(Expression::Rand(Box::new(crate::expressions::Rand {
247 seed: None,
248 lower: None,
249 upper: None,
250 }))),
251
252 Expression::Uuid(_) => Ok(Expression::Function(Box::new(Function::new(
255 "GENERATE_UUID".to_string(),
256 vec![],
257 )))),
258
259 Expression::ApproxDistinct(f) => Ok(Expression::Function(Box::new(Function::new(
262 "APPROX_COUNT_DISTINCT".to_string(),
263 vec![f.this],
264 )))),
265
266 Expression::ArgMax(f) => Ok(Expression::Function(Box::new(Function::new(
268 "MAX_BY".to_string(),
269 vec![*f.this, *f.expression],
270 )))),
271
272 Expression::ArgMin(f) => Ok(Expression::Function(Box::new(Function::new(
274 "MIN_BY".to_string(),
275 vec![*f.this, *f.expression],
276 )))),
277
278 Expression::CountIf(f) => Ok(Expression::Function(Box::new(Function::new(
281 "COUNTIF".to_string(),
282 vec![f.this],
283 )))),
284
285 Expression::StringAgg(f) => Ok(Expression::StringAgg(f)),
288
289 Expression::Unhex(f) => Ok(Expression::Function(Box::new(Function::new(
292 "FROM_HEX".to_string(),
293 vec![*f.this],
294 )))),
295
296 Expression::UnixToTime(f) => {
298 let scale = f.scale.unwrap_or(0);
299 match scale {
300 0 => Ok(Expression::Function(Box::new(Function::new(
301 "TIMESTAMP_SECONDS".to_string(),
302 vec![*f.this],
303 )))),
304 3 => Ok(Expression::Function(Box::new(Function::new(
305 "TIMESTAMP_MILLIS".to_string(),
306 vec![*f.this],
307 )))),
308 6 => Ok(Expression::Function(Box::new(Function::new(
309 "TIMESTAMP_MICROS".to_string(),
310 vec![*f.this],
311 )))),
312 _ => {
313 let div_expr =
315 Expression::Div(Box::new(crate::expressions::BinaryOp::new(
316 *f.this,
317 Expression::Function(Box::new(Function::new(
318 "POWER".to_string(),
319 vec![Expression::number(10), Expression::number(scale)],
320 ))),
321 )));
322 let cast_expr = Expression::Cast(Box::new(crate::expressions::Cast {
323 this: div_expr,
324 to: crate::expressions::DataType::Custom {
325 name: "INT64".to_string(),
326 },
327 double_colon_syntax: false,
328 trailing_comments: vec![],
329 format: None,
330 default: None,
331 }));
332 Ok(Expression::Function(Box::new(Function::new(
333 "TIMESTAMP_SECONDS".to_string(),
334 vec![cast_expr],
335 ))))
336 }
337 }
338 }
339
340 Expression::DateDiff(f) => {
343 let unit_str = match f.unit {
345 Some(crate::expressions::IntervalUnit::Year) => "YEAR",
346 Some(crate::expressions::IntervalUnit::Quarter) => "QUARTER",
347 Some(crate::expressions::IntervalUnit::Month) => "MONTH",
348 Some(crate::expressions::IntervalUnit::Week) => "WEEK",
349 Some(crate::expressions::IntervalUnit::Day) => "DAY",
350 Some(crate::expressions::IntervalUnit::Hour) => "HOUR",
351 Some(crate::expressions::IntervalUnit::Minute) => "MINUTE",
352 Some(crate::expressions::IntervalUnit::Second) => "SECOND",
353 Some(crate::expressions::IntervalUnit::Millisecond) => "MILLISECOND",
354 Some(crate::expressions::IntervalUnit::Microsecond) => "MICROSECOND",
355 Some(crate::expressions::IntervalUnit::Nanosecond) => "NANOSECOND",
356 None => "DAY",
357 };
358 let unit = Expression::Identifier(crate::expressions::Identifier {
359 name: unit_str.to_string(),
360 quoted: false,
361 trailing_comments: Vec::new(),
362 });
363 Ok(Expression::Function(Box::new(Function::new(
364 "DATE_DIFF".to_string(),
365 vec![f.this, f.expression, unit],
366 ))))
367 }
368
369 Expression::VarPop(f) => Ok(Expression::Function(Box::new(Function::new(
372 "VAR_POP".to_string(),
373 vec![f.this],
374 )))),
375
376 Expression::SHA(f) => Ok(Expression::Function(Box::new(Function::new(
379 "SHA1".to_string(),
380 vec![f.this],
381 )))),
382
383 Expression::SHA1Digest(f) => Ok(Expression::Function(Box::new(Function::new(
385 "SHA1".to_string(),
386 vec![f.this],
387 )))),
388
389 Expression::MD5Digest(f) => Ok(Expression::Function(Box::new(Function::new(
391 "MD5".to_string(),
392 vec![*f.this],
393 )))),
394
395 Expression::JSONBool(f) => Ok(Expression::Function(Box::new(Function::new(
398 "BOOL".to_string(),
399 vec![f.this],
400 )))),
401
402 Expression::StringFunc(f) => Ok(Expression::Function(Box::new(Function::new(
404 "STRING".to_string(),
405 vec![*f.this],
406 )))),
407
408 Expression::DateFromUnixDate(f) => Ok(Expression::Function(Box::new(Function::new(
411 "DATE_FROM_UNIX_DATE".to_string(),
412 vec![f.this],
413 )))),
414
415 Expression::UnixDate(f) => Ok(Expression::Function(Box::new(Function::new(
417 "UNIX_DATE".to_string(),
418 vec![f.this],
419 )))),
420
421 Expression::TimestampDiff(f) => Ok(Expression::Function(Box::new(Function::new(
423 "TIMESTAMP_DIFF".to_string(),
424 vec![*f.this, *f.expression],
425 )))),
426
427 Expression::FromTimeZone(f) => Ok(Expression::Function(Box::new(Function::new(
429 "DATETIME".to_string(),
430 vec![*f.this],
431 )))),
432
433 Expression::TsOrDsToDatetime(f) => Ok(Expression::Function(Box::new(Function::new(
435 "DATETIME".to_string(),
436 vec![f.this],
437 )))),
438
439 Expression::TsOrDsToTimestamp(f) => Ok(Expression::Function(Box::new(Function::new(
441 "TIMESTAMP".to_string(),
442 vec![f.this],
443 )))),
444
445 Expression::IfFunc(f) => {
447 let mut args = vec![f.condition, f.true_value];
448 if let Some(false_val) = f.false_value {
449 args.push(false_val);
450 } else {
451 args.push(Expression::Null(crate::expressions::Null));
452 }
453 Ok(Expression::Function(Box::new(Function::new(
454 "IF".to_string(),
455 args,
456 ))))
457 }
458
459 Expression::HexStringExpr(f) => Ok(Expression::Function(Box::new(Function::new(
461 "FROM_HEX".to_string(),
462 vec![*f.this],
463 )))),
464
465 Expression::ApproxTopK(f) => {
468 let mut args = vec![*f.this];
469 if let Some(expr) = f.expression {
470 args.push(*expr);
471 }
472 Ok(Expression::Function(Box::new(Function::new(
473 "APPROX_TOP_COUNT".to_string(),
474 args,
475 ))))
476 }
477
478 Expression::SafeDivide(f) => Ok(Expression::Function(Box::new(Function::new(
480 "SAFE_DIVIDE".to_string(),
481 vec![*f.this, *f.expression],
482 )))),
483
484 Expression::JSONKeysAtDepth(f) => Ok(Expression::Function(Box::new(Function::new(
486 "JSON_KEYS".to_string(),
487 vec![*f.this],
488 )))),
489
490 Expression::JSONValueArray(f) => Ok(Expression::Function(Box::new(Function::new(
492 "JSON_VALUE_ARRAY".to_string(),
493 vec![*f.this],
494 )))),
495
496 Expression::DateFromParts(f) => {
498 let mut args = Vec::new();
499 if let Some(y) = f.year {
500 args.push(*y);
501 }
502 if let Some(m) = f.month {
503 args.push(*m);
504 }
505 if let Some(d) = f.day {
506 args.push(*d);
507 }
508 Ok(Expression::Function(Box::new(Function::new(
509 "DATE".to_string(),
510 args,
511 ))))
512 }
513
514 Expression::Split(f) => {
517 let delimiter = match &f.delimiter {
519 Expression::Literal(Literal::String(s)) if s.is_empty() => {
520 Expression::Literal(Literal::String(",".to_string()))
521 }
522 _ => f.delimiter,
523 };
524 Ok(Expression::Split(Box::new(SplitFunc {
525 this: f.this,
526 delimiter,
527 })))
528 }
529
530 Expression::Cast(c) => {
533 use crate::expressions::DataType;
534 let is_json = matches!(c.to, DataType::Json | DataType::JsonB)
538 || matches!(&c.to, DataType::Custom { name } if name.eq_ignore_ascii_case("JSON") || name.eq_ignore_ascii_case("JSONB"));
539 if is_json {
540 return Ok(Expression::ParseJson(Box::new(UnaryFunc::new(c.this))));
541 }
542 let transformed_type = match self.transform_data_type(c.to)? {
543 Expression::DataType(dt) => dt,
544 _ => return Err(crate::error::Error::parse("Expected DataType", 0, 0)),
545 };
546 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
547 this: c.this,
548 to: transformed_type,
549 trailing_comments: c.trailing_comments,
550 double_colon_syntax: c.double_colon_syntax,
551 format: c.format,
552 default: c.default,
553 })))
554 }
555
556 Expression::SafeCast(c) => {
558 let transformed_type = match self.transform_data_type(c.to)? {
559 Expression::DataType(dt) => dt,
560 _ => return Err(crate::error::Error::parse("Expected DataType", 0, 0)),
561 };
562 Ok(Expression::SafeCast(Box::new(crate::expressions::Cast {
563 this: c.this,
564 to: transformed_type,
565 trailing_comments: c.trailing_comments,
566 double_colon_syntax: c.double_colon_syntax,
567 format: c.format,
568 default: c.default,
569 })))
570 }
571
572 Expression::Select(mut select) => {
575 if select.group_by.is_some() && select.order_by.is_some() {
576 let aliases: Vec<(Expression, Identifier)> = select
578 .expressions
579 .iter()
580 .filter_map(|e| {
581 if let Expression::Alias(a) = e {
582 Some((a.this.clone(), a.alias.clone()))
583 } else {
584 None
585 }
586 })
587 .collect();
588
589 if let Some(ref mut group_by) = select.group_by {
590 for grouped in group_by.expressions.iter_mut() {
591 if matches!(grouped, Expression::Literal(Literal::Number(_))) {
593 continue;
594 }
595 for (expr, alias_ident) in &aliases {
597 if grouped == expr {
598 *grouped = Expression::Column(Column {
599 name: alias_ident.clone(),
600 table: None,
601 join_mark: false,
602 trailing_comments: Vec::new(),
603 });
604 break;
605 }
606 }
607 }
608 }
609 }
610 Ok(Expression::Select(select))
611 }
612
613 Expression::ArrayContains(f) => {
615 let array_expr = f.this;
616 let value_expr = f.expression;
617
618 let unnest = Expression::Unnest(Box::new(UnnestFunc {
620 this: array_expr,
621 expressions: Vec::new(),
622 with_ordinality: false,
623 alias: None,
624 offset_alias: None,
625 }));
626 let aliased_unnest = Expression::Alias(Box::new(Alias {
627 this: unnest,
628 alias: Identifier::new("_col"),
629 column_aliases: Vec::new(),
630 pre_alias_comments: Vec::new(),
631 trailing_comments: Vec::new(),
632 }));
633 let col_ref = Expression::Column(Column {
634 name: Identifier::new("_col"),
635 table: None,
636 join_mark: false,
637 trailing_comments: Vec::new(),
638 });
639 let where_clause = Where {
640 this: Expression::Eq(Box::new(BinaryOp {
641 left: col_ref,
642 right: value_expr,
643 left_comments: Vec::new(),
644 operator_comments: Vec::new(),
645 trailing_comments: Vec::new(),
646 })),
647 };
648 let inner_select = Expression::Select(Box::new(Select {
649 expressions: vec![Expression::Literal(Literal::Number("1".to_string()))],
650 from: Some(From {
651 expressions: vec![aliased_unnest],
652 }),
653 where_clause: Some(where_clause),
654 ..Default::default()
655 }));
656 Ok(Expression::Exists(Box::new(Exists {
657 this: inner_select,
658 not: false,
659 })))
660 }
661
662 Expression::JsonObject(mut f) => {
665 if f.pairs.len() == 1 {
666 let keys_exprs = match &f.pairs[0].0 {
668 Expression::Array(arr) => Some(&arr.expressions),
669 Expression::ArrayFunc(arr) => Some(&arr.expressions),
670 _ => None,
671 };
672 let vals_exprs = match &f.pairs[0].1 {
673 Expression::Array(arr) => Some(&arr.expressions),
674 Expression::ArrayFunc(arr) => Some(&arr.expressions),
675 _ => None,
676 };
677 if let (Some(keys), Some(vals)) = (keys_exprs, vals_exprs) {
678 if keys.len() == vals.len() {
679 let new_pairs: Vec<(Expression, Expression)> = keys
680 .iter()
681 .zip(vals.iter())
682 .map(|(k, v)| (k.clone(), v.clone()))
683 .collect();
684 f.pairs = new_pairs;
685 }
686 }
687 }
688 Ok(Expression::JsonObject(f))
689 }
690
691 Expression::ModFunc(mut f) => {
694 if let Expression::Paren(paren) = f.this {
696 f.this = paren.this;
697 }
698 Ok(Expression::ModFunc(f))
699 }
700
701 Expression::JSONExtract(e) if e.variant_extract.is_some() => {
703 let path = match *e.expression {
704 Expression::Literal(Literal::String(s)) => {
705 let normalized = if s.starts_with('$') {
706 s
707 } else if s.starts_with('[') {
708 format!("${}", s)
709 } else {
710 format!("$.{}", s)
711 };
712 Expression::Literal(Literal::String(normalized))
713 }
714 other => other,
715 };
716 Ok(Expression::Function(Box::new(Function::new(
717 "JSON_EXTRACT".to_string(),
718 vec![*e.this, path],
719 ))))
720 }
721
722 Expression::Function(f) => self.transform_function(*f),
724
725 Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
727
728 Expression::MethodCall(mc) => self.transform_method_call(*mc),
731
732 Expression::CreateFunction(mut cf) => {
735 if let Some(ref mut rtb) = cf.returns_table_body {
736 if rtb.starts_with("TABLE (") || rtb.starts_with("TABLE(") {
737 let inner = if rtb.starts_with("TABLE (") {
739 &rtb["TABLE (".len()..rtb.len() - 1]
740 } else {
741 &rtb["TABLE(".len()..rtb.len() - 1]
742 };
743 let converted = inner
745 .replace(" INT,", " INT64,")
746 .replace(" INT)", " INT64)")
747 .replace(" INTEGER,", " INT64,")
748 .replace(" INTEGER)", " INT64)")
749 .replace(" FLOAT,", " FLOAT64,")
750 .replace(" FLOAT)", " FLOAT64)")
751 .replace(" BOOLEAN,", " BOOL,")
752 .replace(" BOOLEAN)", " BOOL)")
753 .replace(" VARCHAR", " STRING")
754 .replace(" TEXT", " STRING");
755 let converted = if converted.ends_with(" INT") {
757 format!("{}{}", &converted[..converted.len() - 4], " INT64")
758 } else {
759 converted
760 };
761 *rtb = format!("TABLE <{}>", converted);
762 cf.is_table_function = true;
763 }
764 }
765 if cf.is_table_function {
767 if let Some(ref body) = cf.body {
768 if matches!(body, FunctionBody::StringLiteral(_)) {
769 if let Some(FunctionBody::StringLiteral(sql)) = cf.body.take() {
770 if let Ok(parsed) = crate::parser::Parser::parse_sql(&sql) {
772 if let Some(stmt) = parsed.into_iter().next() {
773 cf.body = Some(FunctionBody::Expression(stmt));
774 } else {
775 cf.body = Some(FunctionBody::StringLiteral(sql));
776 }
777 } else {
778 cf.body = Some(FunctionBody::StringLiteral(sql));
779 }
780 }
781 }
782 }
783 }
784 Ok(Expression::CreateFunction(cf))
785 }
786
787 _ => Ok(expr),
789 }
790 }
791}
792
793impl BigQueryDialect {
794 fn transform_data_type(&self, dt: crate::expressions::DataType) -> Result<Expression> {
796 use crate::expressions::DataType;
797 let transformed = match dt {
798 DataType::BigInt { .. } => DataType::Custom {
800 name: "INT64".to_string(),
801 },
802 DataType::Int { .. } => DataType::Custom {
804 name: "INT64".to_string(),
805 },
806 DataType::SmallInt { .. } => DataType::Custom {
808 name: "INT64".to_string(),
809 },
810 DataType::TinyInt { .. } => DataType::Custom {
812 name: "INT64".to_string(),
813 },
814 DataType::Float { .. } => DataType::Custom {
816 name: "FLOAT64".to_string(),
817 },
818 DataType::Double { .. } => DataType::Custom {
820 name: "FLOAT64".to_string(),
821 },
822 DataType::Boolean => DataType::Custom {
824 name: "BOOL".to_string(),
825 },
826 DataType::Char { .. } => DataType::Custom {
828 name: "STRING".to_string(),
829 },
830 DataType::VarChar { .. } => DataType::Custom {
832 name: "STRING".to_string(),
833 },
834 DataType::Text => DataType::Custom {
836 name: "STRING".to_string(),
837 },
838 DataType::String { .. } => DataType::Custom {
840 name: "STRING".to_string(),
841 },
842 DataType::Binary { .. } => DataType::Custom {
844 name: "BYTES".to_string(),
845 },
846 DataType::VarBinary { .. } => DataType::Custom {
848 name: "BYTES".to_string(),
849 },
850 DataType::Blob => DataType::Custom {
852 name: "BYTES".to_string(),
853 },
854 DataType::Decimal { .. } => DataType::Custom {
856 name: "NUMERIC".to_string(),
857 },
858 DataType::Timestamp {
862 timezone: false, ..
863 } => DataType::Custom {
864 name: "TIMESTAMP".to_string(),
865 },
866 DataType::Timestamp { timezone: true, .. } => DataType::Custom {
867 name: "TIMESTAMP".to_string(),
868 },
869 DataType::Uuid => DataType::Custom {
871 name: "STRING".to_string(),
872 },
873 DataType::Custom { ref name } if name.eq_ignore_ascii_case("RECORD") => {
875 DataType::Custom {
876 name: "STRUCT".to_string(),
877 }
878 }
879 DataType::Custom { ref name } if name.eq_ignore_ascii_case("TIMESTAMPTZ") => {
881 DataType::Custom {
882 name: "TIMESTAMP".to_string(),
883 }
884 }
885 DataType::Custom { ref name } if name.eq_ignore_ascii_case("BYTEINT") => {
887 DataType::Custom {
888 name: "INT64".to_string(),
889 }
890 }
891 other => other,
893 };
894 Ok(Expression::DataType(transformed))
895 }
896
897 fn transform_function(&self, f: Function) -> Result<Expression> {
898 let name_upper = f.name.to_uppercase();
899 match name_upper.as_str() {
900 "IFNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
902 original_name: None,
903 expressions: f.args,
904 }))),
905
906 "NVL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
908 original_name: None,
909 expressions: f.args,
910 }))),
911
912 "ISNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
914 original_name: None,
915 expressions: f.args,
916 }))),
917
918 "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
920 Function::new("STRING_AGG".to_string(), f.args),
921 ))),
922
923 "SUBSTR" => Ok(Expression::Function(Box::new(Function::new(
925 "SUBSTRING".to_string(),
926 f.args,
927 )))),
928
929 "RANDOM" => Ok(Expression::Rand(Box::new(crate::expressions::Rand {
931 seed: None,
932 lower: None,
933 upper: None,
934 }))),
935
936 "CURRENT_DATE" if f.args.is_empty() => {
939 Ok(Expression::CurrentDate(crate::expressions::CurrentDate))
940 }
941 "CURRENT_DATE" => Ok(Expression::Function(Box::new(Function {
942 name: "CURRENT_DATE".to_string(),
943 args: f.args,
944 distinct: false,
945 trailing_comments: Vec::new(),
946 use_bracket_syntax: false,
947 no_parens: false,
948 quoted: false,
949 }))),
950
951 "NOW" => Ok(Expression::CurrentTimestamp(
953 crate::expressions::CurrentTimestamp {
954 precision: None,
955 sysdate: false,
956 },
957 )),
958
959 "TO_DATE" => Ok(Expression::Function(Box::new(Function::new(
961 "PARSE_DATE".to_string(),
962 f.args,
963 )))),
964
965 "TO_TIMESTAMP" => Ok(Expression::Function(Box::new(Function::new(
967 "PARSE_TIMESTAMP".to_string(),
968 f.args,
969 )))),
970
971 "TO_TIME" if f.args.len() == 1 => Ok(Expression::Function(Box::new(Function::new(
973 "TIME".to_string(),
974 f.args,
975 )))),
976
977 "DATE_FORMAT" => Ok(Expression::Function(Box::new(Function::new(
979 "FORMAT_DATE".to_string(),
980 f.args,
981 )))),
982
983 "POSITION" if f.args.len() == 2 => {
986 let mut args = f.args;
987 let first = args.remove(0);
989 let second = args.remove(0);
990 Ok(Expression::Function(Box::new(Function::new(
991 "STRPOS".to_string(),
992 vec![second, first],
993 ))))
994 }
995
996 "LEN" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc::new(
998 f.args.into_iter().next().unwrap(),
999 )))),
1000
1001 "CEILING" if f.args.len() == 1 => Ok(Expression::Ceil(Box::new(CeilFunc {
1003 this: f.args.into_iter().next().unwrap(),
1004 decimals: None,
1005 to: None,
1006 }))),
1007
1008 "GETDATE" => Ok(Expression::CurrentTimestamp(
1010 crate::expressions::CurrentTimestamp {
1011 precision: None,
1012 sysdate: false,
1013 },
1014 )),
1015
1016 "CARDINALITY" if f.args.len() == 1 => Ok(Expression::ArrayLength(Box::new(
1019 UnaryFunc::new(f.args.into_iter().next().unwrap()),
1020 ))),
1021
1022 "GENERATE_SERIES" => Ok(Expression::Function(Box::new(Function::new(
1026 "GENERATE_ARRAY".to_string(),
1027 f.args,
1028 )))),
1029
1030 "APPROX_DISTINCT" => Ok(Expression::Function(Box::new(Function::new(
1033 "APPROX_COUNT_DISTINCT".to_string(),
1034 f.args,
1035 )))),
1036
1037 "COUNT_IF" => Ok(Expression::Function(Box::new(Function::new(
1039 "COUNTIF".to_string(),
1040 f.args,
1041 )))),
1042
1043 "SHA" => Ok(Expression::Function(Box::new(Function::new(
1045 "SHA1".to_string(),
1046 f.args,
1047 )))),
1048
1049 "SHA2" => Ok(Expression::Function(Box::new(Function::new(
1051 "SHA256".to_string(),
1052 f.args,
1053 )))),
1054
1055 "MD5" => Ok(Expression::Function(Box::new(Function::new(
1058 "MD5".to_string(),
1059 f.args,
1060 )))),
1061
1062 "DATEADD" if f.args.len() == 3 => {
1067 let mut args = f.args;
1068 let unit_expr = args.remove(0);
1069 let amount = args.remove(0);
1070 let date = args.remove(0);
1071 let unit_name = match &unit_expr {
1073 Expression::Identifier(id) => id.name.to_uppercase(),
1074 _ => "DAY".to_string(),
1075 };
1076 let unit = match unit_name.as_str() {
1077 "YEAR" | "YEARS" | "YY" | "YYYY" => crate::expressions::IntervalUnit::Year,
1078 "QUARTER" | "QUARTERS" | "QQ" | "Q" => {
1079 crate::expressions::IntervalUnit::Quarter
1080 }
1081 "MONTH" | "MONTHS" | "MM" | "M" => crate::expressions::IntervalUnit::Month,
1082 "WEEK" | "WEEKS" | "WK" | "WW" => crate::expressions::IntervalUnit::Week,
1083 "DAY" | "DAYS" | "DD" | "D" | "DAYOFMONTH" => {
1084 crate::expressions::IntervalUnit::Day
1085 }
1086 "HOUR" | "HOURS" | "HH" => crate::expressions::IntervalUnit::Hour,
1087 "MINUTE" | "MINUTES" | "MI" | "N" => crate::expressions::IntervalUnit::Minute,
1088 "SECOND" | "SECONDS" | "SS" | "S" => crate::expressions::IntervalUnit::Second,
1089 "MILLISECOND" | "MILLISECONDS" | "MS" => {
1090 crate::expressions::IntervalUnit::Millisecond
1091 }
1092 "MICROSECOND" | "MICROSECONDS" | "US" => {
1093 crate::expressions::IntervalUnit::Microsecond
1094 }
1095 _ => crate::expressions::IntervalUnit::Day,
1096 };
1097 Ok(Expression::DateAdd(Box::new(
1098 crate::expressions::DateAddFunc {
1099 this: date,
1100 interval: amount,
1101 unit,
1102 },
1103 )))
1104 }
1105 "DATE_ADD" => Ok(Expression::Function(Box::new(Function::new(
1106 "DATE_ADD".to_string(),
1107 f.args,
1108 )))),
1109
1110 "DATEDIFF" => Ok(Expression::Function(Box::new(Function::new(
1112 "DATE_DIFF".to_string(),
1113 f.args,
1114 )))),
1115
1116 "TIMESTAMPDIFF" => Ok(Expression::Function(Box::new(Function::new(
1118 "TIMESTAMP_DIFF".to_string(),
1119 f.args,
1120 )))),
1121
1122 "NEWID" | "UUID" => Ok(Expression::Function(Box::new(Function::new(
1129 "GENERATE_UUID".to_string(),
1130 vec![],
1131 )))),
1132
1133 "LEVENSHTEIN" => Ok(Expression::Function(Box::new(Function::new(
1135 "EDIT_DISTANCE".to_string(),
1136 f.args,
1137 )))),
1138
1139 "UNIX_TIMESTAMP" => Ok(Expression::Function(Box::new(Function::new(
1141 "UNIX_SECONDS".to_string(),
1142 f.args,
1143 )))),
1144
1145 "FROM_UNIXTIME" => Ok(Expression::Function(Box::new(Function::new(
1147 "TIMESTAMP_SECONDS".to_string(),
1148 f.args,
1149 )))),
1150
1151 "CHAR_LENGTH" | "CHARACTER_LENGTH" => Ok(Expression::Function(Box::new(
1153 Function::new("LENGTH".to_string(), f.args),
1154 ))),
1155
1156 "OCTET_LENGTH" => Ok(Expression::Function(Box::new(Function::new(
1158 "BYTE_LENGTH".to_string(),
1159 f.args,
1160 )))),
1161
1162 "JSON_EXTRACT_STRING_ARRAY" => Ok(Expression::Function(Box::new(Function::new(
1164 "JSON_VALUE_ARRAY".to_string(),
1165 f.args,
1166 )))),
1167
1168 "SPLIT" if f.args.len() == 1 => {
1173 let mut args = f.args;
1174 args.push(Expression::Literal(Literal::String(",".to_string())));
1175 Ok(Expression::Split(Box::new(SplitFunc {
1176 this: args.remove(0),
1177 delimiter: args.remove(0),
1178 })))
1179 }
1180
1181 "SPLIT" if f.args.len() == 2 => {
1183 let mut args = f.args;
1184 Ok(Expression::Split(Box::new(SplitFunc {
1185 this: args.remove(0),
1186 delimiter: args.remove(0),
1187 })))
1188 }
1189
1190 "REGEXP_SUBSTR" if f.args.len() >= 2 => {
1192 let args = if f.args.len() > 4 {
1194 f.args[..4].to_vec()
1195 } else {
1196 f.args
1197 };
1198 Ok(Expression::Function(Box::new(Function::new(
1199 "REGEXP_EXTRACT".to_string(),
1200 args,
1201 ))))
1202 }
1203 "REGEXP_SUBSTR" => Ok(Expression::Function(Box::new(Function::new(
1204 "REGEXP_EXTRACT".to_string(),
1205 f.args,
1206 )))),
1207
1208 "REGEXP_REPLACE" if f.args.len() > 3 => {
1210 let args = f.args[..3].to_vec();
1211 Ok(Expression::Function(Box::new(Function::new(
1212 "REGEXP_REPLACE".to_string(),
1213 args,
1214 ))))
1215 }
1216
1217 "OBJECT_CONSTRUCT_KEEP_NULL" => Ok(Expression::Function(Box::new(Function::new(
1219 "JSON_OBJECT".to_string(),
1220 f.args,
1221 )))),
1222
1223 "EDITDISTANCE" if f.args.len() == 3 => {
1225 let col1 = f.args[0].clone();
1226 let col2 = f.args[1].clone();
1227 let max_dist = f.args[2].clone();
1228 Ok(Expression::Function(Box::new(Function::new(
1229 "EDIT_DISTANCE".to_string(),
1230 vec![
1231 col1,
1232 col2,
1233 Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
1234 name: crate::expressions::Identifier::new("max_distance".to_string()),
1235 value: max_dist,
1236 separator: crate::expressions::NamedArgSeparator::DArrow,
1237 })),
1238 ],
1239 ))))
1240 }
1241 "EDITDISTANCE" if f.args.len() == 2 => Ok(Expression::Function(Box::new(
1242 Function::new("EDIT_DISTANCE".to_string(), f.args),
1243 ))),
1244
1245 "HEX_DECODE_BINARY" => Ok(Expression::Function(Box::new(Function::new(
1247 "FROM_HEX".to_string(),
1248 f.args,
1249 )))),
1250
1251 "PARSE_DATE"
1254 | "PARSE_DATETIME"
1255 | "PARSE_TIMESTAMP"
1256 | "SAFE.PARSE_DATE"
1257 | "SAFE.PARSE_DATETIME"
1258 | "SAFE.PARSE_TIMESTAMP" => {
1259 let args = self.normalize_time_format_args(f.args);
1260 Ok(Expression::Function(Box::new(Function {
1261 name: f.name,
1262 args,
1263 distinct: f.distinct,
1264 no_parens: f.no_parens,
1265 trailing_comments: f.trailing_comments,
1266 quoted: f.quoted,
1267 use_bracket_syntax: f.use_bracket_syntax,
1268 })))
1269 }
1270
1271 "GET_PATH" if f.args.len() == 2 => {
1273 let mut args = f.args;
1274 let this = args.remove(0);
1275 let path = args.remove(0);
1276 let json_path = match &path {
1277 Expression::Literal(Literal::String(s)) => {
1278 let normalized = if s.starts_with('$') {
1279 s.clone()
1280 } else if s.starts_with('[') {
1281 format!("${}", s)
1282 } else {
1283 format!("$.{}", s)
1284 };
1285 Expression::Literal(Literal::String(normalized))
1286 }
1287 _ => path,
1288 };
1289 Ok(Expression::JsonExtract(Box::new(JsonExtractFunc {
1290 this,
1291 path: json_path,
1292 returning: None,
1293 arrow_syntax: false,
1294 hash_arrow_syntax: false,
1295 wrapper_option: None,
1296 quotes_option: None,
1297 on_scalar_string: false,
1298 on_error: None,
1299 })))
1300 }
1301
1302 _ => Ok(Expression::Function(Box::new(f))),
1304 }
1305 }
1306
1307 fn transform_aggregate_function(
1308 &self,
1309 f: Box<crate::expressions::AggregateFunction>,
1310 ) -> Result<Expression> {
1311 let name_upper = f.name.to_uppercase();
1312 match name_upper.as_str() {
1313 "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
1315 Function::new("STRING_AGG".to_string(), f.args),
1316 ))),
1317
1318 _ => Ok(Expression::AggregateFunction(f)),
1320 }
1321 }
1322
1323 fn transform_method_call(&self, mc: crate::expressions::MethodCall) -> Result<Expression> {
1326 use crate::expressions::MethodCall;
1327
1328 if let Expression::Column(ref col) = mc.this {
1330 if col.name.name.eq_ignore_ascii_case("SAFE") {
1331 let method_upper = mc.method.name.to_uppercase();
1332 if method_upper == "PARSE_DATE"
1333 || method_upper == "PARSE_DATETIME"
1334 || method_upper == "PARSE_TIMESTAMP"
1335 {
1336 let args = self.normalize_time_format_args(mc.args);
1338 return Ok(Expression::MethodCall(Box::new(MethodCall {
1339 this: mc.this,
1340 method: mc.method,
1341 args,
1342 })));
1343 }
1344 }
1345 }
1346
1347 Ok(Expression::MethodCall(Box::new(mc)))
1349 }
1350
1351 fn normalize_time_format_args(&self, args: Vec<Expression>) -> Vec<Expression> {
1354 args.into_iter()
1355 .enumerate()
1356 .map(|(i, arg)| {
1357 if i == 0 {
1359 if let Expression::Literal(Literal::String(s)) = arg {
1360 let normalized = self.normalize_time_format(&s);
1361 return Expression::Literal(Literal::String(normalized));
1362 }
1363 }
1364 arg
1365 })
1366 .collect()
1367 }
1368
1369 fn normalize_time_format(&self, format: &str) -> String {
1373 format.replace("%Y-%m-%d", "%F").replace("%H:%M:%S", "%T")
1374 }
1375}
1376
1377#[cfg(test)]
1378mod tests {
1379 use super::*;
1380 use crate::dialects::Dialect;
1381
1382 fn transpile_to_bigquery(sql: &str) -> String {
1383 let dialect = Dialect::get(DialectType::Generic);
1384 let result = dialect
1385 .transpile_to(sql, DialectType::BigQuery)
1386 .expect("Transpile failed");
1387 result[0].clone()
1388 }
1389
1390 #[test]
1391 fn test_ifnull_identity() {
1392 let result = transpile_to_bigquery("SELECT IFNULL(a, b)");
1394 assert!(
1395 result.contains("COALESCE"),
1396 "Expected COALESCE, got: {}",
1397 result
1398 );
1399 }
1400
1401 #[test]
1402 fn test_nvl_to_ifnull() {
1403 let result = transpile_to_bigquery("SELECT NVL(a, b)");
1405 assert!(
1406 result.contains("IFNULL"),
1407 "Expected IFNULL, got: {}",
1408 result
1409 );
1410 }
1411
1412 #[test]
1413 fn test_try_cast_to_safe_cast() {
1414 let result = transpile_to_bigquery("SELECT TRY_CAST(a AS INT)");
1415 assert!(
1416 result.contains("SAFE_CAST"),
1417 "Expected SAFE_CAST, got: {}",
1418 result
1419 );
1420 }
1421
1422 #[test]
1423 fn test_random_to_rand() {
1424 let result = transpile_to_bigquery("SELECT RANDOM()");
1425 assert!(result.contains("RAND"), "Expected RAND, got: {}", result);
1426 }
1427
1428 #[test]
1429 fn test_basic_select() {
1430 let result = transpile_to_bigquery("SELECT a, b FROM users WHERE id = 1");
1431 assert!(result.contains("SELECT"));
1432 assert!(result.contains("FROM users"));
1433 }
1434
1435 #[test]
1436 fn test_group_concat_to_string_agg() {
1437 let result = transpile_to_bigquery("SELECT GROUP_CONCAT(name)");
1438 assert!(
1439 result.contains("STRING_AGG"),
1440 "Expected STRING_AGG, got: {}",
1441 result
1442 );
1443 }
1444
1445 #[test]
1446 fn test_generate_series_to_generate_array() {
1447 let result = transpile_to_bigquery("SELECT GENERATE_SERIES(1, 10)");
1448 assert!(
1449 result.contains("GENERATE_ARRAY"),
1450 "Expected GENERATE_ARRAY, got: {}",
1451 result
1452 );
1453 }
1454
1455 #[test]
1456 fn test_backtick_identifiers() {
1457 let dialect = BigQueryDialect;
1459 let config = dialect.generator_config();
1460 assert_eq!(config.identifier_quote, '`');
1461 }
1462
1463 fn bigquery_identity(sql: &str, expected: &str) {
1464 let dialect = Dialect::get(DialectType::BigQuery);
1465 let ast = dialect.parse(sql).expect("Parse failed");
1466 let transformed = dialect.transform(ast[0].clone()).expect("Transform failed");
1467 let result = dialect.generate(&transformed).expect("Generate failed");
1468 assert_eq!(result, expected, "SQL: {}", sql);
1469 }
1470
1471 #[test]
1472 fn test_cast_char_to_string() {
1473 bigquery_identity("CAST(x AS CHAR)", "CAST(x AS STRING)");
1474 }
1475
1476 #[test]
1477 fn test_cast_varchar_to_string() {
1478 bigquery_identity("CAST(x AS VARCHAR)", "CAST(x AS STRING)");
1479 }
1480
1481 #[test]
1482 fn test_cast_nchar_to_string() {
1483 bigquery_identity("CAST(x AS NCHAR)", "CAST(x AS STRING)");
1484 }
1485
1486 #[test]
1487 fn test_cast_nvarchar_to_string() {
1488 bigquery_identity("CAST(x AS NVARCHAR)", "CAST(x AS STRING)");
1489 }
1490
1491 #[test]
1492 fn test_cast_timestamptz_to_timestamp() {
1493 bigquery_identity("CAST(x AS TIMESTAMPTZ)", "CAST(x AS TIMESTAMP)");
1494 }
1495
1496 #[test]
1497 fn test_cast_record_to_struct() {
1498 bigquery_identity("CAST(x AS RECORD)", "CAST(x AS STRUCT)");
1499 }
1500
1501 #[test]
1502 fn test_json_literal_to_parse_json() {
1503 bigquery_identity(
1505 "SELECT JSON '\"foo\"' AS json_data",
1506 "SELECT PARSE_JSON('\"foo\"') AS json_data",
1507 );
1508 }
1509
1510 #[test]
1511 fn test_grant_as_alias_not_quoted() {
1512 bigquery_identity(
1514 "SELECT GRANT FROM (SELECT 'input' AS GRANT)",
1515 "SELECT GRANT FROM (SELECT 'input' AS GRANT)",
1516 );
1517 }
1518
1519 #[test]
1520 fn test_timestamp_literal_to_cast() {
1521 bigquery_identity(
1523 "CREATE VIEW `d.v` OPTIONS (expiration_timestamp=TIMESTAMP '2020-01-02T04:05:06.007Z') AS SELECT 1 AS c",
1524 "CREATE VIEW `d.v` OPTIONS (expiration_timestamp=CAST('2020-01-02T04:05:06.007Z' AS TIMESTAMP)) AS SELECT 1 AS c"
1525 );
1526 }
1527
1528 #[test]
1529 fn test_date_literal_to_cast_in_extract() {
1530 bigquery_identity(
1532 "EXTRACT(WEEK(THURSDAY) FROM DATE '2013-12-25')",
1533 "EXTRACT(WEEK(THURSDAY) FROM CAST('2013-12-25' AS DATE))",
1534 );
1535 }
1536
1537 #[test]
1538 fn test_json_object_with_json_literals() {
1539 bigquery_identity(
1541 "SELECT JSON_OBJECT('a', JSON '10') AS json_data",
1542 "SELECT JSON_OBJECT('a', PARSE_JSON('10')) AS json_data",
1543 );
1544 }
1545
1546 #[test]
1551 fn test_safe_parse_date_format_normalization() {
1552 bigquery_identity(
1554 "SAFE.PARSE_DATE('%Y-%m-%d', '2024-01-15')",
1555 "SAFE.PARSE_DATE('%F', '2024-01-15')",
1556 );
1557 }
1558
1559 #[test]
1560 fn test_safe_parse_datetime_format_normalization() {
1561 bigquery_identity(
1563 "SAFE.PARSE_DATETIME('%Y-%m-%d %H:%M:%S', '2024-01-15 10:30:00')",
1564 "SAFE.PARSE_DATETIME('%F %T', '2024-01-15 10:30:00')",
1565 );
1566 }
1567
1568 #[test]
1569 fn test_safe_parse_timestamp_format_normalization() {
1570 bigquery_identity(
1572 "SAFE.PARSE_TIMESTAMP('%Y-%m-%d %H:%M:%S', '2024-01-15 10:30:00')",
1573 "SAFE.PARSE_TIMESTAMP('%F %T', '2024-01-15 10:30:00')",
1574 );
1575 }
1576
1577 #[test]
1578 fn test_datetime_literal_to_cast() {
1579 bigquery_identity(
1581 "LAST_DAY(DATETIME '2008-11-10 15:30:00', WEEK(SUNDAY))",
1582 "LAST_DAY(CAST('2008-11-10 15:30:00' AS DATETIME), WEEK)",
1583 );
1584 }
1585
1586 #[test]
1587 fn test_last_day_week_modifier_stripped() {
1588 bigquery_identity("LAST_DAY(col, WEEK(MONDAY))", "LAST_DAY(col, WEEK)");
1590 }
1591}