1use super::{DialectImpl, DialectType};
13use crate::error::Result;
14use crate::expressions::{
15 Alias, BinaryOp, CeilFunc, Column, Exists, Expression, From, Function, FunctionBody,
16 Identifier, JsonExtractFunc, LikeOp, Literal, Select, SplitFunc, StringAggFunc, UnaryFunc,
17 UnnestFunc, VarArgFunc, Where,
18};
19use crate::generator::GeneratorConfig;
20use crate::tokens::TokenizerConfig;
21
22pub struct BigQueryDialect;
24
25impl DialectImpl for BigQueryDialect {
26 fn dialect_type(&self) -> DialectType {
27 DialectType::BigQuery
28 }
29
30 fn tokenizer_config(&self) -> TokenizerConfig {
31 let mut config = TokenizerConfig::default();
32 config.identifiers.remove(&'"');
35 config.identifiers.insert('`', '`');
36 config.quotes.insert("\"".to_string(), "\"".to_string());
38 config.quotes.insert("'''".to_string(), "'''".to_string());
40 config
41 .quotes
42 .insert("\"\"\"".to_string(), "\"\"\"".to_string());
43 config.string_escapes = vec!['\'', '\\'];
45 config.b_prefix_is_byte_string = true;
47 config.hex_number_strings = true;
49 config.hex_string_is_integer_type = true;
51 config.hash_comments = true;
53 config
54 }
55
56 fn generator_config(&self) -> GeneratorConfig {
57 use crate::generator::{IdentifierQuoteStyle, NormalizeFunctions};
58 GeneratorConfig {
59 identifier_quote: '`',
60 identifier_quote_style: IdentifierQuoteStyle::BACKTICK,
61 dialect: Some(DialectType::BigQuery),
62 normalize_functions: NormalizeFunctions::None,
64 interval_allows_plural_form: false,
66 join_hints: false,
67 query_hints: false,
68 table_hints: false,
69 limit_fetch_style: crate::generator::LimitFetchStyle::Limit,
70 rename_table_with_db: false,
71 nvl2_supported: false,
72 unnest_with_ordinality: false,
73 collate_is_func: true,
74 limit_only_literals: true,
75 supports_table_alias_columns: false,
76 unpivot_aliases_are_identifiers: false,
77 json_key_value_pair_sep: ",",
78 null_ordering_supported: false,
79 ignore_nulls_in_func: true,
80 json_path_single_quote_escape: true,
81 can_implement_array_any: true,
82 supports_to_number: false,
83 named_placeholder_token: "@",
84 hex_func: "TO_HEX",
85 with_properties_prefix: "OPTIONS",
86 supports_exploding_projections: false,
87 except_intersect_support_all_clause: false,
88 supports_unix_seconds: true,
89 try_supported: true,
91 semi_anti_join_with_side: false,
93 ..Default::default()
94 }
95 }
96
97 fn transform_expr(&self, expr: Expression) -> Result<Expression> {
98 match expr {
99 Expression::DataType(dt) => self.transform_data_type(dt),
101
102 Expression::IfNull(f) => Ok(Expression::IfNull(f)),
105
106 Expression::Nvl(f) => Ok(Expression::IfNull(f)),
108
109 Expression::Coalesce(f) => Ok(Expression::Coalesce(f)),
111
112 Expression::GroupConcat(f) => Ok(Expression::StringAgg(Box::new(StringAggFunc {
115 this: f.this,
116 separator: f.separator,
117 order_by: f.order_by,
118 distinct: f.distinct,
119 filter: f.filter,
120 limit: None,
121 inferred_type: None,
122 }))),
123
124 Expression::TryCast(c) => {
130 let transformed_type = match self.transform_data_type(c.to)? {
131 Expression::DataType(dt) => dt,
132 _ => return Err(crate::error::Error::parse("Expected DataType", 0, 0, 0, 0)),
133 };
134 Ok(Expression::SafeCast(Box::new(crate::expressions::Cast {
135 this: c.this,
136 to: transformed_type,
137 trailing_comments: c.trailing_comments,
138 double_colon_syntax: c.double_colon_syntax,
139 format: c.format,
140 default: c.default,
141 inferred_type: None,
142 })))
143 }
144
145 Expression::ILike(op) => {
148 let lower_left = Expression::Lower(Box::new(UnaryFunc::new(op.left)));
149 let lower_right = Expression::Lower(Box::new(UnaryFunc::new(op.right)));
150 Ok(Expression::Like(Box::new(LikeOp {
151 left: lower_left,
152 right: lower_right,
153 escape: op.escape,
154 quantifier: op.quantifier,
155 inferred_type: None,
156 })))
157 }
158
159 Expression::RegexpLike(f) => Ok(Expression::Function(Box::new(Function::new(
161 "REGEXP_CONTAINS".to_string(),
162 vec![f.this, f.pattern],
163 )))),
164
165 Expression::Explode(f) => Ok(Expression::Unnest(Box::new(
168 crate::expressions::UnnestFunc {
169 this: f.this,
170 expressions: Vec::new(),
171 with_ordinality: false,
172 alias: None,
173 offset_alias: None,
174 },
175 ))),
176
177 Expression::ExplodeOuter(f) => Ok(Expression::Unnest(Box::new(
179 crate::expressions::UnnestFunc {
180 this: f.this,
181 expressions: Vec::new(),
182 with_ordinality: false,
183 alias: None,
184 offset_alias: None,
185 },
186 ))),
187
188 Expression::GenerateSeries(f) => {
190 let mut args = Vec::new();
191 if let Some(start) = f.start {
192 args.push(*start);
193 }
194 if let Some(end) = f.end {
195 args.push(*end);
196 }
197 if let Some(step) = f.step {
198 args.push(*step);
199 }
200 Ok(Expression::Function(Box::new(Function::new(
201 "GENERATE_ARRAY".to_string(),
202 args,
203 ))))
204 }
205
206 Expression::BitwiseAndAgg(f) => Ok(Expression::Function(Box::new(Function::new(
209 "BIT_AND".to_string(),
210 vec![f.this],
211 )))),
212
213 Expression::BitwiseOrAgg(f) => Ok(Expression::Function(Box::new(Function::new(
215 "BIT_OR".to_string(),
216 vec![f.this],
217 )))),
218
219 Expression::BitwiseXorAgg(f) => Ok(Expression::Function(Box::new(Function::new(
221 "BIT_XOR".to_string(),
222 vec![f.this],
223 )))),
224
225 Expression::BitwiseCount(f) => Ok(Expression::Function(Box::new(Function::new(
227 "BIT_COUNT".to_string(),
228 vec![f.this],
229 )))),
230
231 Expression::ByteLength(f) => Ok(Expression::Function(Box::new(Function::new(
233 "BYTE_LENGTH".to_string(),
234 vec![f.this],
235 )))),
236
237 Expression::IntDiv(f) => Ok(Expression::Function(Box::new(Function::new(
239 "DIV".to_string(),
240 vec![f.this, f.expression],
241 )))),
242
243 Expression::Int64(f) => Ok(Expression::Function(Box::new(Function::new(
245 "INT64".to_string(),
246 vec![f.this],
247 )))),
248
249 Expression::Random(_) => Ok(Expression::Rand(Box::new(crate::expressions::Rand {
252 seed: None,
253 lower: None,
254 upper: None,
255 }))),
256
257 Expression::Uuid(_) => Ok(Expression::Function(Box::new(Function::new(
260 "GENERATE_UUID".to_string(),
261 vec![],
262 )))),
263
264 Expression::ApproxDistinct(f) => Ok(Expression::Function(Box::new(Function::new(
267 "APPROX_COUNT_DISTINCT".to_string(),
268 vec![f.this],
269 )))),
270
271 Expression::ArgMax(f) => Ok(Expression::Function(Box::new(Function::new(
273 "MAX_BY".to_string(),
274 vec![*f.this, *f.expression],
275 )))),
276
277 Expression::ArgMin(f) => Ok(Expression::Function(Box::new(Function::new(
279 "MIN_BY".to_string(),
280 vec![*f.this, *f.expression],
281 )))),
282
283 Expression::CountIf(f) => Ok(Expression::Function(Box::new(Function::new(
286 "COUNTIF".to_string(),
287 vec![f.this],
288 )))),
289
290 Expression::StringAgg(f) => Ok(Expression::StringAgg(f)),
293
294 Expression::Unhex(f) => Ok(Expression::Function(Box::new(Function::new(
297 "FROM_HEX".to_string(),
298 vec![*f.this],
299 )))),
300
301 Expression::UnixToTime(f) => {
303 let scale = f.scale.unwrap_or(0);
304 match scale {
305 0 => Ok(Expression::Function(Box::new(Function::new(
306 "TIMESTAMP_SECONDS".to_string(),
307 vec![*f.this],
308 )))),
309 3 => Ok(Expression::Function(Box::new(Function::new(
310 "TIMESTAMP_MILLIS".to_string(),
311 vec![*f.this],
312 )))),
313 6 => Ok(Expression::Function(Box::new(Function::new(
314 "TIMESTAMP_MICROS".to_string(),
315 vec![*f.this],
316 )))),
317 _ => {
318 let div_expr =
320 Expression::Div(Box::new(crate::expressions::BinaryOp::new(
321 *f.this,
322 Expression::Function(Box::new(Function::new(
323 "POWER".to_string(),
324 vec![Expression::number(10), Expression::number(scale)],
325 ))),
326 )));
327 let cast_expr = Expression::Cast(Box::new(crate::expressions::Cast {
328 this: div_expr,
329 to: crate::expressions::DataType::Custom {
330 name: "INT64".to_string(),
331 },
332 double_colon_syntax: false,
333 trailing_comments: vec![],
334 format: None,
335 default: None,
336 inferred_type: None,
337 }));
338 Ok(Expression::Function(Box::new(Function::new(
339 "TIMESTAMP_SECONDS".to_string(),
340 vec![cast_expr],
341 ))))
342 }
343 }
344 }
345
346 Expression::DateDiff(f) => {
349 let unit_str = match f.unit {
351 Some(crate::expressions::IntervalUnit::Year) => "YEAR",
352 Some(crate::expressions::IntervalUnit::Quarter) => "QUARTER",
353 Some(crate::expressions::IntervalUnit::Month) => "MONTH",
354 Some(crate::expressions::IntervalUnit::Week) => "WEEK",
355 Some(crate::expressions::IntervalUnit::Day) => "DAY",
356 Some(crate::expressions::IntervalUnit::Hour) => "HOUR",
357 Some(crate::expressions::IntervalUnit::Minute) => "MINUTE",
358 Some(crate::expressions::IntervalUnit::Second) => "SECOND",
359 Some(crate::expressions::IntervalUnit::Millisecond) => "MILLISECOND",
360 Some(crate::expressions::IntervalUnit::Microsecond) => "MICROSECOND",
361 Some(crate::expressions::IntervalUnit::Nanosecond) => "NANOSECOND",
362 None => "DAY",
363 };
364 let unit = Expression::Identifier(crate::expressions::Identifier {
365 name: unit_str.to_string(),
366 quoted: false,
367 trailing_comments: Vec::new(),
368 span: None,
369 });
370 Ok(Expression::Function(Box::new(Function::new(
371 "DATE_DIFF".to_string(),
372 vec![f.this, f.expression, unit],
373 ))))
374 }
375
376 Expression::VarPop(f) => Ok(Expression::Function(Box::new(Function::new(
379 "VAR_POP".to_string(),
380 vec![f.this],
381 )))),
382
383 Expression::SHA(f) => Ok(Expression::Function(Box::new(Function::new(
386 "SHA1".to_string(),
387 vec![f.this],
388 )))),
389
390 Expression::SHA1Digest(f) => Ok(Expression::Function(Box::new(Function::new(
392 "SHA1".to_string(),
393 vec![f.this],
394 )))),
395
396 Expression::MD5Digest(f) => Ok(Expression::Function(Box::new(Function::new(
398 "MD5".to_string(),
399 vec![*f.this],
400 )))),
401
402 Expression::JSONBool(f) => Ok(Expression::Function(Box::new(Function::new(
405 "BOOL".to_string(),
406 vec![f.this],
407 )))),
408
409 Expression::StringFunc(f) => Ok(Expression::Function(Box::new(Function::new(
411 "STRING".to_string(),
412 vec![*f.this],
413 )))),
414
415 Expression::DateFromUnixDate(f) => Ok(Expression::Function(Box::new(Function::new(
418 "DATE_FROM_UNIX_DATE".to_string(),
419 vec![f.this],
420 )))),
421
422 Expression::UnixDate(f) => Ok(Expression::Function(Box::new(Function::new(
424 "UNIX_DATE".to_string(),
425 vec![f.this],
426 )))),
427
428 Expression::TimestampDiff(f) => Ok(Expression::Function(Box::new(Function::new(
430 "TIMESTAMP_DIFF".to_string(),
431 vec![*f.this, *f.expression],
432 )))),
433
434 Expression::FromTimeZone(f) => Ok(Expression::Function(Box::new(Function::new(
436 "DATETIME".to_string(),
437 vec![*f.this],
438 )))),
439
440 Expression::TsOrDsToDatetime(f) => Ok(Expression::Function(Box::new(Function::new(
442 "DATETIME".to_string(),
443 vec![f.this],
444 )))),
445
446 Expression::TsOrDsToTimestamp(f) => Ok(Expression::Function(Box::new(Function::new(
448 "TIMESTAMP".to_string(),
449 vec![f.this],
450 )))),
451
452 Expression::IfFunc(f) => {
454 let mut args = vec![f.condition, f.true_value];
455 if let Some(false_val) = f.false_value {
456 args.push(false_val);
457 } else {
458 args.push(Expression::Null(crate::expressions::Null));
459 }
460 Ok(Expression::Function(Box::new(Function::new(
461 "IF".to_string(),
462 args,
463 ))))
464 }
465
466 Expression::HexStringExpr(f) => Ok(Expression::Function(Box::new(Function::new(
468 "FROM_HEX".to_string(),
469 vec![*f.this],
470 )))),
471
472 Expression::ApproxTopK(f) => {
475 let mut args = vec![*f.this];
476 if let Some(expr) = f.expression {
477 args.push(*expr);
478 }
479 Ok(Expression::Function(Box::new(Function::new(
480 "APPROX_TOP_COUNT".to_string(),
481 args,
482 ))))
483 }
484
485 Expression::SafeDivide(f) => Ok(Expression::Function(Box::new(Function::new(
487 "SAFE_DIVIDE".to_string(),
488 vec![*f.this, *f.expression],
489 )))),
490
491 Expression::JSONKeysAtDepth(f) => Ok(Expression::Function(Box::new(Function::new(
493 "JSON_KEYS".to_string(),
494 vec![*f.this],
495 )))),
496
497 Expression::JSONValueArray(f) => Ok(Expression::Function(Box::new(Function::new(
499 "JSON_VALUE_ARRAY".to_string(),
500 vec![*f.this],
501 )))),
502
503 Expression::DateFromParts(f) => {
505 let mut args = Vec::new();
506 if let Some(y) = f.year {
507 args.push(*y);
508 }
509 if let Some(m) = f.month {
510 args.push(*m);
511 }
512 if let Some(d) = f.day {
513 args.push(*d);
514 }
515 Ok(Expression::Function(Box::new(Function::new(
516 "DATE".to_string(),
517 args,
518 ))))
519 }
520
521 Expression::Split(f) => {
524 let delimiter = match &f.delimiter {
526 Expression::Literal(Literal::String(s)) if s.is_empty() => {
527 Expression::Literal(Literal::String(",".to_string()))
528 }
529 _ => f.delimiter,
530 };
531 Ok(Expression::Split(Box::new(SplitFunc {
532 this: f.this,
533 delimiter,
534 })))
535 }
536
537 Expression::Cast(c) => {
540 use crate::expressions::DataType;
541 let is_json = matches!(c.to, DataType::Json | DataType::JsonB)
545 || matches!(&c.to, DataType::Custom { name } if name.eq_ignore_ascii_case("JSON") || name.eq_ignore_ascii_case("JSONB"));
546 if is_json {
547 return Ok(Expression::ParseJson(Box::new(UnaryFunc::new(c.this))));
548 }
549 let transformed_type = match self.transform_data_type(c.to)? {
550 Expression::DataType(dt) => dt,
551 _ => return Err(crate::error::Error::parse("Expected DataType", 0, 0, 0, 0)),
552 };
553 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
554 this: c.this,
555 to: transformed_type,
556 trailing_comments: c.trailing_comments,
557 double_colon_syntax: c.double_colon_syntax,
558 format: c.format,
559 default: c.default,
560 inferred_type: None,
561 })))
562 }
563
564 Expression::SafeCast(c) => {
566 let transformed_type = match self.transform_data_type(c.to)? {
567 Expression::DataType(dt) => dt,
568 _ => return Err(crate::error::Error::parse("Expected DataType", 0, 0, 0, 0)),
569 };
570 Ok(Expression::SafeCast(Box::new(crate::expressions::Cast {
571 this: c.this,
572 to: transformed_type,
573 trailing_comments: c.trailing_comments,
574 double_colon_syntax: c.double_colon_syntax,
575 format: c.format,
576 default: c.default,
577 inferred_type: None,
578 })))
579 }
580
581 Expression::Select(mut select) => {
584 if select.group_by.is_some() && select.order_by.is_some() {
585 let aliases: Vec<(Expression, Identifier)> = select
587 .expressions
588 .iter()
589 .filter_map(|e| {
590 if let Expression::Alias(a) = e {
591 Some((a.this.clone(), a.alias.clone()))
592 } else {
593 None
594 }
595 })
596 .collect();
597
598 if let Some(ref mut group_by) = select.group_by {
599 for grouped in group_by.expressions.iter_mut() {
600 if matches!(grouped, Expression::Literal(Literal::Number(_))) {
602 continue;
603 }
604 for (expr, alias_ident) in &aliases {
606 if grouped == expr {
607 *grouped = Expression::Column(Column {
608 name: alias_ident.clone(),
609 table: None,
610 join_mark: false,
611 trailing_comments: Vec::new(),
612 span: None,
613 inferred_type: None,
614 });
615 break;
616 }
617 }
618 }
619 }
620 }
621 Ok(Expression::Select(select))
622 }
623
624 Expression::ArrayContains(f) => {
626 let array_expr = f.this;
627 let value_expr = f.expression;
628
629 let unnest = Expression::Unnest(Box::new(UnnestFunc {
631 this: array_expr,
632 expressions: Vec::new(),
633 with_ordinality: false,
634 alias: None,
635 offset_alias: None,
636 }));
637 let aliased_unnest = Expression::Alias(Box::new(Alias {
638 this: unnest,
639 alias: Identifier::new("_col"),
640 column_aliases: Vec::new(),
641 pre_alias_comments: Vec::new(),
642 trailing_comments: Vec::new(),
643 inferred_type: None,
644 }));
645 let col_ref = Expression::Column(Column {
646 name: Identifier::new("_col"),
647 table: None,
648 join_mark: false,
649 trailing_comments: Vec::new(),
650 span: None,
651 inferred_type: None,
652 });
653 let where_clause = Where {
654 this: Expression::Eq(Box::new(BinaryOp {
655 left: col_ref,
656 right: value_expr,
657 left_comments: Vec::new(),
658 operator_comments: Vec::new(),
659 trailing_comments: Vec::new(),
660 inferred_type: None,
661 })),
662 };
663 let inner_select = Expression::Select(Box::new(Select {
664 expressions: vec![Expression::Literal(Literal::Number("1".to_string()))],
665 from: Some(From {
666 expressions: vec![aliased_unnest],
667 }),
668 where_clause: Some(where_clause),
669 ..Default::default()
670 }));
671 Ok(Expression::Exists(Box::new(Exists {
672 this: inner_select,
673 not: false,
674 })))
675 }
676
677 Expression::JsonObject(mut f) => {
680 if f.pairs.len() == 1 {
681 let keys_exprs = match &f.pairs[0].0 {
683 Expression::Array(arr) => Some(&arr.expressions),
684 Expression::ArrayFunc(arr) => Some(&arr.expressions),
685 _ => None,
686 };
687 let vals_exprs = match &f.pairs[0].1 {
688 Expression::Array(arr) => Some(&arr.expressions),
689 Expression::ArrayFunc(arr) => Some(&arr.expressions),
690 _ => None,
691 };
692 if let (Some(keys), Some(vals)) = (keys_exprs, vals_exprs) {
693 if keys.len() == vals.len() {
694 let new_pairs: Vec<(Expression, Expression)> = keys
695 .iter()
696 .zip(vals.iter())
697 .map(|(k, v)| (k.clone(), v.clone()))
698 .collect();
699 f.pairs = new_pairs;
700 }
701 }
702 }
703 Ok(Expression::JsonObject(f))
704 }
705
706 Expression::ModFunc(mut f) => {
709 if let Expression::Paren(paren) = f.this {
711 f.this = paren.this;
712 }
713 Ok(Expression::ModFunc(f))
714 }
715
716 Expression::JSONExtract(e) if e.variant_extract.is_some() => {
718 let path = match *e.expression {
719 Expression::Literal(Literal::String(s)) => {
720 let normalized = if s.starts_with('$') {
721 s
722 } else if s.starts_with('[') {
723 format!("${}", s)
724 } else {
725 format!("$.{}", s)
726 };
727 Expression::Literal(Literal::String(normalized))
728 }
729 other => other,
730 };
731 Ok(Expression::Function(Box::new(Function::new(
732 "JSON_EXTRACT".to_string(),
733 vec![*e.this, path],
734 ))))
735 }
736
737 Expression::Function(f) => self.transform_function(*f),
739
740 Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
742
743 Expression::MethodCall(mc) => self.transform_method_call(*mc),
746
747 Expression::CreateFunction(mut cf) => {
750 if let Some(ref mut rtb) = cf.returns_table_body {
751 if rtb.starts_with("TABLE (") || rtb.starts_with("TABLE(") {
752 let inner = if rtb.starts_with("TABLE (") {
754 &rtb["TABLE (".len()..rtb.len() - 1]
755 } else {
756 &rtb["TABLE(".len()..rtb.len() - 1]
757 };
758 let converted = inner
760 .replace(" INT,", " INT64,")
761 .replace(" INT)", " INT64)")
762 .replace(" INTEGER,", " INT64,")
763 .replace(" INTEGER)", " INT64)")
764 .replace(" FLOAT,", " FLOAT64,")
765 .replace(" FLOAT)", " FLOAT64)")
766 .replace(" BOOLEAN,", " BOOL,")
767 .replace(" BOOLEAN)", " BOOL)")
768 .replace(" VARCHAR", " STRING")
769 .replace(" TEXT", " STRING");
770 let converted = if converted.ends_with(" INT") {
772 format!("{}{}", &converted[..converted.len() - 4], " INT64")
773 } else {
774 converted
775 };
776 *rtb = format!("TABLE <{}>", converted);
777 cf.is_table_function = true;
778 }
779 }
780 if cf.is_table_function {
782 if let Some(ref body) = cf.body {
783 if matches!(body, FunctionBody::StringLiteral(_)) {
784 if let Some(FunctionBody::StringLiteral(sql)) = cf.body.take() {
785 if let Ok(parsed) = crate::parser::Parser::parse_sql(&sql) {
787 if let Some(stmt) = parsed.into_iter().next() {
788 cf.body = Some(FunctionBody::Expression(stmt));
789 } else {
790 cf.body = Some(FunctionBody::StringLiteral(sql));
791 }
792 } else {
793 cf.body = Some(FunctionBody::StringLiteral(sql));
794 }
795 }
796 }
797 }
798 }
799 Ok(Expression::CreateFunction(cf))
800 }
801
802 _ => Ok(expr),
804 }
805 }
806}
807
808impl BigQueryDialect {
809 fn transform_data_type(&self, dt: crate::expressions::DataType) -> Result<Expression> {
811 use crate::expressions::DataType;
812 let transformed = match dt {
813 DataType::BigInt { .. } => DataType::Custom {
815 name: "INT64".to_string(),
816 },
817 DataType::Int { .. } => DataType::Custom {
819 name: "INT64".to_string(),
820 },
821 DataType::SmallInt { .. } => DataType::Custom {
823 name: "INT64".to_string(),
824 },
825 DataType::TinyInt { .. } => DataType::Custom {
827 name: "INT64".to_string(),
828 },
829 DataType::Float { .. } => DataType::Custom {
831 name: "FLOAT64".to_string(),
832 },
833 DataType::Double { .. } => DataType::Custom {
835 name: "FLOAT64".to_string(),
836 },
837 DataType::Boolean => DataType::Custom {
839 name: "BOOL".to_string(),
840 },
841 DataType::Char { .. } => DataType::Custom {
843 name: "STRING".to_string(),
844 },
845 DataType::VarChar { .. } => DataType::Custom {
847 name: "STRING".to_string(),
848 },
849 DataType::Text => DataType::Custom {
851 name: "STRING".to_string(),
852 },
853 DataType::String { .. } => DataType::Custom {
855 name: "STRING".to_string(),
856 },
857 DataType::Binary { .. } => DataType::Custom {
859 name: "BYTES".to_string(),
860 },
861 DataType::VarBinary { .. } => DataType::Custom {
863 name: "BYTES".to_string(),
864 },
865 DataType::Blob => DataType::Custom {
867 name: "BYTES".to_string(),
868 },
869 DataType::Decimal { .. } => DataType::Custom {
871 name: "NUMERIC".to_string(),
872 },
873 DataType::Timestamp {
877 timezone: false, ..
878 } => DataType::Custom {
879 name: "TIMESTAMP".to_string(),
880 },
881 DataType::Timestamp { timezone: true, .. } => DataType::Custom {
882 name: "TIMESTAMP".to_string(),
883 },
884 DataType::Uuid => DataType::Custom {
886 name: "STRING".to_string(),
887 },
888 DataType::Custom { ref name } if name.eq_ignore_ascii_case("RECORD") => {
890 DataType::Custom {
891 name: "STRUCT".to_string(),
892 }
893 }
894 DataType::Custom { ref name } if name.eq_ignore_ascii_case("TIMESTAMPTZ") => {
896 DataType::Custom {
897 name: "TIMESTAMP".to_string(),
898 }
899 }
900 DataType::Custom { ref name } if name.eq_ignore_ascii_case("BYTEINT") => {
902 DataType::Custom {
903 name: "INT64".to_string(),
904 }
905 }
906 other => other,
908 };
909 Ok(Expression::DataType(transformed))
910 }
911
912 fn transform_function(&self, f: Function) -> Result<Expression> {
913 let name_upper = f.name.to_uppercase();
914 match name_upper.as_str() {
915 "IFNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
917 original_name: None,
918 expressions: f.args,
919 inferred_type: None,
920 }))),
921
922 "NVL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
924 original_name: None,
925 expressions: f.args,
926 inferred_type: None,
927 }))),
928
929 "ISNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
931 original_name: None,
932 expressions: f.args,
933 inferred_type: None,
934 }))),
935
936 "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
938 Function::new("STRING_AGG".to_string(), f.args),
939 ))),
940
941 "SUBSTR" => Ok(Expression::Function(Box::new(Function::new(
943 "SUBSTRING".to_string(),
944 f.args,
945 )))),
946
947 "RANDOM" => Ok(Expression::Rand(Box::new(crate::expressions::Rand {
949 seed: None,
950 lower: None,
951 upper: None,
952 }))),
953
954 "CURRENT_DATE" if f.args.is_empty() => {
957 Ok(Expression::CurrentDate(crate::expressions::CurrentDate))
958 }
959 "CURRENT_DATE" => Ok(Expression::Function(Box::new(Function {
960 name: "CURRENT_DATE".to_string(),
961 args: f.args,
962 distinct: false,
963 trailing_comments: Vec::new(),
964 use_bracket_syntax: false,
965 no_parens: false,
966 quoted: false,
967 span: None,
968 inferred_type: None,
969 }))),
970
971 "NOW" => Ok(Expression::CurrentTimestamp(
973 crate::expressions::CurrentTimestamp {
974 precision: None,
975 sysdate: false,
976 },
977 )),
978
979 "TO_DATE" => Ok(Expression::Function(Box::new(Function::new(
981 "PARSE_DATE".to_string(),
982 f.args,
983 )))),
984
985 "TO_TIMESTAMP" => Ok(Expression::Function(Box::new(Function::new(
987 "PARSE_TIMESTAMP".to_string(),
988 f.args,
989 )))),
990
991 "TO_TIME" if f.args.len() == 1 => Ok(Expression::Function(Box::new(Function::new(
993 "TIME".to_string(),
994 f.args,
995 )))),
996
997 "DATE_FORMAT" => Ok(Expression::Function(Box::new(Function::new(
999 "FORMAT_DATE".to_string(),
1000 f.args,
1001 )))),
1002
1003 "POSITION" if f.args.len() == 2 => {
1006 let mut args = f.args;
1007 let first = args.remove(0);
1009 let second = args.remove(0);
1010 Ok(Expression::Function(Box::new(Function::new(
1011 "STRPOS".to_string(),
1012 vec![second, first],
1013 ))))
1014 }
1015
1016 "LEN" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc::new(
1018 f.args.into_iter().next().unwrap(),
1019 )))),
1020
1021 "CEILING" if f.args.len() == 1 => Ok(Expression::Ceil(Box::new(CeilFunc {
1023 this: f.args.into_iter().next().unwrap(),
1024 decimals: None,
1025 to: None,
1026 }))),
1027
1028 "GETDATE" => Ok(Expression::CurrentTimestamp(
1030 crate::expressions::CurrentTimestamp {
1031 precision: None,
1032 sysdate: false,
1033 },
1034 )),
1035
1036 "CARDINALITY" if f.args.len() == 1 => Ok(Expression::ArrayLength(Box::new(
1039 UnaryFunc::new(f.args.into_iter().next().unwrap()),
1040 ))),
1041
1042 "GENERATE_SERIES" => Ok(Expression::Function(Box::new(Function::new(
1046 "GENERATE_ARRAY".to_string(),
1047 f.args,
1048 )))),
1049
1050 "APPROX_DISTINCT" => Ok(Expression::Function(Box::new(Function::new(
1053 "APPROX_COUNT_DISTINCT".to_string(),
1054 f.args,
1055 )))),
1056
1057 "COUNT_IF" => Ok(Expression::Function(Box::new(Function::new(
1059 "COUNTIF".to_string(),
1060 f.args,
1061 )))),
1062
1063 "SHA" => Ok(Expression::Function(Box::new(Function::new(
1065 "SHA1".to_string(),
1066 f.args,
1067 )))),
1068
1069 "SHA2" => Ok(Expression::Function(Box::new(Function::new(
1071 "SHA256".to_string(),
1072 f.args,
1073 )))),
1074
1075 "MD5" => Ok(Expression::Function(Box::new(Function::new(
1078 "MD5".to_string(),
1079 f.args,
1080 )))),
1081
1082 "DATEADD" if f.args.len() == 3 => {
1087 let mut args = f.args;
1088 let unit_expr = args.remove(0);
1089 let amount = args.remove(0);
1090 let date = args.remove(0);
1091 let unit_name = match &unit_expr {
1093 Expression::Identifier(id) => id.name.to_uppercase(),
1094 _ => "DAY".to_string(),
1095 };
1096 let unit = match unit_name.as_str() {
1097 "YEAR" | "YEARS" | "YY" | "YYYY" => crate::expressions::IntervalUnit::Year,
1098 "QUARTER" | "QUARTERS" | "QQ" | "Q" => {
1099 crate::expressions::IntervalUnit::Quarter
1100 }
1101 "MONTH" | "MONTHS" | "MM" | "M" => crate::expressions::IntervalUnit::Month,
1102 "WEEK" | "WEEKS" | "WK" | "WW" => crate::expressions::IntervalUnit::Week,
1103 "DAY" | "DAYS" | "DD" | "D" | "DAYOFMONTH" => {
1104 crate::expressions::IntervalUnit::Day
1105 }
1106 "HOUR" | "HOURS" | "HH" => crate::expressions::IntervalUnit::Hour,
1107 "MINUTE" | "MINUTES" | "MI" | "N" => crate::expressions::IntervalUnit::Minute,
1108 "SECOND" | "SECONDS" | "SS" | "S" => crate::expressions::IntervalUnit::Second,
1109 "MILLISECOND" | "MILLISECONDS" | "MS" => {
1110 crate::expressions::IntervalUnit::Millisecond
1111 }
1112 "MICROSECOND" | "MICROSECONDS" | "US" => {
1113 crate::expressions::IntervalUnit::Microsecond
1114 }
1115 _ => crate::expressions::IntervalUnit::Day,
1116 };
1117 Ok(Expression::DateAdd(Box::new(
1118 crate::expressions::DateAddFunc {
1119 this: date,
1120 interval: amount,
1121 unit,
1122 },
1123 )))
1124 }
1125 "DATE_ADD" => Ok(Expression::Function(Box::new(Function::new(
1126 "DATE_ADD".to_string(),
1127 f.args,
1128 )))),
1129
1130 "DATEDIFF" => Ok(Expression::Function(Box::new(Function::new(
1132 "DATE_DIFF".to_string(),
1133 f.args,
1134 )))),
1135
1136 "TIMESTAMPDIFF" => Ok(Expression::Function(Box::new(Function::new(
1138 "TIMESTAMP_DIFF".to_string(),
1139 f.args,
1140 )))),
1141
1142 "NEWID" | "UUID" => Ok(Expression::Function(Box::new(Function::new(
1149 "GENERATE_UUID".to_string(),
1150 vec![],
1151 )))),
1152
1153 "LEVENSHTEIN" => Ok(Expression::Function(Box::new(Function::new(
1155 "EDIT_DISTANCE".to_string(),
1156 f.args,
1157 )))),
1158
1159 "UNIX_TIMESTAMP" => Ok(Expression::Function(Box::new(Function::new(
1161 "UNIX_SECONDS".to_string(),
1162 f.args,
1163 )))),
1164
1165 "FROM_UNIXTIME" => Ok(Expression::Function(Box::new(Function::new(
1167 "TIMESTAMP_SECONDS".to_string(),
1168 f.args,
1169 )))),
1170
1171 "CHAR_LENGTH" | "CHARACTER_LENGTH" => Ok(Expression::Function(Box::new(
1173 Function::new("LENGTH".to_string(), f.args),
1174 ))),
1175
1176 "OCTET_LENGTH" => Ok(Expression::Function(Box::new(Function::new(
1178 "BYTE_LENGTH".to_string(),
1179 f.args,
1180 )))),
1181
1182 "JSON_EXTRACT_STRING_ARRAY" => Ok(Expression::Function(Box::new(Function::new(
1184 "JSON_VALUE_ARRAY".to_string(),
1185 f.args,
1186 )))),
1187
1188 "SPLIT" if f.args.len() == 1 => {
1193 let mut args = f.args;
1194 args.push(Expression::Literal(Literal::String(",".to_string())));
1195 Ok(Expression::Split(Box::new(SplitFunc {
1196 this: args.remove(0),
1197 delimiter: args.remove(0),
1198 })))
1199 }
1200
1201 "SPLIT" if f.args.len() == 2 => {
1203 let mut args = f.args;
1204 Ok(Expression::Split(Box::new(SplitFunc {
1205 this: args.remove(0),
1206 delimiter: args.remove(0),
1207 })))
1208 }
1209
1210 "REGEXP_SUBSTR" if f.args.len() >= 2 => {
1212 let args = if f.args.len() > 4 {
1214 f.args[..4].to_vec()
1215 } else {
1216 f.args
1217 };
1218 Ok(Expression::Function(Box::new(Function::new(
1219 "REGEXP_EXTRACT".to_string(),
1220 args,
1221 ))))
1222 }
1223 "REGEXP_SUBSTR" => Ok(Expression::Function(Box::new(Function::new(
1224 "REGEXP_EXTRACT".to_string(),
1225 f.args,
1226 )))),
1227
1228 "REGEXP_REPLACE" if f.args.len() > 3 => {
1230 let args = f.args[..3].to_vec();
1231 Ok(Expression::Function(Box::new(Function::new(
1232 "REGEXP_REPLACE".to_string(),
1233 args,
1234 ))))
1235 }
1236
1237 "OBJECT_CONSTRUCT_KEEP_NULL" => Ok(Expression::Function(Box::new(Function::new(
1239 "JSON_OBJECT".to_string(),
1240 f.args,
1241 )))),
1242
1243 "EDITDISTANCE" if f.args.len() == 3 => {
1245 let col1 = f.args[0].clone();
1246 let col2 = f.args[1].clone();
1247 let max_dist = f.args[2].clone();
1248 Ok(Expression::Function(Box::new(Function::new(
1249 "EDIT_DISTANCE".to_string(),
1250 vec![
1251 col1,
1252 col2,
1253 Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
1254 name: crate::expressions::Identifier::new("max_distance".to_string()),
1255 value: max_dist,
1256 separator: crate::expressions::NamedArgSeparator::DArrow,
1257 })),
1258 ],
1259 ))))
1260 }
1261 "EDITDISTANCE" if f.args.len() == 2 => Ok(Expression::Function(Box::new(
1262 Function::new("EDIT_DISTANCE".to_string(), f.args),
1263 ))),
1264
1265 "HEX_DECODE_BINARY" => Ok(Expression::Function(Box::new(Function::new(
1267 "FROM_HEX".to_string(),
1268 f.args,
1269 )))),
1270
1271 "PARSE_DATE"
1274 | "PARSE_DATETIME"
1275 | "PARSE_TIMESTAMP"
1276 | "SAFE.PARSE_DATE"
1277 | "SAFE.PARSE_DATETIME"
1278 | "SAFE.PARSE_TIMESTAMP" => {
1279 let args = self.normalize_time_format_args(f.args);
1280 Ok(Expression::Function(Box::new(Function {
1281 name: f.name,
1282 args,
1283 distinct: f.distinct,
1284 no_parens: f.no_parens,
1285 trailing_comments: f.trailing_comments,
1286 quoted: f.quoted,
1287 use_bracket_syntax: f.use_bracket_syntax,
1288 span: None,
1289 inferred_type: None,
1290 })))
1291 }
1292
1293 "GET_PATH" if f.args.len() == 2 => {
1295 let mut args = f.args;
1296 let this = args.remove(0);
1297 let path = args.remove(0);
1298 let json_path = match &path {
1299 Expression::Literal(Literal::String(s)) => {
1300 let normalized = if s.starts_with('$') {
1301 s.clone()
1302 } else if s.starts_with('[') {
1303 format!("${}", s)
1304 } else {
1305 format!("$.{}", s)
1306 };
1307 Expression::Literal(Literal::String(normalized))
1308 }
1309 _ => path,
1310 };
1311 Ok(Expression::JsonExtract(Box::new(JsonExtractFunc {
1312 this,
1313 path: json_path,
1314 returning: None,
1315 arrow_syntax: false,
1316 hash_arrow_syntax: false,
1317 wrapper_option: None,
1318 quotes_option: None,
1319 on_scalar_string: false,
1320 on_error: None,
1321 })))
1322 }
1323
1324 _ => Ok(Expression::Function(Box::new(f))),
1326 }
1327 }
1328
1329 fn transform_aggregate_function(
1330 &self,
1331 f: Box<crate::expressions::AggregateFunction>,
1332 ) -> Result<Expression> {
1333 let name_upper = f.name.to_uppercase();
1334 match name_upper.as_str() {
1335 "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
1337 Function::new("STRING_AGG".to_string(), f.args),
1338 ))),
1339
1340 _ => Ok(Expression::AggregateFunction(f)),
1342 }
1343 }
1344
1345 fn transform_method_call(&self, mc: crate::expressions::MethodCall) -> Result<Expression> {
1348 use crate::expressions::MethodCall;
1349
1350 if let Expression::Column(ref col) = mc.this {
1352 if col.name.name.eq_ignore_ascii_case("SAFE") {
1353 let method_upper = mc.method.name.to_uppercase();
1354 if method_upper == "PARSE_DATE"
1355 || method_upper == "PARSE_DATETIME"
1356 || method_upper == "PARSE_TIMESTAMP"
1357 {
1358 let args = self.normalize_time_format_args(mc.args);
1360 return Ok(Expression::MethodCall(Box::new(MethodCall {
1361 this: mc.this,
1362 method: mc.method,
1363 args,
1364 })));
1365 }
1366 }
1367 }
1368
1369 Ok(Expression::MethodCall(Box::new(mc)))
1371 }
1372
1373 fn normalize_time_format_args(&self, args: Vec<Expression>) -> Vec<Expression> {
1376 args.into_iter()
1377 .enumerate()
1378 .map(|(i, arg)| {
1379 if i == 0 {
1381 if let Expression::Literal(Literal::String(s)) = arg {
1382 let normalized = self.normalize_time_format(&s);
1383 return Expression::Literal(Literal::String(normalized));
1384 }
1385 }
1386 arg
1387 })
1388 .collect()
1389 }
1390
1391 fn normalize_time_format(&self, format: &str) -> String {
1395 format.replace("%Y-%m-%d", "%F").replace("%H:%M:%S", "%T")
1396 }
1397}
1398
1399#[cfg(test)]
1400mod tests {
1401 use super::*;
1402 use crate::dialects::Dialect;
1403 use crate::parse_one;
1404
1405 fn transpile_to_bigquery(sql: &str) -> String {
1406 let dialect = Dialect::get(DialectType::Generic);
1407 let result = dialect
1408 .transpile_to(sql, DialectType::BigQuery)
1409 .expect("Transpile failed");
1410 result[0].clone()
1411 }
1412
1413 #[test]
1414 fn test_ifnull_identity() {
1415 let result = transpile_to_bigquery("SELECT IFNULL(a, b)");
1417 assert!(
1418 result.contains("COALESCE"),
1419 "Expected COALESCE, got: {}",
1420 result
1421 );
1422 }
1423
1424 #[test]
1425 fn test_nvl_to_ifnull() {
1426 let result = transpile_to_bigquery("SELECT NVL(a, b)");
1428 assert!(
1429 result.contains("IFNULL"),
1430 "Expected IFNULL, got: {}",
1431 result
1432 );
1433 }
1434
1435 #[test]
1436 fn test_try_cast_to_safe_cast() {
1437 let result = transpile_to_bigquery("SELECT TRY_CAST(a AS INT)");
1438 assert!(
1439 result.contains("SAFE_CAST"),
1440 "Expected SAFE_CAST, got: {}",
1441 result
1442 );
1443 }
1444
1445 #[test]
1446 fn test_random_to_rand() {
1447 let result = transpile_to_bigquery("SELECT RANDOM()");
1448 assert!(result.contains("RAND"), "Expected RAND, got: {}", result);
1449 }
1450
1451 #[test]
1452 fn test_basic_select() {
1453 let result = transpile_to_bigquery("SELECT a, b FROM users WHERE id = 1");
1454 assert!(result.contains("SELECT"));
1455 assert!(result.contains("FROM users"));
1456 }
1457
1458 #[test]
1459 fn test_group_concat_to_string_agg() {
1460 let result = transpile_to_bigquery("SELECT GROUP_CONCAT(name)");
1461 assert!(
1462 result.contains("STRING_AGG"),
1463 "Expected STRING_AGG, got: {}",
1464 result
1465 );
1466 }
1467
1468 #[test]
1469 fn test_generate_series_to_generate_array() {
1470 let result = transpile_to_bigquery("SELECT GENERATE_SERIES(1, 10)");
1471 assert!(
1472 result.contains("GENERATE_ARRAY"),
1473 "Expected GENERATE_ARRAY, got: {}",
1474 result
1475 );
1476 }
1477
1478 #[test]
1479 fn test_backtick_identifiers() {
1480 let dialect = BigQueryDialect;
1482 let config = dialect.generator_config();
1483 assert_eq!(config.identifier_quote, '`');
1484 }
1485
1486 fn bigquery_identity(sql: &str, expected: &str) {
1487 let dialect = Dialect::get(DialectType::BigQuery);
1488 let ast = dialect.parse(sql).expect("Parse failed");
1489 let transformed = dialect.transform(ast[0].clone()).expect("Transform failed");
1490 let result = dialect.generate(&transformed).expect("Generate failed");
1491 assert_eq!(result, expected, "SQL: {}", sql);
1492 }
1493
1494 #[test]
1495 fn test_cast_char_to_string() {
1496 bigquery_identity("CAST(x AS CHAR)", "CAST(x AS STRING)");
1497 }
1498
1499 #[test]
1500 fn test_cast_varchar_to_string() {
1501 bigquery_identity("CAST(x AS VARCHAR)", "CAST(x AS STRING)");
1502 }
1503
1504 #[test]
1505 fn test_cast_nchar_to_string() {
1506 bigquery_identity("CAST(x AS NCHAR)", "CAST(x AS STRING)");
1507 }
1508
1509 #[test]
1510 fn test_cast_nvarchar_to_string() {
1511 bigquery_identity("CAST(x AS NVARCHAR)", "CAST(x AS STRING)");
1512 }
1513
1514 #[test]
1515 fn test_cast_timestamptz_to_timestamp() {
1516 bigquery_identity("CAST(x AS TIMESTAMPTZ)", "CAST(x AS TIMESTAMP)");
1517 }
1518
1519 #[test]
1520 fn test_cast_record_to_struct() {
1521 bigquery_identity("CAST(x AS RECORD)", "CAST(x AS STRUCT)");
1522 }
1523
1524 #[test]
1525 fn test_json_literal_to_parse_json() {
1526 bigquery_identity(
1528 "SELECT JSON '\"foo\"' AS json_data",
1529 "SELECT PARSE_JSON('\"foo\"') AS json_data",
1530 );
1531 }
1532
1533 #[test]
1534 fn test_grant_as_alias_not_quoted() {
1535 bigquery_identity(
1537 "SELECT GRANT FROM (SELECT 'input' AS GRANT)",
1538 "SELECT GRANT FROM (SELECT 'input' AS GRANT)",
1539 );
1540 }
1541
1542 #[test]
1543 fn test_timestamp_literal_to_cast() {
1544 bigquery_identity(
1546 "CREATE VIEW `d.v` OPTIONS (expiration_timestamp=TIMESTAMP '2020-01-02T04:05:06.007Z') AS SELECT 1 AS c",
1547 "CREATE VIEW `d.v` OPTIONS (expiration_timestamp=CAST('2020-01-02T04:05:06.007Z' AS TIMESTAMP)) AS SELECT 1 AS c"
1548 );
1549 }
1550
1551 #[test]
1552 fn test_date_literal_to_cast_in_extract() {
1553 bigquery_identity(
1555 "EXTRACT(WEEK(THURSDAY) FROM DATE '2013-12-25')",
1556 "EXTRACT(WEEK(THURSDAY) FROM CAST('2013-12-25' AS DATE))",
1557 );
1558 }
1559
1560 #[test]
1561 fn test_json_object_with_json_literals() {
1562 bigquery_identity(
1564 "SELECT JSON_OBJECT('a', JSON '10') AS json_data",
1565 "SELECT JSON_OBJECT('a', PARSE_JSON('10')) AS json_data",
1566 );
1567 }
1568
1569 #[test]
1574 fn test_safe_parse_date_format_normalization() {
1575 bigquery_identity(
1577 "SAFE.PARSE_DATE('%Y-%m-%d', '2024-01-15')",
1578 "SAFE.PARSE_DATE('%F', '2024-01-15')",
1579 );
1580 }
1581
1582 #[test]
1583 fn test_safe_parse_datetime_format_normalization() {
1584 bigquery_identity(
1586 "SAFE.PARSE_DATETIME('%Y-%m-%d %H:%M:%S', '2024-01-15 10:30:00')",
1587 "SAFE.PARSE_DATETIME('%F %T', '2024-01-15 10:30:00')",
1588 );
1589 }
1590
1591 #[test]
1592 fn test_safe_parse_timestamp_format_normalization() {
1593 bigquery_identity(
1595 "SAFE.PARSE_TIMESTAMP('%Y-%m-%d %H:%M:%S', '2024-01-15 10:30:00')",
1596 "SAFE.PARSE_TIMESTAMP('%F %T', '2024-01-15 10:30:00')",
1597 );
1598 }
1599
1600 #[test]
1601 fn test_datetime_literal_to_cast() {
1602 bigquery_identity(
1604 "LAST_DAY(DATETIME '2008-11-10 15:30:00', WEEK(SUNDAY))",
1605 "LAST_DAY(CAST('2008-11-10 15:30:00' AS DATETIME), WEEK)",
1606 );
1607 }
1608
1609 #[test]
1610 fn test_last_day_week_modifier_stripped() {
1611 bigquery_identity("LAST_DAY(col, WEEK(MONDAY))", "LAST_DAY(col, WEEK)");
1613 }
1614
1615 #[test]
1616 fn test_hash_line_comment_parses() {
1617 let result = parse_one("SELECT 1 as a #hello world", DialectType::BigQuery);
1620 assert!(result.is_ok(), "Expected parse to succeed, got: {result:?}");
1621 }
1622}