1use super::{DialectImpl, DialectType};
13use crate::error::Result;
14use crate::expressions::{
15 Alias, BinaryOp, CeilFunc, Column, Exists, Expression, From, Function, FunctionBody,
16 Identifier, JsonExtractFunc, LikeOp, Literal, Select, SplitFunc, StringAggFunc, UnaryFunc,
17 UnnestFunc, VarArgFunc, Where,
18};
19use crate::generator::GeneratorConfig;
20use crate::tokens::TokenizerConfig;
21
22pub struct BigQueryDialect;
24
25impl DialectImpl for BigQueryDialect {
26 fn dialect_type(&self) -> DialectType {
27 DialectType::BigQuery
28 }
29
30 fn tokenizer_config(&self) -> TokenizerConfig {
31 let mut config = TokenizerConfig::default();
32 config.identifiers.remove(&'"');
35 config.identifiers.insert('`', '`');
36 config.quotes.insert("\"".to_string(), "\"".to_string());
38 config.quotes.insert("'''".to_string(), "'''".to_string());
40 config
41 .quotes
42 .insert("\"\"\"".to_string(), "\"\"\"".to_string());
43 config.string_escapes = vec!['\'', '\\'];
45 config.b_prefix_is_byte_string = true;
47 config.hex_number_strings = true;
49 config.hex_string_is_integer_type = true;
51 config.hash_comments = true;
53 config
54 }
55
56 fn generator_config(&self) -> GeneratorConfig {
57 use crate::generator::{IdentifierQuoteStyle, NormalizeFunctions};
58 GeneratorConfig {
59 identifier_quote: '`',
60 identifier_quote_style: IdentifierQuoteStyle::BACKTICK,
61 dialect: Some(DialectType::BigQuery),
62 normalize_functions: NormalizeFunctions::None,
64 interval_allows_plural_form: false,
66 join_hints: false,
67 query_hints: false,
68 table_hints: false,
69 limit_fetch_style: crate::generator::LimitFetchStyle::Limit,
70 rename_table_with_db: false,
71 nvl2_supported: false,
72 unnest_with_ordinality: false,
73 collate_is_func: true,
74 limit_only_literals: true,
75 supports_table_alias_columns: false,
76 unpivot_aliases_are_identifiers: false,
77 json_key_value_pair_sep: ",",
78 null_ordering_supported: false,
79 ignore_nulls_in_func: true,
80 json_path_single_quote_escape: true,
81 can_implement_array_any: true,
82 supports_to_number: false,
83 named_placeholder_token: "@",
84 hex_func: "TO_HEX",
85 with_properties_prefix: "OPTIONS",
86 supports_exploding_projections: false,
87 except_intersect_support_all_clause: false,
88 supports_unix_seconds: true,
89 try_supported: true,
91 semi_anti_join_with_side: false,
93 ..Default::default()
94 }
95 }
96
97 fn transform_expr(&self, expr: Expression) -> Result<Expression> {
98 match expr {
99 Expression::DataType(dt) => self.transform_data_type(dt),
101
102 Expression::IfNull(f) => Ok(Expression::IfNull(f)),
105
106 Expression::Nvl(f) => Ok(Expression::IfNull(f)),
108
109 Expression::Coalesce(f) => Ok(Expression::Coalesce(f)),
111
112 Expression::GroupConcat(f) => Ok(Expression::StringAgg(Box::new(StringAggFunc {
115 this: f.this,
116 separator: f.separator,
117 order_by: f.order_by,
118 distinct: f.distinct,
119 filter: f.filter,
120 limit: None,
121 }))),
122
123 Expression::TryCast(c) => {
129 let transformed_type = match self.transform_data_type(c.to)? {
130 Expression::DataType(dt) => dt,
131 _ => return Err(crate::error::Error::parse("Expected DataType", 0, 0, 0, 0)),
132 };
133 Ok(Expression::SafeCast(Box::new(crate::expressions::Cast {
134 this: c.this,
135 to: transformed_type,
136 trailing_comments: c.trailing_comments,
137 double_colon_syntax: c.double_colon_syntax,
138 format: c.format,
139 default: c.default,
140 inferred_type: None,
141 })))
142 }
143
144 Expression::ILike(op) => {
147 let lower_left = Expression::Lower(Box::new(UnaryFunc::new(op.left)));
148 let lower_right = Expression::Lower(Box::new(UnaryFunc::new(op.right)));
149 Ok(Expression::Like(Box::new(LikeOp {
150 left: lower_left,
151 right: lower_right,
152 escape: op.escape,
153 quantifier: op.quantifier,
154 inferred_type: None,
155 })))
156 }
157
158 Expression::RegexpLike(f) => Ok(Expression::Function(Box::new(Function::new(
160 "REGEXP_CONTAINS".to_string(),
161 vec![f.this, f.pattern],
162 )))),
163
164 Expression::Explode(f) => Ok(Expression::Unnest(Box::new(
167 crate::expressions::UnnestFunc {
168 this: f.this,
169 expressions: Vec::new(),
170 with_ordinality: false,
171 alias: None,
172 offset_alias: None,
173 },
174 ))),
175
176 Expression::ExplodeOuter(f) => Ok(Expression::Unnest(Box::new(
178 crate::expressions::UnnestFunc {
179 this: f.this,
180 expressions: Vec::new(),
181 with_ordinality: false,
182 alias: None,
183 offset_alias: None,
184 },
185 ))),
186
187 Expression::GenerateSeries(f) => {
189 let mut args = Vec::new();
190 if let Some(start) = f.start {
191 args.push(*start);
192 }
193 if let Some(end) = f.end {
194 args.push(*end);
195 }
196 if let Some(step) = f.step {
197 args.push(*step);
198 }
199 Ok(Expression::Function(Box::new(Function::new(
200 "GENERATE_ARRAY".to_string(),
201 args,
202 ))))
203 }
204
205 Expression::BitwiseAndAgg(f) => Ok(Expression::Function(Box::new(Function::new(
208 "BIT_AND".to_string(),
209 vec![f.this],
210 )))),
211
212 Expression::BitwiseOrAgg(f) => Ok(Expression::Function(Box::new(Function::new(
214 "BIT_OR".to_string(),
215 vec![f.this],
216 )))),
217
218 Expression::BitwiseXorAgg(f) => Ok(Expression::Function(Box::new(Function::new(
220 "BIT_XOR".to_string(),
221 vec![f.this],
222 )))),
223
224 Expression::BitwiseCount(f) => Ok(Expression::Function(Box::new(Function::new(
226 "BIT_COUNT".to_string(),
227 vec![f.this],
228 )))),
229
230 Expression::ByteLength(f) => Ok(Expression::Function(Box::new(Function::new(
232 "BYTE_LENGTH".to_string(),
233 vec![f.this],
234 )))),
235
236 Expression::IntDiv(f) => Ok(Expression::Function(Box::new(Function::new(
238 "DIV".to_string(),
239 vec![f.this, f.expression],
240 )))),
241
242 Expression::Int64(f) => Ok(Expression::Function(Box::new(Function::new(
244 "INT64".to_string(),
245 vec![f.this],
246 )))),
247
248 Expression::Random(_) => Ok(Expression::Rand(Box::new(crate::expressions::Rand {
251 seed: None,
252 lower: None,
253 upper: None,
254 }))),
255
256 Expression::Uuid(_) => Ok(Expression::Function(Box::new(Function::new(
259 "GENERATE_UUID".to_string(),
260 vec![],
261 )))),
262
263 Expression::ApproxDistinct(f) => Ok(Expression::Function(Box::new(Function::new(
266 "APPROX_COUNT_DISTINCT".to_string(),
267 vec![f.this],
268 )))),
269
270 Expression::ArgMax(f) => Ok(Expression::Function(Box::new(Function::new(
272 "MAX_BY".to_string(),
273 vec![*f.this, *f.expression],
274 )))),
275
276 Expression::ArgMin(f) => Ok(Expression::Function(Box::new(Function::new(
278 "MIN_BY".to_string(),
279 vec![*f.this, *f.expression],
280 )))),
281
282 Expression::CountIf(f) => Ok(Expression::Function(Box::new(Function::new(
285 "COUNTIF".to_string(),
286 vec![f.this],
287 )))),
288
289 Expression::StringAgg(f) => Ok(Expression::StringAgg(f)),
292
293 Expression::Unhex(f) => Ok(Expression::Function(Box::new(Function::new(
296 "FROM_HEX".to_string(),
297 vec![*f.this],
298 )))),
299
300 Expression::UnixToTime(f) => {
302 let scale = f.scale.unwrap_or(0);
303 match scale {
304 0 => Ok(Expression::Function(Box::new(Function::new(
305 "TIMESTAMP_SECONDS".to_string(),
306 vec![*f.this],
307 )))),
308 3 => Ok(Expression::Function(Box::new(Function::new(
309 "TIMESTAMP_MILLIS".to_string(),
310 vec![*f.this],
311 )))),
312 6 => Ok(Expression::Function(Box::new(Function::new(
313 "TIMESTAMP_MICROS".to_string(),
314 vec![*f.this],
315 )))),
316 _ => {
317 let div_expr =
319 Expression::Div(Box::new(crate::expressions::BinaryOp::new(
320 *f.this,
321 Expression::Function(Box::new(Function::new(
322 "POWER".to_string(),
323 vec![Expression::number(10), Expression::number(scale)],
324 ))),
325 )));
326 let cast_expr = Expression::Cast(Box::new(crate::expressions::Cast {
327 this: div_expr,
328 to: crate::expressions::DataType::Custom {
329 name: "INT64".to_string(),
330 },
331 double_colon_syntax: false,
332 trailing_comments: vec![],
333 format: None,
334 default: None,
335 inferred_type: None,
336 }));
337 Ok(Expression::Function(Box::new(Function::new(
338 "TIMESTAMP_SECONDS".to_string(),
339 vec![cast_expr],
340 ))))
341 }
342 }
343 }
344
345 Expression::DateDiff(f) => {
348 let unit_str = match f.unit {
350 Some(crate::expressions::IntervalUnit::Year) => "YEAR",
351 Some(crate::expressions::IntervalUnit::Quarter) => "QUARTER",
352 Some(crate::expressions::IntervalUnit::Month) => "MONTH",
353 Some(crate::expressions::IntervalUnit::Week) => "WEEK",
354 Some(crate::expressions::IntervalUnit::Day) => "DAY",
355 Some(crate::expressions::IntervalUnit::Hour) => "HOUR",
356 Some(crate::expressions::IntervalUnit::Minute) => "MINUTE",
357 Some(crate::expressions::IntervalUnit::Second) => "SECOND",
358 Some(crate::expressions::IntervalUnit::Millisecond) => "MILLISECOND",
359 Some(crate::expressions::IntervalUnit::Microsecond) => "MICROSECOND",
360 Some(crate::expressions::IntervalUnit::Nanosecond) => "NANOSECOND",
361 None => "DAY",
362 };
363 let unit = Expression::Identifier(crate::expressions::Identifier {
364 name: unit_str.to_string(),
365 quoted: false,
366 trailing_comments: Vec::new(),
367 span: None,
368 });
369 Ok(Expression::Function(Box::new(Function::new(
370 "DATE_DIFF".to_string(),
371 vec![f.this, f.expression, unit],
372 ))))
373 }
374
375 Expression::VarPop(f) => Ok(Expression::Function(Box::new(Function::new(
378 "VAR_POP".to_string(),
379 vec![f.this],
380 )))),
381
382 Expression::SHA(f) => Ok(Expression::Function(Box::new(Function::new(
385 "SHA1".to_string(),
386 vec![f.this],
387 )))),
388
389 Expression::SHA1Digest(f) => Ok(Expression::Function(Box::new(Function::new(
391 "SHA1".to_string(),
392 vec![f.this],
393 )))),
394
395 Expression::MD5Digest(f) => Ok(Expression::Function(Box::new(Function::new(
397 "MD5".to_string(),
398 vec![*f.this],
399 )))),
400
401 Expression::JSONBool(f) => Ok(Expression::Function(Box::new(Function::new(
404 "BOOL".to_string(),
405 vec![f.this],
406 )))),
407
408 Expression::StringFunc(f) => Ok(Expression::Function(Box::new(Function::new(
410 "STRING".to_string(),
411 vec![*f.this],
412 )))),
413
414 Expression::DateFromUnixDate(f) => Ok(Expression::Function(Box::new(Function::new(
417 "DATE_FROM_UNIX_DATE".to_string(),
418 vec![f.this],
419 )))),
420
421 Expression::UnixDate(f) => Ok(Expression::Function(Box::new(Function::new(
423 "UNIX_DATE".to_string(),
424 vec![f.this],
425 )))),
426
427 Expression::TimestampDiff(f) => Ok(Expression::Function(Box::new(Function::new(
429 "TIMESTAMP_DIFF".to_string(),
430 vec![*f.this, *f.expression],
431 )))),
432
433 Expression::FromTimeZone(f) => Ok(Expression::Function(Box::new(Function::new(
435 "DATETIME".to_string(),
436 vec![*f.this],
437 )))),
438
439 Expression::TsOrDsToDatetime(f) => Ok(Expression::Function(Box::new(Function::new(
441 "DATETIME".to_string(),
442 vec![f.this],
443 )))),
444
445 Expression::TsOrDsToTimestamp(f) => Ok(Expression::Function(Box::new(Function::new(
447 "TIMESTAMP".to_string(),
448 vec![f.this],
449 )))),
450
451 Expression::IfFunc(f) => {
453 let mut args = vec![f.condition, f.true_value];
454 if let Some(false_val) = f.false_value {
455 args.push(false_val);
456 } else {
457 args.push(Expression::Null(crate::expressions::Null));
458 }
459 Ok(Expression::Function(Box::new(Function::new(
460 "IF".to_string(),
461 args,
462 ))))
463 }
464
465 Expression::HexStringExpr(f) => Ok(Expression::Function(Box::new(Function::new(
467 "FROM_HEX".to_string(),
468 vec![*f.this],
469 )))),
470
471 Expression::ApproxTopK(f) => {
474 let mut args = vec![*f.this];
475 if let Some(expr) = f.expression {
476 args.push(*expr);
477 }
478 Ok(Expression::Function(Box::new(Function::new(
479 "APPROX_TOP_COUNT".to_string(),
480 args,
481 ))))
482 }
483
484 Expression::SafeDivide(f) => Ok(Expression::Function(Box::new(Function::new(
486 "SAFE_DIVIDE".to_string(),
487 vec![*f.this, *f.expression],
488 )))),
489
490 Expression::JSONKeysAtDepth(f) => Ok(Expression::Function(Box::new(Function::new(
492 "JSON_KEYS".to_string(),
493 vec![*f.this],
494 )))),
495
496 Expression::JSONValueArray(f) => Ok(Expression::Function(Box::new(Function::new(
498 "JSON_VALUE_ARRAY".to_string(),
499 vec![*f.this],
500 )))),
501
502 Expression::DateFromParts(f) => {
504 let mut args = Vec::new();
505 if let Some(y) = f.year {
506 args.push(*y);
507 }
508 if let Some(m) = f.month {
509 args.push(*m);
510 }
511 if let Some(d) = f.day {
512 args.push(*d);
513 }
514 Ok(Expression::Function(Box::new(Function::new(
515 "DATE".to_string(),
516 args,
517 ))))
518 }
519
520 Expression::Split(f) => {
523 let delimiter = match &f.delimiter {
525 Expression::Literal(Literal::String(s)) if s.is_empty() => {
526 Expression::Literal(Literal::String(",".to_string()))
527 }
528 _ => f.delimiter,
529 };
530 Ok(Expression::Split(Box::new(SplitFunc {
531 this: f.this,
532 delimiter,
533 })))
534 }
535
536 Expression::Cast(c) => {
539 use crate::expressions::DataType;
540 let is_json = matches!(c.to, DataType::Json | DataType::JsonB)
544 || matches!(&c.to, DataType::Custom { name } if name.eq_ignore_ascii_case("JSON") || name.eq_ignore_ascii_case("JSONB"));
545 if is_json {
546 return Ok(Expression::ParseJson(Box::new(UnaryFunc::new(c.this))));
547 }
548 let transformed_type = match self.transform_data_type(c.to)? {
549 Expression::DataType(dt) => dt,
550 _ => return Err(crate::error::Error::parse("Expected DataType", 0, 0, 0, 0)),
551 };
552 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
553 this: c.this,
554 to: transformed_type,
555 trailing_comments: c.trailing_comments,
556 double_colon_syntax: c.double_colon_syntax,
557 format: c.format,
558 default: c.default,
559 inferred_type: None,
560 })))
561 }
562
563 Expression::SafeCast(c) => {
565 let transformed_type = match self.transform_data_type(c.to)? {
566 Expression::DataType(dt) => dt,
567 _ => return Err(crate::error::Error::parse("Expected DataType", 0, 0, 0, 0)),
568 };
569 Ok(Expression::SafeCast(Box::new(crate::expressions::Cast {
570 this: c.this,
571 to: transformed_type,
572 trailing_comments: c.trailing_comments,
573 double_colon_syntax: c.double_colon_syntax,
574 format: c.format,
575 default: c.default,
576 inferred_type: None,
577 })))
578 }
579
580 Expression::Select(mut select) => {
583 if select.group_by.is_some() && select.order_by.is_some() {
584 let aliases: Vec<(Expression, Identifier)> = select
586 .expressions
587 .iter()
588 .filter_map(|e| {
589 if let Expression::Alias(a) = e {
590 Some((a.this.clone(), a.alias.clone()))
591 } else {
592 None
593 }
594 })
595 .collect();
596
597 if let Some(ref mut group_by) = select.group_by {
598 for grouped in group_by.expressions.iter_mut() {
599 if matches!(grouped, Expression::Literal(Literal::Number(_))) {
601 continue;
602 }
603 for (expr, alias_ident) in &aliases {
605 if grouped == expr {
606 *grouped = Expression::Column(Column {
607 name: alias_ident.clone(),
608 table: None,
609 join_mark: false,
610 trailing_comments: Vec::new(),
611 span: None,
612 inferred_type: None,
613 });
614 break;
615 }
616 }
617 }
618 }
619 }
620 Ok(Expression::Select(select))
621 }
622
623 Expression::ArrayContains(f) => {
625 let array_expr = f.this;
626 let value_expr = f.expression;
627
628 let unnest = Expression::Unnest(Box::new(UnnestFunc {
630 this: array_expr,
631 expressions: Vec::new(),
632 with_ordinality: false,
633 alias: None,
634 offset_alias: None,
635 }));
636 let aliased_unnest = Expression::Alias(Box::new(Alias {
637 this: unnest,
638 alias: Identifier::new("_col"),
639 column_aliases: Vec::new(),
640 pre_alias_comments: Vec::new(),
641 trailing_comments: Vec::new(),
642 inferred_type: None,
643 }));
644 let col_ref = Expression::Column(Column {
645 name: Identifier::new("_col"),
646 table: None,
647 join_mark: false,
648 trailing_comments: Vec::new(),
649 span: None,
650 inferred_type: None,
651 });
652 let where_clause = Where {
653 this: Expression::Eq(Box::new(BinaryOp {
654 left: col_ref,
655 right: value_expr,
656 left_comments: Vec::new(),
657 operator_comments: Vec::new(),
658 trailing_comments: Vec::new(),
659 inferred_type: None,
660 })),
661 };
662 let inner_select = Expression::Select(Box::new(Select {
663 expressions: vec![Expression::Literal(Literal::Number("1".to_string()))],
664 from: Some(From {
665 expressions: vec![aliased_unnest],
666 }),
667 where_clause: Some(where_clause),
668 ..Default::default()
669 }));
670 Ok(Expression::Exists(Box::new(Exists {
671 this: inner_select,
672 not: false,
673 })))
674 }
675
676 Expression::JsonObject(mut f) => {
679 if f.pairs.len() == 1 {
680 let keys_exprs = match &f.pairs[0].0 {
682 Expression::Array(arr) => Some(&arr.expressions),
683 Expression::ArrayFunc(arr) => Some(&arr.expressions),
684 _ => None,
685 };
686 let vals_exprs = match &f.pairs[0].1 {
687 Expression::Array(arr) => Some(&arr.expressions),
688 Expression::ArrayFunc(arr) => Some(&arr.expressions),
689 _ => None,
690 };
691 if let (Some(keys), Some(vals)) = (keys_exprs, vals_exprs) {
692 if keys.len() == vals.len() {
693 let new_pairs: Vec<(Expression, Expression)> = keys
694 .iter()
695 .zip(vals.iter())
696 .map(|(k, v)| (k.clone(), v.clone()))
697 .collect();
698 f.pairs = new_pairs;
699 }
700 }
701 }
702 Ok(Expression::JsonObject(f))
703 }
704
705 Expression::ModFunc(mut f) => {
708 if let Expression::Paren(paren) = f.this {
710 f.this = paren.this;
711 }
712 Ok(Expression::ModFunc(f))
713 }
714
715 Expression::JSONExtract(e) if e.variant_extract.is_some() => {
717 let path = match *e.expression {
718 Expression::Literal(Literal::String(s)) => {
719 let normalized = if s.starts_with('$') {
720 s
721 } else if s.starts_with('[') {
722 format!("${}", s)
723 } else {
724 format!("$.{}", s)
725 };
726 Expression::Literal(Literal::String(normalized))
727 }
728 other => other,
729 };
730 Ok(Expression::Function(Box::new(Function::new(
731 "JSON_EXTRACT".to_string(),
732 vec![*e.this, path],
733 ))))
734 }
735
736 Expression::Function(f) => self.transform_function(*f),
738
739 Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
741
742 Expression::MethodCall(mc) => self.transform_method_call(*mc),
745
746 Expression::CreateFunction(mut cf) => {
749 if let Some(ref mut rtb) = cf.returns_table_body {
750 if rtb.starts_with("TABLE (") || rtb.starts_with("TABLE(") {
751 let inner = if rtb.starts_with("TABLE (") {
753 &rtb["TABLE (".len()..rtb.len() - 1]
754 } else {
755 &rtb["TABLE(".len()..rtb.len() - 1]
756 };
757 let converted = inner
759 .replace(" INT,", " INT64,")
760 .replace(" INT)", " INT64)")
761 .replace(" INTEGER,", " INT64,")
762 .replace(" INTEGER)", " INT64)")
763 .replace(" FLOAT,", " FLOAT64,")
764 .replace(" FLOAT)", " FLOAT64)")
765 .replace(" BOOLEAN,", " BOOL,")
766 .replace(" BOOLEAN)", " BOOL)")
767 .replace(" VARCHAR", " STRING")
768 .replace(" TEXT", " STRING");
769 let converted = if converted.ends_with(" INT") {
771 format!("{}{}", &converted[..converted.len() - 4], " INT64")
772 } else {
773 converted
774 };
775 *rtb = format!("TABLE <{}>", converted);
776 cf.is_table_function = true;
777 }
778 }
779 if cf.is_table_function {
781 if let Some(ref body) = cf.body {
782 if matches!(body, FunctionBody::StringLiteral(_)) {
783 if let Some(FunctionBody::StringLiteral(sql)) = cf.body.take() {
784 if let Ok(parsed) = crate::parser::Parser::parse_sql(&sql) {
786 if let Some(stmt) = parsed.into_iter().next() {
787 cf.body = Some(FunctionBody::Expression(stmt));
788 } else {
789 cf.body = Some(FunctionBody::StringLiteral(sql));
790 }
791 } else {
792 cf.body = Some(FunctionBody::StringLiteral(sql));
793 }
794 }
795 }
796 }
797 }
798 Ok(Expression::CreateFunction(cf))
799 }
800
801 _ => Ok(expr),
803 }
804 }
805}
806
807impl BigQueryDialect {
808 fn transform_data_type(&self, dt: crate::expressions::DataType) -> Result<Expression> {
810 use crate::expressions::DataType;
811 let transformed = match dt {
812 DataType::BigInt { .. } => DataType::Custom {
814 name: "INT64".to_string(),
815 },
816 DataType::Int { .. } => DataType::Custom {
818 name: "INT64".to_string(),
819 },
820 DataType::SmallInt { .. } => DataType::Custom {
822 name: "INT64".to_string(),
823 },
824 DataType::TinyInt { .. } => DataType::Custom {
826 name: "INT64".to_string(),
827 },
828 DataType::Float { .. } => DataType::Custom {
830 name: "FLOAT64".to_string(),
831 },
832 DataType::Double { .. } => DataType::Custom {
834 name: "FLOAT64".to_string(),
835 },
836 DataType::Boolean => DataType::Custom {
838 name: "BOOL".to_string(),
839 },
840 DataType::Char { .. } => DataType::Custom {
842 name: "STRING".to_string(),
843 },
844 DataType::VarChar { .. } => DataType::Custom {
846 name: "STRING".to_string(),
847 },
848 DataType::Text => DataType::Custom {
850 name: "STRING".to_string(),
851 },
852 DataType::String { .. } => DataType::Custom {
854 name: "STRING".to_string(),
855 },
856 DataType::Binary { .. } => DataType::Custom {
858 name: "BYTES".to_string(),
859 },
860 DataType::VarBinary { .. } => DataType::Custom {
862 name: "BYTES".to_string(),
863 },
864 DataType::Blob => DataType::Custom {
866 name: "BYTES".to_string(),
867 },
868 DataType::Decimal { .. } => DataType::Custom {
870 name: "NUMERIC".to_string(),
871 },
872 DataType::Timestamp {
876 timezone: false, ..
877 } => DataType::Custom {
878 name: "TIMESTAMP".to_string(),
879 },
880 DataType::Timestamp { timezone: true, .. } => DataType::Custom {
881 name: "TIMESTAMP".to_string(),
882 },
883 DataType::Uuid => DataType::Custom {
885 name: "STRING".to_string(),
886 },
887 DataType::Custom { ref name } if name.eq_ignore_ascii_case("RECORD") => {
889 DataType::Custom {
890 name: "STRUCT".to_string(),
891 }
892 }
893 DataType::Custom { ref name } if name.eq_ignore_ascii_case("TIMESTAMPTZ") => {
895 DataType::Custom {
896 name: "TIMESTAMP".to_string(),
897 }
898 }
899 DataType::Custom { ref name } if name.eq_ignore_ascii_case("BYTEINT") => {
901 DataType::Custom {
902 name: "INT64".to_string(),
903 }
904 }
905 other => other,
907 };
908 Ok(Expression::DataType(transformed))
909 }
910
911 fn transform_function(&self, f: Function) -> Result<Expression> {
912 let name_upper = f.name.to_uppercase();
913 match name_upper.as_str() {
914 "IFNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
916 original_name: None,
917 expressions: f.args,
918 inferred_type: None,
919 }))),
920
921 "NVL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
923 original_name: None,
924 expressions: f.args,
925 inferred_type: None,
926 }))),
927
928 "ISNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
930 original_name: None,
931 expressions: f.args,
932 inferred_type: None,
933 }))),
934
935 "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
937 Function::new("STRING_AGG".to_string(), f.args),
938 ))),
939
940 "SUBSTR" => Ok(Expression::Function(Box::new(Function::new(
942 "SUBSTRING".to_string(),
943 f.args,
944 )))),
945
946 "RANDOM" => Ok(Expression::Rand(Box::new(crate::expressions::Rand {
948 seed: None,
949 lower: None,
950 upper: None,
951 }))),
952
953 "CURRENT_DATE" if f.args.is_empty() => {
956 Ok(Expression::CurrentDate(crate::expressions::CurrentDate))
957 }
958 "CURRENT_DATE" => Ok(Expression::Function(Box::new(Function {
959 name: "CURRENT_DATE".to_string(),
960 args: f.args,
961 distinct: false,
962 trailing_comments: Vec::new(),
963 use_bracket_syntax: false,
964 no_parens: false,
965 quoted: false,
966 span: None,
967 inferred_type: None,
968 }))),
969
970 "NOW" => Ok(Expression::CurrentTimestamp(
972 crate::expressions::CurrentTimestamp {
973 precision: None,
974 sysdate: false,
975 },
976 )),
977
978 "TO_DATE" => Ok(Expression::Function(Box::new(Function::new(
980 "PARSE_DATE".to_string(),
981 f.args,
982 )))),
983
984 "TO_TIMESTAMP" => Ok(Expression::Function(Box::new(Function::new(
986 "PARSE_TIMESTAMP".to_string(),
987 f.args,
988 )))),
989
990 "TO_TIME" if f.args.len() == 1 => Ok(Expression::Function(Box::new(Function::new(
992 "TIME".to_string(),
993 f.args,
994 )))),
995
996 "DATE_FORMAT" => Ok(Expression::Function(Box::new(Function::new(
998 "FORMAT_DATE".to_string(),
999 f.args,
1000 )))),
1001
1002 "POSITION" if f.args.len() == 2 => {
1005 let mut args = f.args;
1006 let first = args.remove(0);
1008 let second = args.remove(0);
1009 Ok(Expression::Function(Box::new(Function::new(
1010 "STRPOS".to_string(),
1011 vec![second, first],
1012 ))))
1013 }
1014
1015 "LEN" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc::new(
1017 f.args.into_iter().next().unwrap(),
1018 )))),
1019
1020 "CEILING" if f.args.len() == 1 => Ok(Expression::Ceil(Box::new(CeilFunc {
1022 this: f.args.into_iter().next().unwrap(),
1023 decimals: None,
1024 to: None,
1025 }))),
1026
1027 "GETDATE" => Ok(Expression::CurrentTimestamp(
1029 crate::expressions::CurrentTimestamp {
1030 precision: None,
1031 sysdate: false,
1032 },
1033 )),
1034
1035 "CARDINALITY" if f.args.len() == 1 => Ok(Expression::ArrayLength(Box::new(
1038 UnaryFunc::new(f.args.into_iter().next().unwrap()),
1039 ))),
1040
1041 "GENERATE_SERIES" => Ok(Expression::Function(Box::new(Function::new(
1045 "GENERATE_ARRAY".to_string(),
1046 f.args,
1047 )))),
1048
1049 "APPROX_DISTINCT" => Ok(Expression::Function(Box::new(Function::new(
1052 "APPROX_COUNT_DISTINCT".to_string(),
1053 f.args,
1054 )))),
1055
1056 "COUNT_IF" => Ok(Expression::Function(Box::new(Function::new(
1058 "COUNTIF".to_string(),
1059 f.args,
1060 )))),
1061
1062 "SHA" => Ok(Expression::Function(Box::new(Function::new(
1064 "SHA1".to_string(),
1065 f.args,
1066 )))),
1067
1068 "SHA2" => Ok(Expression::Function(Box::new(Function::new(
1070 "SHA256".to_string(),
1071 f.args,
1072 )))),
1073
1074 "MD5" => Ok(Expression::Function(Box::new(Function::new(
1077 "MD5".to_string(),
1078 f.args,
1079 )))),
1080
1081 "DATEADD" if f.args.len() == 3 => {
1086 let mut args = f.args;
1087 let unit_expr = args.remove(0);
1088 let amount = args.remove(0);
1089 let date = args.remove(0);
1090 let unit_name = match &unit_expr {
1092 Expression::Identifier(id) => id.name.to_uppercase(),
1093 _ => "DAY".to_string(),
1094 };
1095 let unit = match unit_name.as_str() {
1096 "YEAR" | "YEARS" | "YY" | "YYYY" => crate::expressions::IntervalUnit::Year,
1097 "QUARTER" | "QUARTERS" | "QQ" | "Q" => {
1098 crate::expressions::IntervalUnit::Quarter
1099 }
1100 "MONTH" | "MONTHS" | "MM" | "M" => crate::expressions::IntervalUnit::Month,
1101 "WEEK" | "WEEKS" | "WK" | "WW" => crate::expressions::IntervalUnit::Week,
1102 "DAY" | "DAYS" | "DD" | "D" | "DAYOFMONTH" => {
1103 crate::expressions::IntervalUnit::Day
1104 }
1105 "HOUR" | "HOURS" | "HH" => crate::expressions::IntervalUnit::Hour,
1106 "MINUTE" | "MINUTES" | "MI" | "N" => crate::expressions::IntervalUnit::Minute,
1107 "SECOND" | "SECONDS" | "SS" | "S" => crate::expressions::IntervalUnit::Second,
1108 "MILLISECOND" | "MILLISECONDS" | "MS" => {
1109 crate::expressions::IntervalUnit::Millisecond
1110 }
1111 "MICROSECOND" | "MICROSECONDS" | "US" => {
1112 crate::expressions::IntervalUnit::Microsecond
1113 }
1114 _ => crate::expressions::IntervalUnit::Day,
1115 };
1116 Ok(Expression::DateAdd(Box::new(
1117 crate::expressions::DateAddFunc {
1118 this: date,
1119 interval: amount,
1120 unit,
1121 },
1122 )))
1123 }
1124 "DATE_ADD" => Ok(Expression::Function(Box::new(Function::new(
1125 "DATE_ADD".to_string(),
1126 f.args,
1127 )))),
1128
1129 "DATEDIFF" => Ok(Expression::Function(Box::new(Function::new(
1131 "DATE_DIFF".to_string(),
1132 f.args,
1133 )))),
1134
1135 "TIMESTAMPDIFF" => Ok(Expression::Function(Box::new(Function::new(
1137 "TIMESTAMP_DIFF".to_string(),
1138 f.args,
1139 )))),
1140
1141 "NEWID" | "UUID" => Ok(Expression::Function(Box::new(Function::new(
1148 "GENERATE_UUID".to_string(),
1149 vec![],
1150 )))),
1151
1152 "LEVENSHTEIN" => Ok(Expression::Function(Box::new(Function::new(
1154 "EDIT_DISTANCE".to_string(),
1155 f.args,
1156 )))),
1157
1158 "UNIX_TIMESTAMP" => Ok(Expression::Function(Box::new(Function::new(
1160 "UNIX_SECONDS".to_string(),
1161 f.args,
1162 )))),
1163
1164 "FROM_UNIXTIME" => Ok(Expression::Function(Box::new(Function::new(
1166 "TIMESTAMP_SECONDS".to_string(),
1167 f.args,
1168 )))),
1169
1170 "CHAR_LENGTH" | "CHARACTER_LENGTH" => Ok(Expression::Function(Box::new(
1172 Function::new("LENGTH".to_string(), f.args),
1173 ))),
1174
1175 "OCTET_LENGTH" => Ok(Expression::Function(Box::new(Function::new(
1177 "BYTE_LENGTH".to_string(),
1178 f.args,
1179 )))),
1180
1181 "JSON_EXTRACT_STRING_ARRAY" => Ok(Expression::Function(Box::new(Function::new(
1183 "JSON_VALUE_ARRAY".to_string(),
1184 f.args,
1185 )))),
1186
1187 "SPLIT" if f.args.len() == 1 => {
1192 let mut args = f.args;
1193 args.push(Expression::Literal(Literal::String(",".to_string())));
1194 Ok(Expression::Split(Box::new(SplitFunc {
1195 this: args.remove(0),
1196 delimiter: args.remove(0),
1197 })))
1198 }
1199
1200 "SPLIT" if f.args.len() == 2 => {
1202 let mut args = f.args;
1203 Ok(Expression::Split(Box::new(SplitFunc {
1204 this: args.remove(0),
1205 delimiter: args.remove(0),
1206 })))
1207 }
1208
1209 "REGEXP_SUBSTR" if f.args.len() >= 2 => {
1211 let args = if f.args.len() > 4 {
1213 f.args[..4].to_vec()
1214 } else {
1215 f.args
1216 };
1217 Ok(Expression::Function(Box::new(Function::new(
1218 "REGEXP_EXTRACT".to_string(),
1219 args,
1220 ))))
1221 }
1222 "REGEXP_SUBSTR" => Ok(Expression::Function(Box::new(Function::new(
1223 "REGEXP_EXTRACT".to_string(),
1224 f.args,
1225 )))),
1226
1227 "REGEXP_REPLACE" if f.args.len() > 3 => {
1229 let args = f.args[..3].to_vec();
1230 Ok(Expression::Function(Box::new(Function::new(
1231 "REGEXP_REPLACE".to_string(),
1232 args,
1233 ))))
1234 }
1235
1236 "OBJECT_CONSTRUCT_KEEP_NULL" => Ok(Expression::Function(Box::new(Function::new(
1238 "JSON_OBJECT".to_string(),
1239 f.args,
1240 )))),
1241
1242 "EDITDISTANCE" if f.args.len() == 3 => {
1244 let col1 = f.args[0].clone();
1245 let col2 = f.args[1].clone();
1246 let max_dist = f.args[2].clone();
1247 Ok(Expression::Function(Box::new(Function::new(
1248 "EDIT_DISTANCE".to_string(),
1249 vec![
1250 col1,
1251 col2,
1252 Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
1253 name: crate::expressions::Identifier::new("max_distance".to_string()),
1254 value: max_dist,
1255 separator: crate::expressions::NamedArgSeparator::DArrow,
1256 })),
1257 ],
1258 ))))
1259 }
1260 "EDITDISTANCE" if f.args.len() == 2 => Ok(Expression::Function(Box::new(
1261 Function::new("EDIT_DISTANCE".to_string(), f.args),
1262 ))),
1263
1264 "HEX_DECODE_BINARY" => Ok(Expression::Function(Box::new(Function::new(
1266 "FROM_HEX".to_string(),
1267 f.args,
1268 )))),
1269
1270 "PARSE_DATE"
1273 | "PARSE_DATETIME"
1274 | "PARSE_TIMESTAMP"
1275 | "SAFE.PARSE_DATE"
1276 | "SAFE.PARSE_DATETIME"
1277 | "SAFE.PARSE_TIMESTAMP" => {
1278 let args = self.normalize_time_format_args(f.args);
1279 Ok(Expression::Function(Box::new(Function {
1280 name: f.name,
1281 args,
1282 distinct: f.distinct,
1283 no_parens: f.no_parens,
1284 trailing_comments: f.trailing_comments,
1285 quoted: f.quoted,
1286 use_bracket_syntax: f.use_bracket_syntax,
1287 span: None,
1288 inferred_type: None,
1289 })))
1290 }
1291
1292 "GET_PATH" if f.args.len() == 2 => {
1294 let mut args = f.args;
1295 let this = args.remove(0);
1296 let path = args.remove(0);
1297 let json_path = match &path {
1298 Expression::Literal(Literal::String(s)) => {
1299 let normalized = if s.starts_with('$') {
1300 s.clone()
1301 } else if s.starts_with('[') {
1302 format!("${}", s)
1303 } else {
1304 format!("$.{}", s)
1305 };
1306 Expression::Literal(Literal::String(normalized))
1307 }
1308 _ => path,
1309 };
1310 Ok(Expression::JsonExtract(Box::new(JsonExtractFunc {
1311 this,
1312 path: json_path,
1313 returning: None,
1314 arrow_syntax: false,
1315 hash_arrow_syntax: false,
1316 wrapper_option: None,
1317 quotes_option: None,
1318 on_scalar_string: false,
1319 on_error: None,
1320 })))
1321 }
1322
1323 _ => Ok(Expression::Function(Box::new(f))),
1325 }
1326 }
1327
1328 fn transform_aggregate_function(
1329 &self,
1330 f: Box<crate::expressions::AggregateFunction>,
1331 ) -> Result<Expression> {
1332 let name_upper = f.name.to_uppercase();
1333 match name_upper.as_str() {
1334 "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
1336 Function::new("STRING_AGG".to_string(), f.args),
1337 ))),
1338
1339 _ => Ok(Expression::AggregateFunction(f)),
1341 }
1342 }
1343
1344 fn transform_method_call(&self, mc: crate::expressions::MethodCall) -> Result<Expression> {
1347 use crate::expressions::MethodCall;
1348
1349 if let Expression::Column(ref col) = mc.this {
1351 if col.name.name.eq_ignore_ascii_case("SAFE") {
1352 let method_upper = mc.method.name.to_uppercase();
1353 if method_upper == "PARSE_DATE"
1354 || method_upper == "PARSE_DATETIME"
1355 || method_upper == "PARSE_TIMESTAMP"
1356 {
1357 let args = self.normalize_time_format_args(mc.args);
1359 return Ok(Expression::MethodCall(Box::new(MethodCall {
1360 this: mc.this,
1361 method: mc.method,
1362 args,
1363 })));
1364 }
1365 }
1366 }
1367
1368 Ok(Expression::MethodCall(Box::new(mc)))
1370 }
1371
1372 fn normalize_time_format_args(&self, args: Vec<Expression>) -> Vec<Expression> {
1375 args.into_iter()
1376 .enumerate()
1377 .map(|(i, arg)| {
1378 if i == 0 {
1380 if let Expression::Literal(Literal::String(s)) = arg {
1381 let normalized = self.normalize_time_format(&s);
1382 return Expression::Literal(Literal::String(normalized));
1383 }
1384 }
1385 arg
1386 })
1387 .collect()
1388 }
1389
1390 fn normalize_time_format(&self, format: &str) -> String {
1394 format.replace("%Y-%m-%d", "%F").replace("%H:%M:%S", "%T")
1395 }
1396}
1397
1398#[cfg(test)]
1399mod tests {
1400 use super::*;
1401 use crate::dialects::Dialect;
1402 use crate::parse_one;
1403
1404 fn transpile_to_bigquery(sql: &str) -> String {
1405 let dialect = Dialect::get(DialectType::Generic);
1406 let result = dialect
1407 .transpile_to(sql, DialectType::BigQuery)
1408 .expect("Transpile failed");
1409 result[0].clone()
1410 }
1411
1412 #[test]
1413 fn test_ifnull_identity() {
1414 let result = transpile_to_bigquery("SELECT IFNULL(a, b)");
1416 assert!(
1417 result.contains("COALESCE"),
1418 "Expected COALESCE, got: {}",
1419 result
1420 );
1421 }
1422
1423 #[test]
1424 fn test_nvl_to_ifnull() {
1425 let result = transpile_to_bigquery("SELECT NVL(a, b)");
1427 assert!(
1428 result.contains("IFNULL"),
1429 "Expected IFNULL, got: {}",
1430 result
1431 );
1432 }
1433
1434 #[test]
1435 fn test_try_cast_to_safe_cast() {
1436 let result = transpile_to_bigquery("SELECT TRY_CAST(a AS INT)");
1437 assert!(
1438 result.contains("SAFE_CAST"),
1439 "Expected SAFE_CAST, got: {}",
1440 result
1441 );
1442 }
1443
1444 #[test]
1445 fn test_random_to_rand() {
1446 let result = transpile_to_bigquery("SELECT RANDOM()");
1447 assert!(result.contains("RAND"), "Expected RAND, got: {}", result);
1448 }
1449
1450 #[test]
1451 fn test_basic_select() {
1452 let result = transpile_to_bigquery("SELECT a, b FROM users WHERE id = 1");
1453 assert!(result.contains("SELECT"));
1454 assert!(result.contains("FROM users"));
1455 }
1456
1457 #[test]
1458 fn test_group_concat_to_string_agg() {
1459 let result = transpile_to_bigquery("SELECT GROUP_CONCAT(name)");
1460 assert!(
1461 result.contains("STRING_AGG"),
1462 "Expected STRING_AGG, got: {}",
1463 result
1464 );
1465 }
1466
1467 #[test]
1468 fn test_generate_series_to_generate_array() {
1469 let result = transpile_to_bigquery("SELECT GENERATE_SERIES(1, 10)");
1470 assert!(
1471 result.contains("GENERATE_ARRAY"),
1472 "Expected GENERATE_ARRAY, got: {}",
1473 result
1474 );
1475 }
1476
1477 #[test]
1478 fn test_backtick_identifiers() {
1479 let dialect = BigQueryDialect;
1481 let config = dialect.generator_config();
1482 assert_eq!(config.identifier_quote, '`');
1483 }
1484
1485 fn bigquery_identity(sql: &str, expected: &str) {
1486 let dialect = Dialect::get(DialectType::BigQuery);
1487 let ast = dialect.parse(sql).expect("Parse failed");
1488 let transformed = dialect.transform(ast[0].clone()).expect("Transform failed");
1489 let result = dialect.generate(&transformed).expect("Generate failed");
1490 assert_eq!(result, expected, "SQL: {}", sql);
1491 }
1492
1493 #[test]
1494 fn test_cast_char_to_string() {
1495 bigquery_identity("CAST(x AS CHAR)", "CAST(x AS STRING)");
1496 }
1497
1498 #[test]
1499 fn test_cast_varchar_to_string() {
1500 bigquery_identity("CAST(x AS VARCHAR)", "CAST(x AS STRING)");
1501 }
1502
1503 #[test]
1504 fn test_cast_nchar_to_string() {
1505 bigquery_identity("CAST(x AS NCHAR)", "CAST(x AS STRING)");
1506 }
1507
1508 #[test]
1509 fn test_cast_nvarchar_to_string() {
1510 bigquery_identity("CAST(x AS NVARCHAR)", "CAST(x AS STRING)");
1511 }
1512
1513 #[test]
1514 fn test_cast_timestamptz_to_timestamp() {
1515 bigquery_identity("CAST(x AS TIMESTAMPTZ)", "CAST(x AS TIMESTAMP)");
1516 }
1517
1518 #[test]
1519 fn test_cast_record_to_struct() {
1520 bigquery_identity("CAST(x AS RECORD)", "CAST(x AS STRUCT)");
1521 }
1522
1523 #[test]
1524 fn test_json_literal_to_parse_json() {
1525 bigquery_identity(
1527 "SELECT JSON '\"foo\"' AS json_data",
1528 "SELECT PARSE_JSON('\"foo\"') AS json_data",
1529 );
1530 }
1531
1532 #[test]
1533 fn test_grant_as_alias_not_quoted() {
1534 bigquery_identity(
1536 "SELECT GRANT FROM (SELECT 'input' AS GRANT)",
1537 "SELECT GRANT FROM (SELECT 'input' AS GRANT)",
1538 );
1539 }
1540
1541 #[test]
1542 fn test_timestamp_literal_to_cast() {
1543 bigquery_identity(
1545 "CREATE VIEW `d.v` OPTIONS (expiration_timestamp=TIMESTAMP '2020-01-02T04:05:06.007Z') AS SELECT 1 AS c",
1546 "CREATE VIEW `d.v` OPTIONS (expiration_timestamp=CAST('2020-01-02T04:05:06.007Z' AS TIMESTAMP)) AS SELECT 1 AS c"
1547 );
1548 }
1549
1550 #[test]
1551 fn test_date_literal_to_cast_in_extract() {
1552 bigquery_identity(
1554 "EXTRACT(WEEK(THURSDAY) FROM DATE '2013-12-25')",
1555 "EXTRACT(WEEK(THURSDAY) FROM CAST('2013-12-25' AS DATE))",
1556 );
1557 }
1558
1559 #[test]
1560 fn test_json_object_with_json_literals() {
1561 bigquery_identity(
1563 "SELECT JSON_OBJECT('a', JSON '10') AS json_data",
1564 "SELECT JSON_OBJECT('a', PARSE_JSON('10')) AS json_data",
1565 );
1566 }
1567
1568 #[test]
1573 fn test_safe_parse_date_format_normalization() {
1574 bigquery_identity(
1576 "SAFE.PARSE_DATE('%Y-%m-%d', '2024-01-15')",
1577 "SAFE.PARSE_DATE('%F', '2024-01-15')",
1578 );
1579 }
1580
1581 #[test]
1582 fn test_safe_parse_datetime_format_normalization() {
1583 bigquery_identity(
1585 "SAFE.PARSE_DATETIME('%Y-%m-%d %H:%M:%S', '2024-01-15 10:30:00')",
1586 "SAFE.PARSE_DATETIME('%F %T', '2024-01-15 10:30:00')",
1587 );
1588 }
1589
1590 #[test]
1591 fn test_safe_parse_timestamp_format_normalization() {
1592 bigquery_identity(
1594 "SAFE.PARSE_TIMESTAMP('%Y-%m-%d %H:%M:%S', '2024-01-15 10:30:00')",
1595 "SAFE.PARSE_TIMESTAMP('%F %T', '2024-01-15 10:30:00')",
1596 );
1597 }
1598
1599 #[test]
1600 fn test_datetime_literal_to_cast() {
1601 bigquery_identity(
1603 "LAST_DAY(DATETIME '2008-11-10 15:30:00', WEEK(SUNDAY))",
1604 "LAST_DAY(CAST('2008-11-10 15:30:00' AS DATETIME), WEEK)",
1605 );
1606 }
1607
1608 #[test]
1609 fn test_last_day_week_modifier_stripped() {
1610 bigquery_identity("LAST_DAY(col, WEEK(MONDAY))", "LAST_DAY(col, WEEK)");
1612 }
1613
1614 #[test]
1615 fn test_hash_line_comment_parses() {
1616 let result = parse_one("SELECT 1 as a #hello world", DialectType::BigQuery);
1619 assert!(result.is_ok(), "Expected parse to succeed, got: {result:?}");
1620 }
1621}