Skip to main content

polyglot_sql/dialects/
bigquery.rs

1//! BigQuery Dialect
2//!
3//! BigQuery-specific transformations based on sqlglot patterns.
4//! Key differences:
5//! - Uses backticks for identifiers (especially for project.dataset.table)
6//! - SAFE_ prefix for safe operations
7//! - Different date/time function names (DATE_DIFF, FORMAT_DATE, PARSE_DATE)
8//! - STRUCT and ARRAY syntax differences
9//! - No ILIKE support
10//! - QUALIFY clause support
11
12use super::{DialectImpl, DialectType};
13use crate::error::Result;
14use crate::expressions::{
15    Alias, BinaryOp, CeilFunc, Column, Exists, Expression, From, Function, FunctionBody,
16    Identifier, JsonExtractFunc, LikeOp, Literal, Select, SplitFunc, StringAggFunc, UnaryFunc,
17    UnnestFunc, VarArgFunc, Where,
18};
19use crate::generator::GeneratorConfig;
20use crate::tokens::TokenizerConfig;
21
22/// BigQuery dialect
23pub struct BigQueryDialect;
24
25impl DialectImpl for BigQueryDialect {
26    fn dialect_type(&self) -> DialectType {
27        DialectType::BigQuery
28    }
29
30    fn tokenizer_config(&self) -> TokenizerConfig {
31        let mut config = TokenizerConfig::default();
32        // BigQuery uses backticks for identifiers, NOT double quotes
33        // Remove double quote from identifiers (it's in the default config)
34        config.identifiers.remove(&'"');
35        config.identifiers.insert('`', '`');
36        // BigQuery supports double quotes for strings (in addition to single quotes)
37        config.quotes.insert("\"".to_string(), "\"".to_string());
38        // BigQuery supports triple-quoted strings
39        config.quotes.insert("'''".to_string(), "'''".to_string());
40        config
41            .quotes
42            .insert("\"\"\"".to_string(), "\"\"\"".to_string());
43        // BigQuery supports backslash escaping in strings
44        config.string_escapes = vec!['\'', '\\'];
45        // In BigQuery, b'...' is a byte string (bytes), not a bit string (binary digits)
46        config.b_prefix_is_byte_string = true;
47        // BigQuery supports hex number strings like 0xA, 0xFF
48        config.hex_number_strings = true;
49        // BigQuery: 0xA represents integer 10 (not binary/blob)
50        config.hex_string_is_integer_type = true;
51        // BigQuery supports # as single-line comments.
52        config.hash_comments = true;
53        config
54    }
55
56    fn generator_config(&self) -> GeneratorConfig {
57        use crate::generator::{IdentifierQuoteStyle, NormalizeFunctions};
58        GeneratorConfig {
59            identifier_quote: '`',
60            identifier_quote_style: IdentifierQuoteStyle::BACKTICK,
61            dialect: Some(DialectType::BigQuery),
62            // BigQuery doesn't normalize function names (Python: NORMALIZE_FUNCTIONS = False)
63            normalize_functions: NormalizeFunctions::None,
64            // BigQuery-specific settings from Python sqlglot
65            interval_allows_plural_form: false,
66            join_hints: false,
67            query_hints: false,
68            table_hints: false,
69            limit_fetch_style: crate::generator::LimitFetchStyle::Limit,
70            rename_table_with_db: false,
71            nvl2_supported: false,
72            unnest_with_ordinality: false,
73            collate_is_func: true,
74            limit_only_literals: true,
75            supports_table_alias_columns: false,
76            unpivot_aliases_are_identifiers: false,
77            json_key_value_pair_sep: ",",
78            null_ordering_supported: false,
79            ignore_nulls_in_func: true,
80            json_path_single_quote_escape: true,
81            can_implement_array_any: true,
82            supports_to_number: false,
83            named_placeholder_token: "@",
84            hex_func: "TO_HEX",
85            with_properties_prefix: "OPTIONS",
86            supports_exploding_projections: false,
87            except_intersect_support_all_clause: false,
88            supports_unix_seconds: true,
89            // BigQuery uses SAFE_ prefix for safe operations
90            try_supported: true,
91            // BigQuery does not support SEMI/ANTI JOIN syntax
92            semi_anti_join_with_side: false,
93            ..Default::default()
94        }
95    }
96
97    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
98        match expr {
99            // ===== Data Type Mappings =====
100            Expression::DataType(dt) => self.transform_data_type(dt),
101
102            // ===== Null handling =====
103            // IFNULL is native to BigQuery - keep as-is for identity
104            Expression::IfNull(f) => Ok(Expression::IfNull(f)),
105
106            // NVL -> IFNULL in BigQuery (BigQuery uses IFNULL, not NVL)
107            Expression::Nvl(f) => Ok(Expression::IfNull(f)),
108
109            // Coalesce stays as Coalesce
110            Expression::Coalesce(f) => Ok(Expression::Coalesce(f)),
111
112            // ===== String aggregation =====
113            // GROUP_CONCAT -> STRING_AGG in BigQuery
114            Expression::GroupConcat(f) => Ok(Expression::StringAgg(Box::new(StringAggFunc {
115                this: f.this,
116                separator: f.separator,
117                order_by: f.order_by,
118                distinct: f.distinct,
119                filter: f.filter,
120                limit: None,
121            }))),
122
123            // ===== Cast operations =====
124            // Cast data types are transformed by transform_recursive in mod.rs
125            // which calls transform_data_type via transform_expr(Expression::DataType(...))
126
127            // TryCast -> SafeCast in BigQuery with type transformation
128            Expression::TryCast(c) => {
129                let transformed_type = match self.transform_data_type(c.to)? {
130                    Expression::DataType(dt) => dt,
131                    _ => return Err(crate::error::Error::parse("Expected DataType", 0, 0, 0, 0)),
132                };
133                Ok(Expression::SafeCast(Box::new(crate::expressions::Cast {
134                    this: c.this,
135                    to: transformed_type,
136                    trailing_comments: c.trailing_comments,
137                    double_colon_syntax: c.double_colon_syntax,
138                    format: c.format,
139                    default: c.default,
140                    inferred_type: None,
141                })))
142            }
143
144            // ===== Pattern matching =====
145            // ILIKE -> LOWER() LIKE LOWER() in BigQuery (no ILIKE support)
146            Expression::ILike(op) => {
147                let lower_left = Expression::Lower(Box::new(UnaryFunc::new(op.left)));
148                let lower_right = Expression::Lower(Box::new(UnaryFunc::new(op.right)));
149                Ok(Expression::Like(Box::new(LikeOp {
150                    left: lower_left,
151                    right: lower_right,
152                    escape: op.escape,
153                    quantifier: op.quantifier,
154                    inferred_type: None,
155                })))
156            }
157
158            // RegexpLike -> REGEXP_CONTAINS in BigQuery
159            Expression::RegexpLike(f) => Ok(Expression::Function(Box::new(Function::new(
160                "REGEXP_CONTAINS".to_string(),
161                vec![f.this, f.pattern],
162            )))),
163
164            // ===== Array operations =====
165            // EXPLODE -> UNNEST in BigQuery
166            Expression::Explode(f) => Ok(Expression::Unnest(Box::new(
167                crate::expressions::UnnestFunc {
168                    this: f.this,
169                    expressions: Vec::new(),
170                    with_ordinality: false,
171                    alias: None,
172                    offset_alias: None,
173                },
174            ))),
175
176            // ExplodeOuter -> UNNEST with LEFT JOIN semantics
177            Expression::ExplodeOuter(f) => Ok(Expression::Unnest(Box::new(
178                crate::expressions::UnnestFunc {
179                    this: f.this,
180                    expressions: Vec::new(),
181                    with_ordinality: false,
182                    alias: None,
183                    offset_alias: None,
184                },
185            ))),
186
187            // GenerateSeries -> GENERATE_ARRAY in BigQuery
188            Expression::GenerateSeries(f) => {
189                let mut args = Vec::new();
190                if let Some(start) = f.start {
191                    args.push(*start);
192                }
193                if let Some(end) = f.end {
194                    args.push(*end);
195                }
196                if let Some(step) = f.step {
197                    args.push(*step);
198                }
199                Ok(Expression::Function(Box::new(Function::new(
200                    "GENERATE_ARRAY".to_string(),
201                    args,
202                ))))
203            }
204
205            // ===== Bitwise operations =====
206            // BitwiseAndAgg -> BIT_AND
207            Expression::BitwiseAndAgg(f) => Ok(Expression::Function(Box::new(Function::new(
208                "BIT_AND".to_string(),
209                vec![f.this],
210            )))),
211
212            // BitwiseOrAgg -> BIT_OR
213            Expression::BitwiseOrAgg(f) => Ok(Expression::Function(Box::new(Function::new(
214                "BIT_OR".to_string(),
215                vec![f.this],
216            )))),
217
218            // BitwiseXorAgg -> BIT_XOR
219            Expression::BitwiseXorAgg(f) => Ok(Expression::Function(Box::new(Function::new(
220                "BIT_XOR".to_string(),
221                vec![f.this],
222            )))),
223
224            // BitwiseCount -> BIT_COUNT
225            Expression::BitwiseCount(f) => Ok(Expression::Function(Box::new(Function::new(
226                "BIT_COUNT".to_string(),
227                vec![f.this],
228            )))),
229
230            // ByteLength -> BYTE_LENGTH
231            Expression::ByteLength(f) => Ok(Expression::Function(Box::new(Function::new(
232                "BYTE_LENGTH".to_string(),
233                vec![f.this],
234            )))),
235
236            // IntDiv -> DIV
237            Expression::IntDiv(f) => Ok(Expression::Function(Box::new(Function::new(
238                "DIV".to_string(),
239                vec![f.this, f.expression],
240            )))),
241
242            // Int64 -> INT64
243            Expression::Int64(f) => Ok(Expression::Function(Box::new(Function::new(
244                "INT64".to_string(),
245                vec![f.this],
246            )))),
247
248            // ===== Random =====
249            // RANDOM -> RAND in BigQuery
250            Expression::Random(_) => Ok(Expression::Rand(Box::new(crate::expressions::Rand {
251                seed: None,
252                lower: None,
253                upper: None,
254            }))),
255
256            // ===== UUID =====
257            // Uuid -> GENERATE_UUID in BigQuery
258            Expression::Uuid(_) => Ok(Expression::Function(Box::new(Function::new(
259                "GENERATE_UUID".to_string(),
260                vec![],
261            )))),
262
263            // ===== Approximate functions =====
264            // ApproxDistinct -> APPROX_COUNT_DISTINCT
265            Expression::ApproxDistinct(f) => Ok(Expression::Function(Box::new(Function::new(
266                "APPROX_COUNT_DISTINCT".to_string(),
267                vec![f.this],
268            )))),
269
270            // ArgMax -> MAX_BY in BigQuery
271            Expression::ArgMax(f) => Ok(Expression::Function(Box::new(Function::new(
272                "MAX_BY".to_string(),
273                vec![*f.this, *f.expression],
274            )))),
275
276            // ArgMin -> MIN_BY in BigQuery
277            Expression::ArgMin(f) => Ok(Expression::Function(Box::new(Function::new(
278                "MIN_BY".to_string(),
279                vec![*f.this, *f.expression],
280            )))),
281
282            // ===== Conditional =====
283            // CountIf -> COUNTIF in BigQuery
284            Expression::CountIf(f) => Ok(Expression::Function(Box::new(Function::new(
285                "COUNTIF".to_string(),
286                vec![f.this],
287            )))),
288
289            // ===== String functions =====
290            // StringAgg -> STRING_AGG in BigQuery - keep as-is to preserve ORDER BY
291            Expression::StringAgg(f) => Ok(Expression::StringAgg(f)),
292
293            // ===== Conversion =====
294            // Unhex -> FROM_HEX
295            Expression::Unhex(f) => Ok(Expression::Function(Box::new(Function::new(
296                "FROM_HEX".to_string(),
297                vec![*f.this],
298            )))),
299
300            // UnixToTime -> TIMESTAMP_SECONDS/MILLIS/MICROS based on scale
301            Expression::UnixToTime(f) => {
302                let scale = f.scale.unwrap_or(0);
303                match scale {
304                    0 => Ok(Expression::Function(Box::new(Function::new(
305                        "TIMESTAMP_SECONDS".to_string(),
306                        vec![*f.this],
307                    )))),
308                    3 => Ok(Expression::Function(Box::new(Function::new(
309                        "TIMESTAMP_MILLIS".to_string(),
310                        vec![*f.this],
311                    )))),
312                    6 => Ok(Expression::Function(Box::new(Function::new(
313                        "TIMESTAMP_MICROS".to_string(),
314                        vec![*f.this],
315                    )))),
316                    _ => {
317                        // TIMESTAMP_SECONDS(CAST(value / POWER(10, scale) AS INT64))
318                        let div_expr =
319                            Expression::Div(Box::new(crate::expressions::BinaryOp::new(
320                                *f.this,
321                                Expression::Function(Box::new(Function::new(
322                                    "POWER".to_string(),
323                                    vec![Expression::number(10), Expression::number(scale)],
324                                ))),
325                            )));
326                        let cast_expr = Expression::Cast(Box::new(crate::expressions::Cast {
327                            this: div_expr,
328                            to: crate::expressions::DataType::Custom {
329                                name: "INT64".to_string(),
330                            },
331                            double_colon_syntax: false,
332                            trailing_comments: vec![],
333                            format: None,
334                            default: None,
335                            inferred_type: None,
336                        }));
337                        Ok(Expression::Function(Box::new(Function::new(
338                            "TIMESTAMP_SECONDS".to_string(),
339                            vec![cast_expr],
340                        ))))
341                    }
342                }
343            }
344
345            // ===== Date/time =====
346            // DateDiff -> DATE_DIFF in BigQuery
347            Expression::DateDiff(f) => {
348                // BigQuery: DATE_DIFF(date1, date2, part)
349                let unit_str = match f.unit {
350                    Some(crate::expressions::IntervalUnit::Year) => "YEAR",
351                    Some(crate::expressions::IntervalUnit::Quarter) => "QUARTER",
352                    Some(crate::expressions::IntervalUnit::Month) => "MONTH",
353                    Some(crate::expressions::IntervalUnit::Week) => "WEEK",
354                    Some(crate::expressions::IntervalUnit::Day) => "DAY",
355                    Some(crate::expressions::IntervalUnit::Hour) => "HOUR",
356                    Some(crate::expressions::IntervalUnit::Minute) => "MINUTE",
357                    Some(crate::expressions::IntervalUnit::Second) => "SECOND",
358                    Some(crate::expressions::IntervalUnit::Millisecond) => "MILLISECOND",
359                    Some(crate::expressions::IntervalUnit::Microsecond) => "MICROSECOND",
360                    Some(crate::expressions::IntervalUnit::Nanosecond) => "NANOSECOND",
361                    None => "DAY",
362                };
363                let unit = Expression::Identifier(crate::expressions::Identifier {
364                    name: unit_str.to_string(),
365                    quoted: false,
366                    trailing_comments: Vec::new(),
367                    span: None,
368                });
369                Ok(Expression::Function(Box::new(Function::new(
370                    "DATE_DIFF".to_string(),
371                    vec![f.this, f.expression, unit],
372                ))))
373            }
374
375            // ===== Variance =====
376            // VarPop -> VAR_POP
377            Expression::VarPop(f) => Ok(Expression::Function(Box::new(Function::new(
378                "VAR_POP".to_string(),
379                vec![f.this],
380            )))),
381
382            // ===== Hash functions =====
383            // SHA -> SHA1
384            Expression::SHA(f) => Ok(Expression::Function(Box::new(Function::new(
385                "SHA1".to_string(),
386                vec![f.this],
387            )))),
388
389            // SHA1Digest -> SHA1
390            Expression::SHA1Digest(f) => Ok(Expression::Function(Box::new(Function::new(
391                "SHA1".to_string(),
392                vec![f.this],
393            )))),
394
395            // MD5Digest -> MD5
396            Expression::MD5Digest(f) => Ok(Expression::Function(Box::new(Function::new(
397                "MD5".to_string(),
398                vec![*f.this],
399            )))),
400
401            // ===== Type conversion =====
402            // JSONBool -> BOOL
403            Expression::JSONBool(f) => Ok(Expression::Function(Box::new(Function::new(
404                "BOOL".to_string(),
405                vec![f.this],
406            )))),
407
408            // StringFunc -> STRING
409            Expression::StringFunc(f) => Ok(Expression::Function(Box::new(Function::new(
410                "STRING".to_string(),
411                vec![*f.this],
412            )))),
413
414            // ===== Date/time from parts =====
415            // DateFromUnixDate -> DATE_FROM_UNIX_DATE
416            Expression::DateFromUnixDate(f) => Ok(Expression::Function(Box::new(Function::new(
417                "DATE_FROM_UNIX_DATE".to_string(),
418                vec![f.this],
419            )))),
420
421            // UnixDate -> UNIX_DATE
422            Expression::UnixDate(f) => Ok(Expression::Function(Box::new(Function::new(
423                "UNIX_DATE".to_string(),
424                vec![f.this],
425            )))),
426
427            // TimestampDiff -> TIMESTAMP_DIFF
428            Expression::TimestampDiff(f) => Ok(Expression::Function(Box::new(Function::new(
429                "TIMESTAMP_DIFF".to_string(),
430                vec![*f.this, *f.expression],
431            )))),
432
433            // FromTimeZone -> DATETIME
434            Expression::FromTimeZone(f) => Ok(Expression::Function(Box::new(Function::new(
435                "DATETIME".to_string(),
436                vec![*f.this],
437            )))),
438
439            // TsOrDsToDatetime -> DATETIME
440            Expression::TsOrDsToDatetime(f) => Ok(Expression::Function(Box::new(Function::new(
441                "DATETIME".to_string(),
442                vec![f.this],
443            )))),
444
445            // TsOrDsToTimestamp -> TIMESTAMP
446            Expression::TsOrDsToTimestamp(f) => Ok(Expression::Function(Box::new(Function::new(
447                "TIMESTAMP".to_string(),
448                vec![f.this],
449            )))),
450
451            // ===== IfFunc -> IF in BigQuery =====
452            Expression::IfFunc(f) => {
453                let mut args = vec![f.condition, f.true_value];
454                if let Some(false_val) = f.false_value {
455                    args.push(false_val);
456                } else {
457                    args.push(Expression::Null(crate::expressions::Null));
458                }
459                Ok(Expression::Function(Box::new(Function::new(
460                    "IF".to_string(),
461                    args,
462                ))))
463            }
464
465            // ===== HexString -> FROM_HEX =====
466            Expression::HexStringExpr(f) => Ok(Expression::Function(Box::new(Function::new(
467                "FROM_HEX".to_string(),
468                vec![*f.this],
469            )))),
470
471            // ===== Additional auto-generated transforms from Python sqlglot =====
472            // ApproxTopK -> APPROX_TOP_COUNT
473            Expression::ApproxTopK(f) => {
474                let mut args = vec![*f.this];
475                if let Some(expr) = f.expression {
476                    args.push(*expr);
477                }
478                Ok(Expression::Function(Box::new(Function::new(
479                    "APPROX_TOP_COUNT".to_string(),
480                    args,
481                ))))
482            }
483
484            // SafeDivide -> SAFE_DIVIDE
485            Expression::SafeDivide(f) => Ok(Expression::Function(Box::new(Function::new(
486                "SAFE_DIVIDE".to_string(),
487                vec![*f.this, *f.expression],
488            )))),
489
490            // JSONKeysAtDepth -> JSON_KEYS
491            Expression::JSONKeysAtDepth(f) => Ok(Expression::Function(Box::new(Function::new(
492                "JSON_KEYS".to_string(),
493                vec![*f.this],
494            )))),
495
496            // JSONValueArray -> JSON_VALUE_ARRAY
497            Expression::JSONValueArray(f) => Ok(Expression::Function(Box::new(Function::new(
498                "JSON_VALUE_ARRAY".to_string(),
499                vec![*f.this],
500            )))),
501
502            // DateFromParts -> DATE
503            Expression::DateFromParts(f) => {
504                let mut args = Vec::new();
505                if let Some(y) = f.year {
506                    args.push(*y);
507                }
508                if let Some(m) = f.month {
509                    args.push(*m);
510                }
511                if let Some(d) = f.day {
512                    args.push(*d);
513                }
514                Ok(Expression::Function(Box::new(Function::new(
515                    "DATE".to_string(),
516                    args,
517                ))))
518            }
519
520            // SPLIT: BigQuery defaults to comma separator when none provided
521            // SPLIT(foo) -> SPLIT(foo, ',')
522            Expression::Split(f) => {
523                // Check if delimiter is empty or a placeholder - add default comma
524                let delimiter = match &f.delimiter {
525                    Expression::Literal(Literal::String(s)) if s.is_empty() => {
526                        Expression::Literal(Literal::String(",".to_string()))
527                    }
528                    _ => f.delimiter,
529                };
530                Ok(Expression::Split(Box::new(SplitFunc {
531                    this: f.this,
532                    delimiter,
533                })))
534            }
535
536            // Cast: Transform the target type according to BigQuery TYPE_MAPPING
537            // Special case: CAST to JSON -> PARSE_JSON in BigQuery
538            Expression::Cast(c) => {
539                use crate::expressions::DataType;
540                // Check if casting to JSON - use PARSE_JSON instead
541                // Handle both DataType::Json/JsonB and DataType::Custom { name: "JSON" }
542                // (parser creates Custom for type literals like JSON 'string')
543                let is_json = matches!(c.to, DataType::Json | DataType::JsonB)
544                    || matches!(&c.to, DataType::Custom { name } if name.eq_ignore_ascii_case("JSON") || name.eq_ignore_ascii_case("JSONB"));
545                if is_json {
546                    return Ok(Expression::ParseJson(Box::new(UnaryFunc::new(c.this))));
547                }
548                let transformed_type = match self.transform_data_type(c.to)? {
549                    Expression::DataType(dt) => dt,
550                    _ => return Err(crate::error::Error::parse("Expected DataType", 0, 0, 0, 0)),
551                };
552                Ok(Expression::Cast(Box::new(crate::expressions::Cast {
553                    this: c.this,
554                    to: transformed_type,
555                    trailing_comments: c.trailing_comments,
556                    double_colon_syntax: c.double_colon_syntax,
557                    format: c.format,
558                    default: c.default,
559                    inferred_type: None,
560                })))
561            }
562
563            // SafeCast: Transform the target type according to BigQuery TYPE_MAPPING
564            Expression::SafeCast(c) => {
565                let transformed_type = match self.transform_data_type(c.to)? {
566                    Expression::DataType(dt) => dt,
567                    _ => return Err(crate::error::Error::parse("Expected DataType", 0, 0, 0, 0)),
568                };
569                Ok(Expression::SafeCast(Box::new(crate::expressions::Cast {
570                    this: c.this,
571                    to: transformed_type,
572                    trailing_comments: c.trailing_comments,
573                    double_colon_syntax: c.double_colon_syntax,
574                    format: c.format,
575                    default: c.default,
576                    inferred_type: None,
577                })))
578            }
579
580            // ===== SELECT-level transforms =====
581            // BigQuery: GROUP BY expression → alias when both GROUP BY and ORDER BY exist
582            Expression::Select(mut select) => {
583                if select.group_by.is_some() && select.order_by.is_some() {
584                    // Build map: expression → alias name for aliased projections
585                    let aliases: Vec<(Expression, Identifier)> = select
586                        .expressions
587                        .iter()
588                        .filter_map(|e| {
589                            if let Expression::Alias(a) = e {
590                                Some((a.this.clone(), a.alias.clone()))
591                            } else {
592                                None
593                            }
594                        })
595                        .collect();
596
597                    if let Some(ref mut group_by) = select.group_by {
598                        for grouped in group_by.expressions.iter_mut() {
599                            // Skip numeric indices (already aliased)
600                            if matches!(grouped, Expression::Literal(Literal::Number(_))) {
601                                continue;
602                            }
603                            // Check if this GROUP BY expression matches a SELECT alias
604                            for (expr, alias_ident) in &aliases {
605                                if grouped == expr {
606                                    *grouped = Expression::Column(Column {
607                                        name: alias_ident.clone(),
608                                        table: None,
609                                        join_mark: false,
610                                        trailing_comments: Vec::new(),
611                                        span: None,
612                                        inferred_type: None,
613                                    });
614                                    break;
615                                }
616                            }
617                        }
618                    }
619                }
620                Ok(Expression::Select(select))
621            }
622
623            // ===== ArrayContains → EXISTS(SELECT 1 FROM UNNEST(arr) AS _col WHERE _col = val) =====
624            Expression::ArrayContains(f) => {
625                let array_expr = f.this;
626                let value_expr = f.expression;
627
628                // Build: SELECT 1 FROM UNNEST(array) AS _col WHERE _col = value
629                let unnest = Expression::Unnest(Box::new(UnnestFunc {
630                    this: array_expr,
631                    expressions: Vec::new(),
632                    with_ordinality: false,
633                    alias: None,
634                    offset_alias: None,
635                }));
636                let aliased_unnest = Expression::Alias(Box::new(Alias {
637                    this: unnest,
638                    alias: Identifier::new("_col"),
639                    column_aliases: Vec::new(),
640                    pre_alias_comments: Vec::new(),
641                    trailing_comments: Vec::new(),
642                    inferred_type: None,
643                }));
644                let col_ref = Expression::Column(Column {
645                    name: Identifier::new("_col"),
646                    table: None,
647                    join_mark: false,
648                    trailing_comments: Vec::new(),
649                    span: None,
650                    inferred_type: None,
651                });
652                let where_clause = Where {
653                    this: Expression::Eq(Box::new(BinaryOp {
654                        left: col_ref,
655                        right: value_expr,
656                        left_comments: Vec::new(),
657                        operator_comments: Vec::new(),
658                        trailing_comments: Vec::new(),
659                        inferred_type: None,
660                    })),
661                };
662                let inner_select = Expression::Select(Box::new(Select {
663                    expressions: vec![Expression::Literal(Literal::Number("1".to_string()))],
664                    from: Some(From {
665                        expressions: vec![aliased_unnest],
666                    }),
667                    where_clause: Some(where_clause),
668                    ..Default::default()
669                }));
670                Ok(Expression::Exists(Box::new(Exists {
671                    this: inner_select,
672                    not: false,
673                })))
674            }
675
676            // ===== JSON_OBJECT array form → key-value pairs =====
677            // BigQuery "signature 2": JSON_OBJECT(['a', 'b'], [10, NULL]) → JSON_OBJECT('a', 10, 'b', NULL)
678            Expression::JsonObject(mut f) => {
679                if f.pairs.len() == 1 {
680                    // Extract expressions from both Array and ArrayFunc variants
681                    let keys_exprs = match &f.pairs[0].0 {
682                        Expression::Array(arr) => Some(&arr.expressions),
683                        Expression::ArrayFunc(arr) => Some(&arr.expressions),
684                        _ => None,
685                    };
686                    let vals_exprs = match &f.pairs[0].1 {
687                        Expression::Array(arr) => Some(&arr.expressions),
688                        Expression::ArrayFunc(arr) => Some(&arr.expressions),
689                        _ => None,
690                    };
691                    if let (Some(keys), Some(vals)) = (keys_exprs, vals_exprs) {
692                        if keys.len() == vals.len() {
693                            let new_pairs: Vec<(Expression, Expression)> = keys
694                                .iter()
695                                .zip(vals.iter())
696                                .map(|(k, v)| (k.clone(), v.clone()))
697                                .collect();
698                            f.pairs = new_pairs;
699                        }
700                    }
701                }
702                Ok(Expression::JsonObject(f))
703            }
704
705            // ===== MOD function: unwrap unnecessary Paren from first argument =====
706            // BigQuery normalizes MOD((a + 1), b) -> MOD(a + 1, b)
707            Expression::ModFunc(mut f) => {
708                // Unwrap Paren from first argument if present
709                if let Expression::Paren(paren) = f.this {
710                    f.this = paren.this;
711                }
712                Ok(Expression::ModFunc(f))
713            }
714
715            // JSONExtract with variant_extract (Snowflake colon syntax) -> JSON_EXTRACT
716            Expression::JSONExtract(e) if e.variant_extract.is_some() => {
717                let path = match *e.expression {
718                    Expression::Literal(Literal::String(s)) => {
719                        let normalized = if s.starts_with('$') {
720                            s
721                        } else if s.starts_with('[') {
722                            format!("${}", s)
723                        } else {
724                            format!("$.{}", s)
725                        };
726                        Expression::Literal(Literal::String(normalized))
727                    }
728                    other => other,
729                };
730                Ok(Expression::Function(Box::new(Function::new(
731                    "JSON_EXTRACT".to_string(),
732                    vec![*e.this, path],
733                ))))
734            }
735
736            // Generic function transformations
737            Expression::Function(f) => self.transform_function(*f),
738
739            // Generic aggregate function transformations
740            Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
741
742            // MethodCall: Handle SAFE.PARSE_DATE, SAFE.PARSE_DATETIME, SAFE.PARSE_TIMESTAMP
743            // These are parsed as MethodCall(this=SAFE, method=PARSE_DATE, args=[...])
744            Expression::MethodCall(mc) => self.transform_method_call(*mc),
745
746            // CreateFunction: Convert RETURNS TABLE(...) to RETURNS TABLE <...> for BigQuery
747            // and set is_table_function = true for TABLE FUNCTION syntax
748            Expression::CreateFunction(mut cf) => {
749                if let Some(ref mut rtb) = cf.returns_table_body {
750                    if rtb.starts_with("TABLE (") || rtb.starts_with("TABLE(") {
751                        // Convert TABLE (...) to TABLE <...> with BigQuery types
752                        let inner = if rtb.starts_with("TABLE (") {
753                            &rtb["TABLE (".len()..rtb.len() - 1]
754                        } else {
755                            &rtb["TABLE(".len()..rtb.len() - 1]
756                        };
757                        // Convert common types to BigQuery equivalents
758                        let converted = inner
759                            .replace(" INT,", " INT64,")
760                            .replace(" INT)", " INT64)")
761                            .replace(" INTEGER,", " INT64,")
762                            .replace(" INTEGER)", " INT64)")
763                            .replace(" FLOAT,", " FLOAT64,")
764                            .replace(" FLOAT)", " FLOAT64)")
765                            .replace(" BOOLEAN,", " BOOL,")
766                            .replace(" BOOLEAN)", " BOOL)")
767                            .replace(" VARCHAR", " STRING")
768                            .replace(" TEXT", " STRING");
769                        // Handle trailing type (no comma, no paren)
770                        let converted = if converted.ends_with(" INT") {
771                            format!("{}{}", &converted[..converted.len() - 4], " INT64")
772                        } else {
773                            converted
774                        };
775                        *rtb = format!("TABLE <{}>", converted);
776                        cf.is_table_function = true;
777                    }
778                }
779                // Convert string literal body to expression body for BigQuery TABLE FUNCTIONs only
780                if cf.is_table_function {
781                    if let Some(ref body) = cf.body {
782                        if matches!(body, FunctionBody::StringLiteral(_)) {
783                            if let Some(FunctionBody::StringLiteral(sql)) = cf.body.take() {
784                                // Parse the SQL string into an expression
785                                if let Ok(parsed) = crate::parser::Parser::parse_sql(&sql) {
786                                    if let Some(stmt) = parsed.into_iter().next() {
787                                        cf.body = Some(FunctionBody::Expression(stmt));
788                                    } else {
789                                        cf.body = Some(FunctionBody::StringLiteral(sql));
790                                    }
791                                } else {
792                                    cf.body = Some(FunctionBody::StringLiteral(sql));
793                                }
794                            }
795                        }
796                    }
797                }
798                Ok(Expression::CreateFunction(cf))
799            }
800
801            // Pass through everything else
802            _ => Ok(expr),
803        }
804    }
805}
806
807impl BigQueryDialect {
808    /// Transform data types according to BigQuery TYPE_MAPPING
809    fn transform_data_type(&self, dt: crate::expressions::DataType) -> Result<Expression> {
810        use crate::expressions::DataType;
811        let transformed = match dt {
812            // BIGINT -> INT64
813            DataType::BigInt { .. } => DataType::Custom {
814                name: "INT64".to_string(),
815            },
816            // INT -> INT64
817            DataType::Int { .. } => DataType::Custom {
818                name: "INT64".to_string(),
819            },
820            // SMALLINT -> INT64
821            DataType::SmallInt { .. } => DataType::Custom {
822                name: "INT64".to_string(),
823            },
824            // TINYINT -> INT64
825            DataType::TinyInt { .. } => DataType::Custom {
826                name: "INT64".to_string(),
827            },
828            // FLOAT -> FLOAT64
829            DataType::Float { .. } => DataType::Custom {
830                name: "FLOAT64".to_string(),
831            },
832            // DOUBLE -> FLOAT64
833            DataType::Double { .. } => DataType::Custom {
834                name: "FLOAT64".to_string(),
835            },
836            // BOOLEAN -> BOOL
837            DataType::Boolean => DataType::Custom {
838                name: "BOOL".to_string(),
839            },
840            // CHAR -> STRING
841            DataType::Char { .. } => DataType::Custom {
842                name: "STRING".to_string(),
843            },
844            // VARCHAR -> STRING
845            DataType::VarChar { .. } => DataType::Custom {
846                name: "STRING".to_string(),
847            },
848            // TEXT -> STRING
849            DataType::Text => DataType::Custom {
850                name: "STRING".to_string(),
851            },
852            // STRING(n) -> STRING (BigQuery doesn't support length for STRING)
853            DataType::String { .. } => DataType::Custom {
854                name: "STRING".to_string(),
855            },
856            // BINARY -> BYTES
857            DataType::Binary { .. } => DataType::Custom {
858                name: "BYTES".to_string(),
859            },
860            // VARBINARY -> BYTES
861            DataType::VarBinary { .. } => DataType::Custom {
862                name: "BYTES".to_string(),
863            },
864            // BLOB -> BYTES
865            DataType::Blob => DataType::Custom {
866                name: "BYTES".to_string(),
867            },
868            // DECIMAL -> NUMERIC (BigQuery strips precision in CAST context)
869            DataType::Decimal { .. } => DataType::Custom {
870                name: "NUMERIC".to_string(),
871            },
872            // For BigQuery identity: preserve TIMESTAMP/DATETIME as Custom types
873            // This avoids the issue where parsed TIMESTAMP (timezone: false) would
874            // be converted to DATETIME by the generator
875            DataType::Timestamp {
876                timezone: false, ..
877            } => DataType::Custom {
878                name: "TIMESTAMP".to_string(),
879            },
880            DataType::Timestamp { timezone: true, .. } => DataType::Custom {
881                name: "TIMESTAMP".to_string(),
882            },
883            // UUID -> STRING (BigQuery doesn't have native UUID type)
884            DataType::Uuid => DataType::Custom {
885                name: "STRING".to_string(),
886            },
887            // RECORD -> STRUCT in BigQuery
888            DataType::Custom { ref name } if name.eq_ignore_ascii_case("RECORD") => {
889                DataType::Custom {
890                    name: "STRUCT".to_string(),
891                }
892            }
893            // TIMESTAMPTZ (custom) -> TIMESTAMP
894            DataType::Custom { ref name } if name.eq_ignore_ascii_case("TIMESTAMPTZ") => {
895                DataType::Custom {
896                    name: "TIMESTAMP".to_string(),
897                }
898            }
899            // BYTEINT (custom) -> INT64
900            DataType::Custom { ref name } if name.eq_ignore_ascii_case("BYTEINT") => {
901                DataType::Custom {
902                    name: "INT64".to_string(),
903                }
904            }
905            // Keep all other types as-is
906            other => other,
907        };
908        Ok(Expression::DataType(transformed))
909    }
910
911    fn transform_function(&self, f: Function) -> Result<Expression> {
912        let name_upper = f.name.to_uppercase();
913        match name_upper.as_str() {
914            // IFNULL -> COALESCE (both work in BigQuery)
915            "IFNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
916                original_name: None,
917                expressions: f.args,
918                inferred_type: None,
919            }))),
920
921            // NVL -> COALESCE
922            "NVL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
923                original_name: None,
924                expressions: f.args,
925                inferred_type: None,
926            }))),
927
928            // ISNULL -> COALESCE
929            "ISNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
930                original_name: None,
931                expressions: f.args,
932                inferred_type: None,
933            }))),
934
935            // GROUP_CONCAT -> STRING_AGG in BigQuery
936            "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
937                Function::new("STRING_AGG".to_string(), f.args),
938            ))),
939
940            // SUBSTR -> SUBSTRING (both work)
941            "SUBSTR" => Ok(Expression::Function(Box::new(Function::new(
942                "SUBSTRING".to_string(),
943                f.args,
944            )))),
945
946            // RANDOM -> RAND
947            "RANDOM" => Ok(Expression::Rand(Box::new(crate::expressions::Rand {
948                seed: None,
949                lower: None,
950                upper: None,
951            }))),
952
953            // CURRENT_DATE -> CURRENT_DATE() in BigQuery
954            // Keep as Function when it has args (e.g., CURRENT_DATE('UTC'))
955            "CURRENT_DATE" if f.args.is_empty() => {
956                Ok(Expression::CurrentDate(crate::expressions::CurrentDate))
957            }
958            "CURRENT_DATE" => Ok(Expression::Function(Box::new(Function {
959                name: "CURRENT_DATE".to_string(),
960                args: f.args,
961                distinct: false,
962                trailing_comments: Vec::new(),
963                use_bracket_syntax: false,
964                no_parens: false,
965                quoted: false,
966                span: None,
967                inferred_type: None,
968            }))),
969
970            // NOW -> CURRENT_TIMESTAMP in BigQuery
971            "NOW" => Ok(Expression::CurrentTimestamp(
972                crate::expressions::CurrentTimestamp {
973                    precision: None,
974                    sysdate: false,
975                },
976            )),
977
978            // TO_DATE -> PARSE_DATE in BigQuery
979            "TO_DATE" => Ok(Expression::Function(Box::new(Function::new(
980                "PARSE_DATE".to_string(),
981                f.args,
982            )))),
983
984            // TO_TIMESTAMP -> PARSE_TIMESTAMP in BigQuery
985            "TO_TIMESTAMP" => Ok(Expression::Function(Box::new(Function::new(
986                "PARSE_TIMESTAMP".to_string(),
987                f.args,
988            )))),
989
990            // TO_TIME -> TIME in BigQuery
991            "TO_TIME" if f.args.len() == 1 => Ok(Expression::Function(Box::new(Function::new(
992                "TIME".to_string(),
993                f.args,
994            )))),
995
996            // DATE_FORMAT -> FORMAT_DATE in BigQuery (argument order may differ)
997            "DATE_FORMAT" => Ok(Expression::Function(Box::new(Function::new(
998                "FORMAT_DATE".to_string(),
999                f.args,
1000            )))),
1001
1002            // POSITION -> STRPOS in BigQuery
1003            // BigQuery uses STRPOS(string, substring)
1004            "POSITION" if f.args.len() == 2 => {
1005                let mut args = f.args;
1006                // Swap arguments: POSITION(sub IN str) -> STRPOS(str, sub)
1007                let first = args.remove(0);
1008                let second = args.remove(0);
1009                Ok(Expression::Function(Box::new(Function::new(
1010                    "STRPOS".to_string(),
1011                    vec![second, first],
1012                ))))
1013            }
1014
1015            // LEN -> LENGTH
1016            "LEN" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc::new(
1017                f.args.into_iter().next().unwrap(),
1018            )))),
1019
1020            // CEILING -> CEIL (both work)
1021            "CEILING" if f.args.len() == 1 => Ok(Expression::Ceil(Box::new(CeilFunc {
1022                this: f.args.into_iter().next().unwrap(),
1023                decimals: None,
1024                to: None,
1025            }))),
1026
1027            // GETDATE -> CURRENT_TIMESTAMP
1028            "GETDATE" => Ok(Expression::CurrentTimestamp(
1029                crate::expressions::CurrentTimestamp {
1030                    precision: None,
1031                    sysdate: false,
1032                },
1033            )),
1034
1035            // ARRAY_LENGTH -> ARRAY_LENGTH (native)
1036            // CARDINALITY -> ARRAY_LENGTH
1037            "CARDINALITY" if f.args.len() == 1 => Ok(Expression::ArrayLength(Box::new(
1038                UnaryFunc::new(f.args.into_iter().next().unwrap()),
1039            ))),
1040
1041            // UNNEST is native to BigQuery
1042
1043            // GENERATE_SERIES -> GENERATE_ARRAY in BigQuery
1044            "GENERATE_SERIES" => Ok(Expression::Function(Box::new(Function::new(
1045                "GENERATE_ARRAY".to_string(),
1046                f.args,
1047            )))),
1048
1049            // APPROX_COUNT_DISTINCT -> APPROX_COUNT_DISTINCT (native)
1050            // APPROX_DISTINCT -> APPROX_COUNT_DISTINCT
1051            "APPROX_DISTINCT" => Ok(Expression::Function(Box::new(Function::new(
1052                "APPROX_COUNT_DISTINCT".to_string(),
1053                f.args,
1054            )))),
1055
1056            // COUNT_IF -> COUNTIF in BigQuery
1057            "COUNT_IF" => Ok(Expression::Function(Box::new(Function::new(
1058                "COUNTIF".to_string(),
1059                f.args,
1060            )))),
1061
1062            // SHA1 -> SHA1 (native), SHA -> SHA1
1063            "SHA" => Ok(Expression::Function(Box::new(Function::new(
1064                "SHA1".to_string(),
1065                f.args,
1066            )))),
1067
1068            // SHA256/SHA2 -> SHA256
1069            "SHA2" => Ok(Expression::Function(Box::new(Function::new(
1070                "SHA256".to_string(),
1071                f.args,
1072            )))),
1073
1074            // MD5 in BigQuery returns bytes, often combined with TO_HEX
1075            // TO_HEX(MD5(x)) pattern
1076            "MD5" => Ok(Expression::Function(Box::new(Function::new(
1077                "MD5".to_string(),
1078                f.args,
1079            )))),
1080
1081            // VARIANCE/VAR_SAMP -> VAR_SAMP (native)
1082            // VAR_POP -> VAR_POP (native)
1083
1084            // DATEADD(unit, amount, date) → DATE_ADD(date, INTERVAL amount unit) for BigQuery
1085            "DATEADD" if f.args.len() == 3 => {
1086                let mut args = f.args;
1087                let unit_expr = args.remove(0);
1088                let amount = args.remove(0);
1089                let date = args.remove(0);
1090                // Convert unit identifier to IntervalUnit
1091                let unit_name = match &unit_expr {
1092                    Expression::Identifier(id) => id.name.to_uppercase(),
1093                    _ => "DAY".to_string(),
1094                };
1095                let unit = match unit_name.as_str() {
1096                    "YEAR" | "YEARS" | "YY" | "YYYY" => crate::expressions::IntervalUnit::Year,
1097                    "QUARTER" | "QUARTERS" | "QQ" | "Q" => {
1098                        crate::expressions::IntervalUnit::Quarter
1099                    }
1100                    "MONTH" | "MONTHS" | "MM" | "M" => crate::expressions::IntervalUnit::Month,
1101                    "WEEK" | "WEEKS" | "WK" | "WW" => crate::expressions::IntervalUnit::Week,
1102                    "DAY" | "DAYS" | "DD" | "D" | "DAYOFMONTH" => {
1103                        crate::expressions::IntervalUnit::Day
1104                    }
1105                    "HOUR" | "HOURS" | "HH" => crate::expressions::IntervalUnit::Hour,
1106                    "MINUTE" | "MINUTES" | "MI" | "N" => crate::expressions::IntervalUnit::Minute,
1107                    "SECOND" | "SECONDS" | "SS" | "S" => crate::expressions::IntervalUnit::Second,
1108                    "MILLISECOND" | "MILLISECONDS" | "MS" => {
1109                        crate::expressions::IntervalUnit::Millisecond
1110                    }
1111                    "MICROSECOND" | "MICROSECONDS" | "US" => {
1112                        crate::expressions::IntervalUnit::Microsecond
1113                    }
1114                    _ => crate::expressions::IntervalUnit::Day,
1115                };
1116                Ok(Expression::DateAdd(Box::new(
1117                    crate::expressions::DateAddFunc {
1118                        this: date,
1119                        interval: amount,
1120                        unit,
1121                    },
1122                )))
1123            }
1124            "DATE_ADD" => Ok(Expression::Function(Box::new(Function::new(
1125                "DATE_ADD".to_string(),
1126                f.args,
1127            )))),
1128
1129            // DATE_DIFF in BigQuery (native)
1130            "DATEDIFF" => Ok(Expression::Function(Box::new(Function::new(
1131                "DATE_DIFF".to_string(),
1132                f.args,
1133            )))),
1134
1135            // TIMESTAMP_DIFF in BigQuery
1136            "TIMESTAMPDIFF" => Ok(Expression::Function(Box::new(Function::new(
1137                "TIMESTAMP_DIFF".to_string(),
1138                f.args,
1139            )))),
1140
1141            // TIME -> TIME (native)
1142            // DATETIME -> DATETIME (native)
1143
1144            // SAFE_DIVIDE -> SAFE_DIVIDE (native)
1145
1146            // NEWID/UUID -> GENERATE_UUID
1147            "NEWID" | "UUID" => Ok(Expression::Function(Box::new(Function::new(
1148                "GENERATE_UUID".to_string(),
1149                vec![],
1150            )))),
1151
1152            // LEVENSHTEIN -> EDIT_DISTANCE (BigQuery naming)
1153            "LEVENSHTEIN" => Ok(Expression::Function(Box::new(Function::new(
1154                "EDIT_DISTANCE".to_string(),
1155                f.args,
1156            )))),
1157
1158            // UNIX_TIMESTAMP -> UNIX_SECONDS
1159            "UNIX_TIMESTAMP" => Ok(Expression::Function(Box::new(Function::new(
1160                "UNIX_SECONDS".to_string(),
1161                f.args,
1162            )))),
1163
1164            // FROM_UNIXTIME -> TIMESTAMP_SECONDS
1165            "FROM_UNIXTIME" => Ok(Expression::Function(Box::new(Function::new(
1166                "TIMESTAMP_SECONDS".to_string(),
1167                f.args,
1168            )))),
1169
1170            // CHAR_LENGTH / CHARACTER_LENGTH -> LENGTH
1171            "CHAR_LENGTH" | "CHARACTER_LENGTH" => Ok(Expression::Function(Box::new(
1172                Function::new("LENGTH".to_string(), f.args),
1173            ))),
1174
1175            // OCTET_LENGTH -> BYTE_LENGTH in BigQuery
1176            "OCTET_LENGTH" => Ok(Expression::Function(Box::new(Function::new(
1177                "BYTE_LENGTH".to_string(),
1178                f.args,
1179            )))),
1180
1181            // JSON_EXTRACT_STRING_ARRAY -> JSON_VALUE_ARRAY in BigQuery
1182            "JSON_EXTRACT_STRING_ARRAY" => Ok(Expression::Function(Box::new(Function::new(
1183                "JSON_VALUE_ARRAY".to_string(),
1184                f.args,
1185            )))),
1186
1187            // INSTR is native to BigQuery
1188
1189            // SPLIT: BigQuery defaults to comma separator when none provided
1190            // SPLIT(foo) -> SPLIT(foo, ',')
1191            "SPLIT" if f.args.len() == 1 => {
1192                let mut args = f.args;
1193                args.push(Expression::Literal(Literal::String(",".to_string())));
1194                Ok(Expression::Split(Box::new(SplitFunc {
1195                    this: args.remove(0),
1196                    delimiter: args.remove(0),
1197                })))
1198            }
1199
1200            // SPLIT with two args - convert to Split expression
1201            "SPLIT" if f.args.len() == 2 => {
1202                let mut args = f.args;
1203                Ok(Expression::Split(Box::new(SplitFunc {
1204                    this: args.remove(0),
1205                    delimiter: args.remove(0),
1206                })))
1207            }
1208
1209            // REGEXP_SUBSTR -> REGEXP_EXTRACT in BigQuery (strip extra Snowflake args)
1210            "REGEXP_SUBSTR" if f.args.len() >= 2 => {
1211                // BigQuery REGEXP_EXTRACT supports (subject, pattern, pos, occ) max 4 args
1212                let args = if f.args.len() > 4 {
1213                    f.args[..4].to_vec()
1214                } else {
1215                    f.args
1216                };
1217                Ok(Expression::Function(Box::new(Function::new(
1218                    "REGEXP_EXTRACT".to_string(),
1219                    args,
1220                ))))
1221            }
1222            "REGEXP_SUBSTR" => Ok(Expression::Function(Box::new(Function::new(
1223                "REGEXP_EXTRACT".to_string(),
1224                f.args,
1225            )))),
1226
1227            // REGEXP_REPLACE - strip extra Snowflake-specific args
1228            "REGEXP_REPLACE" if f.args.len() > 3 => {
1229                let args = f.args[..3].to_vec();
1230                Ok(Expression::Function(Box::new(Function::new(
1231                    "REGEXP_REPLACE".to_string(),
1232                    args,
1233                ))))
1234            }
1235
1236            // OBJECT_CONSTRUCT_KEEP_NULL -> JSON_OBJECT
1237            "OBJECT_CONSTRUCT_KEEP_NULL" => Ok(Expression::Function(Box::new(Function::new(
1238                "JSON_OBJECT".to_string(),
1239                f.args,
1240            )))),
1241
1242            // EDITDISTANCE -> EDIT_DISTANCE with named max_distance parameter
1243            "EDITDISTANCE" if f.args.len() == 3 => {
1244                let col1 = f.args[0].clone();
1245                let col2 = f.args[1].clone();
1246                let max_dist = f.args[2].clone();
1247                Ok(Expression::Function(Box::new(Function::new(
1248                    "EDIT_DISTANCE".to_string(),
1249                    vec![
1250                        col1,
1251                        col2,
1252                        Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
1253                            name: crate::expressions::Identifier::new("max_distance".to_string()),
1254                            value: max_dist,
1255                            separator: crate::expressions::NamedArgSeparator::DArrow,
1256                        })),
1257                    ],
1258                ))))
1259            }
1260            "EDITDISTANCE" if f.args.len() == 2 => Ok(Expression::Function(Box::new(
1261                Function::new("EDIT_DISTANCE".to_string(), f.args),
1262            ))),
1263
1264            // HEX_DECODE_BINARY -> FROM_HEX
1265            "HEX_DECODE_BINARY" => Ok(Expression::Function(Box::new(Function::new(
1266                "FROM_HEX".to_string(),
1267                f.args,
1268            )))),
1269
1270            // BigQuery format string normalization for PARSE_DATE/DATETIME/TIMESTAMP functions
1271            // %Y-%m-%d -> %F and %H:%M:%S -> %T
1272            "PARSE_DATE"
1273            | "PARSE_DATETIME"
1274            | "PARSE_TIMESTAMP"
1275            | "SAFE.PARSE_DATE"
1276            | "SAFE.PARSE_DATETIME"
1277            | "SAFE.PARSE_TIMESTAMP" => {
1278                let args = self.normalize_time_format_args(f.args);
1279                Ok(Expression::Function(Box::new(Function {
1280                    name: f.name,
1281                    args,
1282                    distinct: f.distinct,
1283                    no_parens: f.no_parens,
1284                    trailing_comments: f.trailing_comments,
1285                    quoted: f.quoted,
1286                    use_bracket_syntax: f.use_bracket_syntax,
1287                    span: None,
1288                    inferred_type: None,
1289                })))
1290            }
1291
1292            // GET_PATH(obj, path) -> JSON_EXTRACT(obj, json_path) in BigQuery
1293            "GET_PATH" if f.args.len() == 2 => {
1294                let mut args = f.args;
1295                let this = args.remove(0);
1296                let path = args.remove(0);
1297                let json_path = match &path {
1298                    Expression::Literal(Literal::String(s)) => {
1299                        let normalized = if s.starts_with('$') {
1300                            s.clone()
1301                        } else if s.starts_with('[') {
1302                            format!("${}", s)
1303                        } else {
1304                            format!("$.{}", s)
1305                        };
1306                        Expression::Literal(Literal::String(normalized))
1307                    }
1308                    _ => path,
1309                };
1310                Ok(Expression::JsonExtract(Box::new(JsonExtractFunc {
1311                    this,
1312                    path: json_path,
1313                    returning: None,
1314                    arrow_syntax: false,
1315                    hash_arrow_syntax: false,
1316                    wrapper_option: None,
1317                    quotes_option: None,
1318                    on_scalar_string: false,
1319                    on_error: None,
1320                })))
1321            }
1322
1323            // Pass through everything else
1324            _ => Ok(Expression::Function(Box::new(f))),
1325        }
1326    }
1327
1328    fn transform_aggregate_function(
1329        &self,
1330        f: Box<crate::expressions::AggregateFunction>,
1331    ) -> Result<Expression> {
1332        let name_upper = f.name.to_uppercase();
1333        match name_upper.as_str() {
1334            // GROUP_CONCAT -> STRING_AGG
1335            "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
1336                Function::new("STRING_AGG".to_string(), f.args),
1337            ))),
1338
1339            // Pass through everything else
1340            _ => Ok(Expression::AggregateFunction(f)),
1341        }
1342    }
1343
1344    /// Transform MethodCall expressions
1345    /// Handles SAFE.PARSE_DATE, SAFE.PARSE_DATETIME, SAFE.PARSE_TIMESTAMP format normalization
1346    fn transform_method_call(&self, mc: crate::expressions::MethodCall) -> Result<Expression> {
1347        use crate::expressions::MethodCall;
1348
1349        // Check if this is SAFE.PARSE_DATE/DATETIME/TIMESTAMP
1350        if let Expression::Column(ref col) = mc.this {
1351            if col.name.name.eq_ignore_ascii_case("SAFE") {
1352                let method_upper = mc.method.name.to_uppercase();
1353                if method_upper == "PARSE_DATE"
1354                    || method_upper == "PARSE_DATETIME"
1355                    || method_upper == "PARSE_TIMESTAMP"
1356                {
1357                    // Normalize the format string in the first argument
1358                    let args = self.normalize_time_format_args(mc.args);
1359                    return Ok(Expression::MethodCall(Box::new(MethodCall {
1360                        this: mc.this,
1361                        method: mc.method,
1362                        args,
1363                    })));
1364                }
1365            }
1366        }
1367
1368        // Pass through all other method calls
1369        Ok(Expression::MethodCall(Box::new(mc)))
1370    }
1371
1372    /// Normalize time format strings in function arguments
1373    /// BigQuery normalizes: %Y-%m-%d -> %F, %H:%M:%S -> %T
1374    fn normalize_time_format_args(&self, args: Vec<Expression>) -> Vec<Expression> {
1375        args.into_iter()
1376            .enumerate()
1377            .map(|(i, arg)| {
1378                // Only transform the first argument (the format string)
1379                if i == 0 {
1380                    if let Expression::Literal(Literal::String(s)) = arg {
1381                        let normalized = self.normalize_time_format(&s);
1382                        return Expression::Literal(Literal::String(normalized));
1383                    }
1384                }
1385                arg
1386            })
1387            .collect()
1388    }
1389
1390    /// Normalize a time format string according to BigQuery conventions
1391    /// %Y-%m-%d -> %F (ISO date)
1392    /// %H:%M:%S -> %T (time)
1393    fn normalize_time_format(&self, format: &str) -> String {
1394        format.replace("%Y-%m-%d", "%F").replace("%H:%M:%S", "%T")
1395    }
1396}
1397
1398#[cfg(test)]
1399mod tests {
1400    use super::*;
1401    use crate::dialects::Dialect;
1402    use crate::parse_one;
1403
1404    fn transpile_to_bigquery(sql: &str) -> String {
1405        let dialect = Dialect::get(DialectType::Generic);
1406        let result = dialect
1407            .transpile_to(sql, DialectType::BigQuery)
1408            .expect("Transpile failed");
1409        result[0].clone()
1410    }
1411
1412    #[test]
1413    fn test_ifnull_identity() {
1414        // Generic -> BigQuery: IFNULL is normalized to COALESCE (matching sqlglot behavior)
1415        let result = transpile_to_bigquery("SELECT IFNULL(a, b)");
1416        assert!(
1417            result.contains("COALESCE"),
1418            "Expected COALESCE, got: {}",
1419            result
1420        );
1421    }
1422
1423    #[test]
1424    fn test_nvl_to_ifnull() {
1425        // NVL is converted to IFNULL in BigQuery
1426        let result = transpile_to_bigquery("SELECT NVL(a, b)");
1427        assert!(
1428            result.contains("IFNULL"),
1429            "Expected IFNULL, got: {}",
1430            result
1431        );
1432    }
1433
1434    #[test]
1435    fn test_try_cast_to_safe_cast() {
1436        let result = transpile_to_bigquery("SELECT TRY_CAST(a AS INT)");
1437        assert!(
1438            result.contains("SAFE_CAST"),
1439            "Expected SAFE_CAST, got: {}",
1440            result
1441        );
1442    }
1443
1444    #[test]
1445    fn test_random_to_rand() {
1446        let result = transpile_to_bigquery("SELECT RANDOM()");
1447        assert!(result.contains("RAND"), "Expected RAND, got: {}", result);
1448    }
1449
1450    #[test]
1451    fn test_basic_select() {
1452        let result = transpile_to_bigquery("SELECT a, b FROM users WHERE id = 1");
1453        assert!(result.contains("SELECT"));
1454        assert!(result.contains("FROM users"));
1455    }
1456
1457    #[test]
1458    fn test_group_concat_to_string_agg() {
1459        let result = transpile_to_bigquery("SELECT GROUP_CONCAT(name)");
1460        assert!(
1461            result.contains("STRING_AGG"),
1462            "Expected STRING_AGG, got: {}",
1463            result
1464        );
1465    }
1466
1467    #[test]
1468    fn test_generate_series_to_generate_array() {
1469        let result = transpile_to_bigquery("SELECT GENERATE_SERIES(1, 10)");
1470        assert!(
1471            result.contains("GENERATE_ARRAY"),
1472            "Expected GENERATE_ARRAY, got: {}",
1473            result
1474        );
1475    }
1476
1477    #[test]
1478    fn test_backtick_identifiers() {
1479        // BigQuery uses backticks for identifiers
1480        let dialect = BigQueryDialect;
1481        let config = dialect.generator_config();
1482        assert_eq!(config.identifier_quote, '`');
1483    }
1484
1485    fn bigquery_identity(sql: &str, expected: &str) {
1486        let dialect = Dialect::get(DialectType::BigQuery);
1487        let ast = dialect.parse(sql).expect("Parse failed");
1488        let transformed = dialect.transform(ast[0].clone()).expect("Transform failed");
1489        let result = dialect.generate(&transformed).expect("Generate failed");
1490        assert_eq!(result, expected, "SQL: {}", sql);
1491    }
1492
1493    #[test]
1494    fn test_cast_char_to_string() {
1495        bigquery_identity("CAST(x AS CHAR)", "CAST(x AS STRING)");
1496    }
1497
1498    #[test]
1499    fn test_cast_varchar_to_string() {
1500        bigquery_identity("CAST(x AS VARCHAR)", "CAST(x AS STRING)");
1501    }
1502
1503    #[test]
1504    fn test_cast_nchar_to_string() {
1505        bigquery_identity("CAST(x AS NCHAR)", "CAST(x AS STRING)");
1506    }
1507
1508    #[test]
1509    fn test_cast_nvarchar_to_string() {
1510        bigquery_identity("CAST(x AS NVARCHAR)", "CAST(x AS STRING)");
1511    }
1512
1513    #[test]
1514    fn test_cast_timestamptz_to_timestamp() {
1515        bigquery_identity("CAST(x AS TIMESTAMPTZ)", "CAST(x AS TIMESTAMP)");
1516    }
1517
1518    #[test]
1519    fn test_cast_record_to_struct() {
1520        bigquery_identity("CAST(x AS RECORD)", "CAST(x AS STRUCT)");
1521    }
1522
1523    #[test]
1524    fn test_json_literal_to_parse_json() {
1525        // JSON 'string' literal syntax should be converted to PARSE_JSON()
1526        bigquery_identity(
1527            "SELECT JSON '\"foo\"' AS json_data",
1528            "SELECT PARSE_JSON('\"foo\"') AS json_data",
1529        );
1530    }
1531
1532    #[test]
1533    fn test_grant_as_alias_not_quoted() {
1534        // GRANT is not a reserved keyword in BigQuery, should not be backtick-quoted
1535        bigquery_identity(
1536            "SELECT GRANT FROM (SELECT 'input' AS GRANT)",
1537            "SELECT GRANT FROM (SELECT 'input' AS GRANT)",
1538        );
1539    }
1540
1541    #[test]
1542    fn test_timestamp_literal_to_cast() {
1543        // TIMESTAMP 'value' literal should be converted to CAST('value' AS TIMESTAMP)
1544        bigquery_identity(
1545            "CREATE VIEW `d.v` OPTIONS (expiration_timestamp=TIMESTAMP '2020-01-02T04:05:06.007Z') AS SELECT 1 AS c",
1546            "CREATE VIEW `d.v` OPTIONS (expiration_timestamp=CAST('2020-01-02T04:05:06.007Z' AS TIMESTAMP)) AS SELECT 1 AS c"
1547        );
1548    }
1549
1550    #[test]
1551    fn test_date_literal_to_cast_in_extract() {
1552        // Issue 1: DATE literal should become CAST syntax in BigQuery
1553        bigquery_identity(
1554            "EXTRACT(WEEK(THURSDAY) FROM DATE '2013-12-25')",
1555            "EXTRACT(WEEK(THURSDAY) FROM CAST('2013-12-25' AS DATE))",
1556        );
1557    }
1558
1559    #[test]
1560    fn test_json_object_with_json_literals() {
1561        // Issue 2: JSON literals in JSON_OBJECT should use PARSE_JSON, not CAST AS JSON
1562        bigquery_identity(
1563            "SELECT JSON_OBJECT('a', JSON '10') AS json_data",
1564            "SELECT JSON_OBJECT('a', PARSE_JSON('10')) AS json_data",
1565        );
1566    }
1567
1568    // NOTE: MOD paren unwrapping is tested in the conformance tests (sqlglot_dialect_identity).
1569    // The unit test version was removed due to stack overflow in debug builds (deep recursion).
1570    // Test case: MOD((a + 1), b) -> MOD(a + 1, b)
1571
1572    #[test]
1573    fn test_safe_parse_date_format_normalization() {
1574        // SAFE.PARSE_DATE format string normalization: %Y-%m-%d -> %F
1575        bigquery_identity(
1576            "SAFE.PARSE_DATE('%Y-%m-%d', '2024-01-15')",
1577            "SAFE.PARSE_DATE('%F', '2024-01-15')",
1578        );
1579    }
1580
1581    #[test]
1582    fn test_safe_parse_datetime_format_normalization() {
1583        // SAFE.PARSE_DATETIME format string normalization: %Y-%m-%d %H:%M:%S -> %F %T
1584        bigquery_identity(
1585            "SAFE.PARSE_DATETIME('%Y-%m-%d %H:%M:%S', '2024-01-15 10:30:00')",
1586            "SAFE.PARSE_DATETIME('%F %T', '2024-01-15 10:30:00')",
1587        );
1588    }
1589
1590    #[test]
1591    fn test_safe_parse_timestamp_format_normalization() {
1592        // SAFE.PARSE_TIMESTAMP format string normalization: %Y-%m-%d %H:%M:%S -> %F %T
1593        bigquery_identity(
1594            "SAFE.PARSE_TIMESTAMP('%Y-%m-%d %H:%M:%S', '2024-01-15 10:30:00')",
1595            "SAFE.PARSE_TIMESTAMP('%F %T', '2024-01-15 10:30:00')",
1596        );
1597    }
1598
1599    #[test]
1600    fn test_datetime_literal_to_cast() {
1601        // DATETIME 'value' literal should be converted to CAST('value' AS DATETIME)
1602        bigquery_identity(
1603            "LAST_DAY(DATETIME '2008-11-10 15:30:00', WEEK(SUNDAY))",
1604            "LAST_DAY(CAST('2008-11-10 15:30:00' AS DATETIME), WEEK)",
1605        );
1606    }
1607
1608    #[test]
1609    fn test_last_day_week_modifier_stripped() {
1610        // WEEK(SUNDAY) should become WEEK in BigQuery LAST_DAY function
1611        bigquery_identity("LAST_DAY(col, WEEK(MONDAY))", "LAST_DAY(col, WEEK)");
1612    }
1613
1614    #[test]
1615    fn test_hash_line_comment_parses() {
1616        // Regression test for issue #38:
1617        // BigQuery should accept # as a single-line comment.
1618        let result = parse_one("SELECT 1 as a #hello world", DialectType::BigQuery);
1619        assert!(result.is_ok(), "Expected parse to succeed, got: {result:?}");
1620    }
1621}