Skip to main content

polyglot_sql/dialects/
bigquery.rs

1//! BigQuery Dialect
2//!
3//! BigQuery-specific transformations based on sqlglot patterns.
4//! Key differences:
5//! - Uses backticks for identifiers (especially for project.dataset.table)
6//! - SAFE_ prefix for safe operations
7//! - Different date/time function names (DATE_DIFF, FORMAT_DATE, PARSE_DATE)
8//! - STRUCT and ARRAY syntax differences
9//! - No ILIKE support
10//! - QUALIFY clause support
11
12use super::{DialectImpl, DialectType};
13use crate::error::Result;
14use crate::expressions::{
15    Alias, BinaryOp, CeilFunc, Column, Exists, Expression, From, Function, FunctionBody,
16    Identifier, JsonExtractFunc, LikeOp, Literal, Select, SplitFunc, StringAggFunc, UnaryFunc,
17    UnnestFunc, VarArgFunc, Where,
18};
19use crate::generator::GeneratorConfig;
20use crate::tokens::TokenizerConfig;
21
22/// BigQuery dialect
23pub struct BigQueryDialect;
24
25impl DialectImpl for BigQueryDialect {
26    fn dialect_type(&self) -> DialectType {
27        DialectType::BigQuery
28    }
29
30    fn tokenizer_config(&self) -> TokenizerConfig {
31        let mut config = TokenizerConfig::default();
32        // BigQuery uses backticks for identifiers, NOT double quotes
33        // Remove double quote from identifiers (it's in the default config)
34        config.identifiers.remove(&'"');
35        config.identifiers.insert('`', '`');
36        // BigQuery supports double quotes for strings (in addition to single quotes)
37        config.quotes.insert("\"".to_string(), "\"".to_string());
38        // BigQuery supports triple-quoted strings
39        config.quotes.insert("'''".to_string(), "'''".to_string());
40        config
41            .quotes
42            .insert("\"\"\"".to_string(), "\"\"\"".to_string());
43        // BigQuery supports backslash escaping in strings
44        config.string_escapes = vec!['\'', '\\'];
45        // In BigQuery, b'...' is a byte string (bytes), not a bit string (binary digits)
46        config.b_prefix_is_byte_string = true;
47        // BigQuery supports hex number strings like 0xA, 0xFF
48        config.hex_number_strings = true;
49        // BigQuery: 0xA represents integer 10 (not binary/blob)
50        config.hex_string_is_integer_type = true;
51        // BigQuery supports # as single-line comments.
52        config.hash_comments = true;
53        config
54    }
55
56    fn generator_config(&self) -> GeneratorConfig {
57        use crate::generator::{IdentifierQuoteStyle, NormalizeFunctions};
58        GeneratorConfig {
59            identifier_quote: '`',
60            identifier_quote_style: IdentifierQuoteStyle::BACKTICK,
61            dialect: Some(DialectType::BigQuery),
62            // BigQuery doesn't normalize function names (Python: NORMALIZE_FUNCTIONS = False)
63            normalize_functions: NormalizeFunctions::None,
64            // BigQuery-specific settings from Python sqlglot
65            interval_allows_plural_form: false,
66            join_hints: false,
67            query_hints: false,
68            table_hints: false,
69            limit_fetch_style: crate::generator::LimitFetchStyle::Limit,
70            rename_table_with_db: false,
71            nvl2_supported: false,
72            unnest_with_ordinality: false,
73            collate_is_func: true,
74            limit_only_literals: true,
75            supports_table_alias_columns: false,
76            unpivot_aliases_are_identifiers: false,
77            json_key_value_pair_sep: ",",
78            null_ordering_supported: false,
79            ignore_nulls_in_func: true,
80            json_path_single_quote_escape: true,
81            can_implement_array_any: true,
82            supports_to_number: false,
83            named_placeholder_token: "@",
84            hex_func: "TO_HEX",
85            with_properties_prefix: "OPTIONS",
86            supports_exploding_projections: false,
87            except_intersect_support_all_clause: false,
88            supports_unix_seconds: true,
89            // BigQuery uses SAFE_ prefix for safe operations
90            try_supported: true,
91            // BigQuery does not support SEMI/ANTI JOIN syntax
92            semi_anti_join_with_side: false,
93            ..Default::default()
94        }
95    }
96
97    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
98        match expr {
99            // ===== Data Type Mappings =====
100            Expression::DataType(dt) => self.transform_data_type(dt),
101
102            // ===== Null handling =====
103            // IFNULL is native to BigQuery - keep as-is for identity
104            Expression::IfNull(f) => Ok(Expression::IfNull(f)),
105
106            // NVL -> IFNULL in BigQuery (BigQuery uses IFNULL, not NVL)
107            Expression::Nvl(f) => Ok(Expression::IfNull(f)),
108
109            // Coalesce stays as Coalesce
110            Expression::Coalesce(f) => Ok(Expression::Coalesce(f)),
111
112            // ===== String aggregation =====
113            // GROUP_CONCAT -> STRING_AGG in BigQuery
114            Expression::GroupConcat(f) => Ok(Expression::StringAgg(Box::new(StringAggFunc {
115                this: f.this,
116                separator: f.separator,
117                order_by: f.order_by,
118                distinct: f.distinct,
119                filter: f.filter,
120                limit: None,
121                inferred_type: None,
122            }))),
123
124            // ===== Cast operations =====
125            // Cast data types are transformed by transform_recursive in mod.rs
126            // which calls transform_data_type via transform_expr(Expression::DataType(...))
127
128            // TryCast -> SafeCast in BigQuery with type transformation
129            Expression::TryCast(c) => {
130                let transformed_type = match self.transform_data_type(c.to)? {
131                    Expression::DataType(dt) => dt,
132                    _ => return Err(crate::error::Error::parse("Expected DataType", 0, 0, 0, 0)),
133                };
134                Ok(Expression::SafeCast(Box::new(crate::expressions::Cast {
135                    this: c.this,
136                    to: transformed_type,
137                    trailing_comments: c.trailing_comments,
138                    double_colon_syntax: c.double_colon_syntax,
139                    format: c.format,
140                    default: c.default,
141                    inferred_type: None,
142                })))
143            }
144
145            // ===== Pattern matching =====
146            // ILIKE -> LOWER() LIKE LOWER() in BigQuery (no ILIKE support)
147            Expression::ILike(op) => {
148                let lower_left = Expression::Lower(Box::new(UnaryFunc::new(op.left)));
149                let lower_right = Expression::Lower(Box::new(UnaryFunc::new(op.right)));
150                Ok(Expression::Like(Box::new(LikeOp {
151                    left: lower_left,
152                    right: lower_right,
153                    escape: op.escape,
154                    quantifier: op.quantifier,
155                    inferred_type: None,
156                })))
157            }
158
159            // RegexpLike -> REGEXP_CONTAINS in BigQuery
160            Expression::RegexpLike(f) => Ok(Expression::Function(Box::new(Function::new(
161                "REGEXP_CONTAINS".to_string(),
162                vec![f.this, f.pattern],
163            )))),
164
165            // ===== Array operations =====
166            // EXPLODE -> UNNEST in BigQuery
167            Expression::Explode(f) => Ok(Expression::Unnest(Box::new(
168                crate::expressions::UnnestFunc {
169                    this: f.this,
170                    expressions: Vec::new(),
171                    with_ordinality: false,
172                    alias: None,
173                    offset_alias: None,
174                },
175            ))),
176
177            // ExplodeOuter -> UNNEST with LEFT JOIN semantics
178            Expression::ExplodeOuter(f) => Ok(Expression::Unnest(Box::new(
179                crate::expressions::UnnestFunc {
180                    this: f.this,
181                    expressions: Vec::new(),
182                    with_ordinality: false,
183                    alias: None,
184                    offset_alias: None,
185                },
186            ))),
187
188            // GenerateSeries -> GENERATE_ARRAY in BigQuery
189            Expression::GenerateSeries(f) => {
190                let mut args = Vec::new();
191                if let Some(start) = f.start {
192                    args.push(*start);
193                }
194                if let Some(end) = f.end {
195                    args.push(*end);
196                }
197                if let Some(step) = f.step {
198                    args.push(*step);
199                }
200                Ok(Expression::Function(Box::new(Function::new(
201                    "GENERATE_ARRAY".to_string(),
202                    args,
203                ))))
204            }
205
206            // ===== Bitwise operations =====
207            // BitwiseAndAgg -> BIT_AND
208            Expression::BitwiseAndAgg(f) => Ok(Expression::Function(Box::new(Function::new(
209                "BIT_AND".to_string(),
210                vec![f.this],
211            )))),
212
213            // BitwiseOrAgg -> BIT_OR
214            Expression::BitwiseOrAgg(f) => Ok(Expression::Function(Box::new(Function::new(
215                "BIT_OR".to_string(),
216                vec![f.this],
217            )))),
218
219            // BitwiseXorAgg -> BIT_XOR
220            Expression::BitwiseXorAgg(f) => Ok(Expression::Function(Box::new(Function::new(
221                "BIT_XOR".to_string(),
222                vec![f.this],
223            )))),
224
225            // BitwiseCount -> BIT_COUNT
226            Expression::BitwiseCount(f) => Ok(Expression::Function(Box::new(Function::new(
227                "BIT_COUNT".to_string(),
228                vec![f.this],
229            )))),
230
231            // ByteLength -> BYTE_LENGTH
232            Expression::ByteLength(f) => Ok(Expression::Function(Box::new(Function::new(
233                "BYTE_LENGTH".to_string(),
234                vec![f.this],
235            )))),
236
237            // IntDiv -> DIV
238            Expression::IntDiv(f) => Ok(Expression::Function(Box::new(Function::new(
239                "DIV".to_string(),
240                vec![f.this, f.expression],
241            )))),
242
243            // Int64 -> INT64
244            Expression::Int64(f) => Ok(Expression::Function(Box::new(Function::new(
245                "INT64".to_string(),
246                vec![f.this],
247            )))),
248
249            // ===== Random =====
250            // RANDOM -> RAND in BigQuery
251            Expression::Random(_) => Ok(Expression::Rand(Box::new(crate::expressions::Rand {
252                seed: None,
253                lower: None,
254                upper: None,
255            }))),
256
257            // ===== UUID =====
258            // Uuid -> GENERATE_UUID in BigQuery
259            Expression::Uuid(_) => Ok(Expression::Function(Box::new(Function::new(
260                "GENERATE_UUID".to_string(),
261                vec![],
262            )))),
263
264            // ===== Approximate functions =====
265            // ApproxDistinct -> APPROX_COUNT_DISTINCT
266            Expression::ApproxDistinct(f) => Ok(Expression::Function(Box::new(Function::new(
267                "APPROX_COUNT_DISTINCT".to_string(),
268                vec![f.this],
269            )))),
270
271            // ArgMax -> MAX_BY in BigQuery
272            Expression::ArgMax(f) => Ok(Expression::Function(Box::new(Function::new(
273                "MAX_BY".to_string(),
274                vec![*f.this, *f.expression],
275            )))),
276
277            // ArgMin -> MIN_BY in BigQuery
278            Expression::ArgMin(f) => Ok(Expression::Function(Box::new(Function::new(
279                "MIN_BY".to_string(),
280                vec![*f.this, *f.expression],
281            )))),
282
283            // ===== Conditional =====
284            // CountIf -> COUNTIF in BigQuery
285            Expression::CountIf(f) => Ok(Expression::Function(Box::new(Function::new(
286                "COUNTIF".to_string(),
287                vec![f.this],
288            )))),
289
290            // ===== String functions =====
291            // StringAgg -> STRING_AGG in BigQuery - keep as-is to preserve ORDER BY
292            Expression::StringAgg(f) => Ok(Expression::StringAgg(f)),
293
294            // ===== Conversion =====
295            // Unhex -> FROM_HEX
296            Expression::Unhex(f) => Ok(Expression::Function(Box::new(Function::new(
297                "FROM_HEX".to_string(),
298                vec![*f.this],
299            )))),
300
301            // UnixToTime -> TIMESTAMP_SECONDS/MILLIS/MICROS based on scale
302            Expression::UnixToTime(f) => {
303                let scale = f.scale.unwrap_or(0);
304                match scale {
305                    0 => Ok(Expression::Function(Box::new(Function::new(
306                        "TIMESTAMP_SECONDS".to_string(),
307                        vec![*f.this],
308                    )))),
309                    3 => Ok(Expression::Function(Box::new(Function::new(
310                        "TIMESTAMP_MILLIS".to_string(),
311                        vec![*f.this],
312                    )))),
313                    6 => Ok(Expression::Function(Box::new(Function::new(
314                        "TIMESTAMP_MICROS".to_string(),
315                        vec![*f.this],
316                    )))),
317                    _ => {
318                        // TIMESTAMP_SECONDS(CAST(value / POWER(10, scale) AS INT64))
319                        let div_expr =
320                            Expression::Div(Box::new(crate::expressions::BinaryOp::new(
321                                *f.this,
322                                Expression::Function(Box::new(Function::new(
323                                    "POWER".to_string(),
324                                    vec![Expression::number(10), Expression::number(scale)],
325                                ))),
326                            )));
327                        let cast_expr = Expression::Cast(Box::new(crate::expressions::Cast {
328                            this: div_expr,
329                            to: crate::expressions::DataType::Custom {
330                                name: "INT64".to_string(),
331                            },
332                            double_colon_syntax: false,
333                            trailing_comments: vec![],
334                            format: None,
335                            default: None,
336                            inferred_type: None,
337                        }));
338                        Ok(Expression::Function(Box::new(Function::new(
339                            "TIMESTAMP_SECONDS".to_string(),
340                            vec![cast_expr],
341                        ))))
342                    }
343                }
344            }
345
346            // ===== Date/time =====
347            // DateDiff -> DATE_DIFF in BigQuery
348            Expression::DateDiff(f) => {
349                // BigQuery: DATE_DIFF(date1, date2, part)
350                let unit_str = match f.unit {
351                    Some(crate::expressions::IntervalUnit::Year) => "YEAR",
352                    Some(crate::expressions::IntervalUnit::Quarter) => "QUARTER",
353                    Some(crate::expressions::IntervalUnit::Month) => "MONTH",
354                    Some(crate::expressions::IntervalUnit::Week) => "WEEK",
355                    Some(crate::expressions::IntervalUnit::Day) => "DAY",
356                    Some(crate::expressions::IntervalUnit::Hour) => "HOUR",
357                    Some(crate::expressions::IntervalUnit::Minute) => "MINUTE",
358                    Some(crate::expressions::IntervalUnit::Second) => "SECOND",
359                    Some(crate::expressions::IntervalUnit::Millisecond) => "MILLISECOND",
360                    Some(crate::expressions::IntervalUnit::Microsecond) => "MICROSECOND",
361                    Some(crate::expressions::IntervalUnit::Nanosecond) => "NANOSECOND",
362                    None => "DAY",
363                };
364                let unit = Expression::Identifier(crate::expressions::Identifier {
365                    name: unit_str.to_string(),
366                    quoted: false,
367                    trailing_comments: Vec::new(),
368                    span: None,
369                });
370                Ok(Expression::Function(Box::new(Function::new(
371                    "DATE_DIFF".to_string(),
372                    vec![f.this, f.expression, unit],
373                ))))
374            }
375
376            // ===== Variance =====
377            // VarPop -> VAR_POP
378            Expression::VarPop(f) => Ok(Expression::Function(Box::new(Function::new(
379                "VAR_POP".to_string(),
380                vec![f.this],
381            )))),
382
383            // ===== Hash functions =====
384            // SHA -> SHA1
385            Expression::SHA(f) => Ok(Expression::Function(Box::new(Function::new(
386                "SHA1".to_string(),
387                vec![f.this],
388            )))),
389
390            // SHA1Digest -> SHA1
391            Expression::SHA1Digest(f) => Ok(Expression::Function(Box::new(Function::new(
392                "SHA1".to_string(),
393                vec![f.this],
394            )))),
395
396            // MD5Digest -> MD5
397            Expression::MD5Digest(f) => Ok(Expression::Function(Box::new(Function::new(
398                "MD5".to_string(),
399                vec![*f.this],
400            )))),
401
402            // ===== Type conversion =====
403            // JSONBool -> BOOL
404            Expression::JSONBool(f) => Ok(Expression::Function(Box::new(Function::new(
405                "BOOL".to_string(),
406                vec![f.this],
407            )))),
408
409            // StringFunc -> STRING
410            Expression::StringFunc(f) => Ok(Expression::Function(Box::new(Function::new(
411                "STRING".to_string(),
412                vec![*f.this],
413            )))),
414
415            // ===== Date/time from parts =====
416            // DateFromUnixDate -> DATE_FROM_UNIX_DATE
417            Expression::DateFromUnixDate(f) => Ok(Expression::Function(Box::new(Function::new(
418                "DATE_FROM_UNIX_DATE".to_string(),
419                vec![f.this],
420            )))),
421
422            // UnixDate -> UNIX_DATE
423            Expression::UnixDate(f) => Ok(Expression::Function(Box::new(Function::new(
424                "UNIX_DATE".to_string(),
425                vec![f.this],
426            )))),
427
428            // TimestampDiff -> TIMESTAMP_DIFF
429            Expression::TimestampDiff(f) => Ok(Expression::Function(Box::new(Function::new(
430                "TIMESTAMP_DIFF".to_string(),
431                vec![*f.this, *f.expression],
432            )))),
433
434            // FromTimeZone -> DATETIME
435            Expression::FromTimeZone(f) => Ok(Expression::Function(Box::new(Function::new(
436                "DATETIME".to_string(),
437                vec![*f.this],
438            )))),
439
440            // TsOrDsToDatetime -> DATETIME
441            Expression::TsOrDsToDatetime(f) => Ok(Expression::Function(Box::new(Function::new(
442                "DATETIME".to_string(),
443                vec![f.this],
444            )))),
445
446            // TsOrDsToTimestamp -> TIMESTAMP
447            Expression::TsOrDsToTimestamp(f) => Ok(Expression::Function(Box::new(Function::new(
448                "TIMESTAMP".to_string(),
449                vec![f.this],
450            )))),
451
452            // ===== IfFunc -> IF in BigQuery =====
453            Expression::IfFunc(f) => {
454                let mut args = vec![f.condition, f.true_value];
455                if let Some(false_val) = f.false_value {
456                    args.push(false_val);
457                } else {
458                    args.push(Expression::Null(crate::expressions::Null));
459                }
460                Ok(Expression::Function(Box::new(Function::new(
461                    "IF".to_string(),
462                    args,
463                ))))
464            }
465
466            // ===== HexString -> FROM_HEX =====
467            Expression::HexStringExpr(f) => Ok(Expression::Function(Box::new(Function::new(
468                "FROM_HEX".to_string(),
469                vec![*f.this],
470            )))),
471
472            // ===== Additional auto-generated transforms from Python sqlglot =====
473            // ApproxTopK -> APPROX_TOP_COUNT
474            Expression::ApproxTopK(f) => {
475                let mut args = vec![*f.this];
476                if let Some(expr) = f.expression {
477                    args.push(*expr);
478                }
479                Ok(Expression::Function(Box::new(Function::new(
480                    "APPROX_TOP_COUNT".to_string(),
481                    args,
482                ))))
483            }
484
485            // SafeDivide -> SAFE_DIVIDE
486            Expression::SafeDivide(f) => Ok(Expression::Function(Box::new(Function::new(
487                "SAFE_DIVIDE".to_string(),
488                vec![*f.this, *f.expression],
489            )))),
490
491            // JSONKeysAtDepth -> JSON_KEYS
492            Expression::JSONKeysAtDepth(f) => Ok(Expression::Function(Box::new(Function::new(
493                "JSON_KEYS".to_string(),
494                vec![*f.this],
495            )))),
496
497            // JSONValueArray -> JSON_VALUE_ARRAY
498            Expression::JSONValueArray(f) => Ok(Expression::Function(Box::new(Function::new(
499                "JSON_VALUE_ARRAY".to_string(),
500                vec![*f.this],
501            )))),
502
503            // DateFromParts -> DATE
504            Expression::DateFromParts(f) => {
505                let mut args = Vec::new();
506                if let Some(y) = f.year {
507                    args.push(*y);
508                }
509                if let Some(m) = f.month {
510                    args.push(*m);
511                }
512                if let Some(d) = f.day {
513                    args.push(*d);
514                }
515                Ok(Expression::Function(Box::new(Function::new(
516                    "DATE".to_string(),
517                    args,
518                ))))
519            }
520
521            // SPLIT: BigQuery defaults to comma separator when none provided
522            // SPLIT(foo) -> SPLIT(foo, ',')
523            Expression::Split(f) => {
524                // Check if delimiter is empty or a placeholder - add default comma
525                let delimiter = match &f.delimiter {
526                    Expression::Literal(Literal::String(s)) if s.is_empty() => {
527                        Expression::Literal(Literal::String(",".to_string()))
528                    }
529                    _ => f.delimiter,
530                };
531                Ok(Expression::Split(Box::new(SplitFunc {
532                    this: f.this,
533                    delimiter,
534                })))
535            }
536
537            // Cast: Transform the target type according to BigQuery TYPE_MAPPING
538            // Special case: CAST to JSON -> PARSE_JSON in BigQuery
539            Expression::Cast(c) => {
540                use crate::expressions::DataType;
541                // Check if casting to JSON - use PARSE_JSON instead
542                // Handle both DataType::Json/JsonB and DataType::Custom { name: "JSON" }
543                // (parser creates Custom for type literals like JSON 'string')
544                let is_json = matches!(c.to, DataType::Json | DataType::JsonB)
545                    || matches!(&c.to, DataType::Custom { name } if name.eq_ignore_ascii_case("JSON") || name.eq_ignore_ascii_case("JSONB"));
546                if is_json {
547                    return Ok(Expression::ParseJson(Box::new(UnaryFunc::new(c.this))));
548                }
549                let transformed_type = match self.transform_data_type(c.to)? {
550                    Expression::DataType(dt) => dt,
551                    _ => return Err(crate::error::Error::parse("Expected DataType", 0, 0, 0, 0)),
552                };
553                Ok(Expression::Cast(Box::new(crate::expressions::Cast {
554                    this: c.this,
555                    to: transformed_type,
556                    trailing_comments: c.trailing_comments,
557                    double_colon_syntax: c.double_colon_syntax,
558                    format: c.format,
559                    default: c.default,
560                    inferred_type: None,
561                })))
562            }
563
564            // SafeCast: Transform the target type according to BigQuery TYPE_MAPPING
565            Expression::SafeCast(c) => {
566                let transformed_type = match self.transform_data_type(c.to)? {
567                    Expression::DataType(dt) => dt,
568                    _ => return Err(crate::error::Error::parse("Expected DataType", 0, 0, 0, 0)),
569                };
570                Ok(Expression::SafeCast(Box::new(crate::expressions::Cast {
571                    this: c.this,
572                    to: transformed_type,
573                    trailing_comments: c.trailing_comments,
574                    double_colon_syntax: c.double_colon_syntax,
575                    format: c.format,
576                    default: c.default,
577                    inferred_type: None,
578                })))
579            }
580
581            // ===== SELECT-level transforms =====
582            // BigQuery: GROUP BY expression → alias when both GROUP BY and ORDER BY exist
583            Expression::Select(mut select) => {
584                if select.group_by.is_some() && select.order_by.is_some() {
585                    // Build map: expression → alias name for aliased projections
586                    let aliases: Vec<(Expression, Identifier)> = select
587                        .expressions
588                        .iter()
589                        .filter_map(|e| {
590                            if let Expression::Alias(a) = e {
591                                Some((a.this.clone(), a.alias.clone()))
592                            } else {
593                                None
594                            }
595                        })
596                        .collect();
597
598                    if let Some(ref mut group_by) = select.group_by {
599                        for grouped in group_by.expressions.iter_mut() {
600                            // Skip numeric indices (already aliased)
601                            if matches!(grouped, Expression::Literal(Literal::Number(_))) {
602                                continue;
603                            }
604                            // Check if this GROUP BY expression matches a SELECT alias
605                            for (expr, alias_ident) in &aliases {
606                                if grouped == expr {
607                                    *grouped = Expression::Column(Column {
608                                        name: alias_ident.clone(),
609                                        table: None,
610                                        join_mark: false,
611                                        trailing_comments: Vec::new(),
612                                        span: None,
613                                        inferred_type: None,
614                                    });
615                                    break;
616                                }
617                            }
618                        }
619                    }
620                }
621                Ok(Expression::Select(select))
622            }
623
624            // ===== ArrayContains → EXISTS(SELECT 1 FROM UNNEST(arr) AS _col WHERE _col = val) =====
625            Expression::ArrayContains(f) => {
626                let array_expr = f.this;
627                let value_expr = f.expression;
628
629                // Build: SELECT 1 FROM UNNEST(array) AS _col WHERE _col = value
630                let unnest = Expression::Unnest(Box::new(UnnestFunc {
631                    this: array_expr,
632                    expressions: Vec::new(),
633                    with_ordinality: false,
634                    alias: None,
635                    offset_alias: None,
636                }));
637                let aliased_unnest = Expression::Alias(Box::new(Alias {
638                    this: unnest,
639                    alias: Identifier::new("_col"),
640                    column_aliases: Vec::new(),
641                    pre_alias_comments: Vec::new(),
642                    trailing_comments: Vec::new(),
643                    inferred_type: None,
644                }));
645                let col_ref = Expression::Column(Column {
646                    name: Identifier::new("_col"),
647                    table: None,
648                    join_mark: false,
649                    trailing_comments: Vec::new(),
650                    span: None,
651                    inferred_type: None,
652                });
653                let where_clause = Where {
654                    this: Expression::Eq(Box::new(BinaryOp {
655                        left: col_ref,
656                        right: value_expr,
657                        left_comments: Vec::new(),
658                        operator_comments: Vec::new(),
659                        trailing_comments: Vec::new(),
660                        inferred_type: None,
661                    })),
662                };
663                let inner_select = Expression::Select(Box::new(Select {
664                    expressions: vec![Expression::Literal(Literal::Number("1".to_string()))],
665                    from: Some(From {
666                        expressions: vec![aliased_unnest],
667                    }),
668                    where_clause: Some(where_clause),
669                    ..Default::default()
670                }));
671                Ok(Expression::Exists(Box::new(Exists {
672                    this: inner_select,
673                    not: false,
674                })))
675            }
676
677            // ===== JSON_OBJECT array form → key-value pairs =====
678            // BigQuery "signature 2": JSON_OBJECT(['a', 'b'], [10, NULL]) → JSON_OBJECT('a', 10, 'b', NULL)
679            Expression::JsonObject(mut f) => {
680                if f.pairs.len() == 1 {
681                    // Extract expressions from both Array and ArrayFunc variants
682                    let keys_exprs = match &f.pairs[0].0 {
683                        Expression::Array(arr) => Some(&arr.expressions),
684                        Expression::ArrayFunc(arr) => Some(&arr.expressions),
685                        _ => None,
686                    };
687                    let vals_exprs = match &f.pairs[0].1 {
688                        Expression::Array(arr) => Some(&arr.expressions),
689                        Expression::ArrayFunc(arr) => Some(&arr.expressions),
690                        _ => None,
691                    };
692                    if let (Some(keys), Some(vals)) = (keys_exprs, vals_exprs) {
693                        if keys.len() == vals.len() {
694                            let new_pairs: Vec<(Expression, Expression)> = keys
695                                .iter()
696                                .zip(vals.iter())
697                                .map(|(k, v)| (k.clone(), v.clone()))
698                                .collect();
699                            f.pairs = new_pairs;
700                        }
701                    }
702                }
703                Ok(Expression::JsonObject(f))
704            }
705
706            // ===== MOD function: unwrap unnecessary Paren from first argument =====
707            // BigQuery normalizes MOD((a + 1), b) -> MOD(a + 1, b)
708            Expression::ModFunc(mut f) => {
709                // Unwrap Paren from first argument if present
710                if let Expression::Paren(paren) = f.this {
711                    f.this = paren.this;
712                }
713                Ok(Expression::ModFunc(f))
714            }
715
716            // JSONExtract with variant_extract (Snowflake colon syntax) -> JSON_EXTRACT
717            Expression::JSONExtract(e) if e.variant_extract.is_some() => {
718                let path = match *e.expression {
719                    Expression::Literal(Literal::String(s)) => {
720                        let normalized = if s.starts_with('$') {
721                            s
722                        } else if s.starts_with('[') {
723                            format!("${}", s)
724                        } else {
725                            format!("$.{}", s)
726                        };
727                        Expression::Literal(Literal::String(normalized))
728                    }
729                    other => other,
730                };
731                Ok(Expression::Function(Box::new(Function::new(
732                    "JSON_EXTRACT".to_string(),
733                    vec![*e.this, path],
734                ))))
735            }
736
737            // Generic function transformations
738            Expression::Function(f) => self.transform_function(*f),
739
740            // Generic aggregate function transformations
741            Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
742
743            // MethodCall: Handle SAFE.PARSE_DATE, SAFE.PARSE_DATETIME, SAFE.PARSE_TIMESTAMP
744            // These are parsed as MethodCall(this=SAFE, method=PARSE_DATE, args=[...])
745            Expression::MethodCall(mc) => self.transform_method_call(*mc),
746
747            // CreateFunction: Convert RETURNS TABLE(...) to RETURNS TABLE <...> for BigQuery
748            // and set is_table_function = true for TABLE FUNCTION syntax
749            Expression::CreateFunction(mut cf) => {
750                if let Some(ref mut rtb) = cf.returns_table_body {
751                    if rtb.starts_with("TABLE (") || rtb.starts_with("TABLE(") {
752                        // Convert TABLE (...) to TABLE <...> with BigQuery types
753                        let inner = if rtb.starts_with("TABLE (") {
754                            &rtb["TABLE (".len()..rtb.len() - 1]
755                        } else {
756                            &rtb["TABLE(".len()..rtb.len() - 1]
757                        };
758                        // Convert common types to BigQuery equivalents
759                        let converted = inner
760                            .replace(" INT,", " INT64,")
761                            .replace(" INT)", " INT64)")
762                            .replace(" INTEGER,", " INT64,")
763                            .replace(" INTEGER)", " INT64)")
764                            .replace(" FLOAT,", " FLOAT64,")
765                            .replace(" FLOAT)", " FLOAT64)")
766                            .replace(" BOOLEAN,", " BOOL,")
767                            .replace(" BOOLEAN)", " BOOL)")
768                            .replace(" VARCHAR", " STRING")
769                            .replace(" TEXT", " STRING");
770                        // Handle trailing type (no comma, no paren)
771                        let converted = if converted.ends_with(" INT") {
772                            format!("{}{}", &converted[..converted.len() - 4], " INT64")
773                        } else {
774                            converted
775                        };
776                        *rtb = format!("TABLE <{}>", converted);
777                        cf.is_table_function = true;
778                    }
779                }
780                // Convert string literal body to expression body for BigQuery TABLE FUNCTIONs only
781                if cf.is_table_function {
782                    if let Some(ref body) = cf.body {
783                        if matches!(body, FunctionBody::StringLiteral(_)) {
784                            if let Some(FunctionBody::StringLiteral(sql)) = cf.body.take() {
785                                // Parse the SQL string into an expression
786                                if let Ok(parsed) = crate::parser::Parser::parse_sql(&sql) {
787                                    if let Some(stmt) = parsed.into_iter().next() {
788                                        cf.body = Some(FunctionBody::Expression(stmt));
789                                    } else {
790                                        cf.body = Some(FunctionBody::StringLiteral(sql));
791                                    }
792                                } else {
793                                    cf.body = Some(FunctionBody::StringLiteral(sql));
794                                }
795                            }
796                        }
797                    }
798                }
799                Ok(Expression::CreateFunction(cf))
800            }
801
802            // Pass through everything else
803            _ => Ok(expr),
804        }
805    }
806}
807
808impl BigQueryDialect {
809    /// Transform data types according to BigQuery TYPE_MAPPING
810    fn transform_data_type(&self, dt: crate::expressions::DataType) -> Result<Expression> {
811        use crate::expressions::DataType;
812        let transformed = match dt {
813            // BIGINT -> INT64
814            DataType::BigInt { .. } => DataType::Custom {
815                name: "INT64".to_string(),
816            },
817            // INT -> INT64
818            DataType::Int { .. } => DataType::Custom {
819                name: "INT64".to_string(),
820            },
821            // SMALLINT -> INT64
822            DataType::SmallInt { .. } => DataType::Custom {
823                name: "INT64".to_string(),
824            },
825            // TINYINT -> INT64
826            DataType::TinyInt { .. } => DataType::Custom {
827                name: "INT64".to_string(),
828            },
829            // FLOAT -> FLOAT64
830            DataType::Float { .. } => DataType::Custom {
831                name: "FLOAT64".to_string(),
832            },
833            // DOUBLE -> FLOAT64
834            DataType::Double { .. } => DataType::Custom {
835                name: "FLOAT64".to_string(),
836            },
837            // BOOLEAN -> BOOL
838            DataType::Boolean => DataType::Custom {
839                name: "BOOL".to_string(),
840            },
841            // CHAR -> STRING
842            DataType::Char { .. } => DataType::Custom {
843                name: "STRING".to_string(),
844            },
845            // VARCHAR -> STRING
846            DataType::VarChar { .. } => DataType::Custom {
847                name: "STRING".to_string(),
848            },
849            // TEXT -> STRING
850            DataType::Text => DataType::Custom {
851                name: "STRING".to_string(),
852            },
853            // STRING(n) -> STRING (BigQuery doesn't support length for STRING)
854            DataType::String { .. } => DataType::Custom {
855                name: "STRING".to_string(),
856            },
857            // BINARY -> BYTES
858            DataType::Binary { .. } => DataType::Custom {
859                name: "BYTES".to_string(),
860            },
861            // VARBINARY -> BYTES
862            DataType::VarBinary { .. } => DataType::Custom {
863                name: "BYTES".to_string(),
864            },
865            // BLOB -> BYTES
866            DataType::Blob => DataType::Custom {
867                name: "BYTES".to_string(),
868            },
869            // DECIMAL -> NUMERIC (BigQuery strips precision in CAST context)
870            DataType::Decimal { .. } => DataType::Custom {
871                name: "NUMERIC".to_string(),
872            },
873            // For BigQuery identity: preserve TIMESTAMP/DATETIME as Custom types
874            // This avoids the issue where parsed TIMESTAMP (timezone: false) would
875            // be converted to DATETIME by the generator
876            DataType::Timestamp {
877                timezone: false, ..
878            } => DataType::Custom {
879                name: "TIMESTAMP".to_string(),
880            },
881            DataType::Timestamp { timezone: true, .. } => DataType::Custom {
882                name: "TIMESTAMP".to_string(),
883            },
884            // UUID -> STRING (BigQuery doesn't have native UUID type)
885            DataType::Uuid => DataType::Custom {
886                name: "STRING".to_string(),
887            },
888            // RECORD -> STRUCT in BigQuery
889            DataType::Custom { ref name } if name.eq_ignore_ascii_case("RECORD") => {
890                DataType::Custom {
891                    name: "STRUCT".to_string(),
892                }
893            }
894            // TIMESTAMPTZ (custom) -> TIMESTAMP
895            DataType::Custom { ref name } if name.eq_ignore_ascii_case("TIMESTAMPTZ") => {
896                DataType::Custom {
897                    name: "TIMESTAMP".to_string(),
898                }
899            }
900            // BYTEINT (custom) -> INT64
901            DataType::Custom { ref name } if name.eq_ignore_ascii_case("BYTEINT") => {
902                DataType::Custom {
903                    name: "INT64".to_string(),
904                }
905            }
906            // Keep all other types as-is
907            other => other,
908        };
909        Ok(Expression::DataType(transformed))
910    }
911
912    fn transform_function(&self, f: Function) -> Result<Expression> {
913        let name_upper = f.name.to_uppercase();
914        match name_upper.as_str() {
915            // IFNULL -> COALESCE (both work in BigQuery)
916            "IFNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
917                original_name: None,
918                expressions: f.args,
919                inferred_type: None,
920            }))),
921
922            // NVL -> COALESCE
923            "NVL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
924                original_name: None,
925                expressions: f.args,
926                inferred_type: None,
927            }))),
928
929            // ISNULL -> COALESCE
930            "ISNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
931                original_name: None,
932                expressions: f.args,
933                inferred_type: None,
934            }))),
935
936            // GROUP_CONCAT -> STRING_AGG in BigQuery
937            "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
938                Function::new("STRING_AGG".to_string(), f.args),
939            ))),
940
941            // SUBSTR -> SUBSTRING (both work)
942            "SUBSTR" => Ok(Expression::Function(Box::new(Function::new(
943                "SUBSTRING".to_string(),
944                f.args,
945            )))),
946
947            // RANDOM -> RAND
948            "RANDOM" => Ok(Expression::Rand(Box::new(crate::expressions::Rand {
949                seed: None,
950                lower: None,
951                upper: None,
952            }))),
953
954            // CURRENT_DATE -> CURRENT_DATE() in BigQuery
955            // Keep as Function when it has args (e.g., CURRENT_DATE('UTC'))
956            "CURRENT_DATE" if f.args.is_empty() => {
957                Ok(Expression::CurrentDate(crate::expressions::CurrentDate))
958            }
959            "CURRENT_DATE" => Ok(Expression::Function(Box::new(Function {
960                name: "CURRENT_DATE".to_string(),
961                args: f.args,
962                distinct: false,
963                trailing_comments: Vec::new(),
964                use_bracket_syntax: false,
965                no_parens: false,
966                quoted: false,
967                span: None,
968                inferred_type: None,
969            }))),
970
971            // NOW -> CURRENT_TIMESTAMP in BigQuery
972            "NOW" => Ok(Expression::CurrentTimestamp(
973                crate::expressions::CurrentTimestamp {
974                    precision: None,
975                    sysdate: false,
976                },
977            )),
978
979            // TO_DATE -> PARSE_DATE in BigQuery
980            "TO_DATE" => Ok(Expression::Function(Box::new(Function::new(
981                "PARSE_DATE".to_string(),
982                f.args,
983            )))),
984
985            // TO_TIMESTAMP -> PARSE_TIMESTAMP in BigQuery
986            "TO_TIMESTAMP" => Ok(Expression::Function(Box::new(Function::new(
987                "PARSE_TIMESTAMP".to_string(),
988                f.args,
989            )))),
990
991            // TO_TIME -> TIME in BigQuery
992            "TO_TIME" if f.args.len() == 1 => Ok(Expression::Function(Box::new(Function::new(
993                "TIME".to_string(),
994                f.args,
995            )))),
996
997            // DATE_FORMAT -> FORMAT_DATE in BigQuery (argument order may differ)
998            "DATE_FORMAT" => Ok(Expression::Function(Box::new(Function::new(
999                "FORMAT_DATE".to_string(),
1000                f.args,
1001            )))),
1002
1003            // POSITION -> STRPOS in BigQuery
1004            // BigQuery uses STRPOS(string, substring)
1005            "POSITION" if f.args.len() == 2 => {
1006                let mut args = f.args;
1007                // Swap arguments: POSITION(sub IN str) -> STRPOS(str, sub)
1008                let first = args.remove(0);
1009                let second = args.remove(0);
1010                Ok(Expression::Function(Box::new(Function::new(
1011                    "STRPOS".to_string(),
1012                    vec![second, first],
1013                ))))
1014            }
1015
1016            // LEN -> LENGTH
1017            "LEN" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc::new(
1018                f.args.into_iter().next().unwrap(),
1019            )))),
1020
1021            // CEILING -> CEIL (both work)
1022            "CEILING" if f.args.len() == 1 => Ok(Expression::Ceil(Box::new(CeilFunc {
1023                this: f.args.into_iter().next().unwrap(),
1024                decimals: None,
1025                to: None,
1026            }))),
1027
1028            // GETDATE -> CURRENT_TIMESTAMP
1029            "GETDATE" => Ok(Expression::CurrentTimestamp(
1030                crate::expressions::CurrentTimestamp {
1031                    precision: None,
1032                    sysdate: false,
1033                },
1034            )),
1035
1036            // ARRAY_LENGTH -> ARRAY_LENGTH (native)
1037            // CARDINALITY -> ARRAY_LENGTH
1038            "CARDINALITY" if f.args.len() == 1 => Ok(Expression::ArrayLength(Box::new(
1039                UnaryFunc::new(f.args.into_iter().next().unwrap()),
1040            ))),
1041
1042            // UNNEST is native to BigQuery
1043
1044            // GENERATE_SERIES -> GENERATE_ARRAY in BigQuery
1045            "GENERATE_SERIES" => Ok(Expression::Function(Box::new(Function::new(
1046                "GENERATE_ARRAY".to_string(),
1047                f.args,
1048            )))),
1049
1050            // APPROX_COUNT_DISTINCT -> APPROX_COUNT_DISTINCT (native)
1051            // APPROX_DISTINCT -> APPROX_COUNT_DISTINCT
1052            "APPROX_DISTINCT" => Ok(Expression::Function(Box::new(Function::new(
1053                "APPROX_COUNT_DISTINCT".to_string(),
1054                f.args,
1055            )))),
1056
1057            // COUNT_IF -> COUNTIF in BigQuery
1058            "COUNT_IF" => Ok(Expression::Function(Box::new(Function::new(
1059                "COUNTIF".to_string(),
1060                f.args,
1061            )))),
1062
1063            // SHA1 -> SHA1 (native), SHA -> SHA1
1064            "SHA" => Ok(Expression::Function(Box::new(Function::new(
1065                "SHA1".to_string(),
1066                f.args,
1067            )))),
1068
1069            // SHA256/SHA2 -> SHA256
1070            "SHA2" => Ok(Expression::Function(Box::new(Function::new(
1071                "SHA256".to_string(),
1072                f.args,
1073            )))),
1074
1075            // MD5 in BigQuery returns bytes, often combined with TO_HEX
1076            // TO_HEX(MD5(x)) pattern
1077            "MD5" => Ok(Expression::Function(Box::new(Function::new(
1078                "MD5".to_string(),
1079                f.args,
1080            )))),
1081
1082            // VARIANCE/VAR_SAMP -> VAR_SAMP (native)
1083            // VAR_POP -> VAR_POP (native)
1084
1085            // DATEADD(unit, amount, date) → DATE_ADD(date, INTERVAL amount unit) for BigQuery
1086            "DATEADD" if f.args.len() == 3 => {
1087                let mut args = f.args;
1088                let unit_expr = args.remove(0);
1089                let amount = args.remove(0);
1090                let date = args.remove(0);
1091                // Convert unit identifier to IntervalUnit
1092                let unit_name = match &unit_expr {
1093                    Expression::Identifier(id) => id.name.to_uppercase(),
1094                    _ => "DAY".to_string(),
1095                };
1096                let unit = match unit_name.as_str() {
1097                    "YEAR" | "YEARS" | "YY" | "YYYY" => crate::expressions::IntervalUnit::Year,
1098                    "QUARTER" | "QUARTERS" | "QQ" | "Q" => {
1099                        crate::expressions::IntervalUnit::Quarter
1100                    }
1101                    "MONTH" | "MONTHS" | "MM" | "M" => crate::expressions::IntervalUnit::Month,
1102                    "WEEK" | "WEEKS" | "WK" | "WW" => crate::expressions::IntervalUnit::Week,
1103                    "DAY" | "DAYS" | "DD" | "D" | "DAYOFMONTH" => {
1104                        crate::expressions::IntervalUnit::Day
1105                    }
1106                    "HOUR" | "HOURS" | "HH" => crate::expressions::IntervalUnit::Hour,
1107                    "MINUTE" | "MINUTES" | "MI" | "N" => crate::expressions::IntervalUnit::Minute,
1108                    "SECOND" | "SECONDS" | "SS" | "S" => crate::expressions::IntervalUnit::Second,
1109                    "MILLISECOND" | "MILLISECONDS" | "MS" => {
1110                        crate::expressions::IntervalUnit::Millisecond
1111                    }
1112                    "MICROSECOND" | "MICROSECONDS" | "US" => {
1113                        crate::expressions::IntervalUnit::Microsecond
1114                    }
1115                    _ => crate::expressions::IntervalUnit::Day,
1116                };
1117                Ok(Expression::DateAdd(Box::new(
1118                    crate::expressions::DateAddFunc {
1119                        this: date,
1120                        interval: amount,
1121                        unit,
1122                    },
1123                )))
1124            }
1125            "DATE_ADD" => Ok(Expression::Function(Box::new(Function::new(
1126                "DATE_ADD".to_string(),
1127                f.args,
1128            )))),
1129
1130            // DATE_DIFF in BigQuery (native)
1131            "DATEDIFF" => Ok(Expression::Function(Box::new(Function::new(
1132                "DATE_DIFF".to_string(),
1133                f.args,
1134            )))),
1135
1136            // TIMESTAMP_DIFF in BigQuery
1137            "TIMESTAMPDIFF" => Ok(Expression::Function(Box::new(Function::new(
1138                "TIMESTAMP_DIFF".to_string(),
1139                f.args,
1140            )))),
1141
1142            // TIME -> TIME (native)
1143            // DATETIME -> DATETIME (native)
1144
1145            // SAFE_DIVIDE -> SAFE_DIVIDE (native)
1146
1147            // NEWID/UUID -> GENERATE_UUID
1148            "NEWID" | "UUID" => Ok(Expression::Function(Box::new(Function::new(
1149                "GENERATE_UUID".to_string(),
1150                vec![],
1151            )))),
1152
1153            // LEVENSHTEIN -> EDIT_DISTANCE (BigQuery naming)
1154            "LEVENSHTEIN" => Ok(Expression::Function(Box::new(Function::new(
1155                "EDIT_DISTANCE".to_string(),
1156                f.args,
1157            )))),
1158
1159            // UNIX_TIMESTAMP -> UNIX_SECONDS
1160            "UNIX_TIMESTAMP" => Ok(Expression::Function(Box::new(Function::new(
1161                "UNIX_SECONDS".to_string(),
1162                f.args,
1163            )))),
1164
1165            // FROM_UNIXTIME -> TIMESTAMP_SECONDS
1166            "FROM_UNIXTIME" => Ok(Expression::Function(Box::new(Function::new(
1167                "TIMESTAMP_SECONDS".to_string(),
1168                f.args,
1169            )))),
1170
1171            // CHAR_LENGTH / CHARACTER_LENGTH -> LENGTH
1172            "CHAR_LENGTH" | "CHARACTER_LENGTH" => Ok(Expression::Function(Box::new(
1173                Function::new("LENGTH".to_string(), f.args),
1174            ))),
1175
1176            // OCTET_LENGTH -> BYTE_LENGTH in BigQuery
1177            "OCTET_LENGTH" => Ok(Expression::Function(Box::new(Function::new(
1178                "BYTE_LENGTH".to_string(),
1179                f.args,
1180            )))),
1181
1182            // JSON_EXTRACT_STRING_ARRAY -> JSON_VALUE_ARRAY in BigQuery
1183            "JSON_EXTRACT_STRING_ARRAY" => Ok(Expression::Function(Box::new(Function::new(
1184                "JSON_VALUE_ARRAY".to_string(),
1185                f.args,
1186            )))),
1187
1188            // INSTR is native to BigQuery
1189
1190            // SPLIT: BigQuery defaults to comma separator when none provided
1191            // SPLIT(foo) -> SPLIT(foo, ',')
1192            "SPLIT" if f.args.len() == 1 => {
1193                let mut args = f.args;
1194                args.push(Expression::Literal(Literal::String(",".to_string())));
1195                Ok(Expression::Split(Box::new(SplitFunc {
1196                    this: args.remove(0),
1197                    delimiter: args.remove(0),
1198                })))
1199            }
1200
1201            // SPLIT with two args - convert to Split expression
1202            "SPLIT" if f.args.len() == 2 => {
1203                let mut args = f.args;
1204                Ok(Expression::Split(Box::new(SplitFunc {
1205                    this: args.remove(0),
1206                    delimiter: args.remove(0),
1207                })))
1208            }
1209
1210            // REGEXP_SUBSTR -> REGEXP_EXTRACT in BigQuery (strip extra Snowflake args)
1211            "REGEXP_SUBSTR" if f.args.len() >= 2 => {
1212                // BigQuery REGEXP_EXTRACT supports (subject, pattern, pos, occ) max 4 args
1213                let args = if f.args.len() > 4 {
1214                    f.args[..4].to_vec()
1215                } else {
1216                    f.args
1217                };
1218                Ok(Expression::Function(Box::new(Function::new(
1219                    "REGEXP_EXTRACT".to_string(),
1220                    args,
1221                ))))
1222            }
1223            "REGEXP_SUBSTR" => Ok(Expression::Function(Box::new(Function::new(
1224                "REGEXP_EXTRACT".to_string(),
1225                f.args,
1226            )))),
1227
1228            // REGEXP_REPLACE - strip extra Snowflake-specific args
1229            "REGEXP_REPLACE" if f.args.len() > 3 => {
1230                let args = f.args[..3].to_vec();
1231                Ok(Expression::Function(Box::new(Function::new(
1232                    "REGEXP_REPLACE".to_string(),
1233                    args,
1234                ))))
1235            }
1236
1237            // OBJECT_CONSTRUCT_KEEP_NULL -> JSON_OBJECT
1238            "OBJECT_CONSTRUCT_KEEP_NULL" => Ok(Expression::Function(Box::new(Function::new(
1239                "JSON_OBJECT".to_string(),
1240                f.args,
1241            )))),
1242
1243            // EDITDISTANCE -> EDIT_DISTANCE with named max_distance parameter
1244            "EDITDISTANCE" if f.args.len() == 3 => {
1245                let col1 = f.args[0].clone();
1246                let col2 = f.args[1].clone();
1247                let max_dist = f.args[2].clone();
1248                Ok(Expression::Function(Box::new(Function::new(
1249                    "EDIT_DISTANCE".to_string(),
1250                    vec![
1251                        col1,
1252                        col2,
1253                        Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
1254                            name: crate::expressions::Identifier::new("max_distance".to_string()),
1255                            value: max_dist,
1256                            separator: crate::expressions::NamedArgSeparator::DArrow,
1257                        })),
1258                    ],
1259                ))))
1260            }
1261            "EDITDISTANCE" if f.args.len() == 2 => Ok(Expression::Function(Box::new(
1262                Function::new("EDIT_DISTANCE".to_string(), f.args),
1263            ))),
1264
1265            // HEX_DECODE_BINARY -> FROM_HEX
1266            "HEX_DECODE_BINARY" => Ok(Expression::Function(Box::new(Function::new(
1267                "FROM_HEX".to_string(),
1268                f.args,
1269            )))),
1270
1271            // BigQuery format string normalization for PARSE_DATE/DATETIME/TIMESTAMP functions
1272            // %Y-%m-%d -> %F and %H:%M:%S -> %T
1273            "PARSE_DATE"
1274            | "PARSE_DATETIME"
1275            | "PARSE_TIMESTAMP"
1276            | "SAFE.PARSE_DATE"
1277            | "SAFE.PARSE_DATETIME"
1278            | "SAFE.PARSE_TIMESTAMP" => {
1279                let args = self.normalize_time_format_args(f.args);
1280                Ok(Expression::Function(Box::new(Function {
1281                    name: f.name,
1282                    args,
1283                    distinct: f.distinct,
1284                    no_parens: f.no_parens,
1285                    trailing_comments: f.trailing_comments,
1286                    quoted: f.quoted,
1287                    use_bracket_syntax: f.use_bracket_syntax,
1288                    span: None,
1289                    inferred_type: None,
1290                })))
1291            }
1292
1293            // GET_PATH(obj, path) -> JSON_EXTRACT(obj, json_path) in BigQuery
1294            "GET_PATH" if f.args.len() == 2 => {
1295                let mut args = f.args;
1296                let this = args.remove(0);
1297                let path = args.remove(0);
1298                let json_path = match &path {
1299                    Expression::Literal(Literal::String(s)) => {
1300                        let normalized = if s.starts_with('$') {
1301                            s.clone()
1302                        } else if s.starts_with('[') {
1303                            format!("${}", s)
1304                        } else {
1305                            format!("$.{}", s)
1306                        };
1307                        Expression::Literal(Literal::String(normalized))
1308                    }
1309                    _ => path,
1310                };
1311                Ok(Expression::JsonExtract(Box::new(JsonExtractFunc {
1312                    this,
1313                    path: json_path,
1314                    returning: None,
1315                    arrow_syntax: false,
1316                    hash_arrow_syntax: false,
1317                    wrapper_option: None,
1318                    quotes_option: None,
1319                    on_scalar_string: false,
1320                    on_error: None,
1321                })))
1322            }
1323
1324            // Pass through everything else
1325            _ => Ok(Expression::Function(Box::new(f))),
1326        }
1327    }
1328
1329    fn transform_aggregate_function(
1330        &self,
1331        f: Box<crate::expressions::AggregateFunction>,
1332    ) -> Result<Expression> {
1333        let name_upper = f.name.to_uppercase();
1334        match name_upper.as_str() {
1335            // GROUP_CONCAT -> STRING_AGG
1336            "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
1337                Function::new("STRING_AGG".to_string(), f.args),
1338            ))),
1339
1340            // Pass through everything else
1341            _ => Ok(Expression::AggregateFunction(f)),
1342        }
1343    }
1344
1345    /// Transform MethodCall expressions
1346    /// Handles SAFE.PARSE_DATE, SAFE.PARSE_DATETIME, SAFE.PARSE_TIMESTAMP format normalization
1347    fn transform_method_call(&self, mc: crate::expressions::MethodCall) -> Result<Expression> {
1348        use crate::expressions::MethodCall;
1349
1350        // Check if this is SAFE.PARSE_DATE/DATETIME/TIMESTAMP
1351        if let Expression::Column(ref col) = mc.this {
1352            if col.name.name.eq_ignore_ascii_case("SAFE") {
1353                let method_upper = mc.method.name.to_uppercase();
1354                if method_upper == "PARSE_DATE"
1355                    || method_upper == "PARSE_DATETIME"
1356                    || method_upper == "PARSE_TIMESTAMP"
1357                {
1358                    // Normalize the format string in the first argument
1359                    let args = self.normalize_time_format_args(mc.args);
1360                    return Ok(Expression::MethodCall(Box::new(MethodCall {
1361                        this: mc.this,
1362                        method: mc.method,
1363                        args,
1364                    })));
1365                }
1366            }
1367        }
1368
1369        // Pass through all other method calls
1370        Ok(Expression::MethodCall(Box::new(mc)))
1371    }
1372
1373    /// Normalize time format strings in function arguments
1374    /// BigQuery normalizes: %Y-%m-%d -> %F, %H:%M:%S -> %T
1375    fn normalize_time_format_args(&self, args: Vec<Expression>) -> Vec<Expression> {
1376        args.into_iter()
1377            .enumerate()
1378            .map(|(i, arg)| {
1379                // Only transform the first argument (the format string)
1380                if i == 0 {
1381                    if let Expression::Literal(Literal::String(s)) = arg {
1382                        let normalized = self.normalize_time_format(&s);
1383                        return Expression::Literal(Literal::String(normalized));
1384                    }
1385                }
1386                arg
1387            })
1388            .collect()
1389    }
1390
1391    /// Normalize a time format string according to BigQuery conventions
1392    /// %Y-%m-%d -> %F (ISO date)
1393    /// %H:%M:%S -> %T (time)
1394    fn normalize_time_format(&self, format: &str) -> String {
1395        format.replace("%Y-%m-%d", "%F").replace("%H:%M:%S", "%T")
1396    }
1397}
1398
1399#[cfg(test)]
1400mod tests {
1401    use super::*;
1402    use crate::dialects::Dialect;
1403    use crate::parse_one;
1404
1405    fn transpile_to_bigquery(sql: &str) -> String {
1406        let dialect = Dialect::get(DialectType::Generic);
1407        let result = dialect
1408            .transpile_to(sql, DialectType::BigQuery)
1409            .expect("Transpile failed");
1410        result[0].clone()
1411    }
1412
1413    #[test]
1414    fn test_ifnull_identity() {
1415        // Generic -> BigQuery: IFNULL is normalized to COALESCE (matching sqlglot behavior)
1416        let result = transpile_to_bigquery("SELECT IFNULL(a, b)");
1417        assert!(
1418            result.contains("COALESCE"),
1419            "Expected COALESCE, got: {}",
1420            result
1421        );
1422    }
1423
1424    #[test]
1425    fn test_nvl_to_ifnull() {
1426        // NVL is converted to IFNULL in BigQuery
1427        let result = transpile_to_bigquery("SELECT NVL(a, b)");
1428        assert!(
1429            result.contains("IFNULL"),
1430            "Expected IFNULL, got: {}",
1431            result
1432        );
1433    }
1434
1435    #[test]
1436    fn test_try_cast_to_safe_cast() {
1437        let result = transpile_to_bigquery("SELECT TRY_CAST(a AS INT)");
1438        assert!(
1439            result.contains("SAFE_CAST"),
1440            "Expected SAFE_CAST, got: {}",
1441            result
1442        );
1443    }
1444
1445    #[test]
1446    fn test_random_to_rand() {
1447        let result = transpile_to_bigquery("SELECT RANDOM()");
1448        assert!(result.contains("RAND"), "Expected RAND, got: {}", result);
1449    }
1450
1451    #[test]
1452    fn test_basic_select() {
1453        let result = transpile_to_bigquery("SELECT a, b FROM users WHERE id = 1");
1454        assert!(result.contains("SELECT"));
1455        assert!(result.contains("FROM users"));
1456    }
1457
1458    #[test]
1459    fn test_group_concat_to_string_agg() {
1460        let result = transpile_to_bigquery("SELECT GROUP_CONCAT(name)");
1461        assert!(
1462            result.contains("STRING_AGG"),
1463            "Expected STRING_AGG, got: {}",
1464            result
1465        );
1466    }
1467
1468    #[test]
1469    fn test_generate_series_to_generate_array() {
1470        let result = transpile_to_bigquery("SELECT GENERATE_SERIES(1, 10)");
1471        assert!(
1472            result.contains("GENERATE_ARRAY"),
1473            "Expected GENERATE_ARRAY, got: {}",
1474            result
1475        );
1476    }
1477
1478    #[test]
1479    fn test_backtick_identifiers() {
1480        // BigQuery uses backticks for identifiers
1481        let dialect = BigQueryDialect;
1482        let config = dialect.generator_config();
1483        assert_eq!(config.identifier_quote, '`');
1484    }
1485
1486    fn bigquery_identity(sql: &str, expected: &str) {
1487        let dialect = Dialect::get(DialectType::BigQuery);
1488        let ast = dialect.parse(sql).expect("Parse failed");
1489        let transformed = dialect.transform(ast[0].clone()).expect("Transform failed");
1490        let result = dialect.generate(&transformed).expect("Generate failed");
1491        assert_eq!(result, expected, "SQL: {}", sql);
1492    }
1493
1494    #[test]
1495    fn test_cast_char_to_string() {
1496        bigquery_identity("CAST(x AS CHAR)", "CAST(x AS STRING)");
1497    }
1498
1499    #[test]
1500    fn test_cast_varchar_to_string() {
1501        bigquery_identity("CAST(x AS VARCHAR)", "CAST(x AS STRING)");
1502    }
1503
1504    #[test]
1505    fn test_cast_nchar_to_string() {
1506        bigquery_identity("CAST(x AS NCHAR)", "CAST(x AS STRING)");
1507    }
1508
1509    #[test]
1510    fn test_cast_nvarchar_to_string() {
1511        bigquery_identity("CAST(x AS NVARCHAR)", "CAST(x AS STRING)");
1512    }
1513
1514    #[test]
1515    fn test_cast_timestamptz_to_timestamp() {
1516        bigquery_identity("CAST(x AS TIMESTAMPTZ)", "CAST(x AS TIMESTAMP)");
1517    }
1518
1519    #[test]
1520    fn test_cast_record_to_struct() {
1521        bigquery_identity("CAST(x AS RECORD)", "CAST(x AS STRUCT)");
1522    }
1523
1524    #[test]
1525    fn test_json_literal_to_parse_json() {
1526        // JSON 'string' literal syntax should be converted to PARSE_JSON()
1527        bigquery_identity(
1528            "SELECT JSON '\"foo\"' AS json_data",
1529            "SELECT PARSE_JSON('\"foo\"') AS json_data",
1530        );
1531    }
1532
1533    #[test]
1534    fn test_grant_as_alias_not_quoted() {
1535        // GRANT is not a reserved keyword in BigQuery, should not be backtick-quoted
1536        bigquery_identity(
1537            "SELECT GRANT FROM (SELECT 'input' AS GRANT)",
1538            "SELECT GRANT FROM (SELECT 'input' AS GRANT)",
1539        );
1540    }
1541
1542    #[test]
1543    fn test_timestamp_literal_to_cast() {
1544        // TIMESTAMP 'value' literal should be converted to CAST('value' AS TIMESTAMP)
1545        bigquery_identity(
1546            "CREATE VIEW `d.v` OPTIONS (expiration_timestamp=TIMESTAMP '2020-01-02T04:05:06.007Z') AS SELECT 1 AS c",
1547            "CREATE VIEW `d.v` OPTIONS (expiration_timestamp=CAST('2020-01-02T04:05:06.007Z' AS TIMESTAMP)) AS SELECT 1 AS c"
1548        );
1549    }
1550
1551    #[test]
1552    fn test_date_literal_to_cast_in_extract() {
1553        // Issue 1: DATE literal should become CAST syntax in BigQuery
1554        bigquery_identity(
1555            "EXTRACT(WEEK(THURSDAY) FROM DATE '2013-12-25')",
1556            "EXTRACT(WEEK(THURSDAY) FROM CAST('2013-12-25' AS DATE))",
1557        );
1558    }
1559
1560    #[test]
1561    fn test_json_object_with_json_literals() {
1562        // Issue 2: JSON literals in JSON_OBJECT should use PARSE_JSON, not CAST AS JSON
1563        bigquery_identity(
1564            "SELECT JSON_OBJECT('a', JSON '10') AS json_data",
1565            "SELECT JSON_OBJECT('a', PARSE_JSON('10')) AS json_data",
1566        );
1567    }
1568
1569    // NOTE: MOD paren unwrapping is tested in the conformance tests (sqlglot_dialect_identity).
1570    // The unit test version was removed due to stack overflow in debug builds (deep recursion).
1571    // Test case: MOD((a + 1), b) -> MOD(a + 1, b)
1572
1573    #[test]
1574    fn test_safe_parse_date_format_normalization() {
1575        // SAFE.PARSE_DATE format string normalization: %Y-%m-%d -> %F
1576        bigquery_identity(
1577            "SAFE.PARSE_DATE('%Y-%m-%d', '2024-01-15')",
1578            "SAFE.PARSE_DATE('%F', '2024-01-15')",
1579        );
1580    }
1581
1582    #[test]
1583    fn test_safe_parse_datetime_format_normalization() {
1584        // SAFE.PARSE_DATETIME format string normalization: %Y-%m-%d %H:%M:%S -> %F %T
1585        bigquery_identity(
1586            "SAFE.PARSE_DATETIME('%Y-%m-%d %H:%M:%S', '2024-01-15 10:30:00')",
1587            "SAFE.PARSE_DATETIME('%F %T', '2024-01-15 10:30:00')",
1588        );
1589    }
1590
1591    #[test]
1592    fn test_safe_parse_timestamp_format_normalization() {
1593        // SAFE.PARSE_TIMESTAMP format string normalization: %Y-%m-%d %H:%M:%S -> %F %T
1594        bigquery_identity(
1595            "SAFE.PARSE_TIMESTAMP('%Y-%m-%d %H:%M:%S', '2024-01-15 10:30:00')",
1596            "SAFE.PARSE_TIMESTAMP('%F %T', '2024-01-15 10:30:00')",
1597        );
1598    }
1599
1600    #[test]
1601    fn test_datetime_literal_to_cast() {
1602        // DATETIME 'value' literal should be converted to CAST('value' AS DATETIME)
1603        bigquery_identity(
1604            "LAST_DAY(DATETIME '2008-11-10 15:30:00', WEEK(SUNDAY))",
1605            "LAST_DAY(CAST('2008-11-10 15:30:00' AS DATETIME), WEEK)",
1606        );
1607    }
1608
1609    #[test]
1610    fn test_last_day_week_modifier_stripped() {
1611        // WEEK(SUNDAY) should become WEEK in BigQuery LAST_DAY function
1612        bigquery_identity("LAST_DAY(col, WEEK(MONDAY))", "LAST_DAY(col, WEEK)");
1613    }
1614
1615    #[test]
1616    fn test_hash_line_comment_parses() {
1617        // Regression test for issue #38:
1618        // BigQuery should accept # as a single-line comment.
1619        let result = parse_one("SELECT 1 as a #hello world", DialectType::BigQuery);
1620        assert!(result.is_ok(), "Expected parse to succeed, got: {result:?}");
1621    }
1622}