Skip to main content

polyglot_sql/dialects/
bigquery.rs

1//! BigQuery Dialect
2//!
3//! BigQuery-specific transformations based on sqlglot patterns.
4//! Key differences:
5//! - Uses backticks for identifiers (especially for project.dataset.table)
6//! - SAFE_ prefix for safe operations
7//! - Different date/time function names (DATE_DIFF, FORMAT_DATE, PARSE_DATE)
8//! - STRUCT and ARRAY syntax differences
9//! - No ILIKE support
10//! - QUALIFY clause support
11
12use super::{DialectImpl, DialectType};
13use crate::error::Result;
14use crate::expressions::{
15    Alias, BinaryOp, CeilFunc, Column, Exists, Expression, From, Function, FunctionBody,
16    Identifier, JsonExtractFunc, LikeOp, Literal, Select, SplitFunc, StringAggFunc, UnaryFunc,
17    UnnestFunc, VarArgFunc, Where,
18};
19use crate::generator::GeneratorConfig;
20use crate::tokens::TokenizerConfig;
21
22/// BigQuery dialect
23pub struct BigQueryDialect;
24
25impl DialectImpl for BigQueryDialect {
26    fn dialect_type(&self) -> DialectType {
27        DialectType::BigQuery
28    }
29
30    fn tokenizer_config(&self) -> TokenizerConfig {
31        let mut config = TokenizerConfig::default();
32        // BigQuery uses backticks for identifiers, NOT double quotes
33        // Remove double quote from identifiers (it's in the default config)
34        config.identifiers.remove(&'"');
35        config.identifiers.insert('`', '`');
36        // BigQuery supports double quotes for strings (in addition to single quotes)
37        config.quotes.insert("\"".to_string(), "\"".to_string());
38        // BigQuery supports triple-quoted strings
39        config.quotes.insert("'''".to_string(), "'''".to_string());
40        config
41            .quotes
42            .insert("\"\"\"".to_string(), "\"\"\"".to_string());
43        // BigQuery supports backslash escaping in strings
44        config.string_escapes = vec!['\'', '\\'];
45        // In BigQuery, b'...' is a byte string (bytes), not a bit string (binary digits)
46        config.b_prefix_is_byte_string = true;
47        // BigQuery supports hex number strings like 0xA, 0xFF
48        config.hex_number_strings = true;
49        // BigQuery: 0xA represents integer 10 (not binary/blob)
50        config.hex_string_is_integer_type = true;
51        // BigQuery supports # as single-line comments.
52        config.hash_comments = true;
53        config
54    }
55
56    fn generator_config(&self) -> GeneratorConfig {
57        use crate::generator::{IdentifierQuoteStyle, NormalizeFunctions};
58        GeneratorConfig {
59            identifier_quote: '`',
60            identifier_quote_style: IdentifierQuoteStyle::BACKTICK,
61            dialect: Some(DialectType::BigQuery),
62            // BigQuery doesn't normalize function names (Python: NORMALIZE_FUNCTIONS = False)
63            normalize_functions: NormalizeFunctions::None,
64            // BigQuery-specific settings from Python sqlglot
65            interval_allows_plural_form: false,
66            join_hints: false,
67            query_hints: false,
68            table_hints: false,
69            limit_fetch_style: crate::generator::LimitFetchStyle::Limit,
70            rename_table_with_db: false,
71            nvl2_supported: false,
72            unnest_with_ordinality: false,
73            collate_is_func: true,
74            limit_only_literals: true,
75            supports_table_alias_columns: false,
76            unpivot_aliases_are_identifiers: false,
77            json_key_value_pair_sep: ",",
78            null_ordering_supported: false,
79            ignore_nulls_in_func: true,
80            json_path_single_quote_escape: true,
81            can_implement_array_any: true,
82            supports_to_number: false,
83            named_placeholder_token: "@",
84            hex_func: "TO_HEX",
85            with_properties_prefix: "OPTIONS",
86            supports_exploding_projections: false,
87            except_intersect_support_all_clause: false,
88            supports_unix_seconds: true,
89            // BigQuery uses SAFE_ prefix for safe operations
90            try_supported: true,
91            // BigQuery does not support SEMI/ANTI JOIN syntax
92            semi_anti_join_with_side: false,
93            ..Default::default()
94        }
95    }
96
97    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
98        match expr {
99            // ===== Data Type Mappings =====
100            Expression::DataType(dt) => self.transform_data_type(dt),
101
102            // ===== Null handling =====
103            // IFNULL is native to BigQuery - keep as-is for identity
104            Expression::IfNull(f) => Ok(Expression::IfNull(f)),
105
106            // NVL -> IFNULL in BigQuery (BigQuery uses IFNULL, not NVL)
107            Expression::Nvl(f) => Ok(Expression::IfNull(f)),
108
109            // Coalesce stays as Coalesce
110            Expression::Coalesce(f) => Ok(Expression::Coalesce(f)),
111
112            // ===== String aggregation =====
113            // GROUP_CONCAT -> STRING_AGG in BigQuery
114            Expression::GroupConcat(f) => Ok(Expression::StringAgg(Box::new(StringAggFunc {
115                this: f.this,
116                separator: f.separator,
117                order_by: f.order_by,
118                distinct: f.distinct,
119                filter: f.filter,
120                limit: None,
121                inferred_type: None,
122            }))),
123
124            // ===== Cast operations =====
125            // Cast data types are transformed by transform_recursive in mod.rs
126            // which calls transform_data_type via transform_expr(Expression::DataType(...))
127
128            // TryCast -> SafeCast in BigQuery with type transformation
129            Expression::TryCast(c) => {
130                let transformed_type = match self.transform_data_type(c.to)? {
131                    Expression::DataType(dt) => dt,
132                    _ => return Err(crate::error::Error::parse("Expected DataType", 0, 0, 0, 0)),
133                };
134                Ok(Expression::SafeCast(Box::new(crate::expressions::Cast {
135                    this: c.this,
136                    to: transformed_type,
137                    trailing_comments: c.trailing_comments,
138                    double_colon_syntax: c.double_colon_syntax,
139                    format: c.format,
140                    default: c.default,
141                    inferred_type: None,
142                })))
143            }
144
145            // ===== Pattern matching =====
146            // ILIKE -> LOWER() LIKE LOWER() in BigQuery (no ILIKE support)
147            Expression::ILike(op) => {
148                let lower_left = Expression::Lower(Box::new(UnaryFunc::new(op.left)));
149                let lower_right = Expression::Lower(Box::new(UnaryFunc::new(op.right)));
150                Ok(Expression::Like(Box::new(LikeOp {
151                    left: lower_left,
152                    right: lower_right,
153                    escape: op.escape,
154                    quantifier: op.quantifier,
155                    inferred_type: None,
156                })))
157            }
158
159            // RegexpLike -> REGEXP_CONTAINS in BigQuery
160            Expression::RegexpLike(f) => Ok(Expression::Function(Box::new(Function::new(
161                "REGEXP_CONTAINS".to_string(),
162                vec![f.this, f.pattern],
163            )))),
164
165            // ===== Array operations =====
166            // EXPLODE -> UNNEST in BigQuery
167            Expression::Explode(f) => Ok(Expression::Unnest(Box::new(
168                crate::expressions::UnnestFunc {
169                    this: f.this,
170                    expressions: Vec::new(),
171                    with_ordinality: false,
172                    alias: None,
173                    offset_alias: None,
174                },
175            ))),
176
177            // ExplodeOuter -> UNNEST with LEFT JOIN semantics
178            Expression::ExplodeOuter(f) => Ok(Expression::Unnest(Box::new(
179                crate::expressions::UnnestFunc {
180                    this: f.this,
181                    expressions: Vec::new(),
182                    with_ordinality: false,
183                    alias: None,
184                    offset_alias: None,
185                },
186            ))),
187
188            // GenerateSeries -> GENERATE_ARRAY in BigQuery
189            Expression::GenerateSeries(f) => {
190                let mut args = Vec::new();
191                if let Some(start) = f.start {
192                    args.push(*start);
193                }
194                if let Some(end) = f.end {
195                    args.push(*end);
196                }
197                if let Some(step) = f.step {
198                    args.push(*step);
199                }
200                Ok(Expression::Function(Box::new(Function::new(
201                    "GENERATE_ARRAY".to_string(),
202                    args,
203                ))))
204            }
205
206            // ===== Bitwise operations =====
207            // BitwiseAndAgg -> BIT_AND
208            Expression::BitwiseAndAgg(f) => Ok(Expression::Function(Box::new(Function::new(
209                "BIT_AND".to_string(),
210                vec![f.this],
211            )))),
212
213            // BitwiseOrAgg -> BIT_OR
214            Expression::BitwiseOrAgg(f) => Ok(Expression::Function(Box::new(Function::new(
215                "BIT_OR".to_string(),
216                vec![f.this],
217            )))),
218
219            // BitwiseXorAgg -> BIT_XOR
220            Expression::BitwiseXorAgg(f) => Ok(Expression::Function(Box::new(Function::new(
221                "BIT_XOR".to_string(),
222                vec![f.this],
223            )))),
224
225            // BitwiseCount -> BIT_COUNT
226            Expression::BitwiseCount(f) => Ok(Expression::Function(Box::new(Function::new(
227                "BIT_COUNT".to_string(),
228                vec![f.this],
229            )))),
230
231            // ByteLength -> BYTE_LENGTH
232            Expression::ByteLength(f) => Ok(Expression::Function(Box::new(Function::new(
233                "BYTE_LENGTH".to_string(),
234                vec![f.this],
235            )))),
236
237            // IntDiv -> DIV
238            Expression::IntDiv(f) => Ok(Expression::Function(Box::new(Function::new(
239                "DIV".to_string(),
240                vec![f.this, f.expression],
241            )))),
242
243            // Int64 -> INT64
244            Expression::Int64(f) => Ok(Expression::Function(Box::new(Function::new(
245                "INT64".to_string(),
246                vec![f.this],
247            )))),
248
249            // ===== Random =====
250            // RANDOM -> RAND in BigQuery
251            Expression::Random(_) => Ok(Expression::Rand(Box::new(crate::expressions::Rand {
252                seed: None,
253                lower: None,
254                upper: None,
255            }))),
256
257            // ===== UUID =====
258            // Uuid -> GENERATE_UUID in BigQuery
259            Expression::Uuid(_) => Ok(Expression::Function(Box::new(Function::new(
260                "GENERATE_UUID".to_string(),
261                vec![],
262            )))),
263
264            // ===== Approximate functions =====
265            // ApproxDistinct -> APPROX_COUNT_DISTINCT
266            Expression::ApproxDistinct(f) => Ok(Expression::Function(Box::new(Function::new(
267                "APPROX_COUNT_DISTINCT".to_string(),
268                vec![f.this],
269            )))),
270
271            // ArgMax -> MAX_BY in BigQuery
272            Expression::ArgMax(f) => Ok(Expression::Function(Box::new(Function::new(
273                "MAX_BY".to_string(),
274                vec![*f.this, *f.expression],
275            )))),
276
277            // ArgMin -> MIN_BY in BigQuery
278            Expression::ArgMin(f) => Ok(Expression::Function(Box::new(Function::new(
279                "MIN_BY".to_string(),
280                vec![*f.this, *f.expression],
281            )))),
282
283            // ===== Conditional =====
284            // CountIf -> COUNTIF in BigQuery
285            Expression::CountIf(f) => Ok(Expression::Function(Box::new(Function::new(
286                "COUNTIF".to_string(),
287                vec![f.this],
288            )))),
289
290            // ===== String functions =====
291            // StringAgg -> STRING_AGG in BigQuery - keep as-is to preserve ORDER BY
292            Expression::StringAgg(f) => Ok(Expression::StringAgg(f)),
293
294            // ===== Conversion =====
295            // Unhex -> FROM_HEX
296            Expression::Unhex(f) => Ok(Expression::Function(Box::new(Function::new(
297                "FROM_HEX".to_string(),
298                vec![*f.this],
299            )))),
300
301            // UnixToTime -> TIMESTAMP_SECONDS/MILLIS/MICROS based on scale
302            Expression::UnixToTime(f) => {
303                let scale = f.scale.unwrap_or(0);
304                match scale {
305                    0 => Ok(Expression::Function(Box::new(Function::new(
306                        "TIMESTAMP_SECONDS".to_string(),
307                        vec![*f.this],
308                    )))),
309                    3 => Ok(Expression::Function(Box::new(Function::new(
310                        "TIMESTAMP_MILLIS".to_string(),
311                        vec![*f.this],
312                    )))),
313                    6 => Ok(Expression::Function(Box::new(Function::new(
314                        "TIMESTAMP_MICROS".to_string(),
315                        vec![*f.this],
316                    )))),
317                    _ => {
318                        // TIMESTAMP_SECONDS(CAST(value / POWER(10, scale) AS INT64))
319                        let div_expr =
320                            Expression::Div(Box::new(crate::expressions::BinaryOp::new(
321                                *f.this,
322                                Expression::Function(Box::new(Function::new(
323                                    "POWER".to_string(),
324                                    vec![Expression::number(10), Expression::number(scale)],
325                                ))),
326                            )));
327                        let cast_expr = Expression::Cast(Box::new(crate::expressions::Cast {
328                            this: div_expr,
329                            to: crate::expressions::DataType::Custom {
330                                name: "INT64".to_string(),
331                            },
332                            double_colon_syntax: false,
333                            trailing_comments: vec![],
334                            format: None,
335                            default: None,
336                            inferred_type: None,
337                        }));
338                        Ok(Expression::Function(Box::new(Function::new(
339                            "TIMESTAMP_SECONDS".to_string(),
340                            vec![cast_expr],
341                        ))))
342                    }
343                }
344            }
345
346            // ===== Date/time =====
347            // DateDiff -> DATE_DIFF in BigQuery
348            Expression::DateDiff(f) => {
349                // BigQuery: DATE_DIFF(date1, date2, part)
350                let unit_str = match f.unit {
351                    Some(crate::expressions::IntervalUnit::Year) => "YEAR",
352                    Some(crate::expressions::IntervalUnit::Quarter) => "QUARTER",
353                    Some(crate::expressions::IntervalUnit::Month) => "MONTH",
354                    Some(crate::expressions::IntervalUnit::Week) => "WEEK",
355                    Some(crate::expressions::IntervalUnit::Day) => "DAY",
356                    Some(crate::expressions::IntervalUnit::Hour) => "HOUR",
357                    Some(crate::expressions::IntervalUnit::Minute) => "MINUTE",
358                    Some(crate::expressions::IntervalUnit::Second) => "SECOND",
359                    Some(crate::expressions::IntervalUnit::Millisecond) => "MILLISECOND",
360                    Some(crate::expressions::IntervalUnit::Microsecond) => "MICROSECOND",
361                    Some(crate::expressions::IntervalUnit::Nanosecond) => "NANOSECOND",
362                    None => "DAY",
363                };
364                let unit = Expression::Identifier(crate::expressions::Identifier {
365                    name: unit_str.to_string(),
366                    quoted: false,
367                    trailing_comments: Vec::new(),
368                    span: None,
369                });
370                Ok(Expression::Function(Box::new(Function::new(
371                    "DATE_DIFF".to_string(),
372                    vec![f.this, f.expression, unit],
373                ))))
374            }
375
376            // ===== Variance =====
377            // VarPop -> VAR_POP
378            Expression::VarPop(f) => Ok(Expression::Function(Box::new(Function::new(
379                "VAR_POP".to_string(),
380                vec![f.this],
381            )))),
382
383            // ===== Hash functions =====
384            // SHA -> SHA1
385            Expression::SHA(f) => Ok(Expression::Function(Box::new(Function::new(
386                "SHA1".to_string(),
387                vec![f.this],
388            )))),
389
390            // SHA1Digest -> SHA1
391            Expression::SHA1Digest(f) => Ok(Expression::Function(Box::new(Function::new(
392                "SHA1".to_string(),
393                vec![f.this],
394            )))),
395
396            // MD5Digest -> MD5
397            Expression::MD5Digest(f) => Ok(Expression::Function(Box::new(Function::new(
398                "MD5".to_string(),
399                vec![*f.this],
400            )))),
401
402            // ===== Type conversion =====
403            // JSONBool -> BOOL
404            Expression::JSONBool(f) => Ok(Expression::Function(Box::new(Function::new(
405                "BOOL".to_string(),
406                vec![f.this],
407            )))),
408
409            // StringFunc -> STRING
410            Expression::StringFunc(f) => Ok(Expression::Function(Box::new(Function::new(
411                "STRING".to_string(),
412                vec![*f.this],
413            )))),
414
415            // ===== Date/time from parts =====
416            // DateFromUnixDate -> DATE_FROM_UNIX_DATE
417            Expression::DateFromUnixDate(f) => Ok(Expression::Function(Box::new(Function::new(
418                "DATE_FROM_UNIX_DATE".to_string(),
419                vec![f.this],
420            )))),
421
422            // UnixDate -> UNIX_DATE
423            Expression::UnixDate(f) => Ok(Expression::Function(Box::new(Function::new(
424                "UNIX_DATE".to_string(),
425                vec![f.this],
426            )))),
427
428            // TimestampDiff -> TIMESTAMP_DIFF
429            Expression::TimestampDiff(f) => Ok(Expression::Function(Box::new(Function::new(
430                "TIMESTAMP_DIFF".to_string(),
431                vec![*f.this, *f.expression],
432            )))),
433
434            // FromTimeZone -> DATETIME
435            Expression::FromTimeZone(f) => Ok(Expression::Function(Box::new(Function::new(
436                "DATETIME".to_string(),
437                vec![*f.this],
438            )))),
439
440            // TsOrDsToDatetime -> DATETIME
441            Expression::TsOrDsToDatetime(f) => Ok(Expression::Function(Box::new(Function::new(
442                "DATETIME".to_string(),
443                vec![f.this],
444            )))),
445
446            // TsOrDsToTimestamp -> TIMESTAMP
447            Expression::TsOrDsToTimestamp(f) => Ok(Expression::Function(Box::new(Function::new(
448                "TIMESTAMP".to_string(),
449                vec![f.this],
450            )))),
451
452            // ===== IfFunc -> IF in BigQuery =====
453            Expression::IfFunc(f) => {
454                let mut args = vec![f.condition, f.true_value];
455                if let Some(false_val) = f.false_value {
456                    args.push(false_val);
457                } else {
458                    args.push(Expression::Null(crate::expressions::Null));
459                }
460                Ok(Expression::Function(Box::new(Function::new(
461                    "IF".to_string(),
462                    args,
463                ))))
464            }
465
466            // ===== HexString -> FROM_HEX =====
467            Expression::HexStringExpr(f) => Ok(Expression::Function(Box::new(Function::new(
468                "FROM_HEX".to_string(),
469                vec![*f.this],
470            )))),
471
472            // ===== Additional auto-generated transforms from Python sqlglot =====
473            // ApproxTopK -> APPROX_TOP_COUNT
474            Expression::ApproxTopK(f) => {
475                let mut args = vec![*f.this];
476                if let Some(expr) = f.expression {
477                    args.push(*expr);
478                }
479                Ok(Expression::Function(Box::new(Function::new(
480                    "APPROX_TOP_COUNT".to_string(),
481                    args,
482                ))))
483            }
484
485            // SafeDivide -> SAFE_DIVIDE
486            Expression::SafeDivide(f) => Ok(Expression::Function(Box::new(Function::new(
487                "SAFE_DIVIDE".to_string(),
488                vec![*f.this, *f.expression],
489            )))),
490
491            // JSONKeysAtDepth -> JSON_KEYS
492            Expression::JSONKeysAtDepth(f) => Ok(Expression::Function(Box::new(Function::new(
493                "JSON_KEYS".to_string(),
494                vec![*f.this],
495            )))),
496
497            // JSONValueArray -> JSON_VALUE_ARRAY
498            Expression::JSONValueArray(f) => Ok(Expression::Function(Box::new(Function::new(
499                "JSON_VALUE_ARRAY".to_string(),
500                vec![*f.this],
501            )))),
502
503            // DateFromParts -> DATE
504            Expression::DateFromParts(f) => {
505                let mut args = Vec::new();
506                if let Some(y) = f.year {
507                    args.push(*y);
508                }
509                if let Some(m) = f.month {
510                    args.push(*m);
511                }
512                if let Some(d) = f.day {
513                    args.push(*d);
514                }
515                Ok(Expression::Function(Box::new(Function::new(
516                    "DATE".to_string(),
517                    args,
518                ))))
519            }
520
521            // SPLIT: BigQuery defaults to comma separator when none provided
522            // SPLIT(foo) -> SPLIT(foo, ',')
523            Expression::Split(f) => {
524                // Check if delimiter is empty or a placeholder - add default comma
525                let delimiter = match &f.delimiter {
526                    Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s.is_empty()) => {
527                        let Literal::String(_) = lit.as_ref() else { unreachable!() };
528                        Expression::Literal(Box::new(Literal::String(",".to_string())))
529                    }
530                    _ => f.delimiter,
531                };
532                Ok(Expression::Split(Box::new(SplitFunc {
533                    this: f.this,
534                    delimiter,
535                })))
536            }
537
538            // Cast: Transform the target type according to BigQuery TYPE_MAPPING
539            // Special case: CAST to JSON -> PARSE_JSON in BigQuery
540            Expression::Cast(c) => {
541                use crate::expressions::DataType;
542                // Check if casting to JSON - use PARSE_JSON instead
543                // Handle both DataType::Json/JsonB and DataType::Custom { name: "JSON" }
544                // (parser creates Custom for type literals like JSON 'string')
545                let is_json = matches!(c.to, DataType::Json | DataType::JsonB)
546                    || matches!(&c.to, DataType::Custom { name } if name.eq_ignore_ascii_case("JSON") || name.eq_ignore_ascii_case("JSONB"));
547                if is_json {
548                    return Ok(Expression::ParseJson(Box::new(UnaryFunc::new(c.this))));
549                }
550                let transformed_type = match self.transform_data_type(c.to)? {
551                    Expression::DataType(dt) => dt,
552                    _ => return Err(crate::error::Error::parse("Expected DataType", 0, 0, 0, 0)),
553                };
554                Ok(Expression::Cast(Box::new(crate::expressions::Cast {
555                    this: c.this,
556                    to: transformed_type,
557                    trailing_comments: c.trailing_comments,
558                    double_colon_syntax: c.double_colon_syntax,
559                    format: c.format,
560                    default: c.default,
561                    inferred_type: None,
562                })))
563            }
564
565            // SafeCast: Transform the target type according to BigQuery TYPE_MAPPING
566            Expression::SafeCast(c) => {
567                let transformed_type = match self.transform_data_type(c.to)? {
568                    Expression::DataType(dt) => dt,
569                    _ => return Err(crate::error::Error::parse("Expected DataType", 0, 0, 0, 0)),
570                };
571                Ok(Expression::SafeCast(Box::new(crate::expressions::Cast {
572                    this: c.this,
573                    to: transformed_type,
574                    trailing_comments: c.trailing_comments,
575                    double_colon_syntax: c.double_colon_syntax,
576                    format: c.format,
577                    default: c.default,
578                    inferred_type: None,
579                })))
580            }
581
582            // ===== SELECT-level transforms =====
583            // BigQuery: GROUP BY expression → alias when both GROUP BY and ORDER BY exist
584            Expression::Select(mut select) => {
585                if select.group_by.is_some() && select.order_by.is_some() {
586                    // Build map: expression → alias name for aliased projections
587                    let aliases: Vec<(Expression, Identifier)> = select
588                        .expressions
589                        .iter()
590                        .filter_map(|e| {
591                            if let Expression::Alias(a) = e {
592                                Some((a.this.clone(), a.alias.clone()))
593                            } else {
594                                None
595                            }
596                        })
597                        .collect();
598
599                    if let Some(ref mut group_by) = select.group_by {
600                        for grouped in group_by.expressions.iter_mut() {
601                            // Skip numeric indices (already aliased)
602                            if matches!(grouped, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_))) {
603                                continue;
604                            }
605                            // Check if this GROUP BY expression matches a SELECT alias
606                            for (expr, alias_ident) in &aliases {
607                                if grouped == expr {
608                                    *grouped = Expression::boxed_column(Column {
609                                        name: alias_ident.clone(),
610                                        table: None,
611                                        join_mark: false,
612                                        trailing_comments: Vec::new(),
613                                        span: None,
614                                        inferred_type: None,
615                                    });
616                                    break;
617                                }
618                            }
619                        }
620                    }
621                }
622                Ok(Expression::Select(select))
623            }
624
625            // ===== ArrayContains → EXISTS(SELECT 1 FROM UNNEST(arr) AS _col WHERE _col = val) =====
626            Expression::ArrayContains(f) => {
627                let array_expr = f.this;
628                let value_expr = f.expression;
629
630                // Build: SELECT 1 FROM UNNEST(array) AS _col WHERE _col = value
631                let unnest = Expression::Unnest(Box::new(UnnestFunc {
632                    this: array_expr,
633                    expressions: Vec::new(),
634                    with_ordinality: false,
635                    alias: None,
636                    offset_alias: None,
637                }));
638                let aliased_unnest = Expression::Alias(Box::new(Alias {
639                    this: unnest,
640                    alias: Identifier::new("_col"),
641                    column_aliases: Vec::new(),
642                    pre_alias_comments: Vec::new(),
643                    trailing_comments: Vec::new(),
644                    inferred_type: None,
645                }));
646                let col_ref = Expression::boxed_column(Column {
647                    name: Identifier::new("_col"),
648                    table: None,
649                    join_mark: false,
650                    trailing_comments: Vec::new(),
651                    span: None,
652                    inferred_type: None,
653                });
654                let where_clause = Where {
655                    this: Expression::Eq(Box::new(BinaryOp {
656                        left: col_ref,
657                        right: value_expr,
658                        left_comments: Vec::new(),
659                        operator_comments: Vec::new(),
660                        trailing_comments: Vec::new(),
661                        inferred_type: None,
662                    })),
663                };
664                let inner_select = Expression::Select(Box::new(Select {
665                    expressions: vec![Expression::Literal(Box::new(Literal::Number("1".to_string())))],
666                    from: Some(From {
667                        expressions: vec![aliased_unnest],
668                    }),
669                    where_clause: Some(where_clause),
670                    ..Default::default()
671                }));
672                Ok(Expression::Exists(Box::new(Exists {
673                    this: inner_select,
674                    not: false,
675                })))
676            }
677
678            // ===== JSON_OBJECT array form → key-value pairs =====
679            // BigQuery "signature 2": JSON_OBJECT(['a', 'b'], [10, NULL]) → JSON_OBJECT('a', 10, 'b', NULL)
680            Expression::JsonObject(mut f) => {
681                if f.pairs.len() == 1 {
682                    // Extract expressions from both Array and ArrayFunc variants
683                    let keys_exprs = match &f.pairs[0].0 {
684                        Expression::Array(arr) => Some(&arr.expressions),
685                        Expression::ArrayFunc(arr) => Some(&arr.expressions),
686                        _ => None,
687                    };
688                    let vals_exprs = match &f.pairs[0].1 {
689                        Expression::Array(arr) => Some(&arr.expressions),
690                        Expression::ArrayFunc(arr) => Some(&arr.expressions),
691                        _ => None,
692                    };
693                    if let (Some(keys), Some(vals)) = (keys_exprs, vals_exprs) {
694                        if keys.len() == vals.len() {
695                            let new_pairs: Vec<(Expression, Expression)> = keys
696                                .iter()
697                                .zip(vals.iter())
698                                .map(|(k, v)| (k.clone(), v.clone()))
699                                .collect();
700                            f.pairs = new_pairs;
701                        }
702                    }
703                }
704                Ok(Expression::JsonObject(f))
705            }
706
707            // ===== MOD function: unwrap unnecessary Paren from first argument =====
708            // BigQuery normalizes MOD((a + 1), b) -> MOD(a + 1, b)
709            Expression::ModFunc(mut f) => {
710                // Unwrap Paren from first argument if present
711                if let Expression::Paren(paren) = f.this {
712                    f.this = paren.this;
713                }
714                Ok(Expression::ModFunc(f))
715            }
716
717            // JSONExtract with variant_extract (Snowflake colon syntax) -> JSON_EXTRACT
718            Expression::JSONExtract(e) if e.variant_extract.is_some() => {
719                let path = match *e.expression {
720                    Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
721                        let Literal::String(s) = lit.as_ref() else { unreachable!() };
722                        let normalized = if s.starts_with('$') {
723                            s.clone()
724                        } else if s.starts_with('[') {
725                            format!("${}", s)
726                        } else {
727                            format!("$.{}", s)
728                        };
729                        Expression::Literal(Box::new(Literal::String(normalized)))
730                    }
731                    other => other,
732                };
733                Ok(Expression::Function(Box::new(Function::new(
734                    "JSON_EXTRACT".to_string(),
735                    vec![*e.this, path],
736                ))))
737            }
738
739            // Generic function transformations
740            Expression::Function(f) => self.transform_function(*f),
741
742            // Generic aggregate function transformations
743            Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
744
745            // MethodCall: Handle SAFE.PARSE_DATE, SAFE.PARSE_DATETIME, SAFE.PARSE_TIMESTAMP
746            // These are parsed as MethodCall(this=SAFE, method=PARSE_DATE, args=[...])
747            Expression::MethodCall(mc) => self.transform_method_call(*mc),
748
749            // CreateFunction: Convert RETURNS TABLE(...) to RETURNS TABLE <...> for BigQuery
750            // and set is_table_function = true for TABLE FUNCTION syntax
751            Expression::CreateFunction(mut cf) => {
752                if let Some(ref mut rtb) = cf.returns_table_body {
753                    if rtb.starts_with("TABLE (") || rtb.starts_with("TABLE(") {
754                        // Convert TABLE (...) to TABLE <...> with BigQuery types
755                        let inner = if rtb.starts_with("TABLE (") {
756                            &rtb["TABLE (".len()..rtb.len() - 1]
757                        } else {
758                            &rtb["TABLE(".len()..rtb.len() - 1]
759                        };
760                        // Convert common types to BigQuery equivalents
761                        let converted = inner
762                            .replace(" INT,", " INT64,")
763                            .replace(" INT)", " INT64)")
764                            .replace(" INTEGER,", " INT64,")
765                            .replace(" INTEGER)", " INT64)")
766                            .replace(" FLOAT,", " FLOAT64,")
767                            .replace(" FLOAT)", " FLOAT64)")
768                            .replace(" BOOLEAN,", " BOOL,")
769                            .replace(" BOOLEAN)", " BOOL)")
770                            .replace(" VARCHAR", " STRING")
771                            .replace(" TEXT", " STRING");
772                        // Handle trailing type (no comma, no paren)
773                        let converted = if converted.ends_with(" INT") {
774                            format!("{}{}", &converted[..converted.len() - 4], " INT64")
775                        } else {
776                            converted
777                        };
778                        *rtb = format!("TABLE <{}>", converted);
779                        cf.is_table_function = true;
780                    }
781                }
782                // Convert string literal body to expression body for BigQuery TABLE FUNCTIONs only
783                if cf.is_table_function {
784                    if let Some(ref body) = cf.body {
785                        if matches!(body, FunctionBody::StringLiteral(_)) {
786                            if let Some(FunctionBody::StringLiteral(sql)) = cf.body.take() {
787                                // Parse the SQL string into an expression
788                                if let Ok(parsed) = crate::parser::Parser::parse_sql(&sql) {
789                                    if let Some(stmt) = parsed.into_iter().next() {
790                                        cf.body = Some(FunctionBody::Expression(stmt));
791                                    } else {
792                                        cf.body = Some(FunctionBody::StringLiteral(sql));
793                                    }
794                                } else {
795                                    cf.body = Some(FunctionBody::StringLiteral(sql));
796                                }
797                            }
798                        }
799                    }
800                }
801                Ok(Expression::CreateFunction(cf))
802            }
803
804            // Pass through everything else
805            _ => Ok(expr),
806        }
807    }
808}
809
810impl BigQueryDialect {
811    /// Transform data types according to BigQuery TYPE_MAPPING
812    fn transform_data_type(&self, dt: crate::expressions::DataType) -> Result<Expression> {
813        use crate::expressions::DataType;
814        let transformed = match dt {
815            // BIGINT -> INT64
816            DataType::BigInt { .. } => DataType::Custom {
817                name: "INT64".to_string(),
818            },
819            // INT -> INT64
820            DataType::Int { .. } => DataType::Custom {
821                name: "INT64".to_string(),
822            },
823            // SMALLINT -> INT64
824            DataType::SmallInt { .. } => DataType::Custom {
825                name: "INT64".to_string(),
826            },
827            // TINYINT -> INT64
828            DataType::TinyInt { .. } => DataType::Custom {
829                name: "INT64".to_string(),
830            },
831            // FLOAT -> FLOAT64
832            DataType::Float { .. } => DataType::Custom {
833                name: "FLOAT64".to_string(),
834            },
835            // DOUBLE -> FLOAT64
836            DataType::Double { .. } => DataType::Custom {
837                name: "FLOAT64".to_string(),
838            },
839            // BOOLEAN -> BOOL
840            DataType::Boolean => DataType::Custom {
841                name: "BOOL".to_string(),
842            },
843            // CHAR -> STRING
844            DataType::Char { .. } => DataType::Custom {
845                name: "STRING".to_string(),
846            },
847            // VARCHAR -> STRING
848            DataType::VarChar { .. } => DataType::Custom {
849                name: "STRING".to_string(),
850            },
851            // TEXT -> STRING
852            DataType::Text => DataType::Custom {
853                name: "STRING".to_string(),
854            },
855            // STRING(n) -> STRING (BigQuery doesn't support length for STRING)
856            DataType::String { .. } => DataType::Custom {
857                name: "STRING".to_string(),
858            },
859            // BINARY -> BYTES
860            DataType::Binary { .. } => DataType::Custom {
861                name: "BYTES".to_string(),
862            },
863            // VARBINARY -> BYTES
864            DataType::VarBinary { .. } => DataType::Custom {
865                name: "BYTES".to_string(),
866            },
867            // BLOB -> BYTES
868            DataType::Blob => DataType::Custom {
869                name: "BYTES".to_string(),
870            },
871            // DECIMAL -> NUMERIC (BigQuery strips precision in CAST context)
872            DataType::Decimal { .. } => DataType::Custom {
873                name: "NUMERIC".to_string(),
874            },
875            // For BigQuery identity: preserve TIMESTAMP/DATETIME as Custom types
876            // This avoids the issue where parsed TIMESTAMP (timezone: false) would
877            // be converted to DATETIME by the generator
878            DataType::Timestamp {
879                timezone: false, ..
880            } => DataType::Custom {
881                name: "TIMESTAMP".to_string(),
882            },
883            DataType::Timestamp { timezone: true, .. } => DataType::Custom {
884                name: "TIMESTAMP".to_string(),
885            },
886            // UUID -> STRING (BigQuery doesn't have native UUID type)
887            DataType::Uuid => DataType::Custom {
888                name: "STRING".to_string(),
889            },
890            // RECORD -> STRUCT in BigQuery
891            DataType::Custom { ref name } if name.eq_ignore_ascii_case("RECORD") => {
892                DataType::Custom {
893                    name: "STRUCT".to_string(),
894                }
895            }
896            // TIMESTAMPTZ (custom) -> TIMESTAMP
897            DataType::Custom { ref name } if name.eq_ignore_ascii_case("TIMESTAMPTZ") => {
898                DataType::Custom {
899                    name: "TIMESTAMP".to_string(),
900                }
901            }
902            // BYTEINT (custom) -> INT64
903            DataType::Custom { ref name } if name.eq_ignore_ascii_case("BYTEINT") => {
904                DataType::Custom {
905                    name: "INT64".to_string(),
906                }
907            }
908            // Keep all other types as-is
909            other => other,
910        };
911        Ok(Expression::DataType(transformed))
912    }
913
914    fn transform_function(&self, f: Function) -> Result<Expression> {
915        let name_upper = f.name.to_uppercase();
916        match name_upper.as_str() {
917            // IFNULL -> COALESCE (both work in BigQuery)
918            "IFNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
919                original_name: None,
920                expressions: f.args,
921                inferred_type: None,
922            }))),
923
924            // NVL -> COALESCE
925            "NVL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
926                original_name: None,
927                expressions: f.args,
928                inferred_type: None,
929            }))),
930
931            // ISNULL -> COALESCE
932            "ISNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
933                original_name: None,
934                expressions: f.args,
935                inferred_type: None,
936            }))),
937
938            // GROUP_CONCAT -> STRING_AGG in BigQuery
939            "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
940                Function::new("STRING_AGG".to_string(), f.args),
941            ))),
942
943            // SUBSTR -> SUBSTRING (both work)
944            "SUBSTR" => Ok(Expression::Function(Box::new(Function::new(
945                "SUBSTRING".to_string(),
946                f.args,
947            )))),
948
949            // RANDOM -> RAND
950            "RANDOM" => Ok(Expression::Rand(Box::new(crate::expressions::Rand {
951                seed: None,
952                lower: None,
953                upper: None,
954            }))),
955
956            // CURRENT_DATE -> CURRENT_DATE() in BigQuery
957            // Keep as Function when it has args (e.g., CURRENT_DATE('UTC'))
958            "CURRENT_DATE" if f.args.is_empty() => {
959                Ok(Expression::CurrentDate(crate::expressions::CurrentDate))
960            }
961            "CURRENT_DATE" => Ok(Expression::Function(Box::new(Function {
962                name: "CURRENT_DATE".to_string(),
963                args: f.args,
964                distinct: false,
965                trailing_comments: Vec::new(),
966                use_bracket_syntax: false,
967                no_parens: false,
968                quoted: false,
969                span: None,
970                inferred_type: None,
971            }))),
972
973            // NOW -> CURRENT_TIMESTAMP in BigQuery
974            "NOW" => Ok(Expression::CurrentTimestamp(
975                crate::expressions::CurrentTimestamp {
976                    precision: None,
977                    sysdate: false,
978                },
979            )),
980
981            // TO_DATE -> PARSE_DATE in BigQuery
982            "TO_DATE" => Ok(Expression::Function(Box::new(Function::new(
983                "PARSE_DATE".to_string(),
984                f.args,
985            )))),
986
987            // TO_TIMESTAMP -> PARSE_TIMESTAMP in BigQuery
988            "TO_TIMESTAMP" => Ok(Expression::Function(Box::new(Function::new(
989                "PARSE_TIMESTAMP".to_string(),
990                f.args,
991            )))),
992
993            // TO_TIME -> TIME in BigQuery
994            "TO_TIME" if f.args.len() == 1 => Ok(Expression::Function(Box::new(Function::new(
995                "TIME".to_string(),
996                f.args,
997            )))),
998
999            // DATE_FORMAT -> FORMAT_DATE in BigQuery (argument order may differ)
1000            "DATE_FORMAT" => Ok(Expression::Function(Box::new(Function::new(
1001                "FORMAT_DATE".to_string(),
1002                f.args,
1003            )))),
1004
1005            // POSITION -> STRPOS in BigQuery
1006            // BigQuery uses STRPOS(string, substring)
1007            "POSITION" if f.args.len() == 2 => {
1008                let mut args = f.args;
1009                // Swap arguments: POSITION(sub IN str) -> STRPOS(str, sub)
1010                let first = args.remove(0);
1011                let second = args.remove(0);
1012                Ok(Expression::Function(Box::new(Function::new(
1013                    "STRPOS".to_string(),
1014                    vec![second, first],
1015                ))))
1016            }
1017
1018            // LEN -> LENGTH
1019            "LEN" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc::new(
1020                f.args.into_iter().next().unwrap(),
1021            )))),
1022
1023            // CEILING -> CEIL (both work)
1024            "CEILING" if f.args.len() == 1 => Ok(Expression::Ceil(Box::new(CeilFunc {
1025                this: f.args.into_iter().next().unwrap(),
1026                decimals: None,
1027                to: None,
1028            }))),
1029
1030            // GETDATE -> CURRENT_TIMESTAMP
1031            "GETDATE" => Ok(Expression::CurrentTimestamp(
1032                crate::expressions::CurrentTimestamp {
1033                    precision: None,
1034                    sysdate: false,
1035                },
1036            )),
1037
1038            // ARRAY_LENGTH -> ARRAY_LENGTH (native)
1039            // CARDINALITY -> ARRAY_LENGTH
1040            "CARDINALITY" if f.args.len() == 1 => Ok(Expression::ArrayLength(Box::new(
1041                UnaryFunc::new(f.args.into_iter().next().unwrap()),
1042            ))),
1043
1044            // UNNEST is native to BigQuery
1045
1046            // GENERATE_SERIES -> GENERATE_ARRAY in BigQuery
1047            "GENERATE_SERIES" => Ok(Expression::Function(Box::new(Function::new(
1048                "GENERATE_ARRAY".to_string(),
1049                f.args,
1050            )))),
1051
1052            // APPROX_COUNT_DISTINCT -> APPROX_COUNT_DISTINCT (native)
1053            // APPROX_DISTINCT -> APPROX_COUNT_DISTINCT
1054            "APPROX_DISTINCT" => Ok(Expression::Function(Box::new(Function::new(
1055                "APPROX_COUNT_DISTINCT".to_string(),
1056                f.args,
1057            )))),
1058
1059            // COUNT_IF -> COUNTIF in BigQuery
1060            "COUNT_IF" => Ok(Expression::Function(Box::new(Function::new(
1061                "COUNTIF".to_string(),
1062                f.args,
1063            )))),
1064
1065            // SHA1 -> SHA1 (native), SHA -> SHA1
1066            "SHA" => Ok(Expression::Function(Box::new(Function::new(
1067                "SHA1".to_string(),
1068                f.args,
1069            )))),
1070
1071            // SHA256/SHA2 -> SHA256
1072            "SHA2" => Ok(Expression::Function(Box::new(Function::new(
1073                "SHA256".to_string(),
1074                f.args,
1075            )))),
1076
1077            // MD5 in BigQuery returns bytes, often combined with TO_HEX
1078            // TO_HEX(MD5(x)) pattern
1079            "MD5" => Ok(Expression::Function(Box::new(Function::new(
1080                "MD5".to_string(),
1081                f.args,
1082            )))),
1083
1084            // VARIANCE/VAR_SAMP -> VAR_SAMP (native)
1085            // VAR_POP -> VAR_POP (native)
1086
1087            // DATEADD(unit, amount, date) → DATE_ADD(date, INTERVAL amount unit) for BigQuery
1088            "DATEADD" if f.args.len() == 3 => {
1089                let mut args = f.args;
1090                let unit_expr = args.remove(0);
1091                let amount = args.remove(0);
1092                let date = args.remove(0);
1093                // Convert unit identifier to IntervalUnit
1094                let unit_name = match &unit_expr {
1095                    Expression::Identifier(id) => id.name.to_uppercase(),
1096                    _ => "DAY".to_string(),
1097                };
1098                let unit = match unit_name.as_str() {
1099                    "YEAR" | "YEARS" | "YY" | "YYYY" => crate::expressions::IntervalUnit::Year,
1100                    "QUARTER" | "QUARTERS" | "QQ" | "Q" => {
1101                        crate::expressions::IntervalUnit::Quarter
1102                    }
1103                    "MONTH" | "MONTHS" | "MM" | "M" => crate::expressions::IntervalUnit::Month,
1104                    "WEEK" | "WEEKS" | "WK" | "WW" => crate::expressions::IntervalUnit::Week,
1105                    "DAY" | "DAYS" | "DD" | "D" | "DAYOFMONTH" => {
1106                        crate::expressions::IntervalUnit::Day
1107                    }
1108                    "HOUR" | "HOURS" | "HH" => crate::expressions::IntervalUnit::Hour,
1109                    "MINUTE" | "MINUTES" | "MI" | "N" => crate::expressions::IntervalUnit::Minute,
1110                    "SECOND" | "SECONDS" | "SS" | "S" => crate::expressions::IntervalUnit::Second,
1111                    "MILLISECOND" | "MILLISECONDS" | "MS" => {
1112                        crate::expressions::IntervalUnit::Millisecond
1113                    }
1114                    "MICROSECOND" | "MICROSECONDS" | "US" => {
1115                        crate::expressions::IntervalUnit::Microsecond
1116                    }
1117                    _ => crate::expressions::IntervalUnit::Day,
1118                };
1119                Ok(Expression::DateAdd(Box::new(
1120                    crate::expressions::DateAddFunc {
1121                        this: date,
1122                        interval: amount,
1123                        unit,
1124                    },
1125                )))
1126            }
1127            "DATE_ADD" => Ok(Expression::Function(Box::new(Function::new(
1128                "DATE_ADD".to_string(),
1129                f.args,
1130            )))),
1131
1132            // DATE_DIFF in BigQuery (native)
1133            "DATEDIFF" => Ok(Expression::Function(Box::new(Function::new(
1134                "DATE_DIFF".to_string(),
1135                f.args,
1136            )))),
1137
1138            // TIMESTAMP_DIFF in BigQuery
1139            "TIMESTAMPDIFF" => Ok(Expression::Function(Box::new(Function::new(
1140                "TIMESTAMP_DIFF".to_string(),
1141                f.args,
1142            )))),
1143
1144            // TIME -> TIME (native)
1145            // DATETIME -> DATETIME (native)
1146
1147            // SAFE_DIVIDE -> SAFE_DIVIDE (native)
1148
1149            // NEWID/UUID -> GENERATE_UUID
1150            "NEWID" | "UUID" => Ok(Expression::Function(Box::new(Function::new(
1151                "GENERATE_UUID".to_string(),
1152                vec![],
1153            )))),
1154
1155            // LEVENSHTEIN -> EDIT_DISTANCE (BigQuery naming)
1156            "LEVENSHTEIN" => Ok(Expression::Function(Box::new(Function::new(
1157                "EDIT_DISTANCE".to_string(),
1158                f.args,
1159            )))),
1160
1161            // UNIX_TIMESTAMP -> UNIX_SECONDS
1162            "UNIX_TIMESTAMP" => Ok(Expression::Function(Box::new(Function::new(
1163                "UNIX_SECONDS".to_string(),
1164                f.args,
1165            )))),
1166
1167            // FROM_UNIXTIME -> TIMESTAMP_SECONDS
1168            "FROM_UNIXTIME" => Ok(Expression::Function(Box::new(Function::new(
1169                "TIMESTAMP_SECONDS".to_string(),
1170                f.args,
1171            )))),
1172
1173            // CHAR_LENGTH / CHARACTER_LENGTH -> LENGTH
1174            "CHAR_LENGTH" | "CHARACTER_LENGTH" => Ok(Expression::Function(Box::new(
1175                Function::new("LENGTH".to_string(), f.args),
1176            ))),
1177
1178            // OCTET_LENGTH -> BYTE_LENGTH in BigQuery
1179            "OCTET_LENGTH" => Ok(Expression::Function(Box::new(Function::new(
1180                "BYTE_LENGTH".to_string(),
1181                f.args,
1182            )))),
1183
1184            // JSON_EXTRACT_STRING_ARRAY -> JSON_VALUE_ARRAY in BigQuery
1185            "JSON_EXTRACT_STRING_ARRAY" => Ok(Expression::Function(Box::new(Function::new(
1186                "JSON_VALUE_ARRAY".to_string(),
1187                f.args,
1188            )))),
1189
1190            // INSTR is native to BigQuery
1191
1192            // SPLIT: BigQuery defaults to comma separator when none provided
1193            // SPLIT(foo) -> SPLIT(foo, ',')
1194            "SPLIT" if f.args.len() == 1 => {
1195                let mut args = f.args;
1196                args.push(Expression::Literal(Box::new(Literal::String(",".to_string()))));
1197                Ok(Expression::Split(Box::new(SplitFunc {
1198                    this: args.remove(0),
1199                    delimiter: args.remove(0),
1200                })))
1201            }
1202
1203            // SPLIT with two args - convert to Split expression
1204            "SPLIT" if f.args.len() == 2 => {
1205                let mut args = f.args;
1206                Ok(Expression::Split(Box::new(SplitFunc {
1207                    this: args.remove(0),
1208                    delimiter: args.remove(0),
1209                })))
1210            }
1211
1212            // REGEXP_SUBSTR -> REGEXP_EXTRACT in BigQuery (strip extra Snowflake args)
1213            "REGEXP_SUBSTR" if f.args.len() >= 2 => {
1214                // BigQuery REGEXP_EXTRACT supports (subject, pattern, pos, occ) max 4 args
1215                let args = if f.args.len() > 4 {
1216                    f.args[..4].to_vec()
1217                } else {
1218                    f.args
1219                };
1220                Ok(Expression::Function(Box::new(Function::new(
1221                    "REGEXP_EXTRACT".to_string(),
1222                    args,
1223                ))))
1224            }
1225            "REGEXP_SUBSTR" => Ok(Expression::Function(Box::new(Function::new(
1226                "REGEXP_EXTRACT".to_string(),
1227                f.args,
1228            )))),
1229
1230            // REGEXP_REPLACE - strip extra Snowflake-specific args
1231            "REGEXP_REPLACE" if f.args.len() > 3 => {
1232                let args = f.args[..3].to_vec();
1233                Ok(Expression::Function(Box::new(Function::new(
1234                    "REGEXP_REPLACE".to_string(),
1235                    args,
1236                ))))
1237            }
1238
1239            // OBJECT_CONSTRUCT_KEEP_NULL -> JSON_OBJECT
1240            "OBJECT_CONSTRUCT_KEEP_NULL" => Ok(Expression::Function(Box::new(Function::new(
1241                "JSON_OBJECT".to_string(),
1242                f.args,
1243            )))),
1244
1245            // EDITDISTANCE -> EDIT_DISTANCE with named max_distance parameter
1246            "EDITDISTANCE" if f.args.len() == 3 => {
1247                let col1 = f.args[0].clone();
1248                let col2 = f.args[1].clone();
1249                let max_dist = f.args[2].clone();
1250                Ok(Expression::Function(Box::new(Function::new(
1251                    "EDIT_DISTANCE".to_string(),
1252                    vec![
1253                        col1,
1254                        col2,
1255                        Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
1256                            name: crate::expressions::Identifier::new("max_distance".to_string()),
1257                            value: max_dist,
1258                            separator: crate::expressions::NamedArgSeparator::DArrow,
1259                        })),
1260                    ],
1261                ))))
1262            }
1263            "EDITDISTANCE" if f.args.len() == 2 => Ok(Expression::Function(Box::new(
1264                Function::new("EDIT_DISTANCE".to_string(), f.args),
1265            ))),
1266
1267            // HEX_DECODE_BINARY -> FROM_HEX
1268            "HEX_DECODE_BINARY" => Ok(Expression::Function(Box::new(Function::new(
1269                "FROM_HEX".to_string(),
1270                f.args,
1271            )))),
1272
1273            // BigQuery format string normalization for PARSE_DATE/DATETIME/TIMESTAMP functions
1274            // %Y-%m-%d -> %F and %H:%M:%S -> %T
1275            "PARSE_DATE"
1276            | "PARSE_DATETIME"
1277            | "PARSE_TIMESTAMP"
1278            | "SAFE.PARSE_DATE"
1279            | "SAFE.PARSE_DATETIME"
1280            | "SAFE.PARSE_TIMESTAMP" => {
1281                let args = self.normalize_time_format_args(f.args);
1282                Ok(Expression::Function(Box::new(Function {
1283                    name: f.name,
1284                    args,
1285                    distinct: f.distinct,
1286                    no_parens: f.no_parens,
1287                    trailing_comments: f.trailing_comments,
1288                    quoted: f.quoted,
1289                    use_bracket_syntax: f.use_bracket_syntax,
1290                    span: None,
1291                    inferred_type: None,
1292                })))
1293            }
1294
1295            // GET_PATH(obj, path) -> JSON_EXTRACT(obj, json_path) in BigQuery
1296            "GET_PATH" if f.args.len() == 2 => {
1297                let mut args = f.args;
1298                let this = args.remove(0);
1299                let path = args.remove(0);
1300                let json_path = match &path {
1301                    Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
1302                        let Literal::String(s) = lit.as_ref() else { unreachable!() };
1303                        let normalized = if s.starts_with('$') {
1304                            s.clone()
1305                        } else if s.starts_with('[') {
1306                            format!("${}", s)
1307                        } else {
1308                            format!("$.{}", s)
1309                        };
1310                        Expression::Literal(Box::new(Literal::String(normalized)))
1311                    }
1312                    _ => path,
1313                };
1314                Ok(Expression::JsonExtract(Box::new(JsonExtractFunc {
1315                    this,
1316                    path: json_path,
1317                    returning: None,
1318                    arrow_syntax: false,
1319                    hash_arrow_syntax: false,
1320                    wrapper_option: None,
1321                    quotes_option: None,
1322                    on_scalar_string: false,
1323                    on_error: None,
1324                })))
1325            }
1326
1327            // Pass through everything else
1328            _ => Ok(Expression::Function(Box::new(f))),
1329        }
1330    }
1331
1332    fn transform_aggregate_function(
1333        &self,
1334        f: Box<crate::expressions::AggregateFunction>,
1335    ) -> Result<Expression> {
1336        let name_upper = f.name.to_uppercase();
1337        match name_upper.as_str() {
1338            // GROUP_CONCAT -> STRING_AGG
1339            "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
1340                Function::new("STRING_AGG".to_string(), f.args),
1341            ))),
1342
1343            // Pass through everything else
1344            _ => Ok(Expression::AggregateFunction(f)),
1345        }
1346    }
1347
1348    /// Transform MethodCall expressions
1349    /// Handles SAFE.PARSE_DATE, SAFE.PARSE_DATETIME, SAFE.PARSE_TIMESTAMP format normalization
1350    fn transform_method_call(&self, mc: crate::expressions::MethodCall) -> Result<Expression> {
1351        use crate::expressions::MethodCall;
1352
1353        // Check if this is SAFE.PARSE_DATE/DATETIME/TIMESTAMP
1354        if let Expression::Column(ref col) = mc.this {
1355            if col.name.name.eq_ignore_ascii_case("SAFE") {
1356                let method_upper = mc.method.name.to_uppercase();
1357                if method_upper == "PARSE_DATE"
1358                    || method_upper == "PARSE_DATETIME"
1359                    || method_upper == "PARSE_TIMESTAMP"
1360                {
1361                    // Normalize the format string in the first argument
1362                    let args = self.normalize_time_format_args(mc.args);
1363                    return Ok(Expression::MethodCall(Box::new(MethodCall {
1364                        this: mc.this,
1365                        method: mc.method,
1366                        args,
1367                    })));
1368                }
1369            }
1370        }
1371
1372        // Pass through all other method calls
1373        Ok(Expression::MethodCall(Box::new(mc)))
1374    }
1375
1376    /// Normalize time format strings in function arguments
1377    /// BigQuery normalizes: %Y-%m-%d -> %F, %H:%M:%S -> %T
1378    fn normalize_time_format_args(&self, args: Vec<Expression>) -> Vec<Expression> {
1379        args.into_iter()
1380            .enumerate()
1381            .map(|(i, arg)| {
1382                // Only transform the first argument (the format string)
1383                if i == 0 {
1384                    if let Expression::Literal(ref lit) = arg {
1385                        if let Literal::String(s) = lit.as_ref() {
1386                            let normalized = self.normalize_time_format(&s);
1387                            return Expression::Literal(Box::new(Literal::String(normalized)));
1388                        }
1389                    }
1390                }
1391                arg
1392            })
1393            .collect()
1394    }
1395
1396    /// Normalize a time format string according to BigQuery conventions
1397    /// %Y-%m-%d -> %F (ISO date)
1398    /// %H:%M:%S -> %T (time)
1399    fn normalize_time_format(&self, format: &str) -> String {
1400        format.replace("%Y-%m-%d", "%F").replace("%H:%M:%S", "%T")
1401    }
1402}
1403
1404#[cfg(test)]
1405mod tests {
1406    use super::*;
1407    use crate::dialects::Dialect;
1408    use crate::parse_one;
1409
1410    fn transpile_to_bigquery(sql: &str) -> String {
1411        let dialect = Dialect::get(DialectType::Generic);
1412        let result = dialect
1413            .transpile_to(sql, DialectType::BigQuery)
1414            .expect("Transpile failed");
1415        result[0].clone()
1416    }
1417
1418    #[test]
1419    fn test_ifnull_identity() {
1420        // Generic -> BigQuery: IFNULL is normalized to COALESCE (matching sqlglot behavior)
1421        let result = transpile_to_bigquery("SELECT IFNULL(a, b)");
1422        assert!(
1423            result.contains("COALESCE"),
1424            "Expected COALESCE, got: {}",
1425            result
1426        );
1427    }
1428
1429    #[test]
1430    fn test_nvl_to_ifnull() {
1431        // NVL is converted to IFNULL in BigQuery
1432        let result = transpile_to_bigquery("SELECT NVL(a, b)");
1433        assert!(
1434            result.contains("IFNULL"),
1435            "Expected IFNULL, got: {}",
1436            result
1437        );
1438    }
1439
1440    #[test]
1441    fn test_try_cast_to_safe_cast() {
1442        let result = transpile_to_bigquery("SELECT TRY_CAST(a AS INT)");
1443        assert!(
1444            result.contains("SAFE_CAST"),
1445            "Expected SAFE_CAST, got: {}",
1446            result
1447        );
1448    }
1449
1450    #[test]
1451    fn test_random_to_rand() {
1452        let result = transpile_to_bigquery("SELECT RANDOM()");
1453        assert!(result.contains("RAND"), "Expected RAND, got: {}", result);
1454    }
1455
1456    #[test]
1457    fn test_basic_select() {
1458        let result = transpile_to_bigquery("SELECT a, b FROM users WHERE id = 1");
1459        assert!(result.contains("SELECT"));
1460        assert!(result.contains("FROM users"));
1461    }
1462
1463    #[test]
1464    fn test_group_concat_to_string_agg() {
1465        let result = transpile_to_bigquery("SELECT GROUP_CONCAT(name)");
1466        assert!(
1467            result.contains("STRING_AGG"),
1468            "Expected STRING_AGG, got: {}",
1469            result
1470        );
1471    }
1472
1473    #[test]
1474    fn test_generate_series_to_generate_array() {
1475        let result = transpile_to_bigquery("SELECT GENERATE_SERIES(1, 10)");
1476        assert!(
1477            result.contains("GENERATE_ARRAY"),
1478            "Expected GENERATE_ARRAY, got: {}",
1479            result
1480        );
1481    }
1482
1483    #[test]
1484    fn test_backtick_identifiers() {
1485        // BigQuery uses backticks for identifiers
1486        let dialect = BigQueryDialect;
1487        let config = dialect.generator_config();
1488        assert_eq!(config.identifier_quote, '`');
1489    }
1490
1491    fn bigquery_identity(sql: &str, expected: &str) {
1492        let dialect = Dialect::get(DialectType::BigQuery);
1493        let ast = dialect.parse(sql).expect("Parse failed");
1494        let transformed = dialect.transform(ast[0].clone()).expect("Transform failed");
1495        let result = dialect.generate(&transformed).expect("Generate failed");
1496        assert_eq!(result, expected, "SQL: {}", sql);
1497    }
1498
1499    #[test]
1500    fn test_cast_char_to_string() {
1501        bigquery_identity("CAST(x AS CHAR)", "CAST(x AS STRING)");
1502    }
1503
1504    #[test]
1505    fn test_cast_varchar_to_string() {
1506        bigquery_identity("CAST(x AS VARCHAR)", "CAST(x AS STRING)");
1507    }
1508
1509    #[test]
1510    fn test_cast_nchar_to_string() {
1511        bigquery_identity("CAST(x AS NCHAR)", "CAST(x AS STRING)");
1512    }
1513
1514    #[test]
1515    fn test_cast_nvarchar_to_string() {
1516        bigquery_identity("CAST(x AS NVARCHAR)", "CAST(x AS STRING)");
1517    }
1518
1519    #[test]
1520    fn test_cast_timestamptz_to_timestamp() {
1521        bigquery_identity("CAST(x AS TIMESTAMPTZ)", "CAST(x AS TIMESTAMP)");
1522    }
1523
1524    #[test]
1525    fn test_cast_record_to_struct() {
1526        bigquery_identity("CAST(x AS RECORD)", "CAST(x AS STRUCT)");
1527    }
1528
1529    #[test]
1530    fn test_json_literal_to_parse_json() {
1531        // JSON 'string' literal syntax should be converted to PARSE_JSON()
1532        bigquery_identity(
1533            "SELECT JSON '\"foo\"' AS json_data",
1534            "SELECT PARSE_JSON('\"foo\"') AS json_data",
1535        );
1536    }
1537
1538    #[test]
1539    fn test_grant_as_alias_not_quoted() {
1540        // GRANT is not a reserved keyword in BigQuery, should not be backtick-quoted
1541        bigquery_identity(
1542            "SELECT GRANT FROM (SELECT 'input' AS GRANT)",
1543            "SELECT GRANT FROM (SELECT 'input' AS GRANT)",
1544        );
1545    }
1546
1547    #[test]
1548    fn test_timestamp_literal_to_cast() {
1549        // TIMESTAMP 'value' literal should be converted to CAST('value' AS TIMESTAMP)
1550        bigquery_identity(
1551            "CREATE VIEW `d.v` OPTIONS (expiration_timestamp=TIMESTAMP '2020-01-02T04:05:06.007Z') AS SELECT 1 AS c",
1552            "CREATE VIEW `d.v` OPTIONS (expiration_timestamp=CAST('2020-01-02T04:05:06.007Z' AS TIMESTAMP)) AS SELECT 1 AS c"
1553        );
1554    }
1555
1556    #[test]
1557    fn test_date_literal_to_cast_in_extract() {
1558        // Issue 1: DATE literal should become CAST syntax in BigQuery
1559        bigquery_identity(
1560            "EXTRACT(WEEK(THURSDAY) FROM DATE '2013-12-25')",
1561            "EXTRACT(WEEK(THURSDAY) FROM CAST('2013-12-25' AS DATE))",
1562        );
1563    }
1564
1565    #[test]
1566    fn test_json_object_with_json_literals() {
1567        // Issue 2: JSON literals in JSON_OBJECT should use PARSE_JSON, not CAST AS JSON
1568        bigquery_identity(
1569            "SELECT JSON_OBJECT('a', JSON '10') AS json_data",
1570            "SELECT JSON_OBJECT('a', PARSE_JSON('10')) AS json_data",
1571        );
1572    }
1573
1574    // NOTE: MOD paren unwrapping is tested in the conformance tests (sqlglot_dialect_identity).
1575    // The unit test version was removed due to stack overflow in debug builds (deep recursion).
1576    // Test case: MOD((a + 1), b) -> MOD(a + 1, b)
1577
1578    #[test]
1579    fn test_safe_parse_date_format_normalization() {
1580        // SAFE.PARSE_DATE format string normalization: %Y-%m-%d -> %F
1581        bigquery_identity(
1582            "SAFE.PARSE_DATE('%Y-%m-%d', '2024-01-15')",
1583            "SAFE.PARSE_DATE('%F', '2024-01-15')",
1584        );
1585    }
1586
1587    #[test]
1588    fn test_safe_parse_datetime_format_normalization() {
1589        // SAFE.PARSE_DATETIME format string normalization: %Y-%m-%d %H:%M:%S -> %F %T
1590        bigquery_identity(
1591            "SAFE.PARSE_DATETIME('%Y-%m-%d %H:%M:%S', '2024-01-15 10:30:00')",
1592            "SAFE.PARSE_DATETIME('%F %T', '2024-01-15 10:30:00')",
1593        );
1594    }
1595
1596    #[test]
1597    fn test_safe_parse_timestamp_format_normalization() {
1598        // SAFE.PARSE_TIMESTAMP format string normalization: %Y-%m-%d %H:%M:%S -> %F %T
1599        bigquery_identity(
1600            "SAFE.PARSE_TIMESTAMP('%Y-%m-%d %H:%M:%S', '2024-01-15 10:30:00')",
1601            "SAFE.PARSE_TIMESTAMP('%F %T', '2024-01-15 10:30:00')",
1602        );
1603    }
1604
1605    #[test]
1606    fn test_datetime_literal_to_cast() {
1607        // DATETIME 'value' literal should be converted to CAST('value' AS DATETIME)
1608        bigquery_identity(
1609            "LAST_DAY(DATETIME '2008-11-10 15:30:00', WEEK(SUNDAY))",
1610            "LAST_DAY(CAST('2008-11-10 15:30:00' AS DATETIME), WEEK)",
1611        );
1612    }
1613
1614    #[test]
1615    fn test_last_day_week_modifier_stripped() {
1616        // WEEK(SUNDAY) should become WEEK in BigQuery LAST_DAY function
1617        bigquery_identity("LAST_DAY(col, WEEK(MONDAY))", "LAST_DAY(col, WEEK)");
1618    }
1619
1620    #[test]
1621    fn test_hash_line_comment_parses() {
1622        // Regression test for issue #38:
1623        // BigQuery should accept # as a single-line comment.
1624        let result = parse_one("SELECT 1 as a #hello world", DialectType::BigQuery);
1625        assert!(result.is_ok(), "Expected parse to succeed, got: {result:?}");
1626    }
1627}