Skip to main content

polyglot_sql/dialects/
bigquery.rs

1//! BigQuery Dialect
2//!
3//! BigQuery-specific transformations based on sqlglot patterns.
4//! Key differences:
5//! - Uses backticks for identifiers (especially for project.dataset.table)
6//! - SAFE_ prefix for safe operations
7//! - Different date/time function names (DATE_DIFF, FORMAT_DATE, PARSE_DATE)
8//! - STRUCT and ARRAY syntax differences
9//! - No ILIKE support
10//! - QUALIFY clause support
11
12use super::{DialectImpl, DialectType};
13use crate::error::Result;
14use crate::expressions::{
15    Alias, BinaryOp, CeilFunc, Column, Exists, Expression, From, Function, FunctionBody,
16    Identifier, JsonExtractFunc, LikeOp, Literal, Select, SplitFunc, StringAggFunc, UnaryFunc,
17    UnnestFunc, VarArgFunc, Where,
18};
19use crate::generator::GeneratorConfig;
20use crate::tokens::TokenizerConfig;
21
22/// BigQuery dialect
23pub struct BigQueryDialect;
24
25impl DialectImpl for BigQueryDialect {
26    fn dialect_type(&self) -> DialectType {
27        DialectType::BigQuery
28    }
29
30    fn tokenizer_config(&self) -> TokenizerConfig {
31        let mut config = TokenizerConfig::default();
32        // BigQuery uses backticks for identifiers, NOT double quotes
33        // Remove double quote from identifiers (it's in the default config)
34        config.identifiers.remove(&'"');
35        config.identifiers.insert('`', '`');
36        // BigQuery supports double quotes for strings (in addition to single quotes)
37        config.quotes.insert("\"".to_string(), "\"".to_string());
38        // BigQuery supports triple-quoted strings
39        config.quotes.insert("'''".to_string(), "'''".to_string());
40        config
41            .quotes
42            .insert("\"\"\"".to_string(), "\"\"\"".to_string());
43        // BigQuery supports backslash escaping in strings
44        config.string_escapes = vec!['\'', '\\'];
45        // In BigQuery, b'...' is a byte string (bytes), not a bit string (binary digits)
46        config.b_prefix_is_byte_string = true;
47        // BigQuery supports hex number strings like 0xA, 0xFF
48        config.hex_number_strings = true;
49        // BigQuery: 0xA represents integer 10 (not binary/blob)
50        config.hex_string_is_integer_type = true;
51        // BigQuery supports # as single-line comments.
52        config.hash_comments = true;
53        config
54    }
55
56    fn generator_config(&self) -> GeneratorConfig {
57        use crate::generator::{IdentifierQuoteStyle, NormalizeFunctions};
58        GeneratorConfig {
59            identifier_quote: '`',
60            identifier_quote_style: IdentifierQuoteStyle::BACKTICK,
61            dialect: Some(DialectType::BigQuery),
62            // BigQuery doesn't normalize function names (Python: NORMALIZE_FUNCTIONS = False)
63            normalize_functions: NormalizeFunctions::None,
64            // BigQuery-specific settings from Python sqlglot
65            interval_allows_plural_form: false,
66            join_hints: false,
67            query_hints: false,
68            table_hints: false,
69            limit_fetch_style: crate::generator::LimitFetchStyle::Limit,
70            rename_table_with_db: false,
71            nvl2_supported: false,
72            unnest_with_ordinality: false,
73            collate_is_func: true,
74            limit_only_literals: true,
75            supports_table_alias_columns: false,
76            unpivot_aliases_are_identifiers: false,
77            json_key_value_pair_sep: ",",
78            null_ordering_supported: false,
79            ignore_nulls_in_func: true,
80            json_path_single_quote_escape: true,
81            can_implement_array_any: true,
82            supports_to_number: false,
83            named_placeholder_token: "@",
84            hex_func: "TO_HEX",
85            with_properties_prefix: "OPTIONS",
86            supports_exploding_projections: false,
87            except_intersect_support_all_clause: false,
88            supports_unix_seconds: true,
89            // BigQuery uses SAFE_ prefix for safe operations
90            try_supported: true,
91            // BigQuery does not support SEMI/ANTI JOIN syntax
92            semi_anti_join_with_side: false,
93            ..Default::default()
94        }
95    }
96
97    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
98        match expr {
99            // ===== Data Type Mappings =====
100            Expression::DataType(dt) => self.transform_data_type(dt),
101
102            // ===== Null handling =====
103            // IFNULL is native to BigQuery - keep as-is for identity
104            Expression::IfNull(f) => Ok(Expression::IfNull(f)),
105
106            // NVL -> IFNULL in BigQuery (BigQuery uses IFNULL, not NVL)
107            Expression::Nvl(f) => Ok(Expression::IfNull(f)),
108
109            // Coalesce stays as Coalesce
110            Expression::Coalesce(f) => Ok(Expression::Coalesce(f)),
111
112            // ===== String aggregation =====
113            // GROUP_CONCAT -> STRING_AGG in BigQuery
114            Expression::GroupConcat(f) => Ok(Expression::StringAgg(Box::new(StringAggFunc {
115                this: f.this,
116                separator: f.separator,
117                order_by: f.order_by,
118                distinct: f.distinct,
119                filter: f.filter,
120                limit: None,
121                inferred_type: None,
122            }))),
123
124            // ===== Cast operations =====
125            // Cast data types are transformed by transform_recursive in mod.rs
126            // which calls transform_data_type via transform_expr(Expression::DataType(...))
127
128            // TryCast -> SafeCast in BigQuery with type transformation
129            Expression::TryCast(c) => {
130                let transformed_type = match self.transform_data_type(c.to)? {
131                    Expression::DataType(dt) => dt,
132                    _ => return Err(crate::error::Error::parse("Expected DataType", 0, 0, 0, 0)),
133                };
134                Ok(Expression::SafeCast(Box::new(crate::expressions::Cast {
135                    this: c.this,
136                    to: transformed_type,
137                    trailing_comments: c.trailing_comments,
138                    double_colon_syntax: c.double_colon_syntax,
139                    format: c.format,
140                    default: c.default,
141                    inferred_type: None,
142                })))
143            }
144
145            // ===== Pattern matching =====
146            // ILIKE -> LOWER() LIKE LOWER() in BigQuery (no ILIKE support)
147            Expression::ILike(op) => {
148                let lower_left = Expression::Lower(Box::new(UnaryFunc::new(op.left)));
149                let lower_right = Expression::Lower(Box::new(UnaryFunc::new(op.right)));
150                Ok(Expression::Like(Box::new(LikeOp {
151                    left: lower_left,
152                    right: lower_right,
153                    escape: op.escape,
154                    quantifier: op.quantifier,
155                    inferred_type: None,
156                })))
157            }
158
159            // RegexpLike -> REGEXP_CONTAINS in BigQuery
160            Expression::RegexpLike(f) => Ok(Expression::Function(Box::new(Function::new(
161                "REGEXP_CONTAINS".to_string(),
162                vec![f.this, f.pattern],
163            )))),
164
165            // ===== Array operations =====
166            // EXPLODE -> UNNEST in BigQuery
167            Expression::Explode(f) => Ok(Expression::Unnest(Box::new(
168                crate::expressions::UnnestFunc {
169                    this: f.this,
170                    expressions: Vec::new(),
171                    with_ordinality: false,
172                    alias: None,
173                    offset_alias: None,
174                },
175            ))),
176
177            // ExplodeOuter -> UNNEST with LEFT JOIN semantics
178            Expression::ExplodeOuter(f) => Ok(Expression::Unnest(Box::new(
179                crate::expressions::UnnestFunc {
180                    this: f.this,
181                    expressions: Vec::new(),
182                    with_ordinality: false,
183                    alias: None,
184                    offset_alias: None,
185                },
186            ))),
187
188            // GenerateSeries -> GENERATE_ARRAY in BigQuery
189            Expression::GenerateSeries(f) => {
190                let mut args = Vec::new();
191                if let Some(start) = f.start {
192                    args.push(*start);
193                }
194                if let Some(end) = f.end {
195                    args.push(*end);
196                }
197                if let Some(step) = f.step {
198                    args.push(*step);
199                }
200                Ok(Expression::Function(Box::new(Function::new(
201                    "GENERATE_ARRAY".to_string(),
202                    args,
203                ))))
204            }
205
206            // ===== Bitwise operations =====
207            // BitwiseAndAgg -> BIT_AND
208            Expression::BitwiseAndAgg(f) => Ok(Expression::Function(Box::new(Function::new(
209                "BIT_AND".to_string(),
210                vec![f.this],
211            )))),
212
213            // BitwiseOrAgg -> BIT_OR
214            Expression::BitwiseOrAgg(f) => Ok(Expression::Function(Box::new(Function::new(
215                "BIT_OR".to_string(),
216                vec![f.this],
217            )))),
218
219            // BitwiseXorAgg -> BIT_XOR
220            Expression::BitwiseXorAgg(f) => Ok(Expression::Function(Box::new(Function::new(
221                "BIT_XOR".to_string(),
222                vec![f.this],
223            )))),
224
225            // BitwiseCount -> BIT_COUNT
226            Expression::BitwiseCount(f) => Ok(Expression::Function(Box::new(Function::new(
227                "BIT_COUNT".to_string(),
228                vec![f.this],
229            )))),
230
231            // ByteLength -> BYTE_LENGTH
232            Expression::ByteLength(f) => Ok(Expression::Function(Box::new(Function::new(
233                "BYTE_LENGTH".to_string(),
234                vec![f.this],
235            )))),
236
237            // IntDiv -> DIV
238            Expression::IntDiv(f) => Ok(Expression::Function(Box::new(Function::new(
239                "DIV".to_string(),
240                vec![f.this, f.expression],
241            )))),
242
243            // Int64 -> INT64
244            Expression::Int64(f) => Ok(Expression::Function(Box::new(Function::new(
245                "INT64".to_string(),
246                vec![f.this],
247            )))),
248
249            // ===== Random =====
250            // RANDOM -> RAND in BigQuery
251            Expression::Random(_) => Ok(Expression::Rand(Box::new(crate::expressions::Rand {
252                seed: None,
253                lower: None,
254                upper: None,
255            }))),
256
257            // ===== UUID =====
258            // Uuid -> GENERATE_UUID in BigQuery
259            Expression::Uuid(_) => Ok(Expression::Function(Box::new(Function::new(
260                "GENERATE_UUID".to_string(),
261                vec![],
262            )))),
263
264            // ===== Approximate functions =====
265            // ApproxDistinct -> APPROX_COUNT_DISTINCT
266            Expression::ApproxDistinct(f) => Ok(Expression::Function(Box::new(Function::new(
267                "APPROX_COUNT_DISTINCT".to_string(),
268                vec![f.this],
269            )))),
270
271            // ArgMax -> MAX_BY in BigQuery
272            Expression::ArgMax(f) => Ok(Expression::Function(Box::new(Function::new(
273                "MAX_BY".to_string(),
274                vec![*f.this, *f.expression],
275            )))),
276
277            // ArgMin -> MIN_BY in BigQuery
278            Expression::ArgMin(f) => Ok(Expression::Function(Box::new(Function::new(
279                "MIN_BY".to_string(),
280                vec![*f.this, *f.expression],
281            )))),
282
283            // ===== Conditional =====
284            // CountIf -> COUNTIF in BigQuery
285            Expression::CountIf(f) => Ok(Expression::Function(Box::new(Function::new(
286                "COUNTIF".to_string(),
287                vec![f.this],
288            )))),
289
290            // ===== String functions =====
291            // StringAgg -> STRING_AGG in BigQuery - keep as-is to preserve ORDER BY
292            Expression::StringAgg(f) => Ok(Expression::StringAgg(f)),
293
294            // ===== Conversion =====
295            // Unhex -> FROM_HEX
296            Expression::Unhex(f) => Ok(Expression::Function(Box::new(Function::new(
297                "FROM_HEX".to_string(),
298                vec![*f.this],
299            )))),
300
301            // UnixToTime -> TIMESTAMP_SECONDS/MILLIS/MICROS based on scale
302            Expression::UnixToTime(f) => {
303                let scale = f.scale.unwrap_or(0);
304                match scale {
305                    0 => Ok(Expression::Function(Box::new(Function::new(
306                        "TIMESTAMP_SECONDS".to_string(),
307                        vec![*f.this],
308                    )))),
309                    3 => Ok(Expression::Function(Box::new(Function::new(
310                        "TIMESTAMP_MILLIS".to_string(),
311                        vec![*f.this],
312                    )))),
313                    6 => Ok(Expression::Function(Box::new(Function::new(
314                        "TIMESTAMP_MICROS".to_string(),
315                        vec![*f.this],
316                    )))),
317                    _ => {
318                        // TIMESTAMP_SECONDS(CAST(value / POWER(10, scale) AS INT64))
319                        let div_expr =
320                            Expression::Div(Box::new(crate::expressions::BinaryOp::new(
321                                *f.this,
322                                Expression::Function(Box::new(Function::new(
323                                    "POWER".to_string(),
324                                    vec![Expression::number(10), Expression::number(scale)],
325                                ))),
326                            )));
327                        let cast_expr = Expression::Cast(Box::new(crate::expressions::Cast {
328                            this: div_expr,
329                            to: crate::expressions::DataType::Custom {
330                                name: "INT64".to_string(),
331                            },
332                            double_colon_syntax: false,
333                            trailing_comments: vec![],
334                            format: None,
335                            default: None,
336                            inferred_type: None,
337                        }));
338                        Ok(Expression::Function(Box::new(Function::new(
339                            "TIMESTAMP_SECONDS".to_string(),
340                            vec![cast_expr],
341                        ))))
342                    }
343                }
344            }
345
346            // ===== Date/time =====
347            // DateDiff -> DATE_DIFF in BigQuery
348            Expression::DateDiff(f) => {
349                // BigQuery: DATE_DIFF(date1, date2, part)
350                let unit_str = match f.unit {
351                    Some(crate::expressions::IntervalUnit::Year) => "YEAR",
352                    Some(crate::expressions::IntervalUnit::Quarter) => "QUARTER",
353                    Some(crate::expressions::IntervalUnit::Month) => "MONTH",
354                    Some(crate::expressions::IntervalUnit::Week) => "WEEK",
355                    Some(crate::expressions::IntervalUnit::Day) => "DAY",
356                    Some(crate::expressions::IntervalUnit::Hour) => "HOUR",
357                    Some(crate::expressions::IntervalUnit::Minute) => "MINUTE",
358                    Some(crate::expressions::IntervalUnit::Second) => "SECOND",
359                    Some(crate::expressions::IntervalUnit::Millisecond) => "MILLISECOND",
360                    Some(crate::expressions::IntervalUnit::Microsecond) => "MICROSECOND",
361                    Some(crate::expressions::IntervalUnit::Nanosecond) => "NANOSECOND",
362                    None => "DAY",
363                };
364                let unit = Expression::Identifier(crate::expressions::Identifier {
365                    name: unit_str.to_string(),
366                    quoted: false,
367                    trailing_comments: Vec::new(),
368                    span: None,
369                });
370                Ok(Expression::Function(Box::new(Function::new(
371                    "DATE_DIFF".to_string(),
372                    vec![f.this, f.expression, unit],
373                ))))
374            }
375
376            // ===== Variance =====
377            // VarPop -> VAR_POP
378            Expression::VarPop(f) => Ok(Expression::Function(Box::new(Function::new(
379                "VAR_POP".to_string(),
380                vec![f.this],
381            )))),
382
383            // ===== Hash functions =====
384            // SHA -> SHA1
385            Expression::SHA(f) => Ok(Expression::Function(Box::new(Function::new(
386                "SHA1".to_string(),
387                vec![f.this],
388            )))),
389
390            // SHA1Digest -> SHA1
391            Expression::SHA1Digest(f) => Ok(Expression::Function(Box::new(Function::new(
392                "SHA1".to_string(),
393                vec![f.this],
394            )))),
395
396            // MD5Digest -> MD5
397            Expression::MD5Digest(f) => Ok(Expression::Function(Box::new(Function::new(
398                "MD5".to_string(),
399                vec![*f.this],
400            )))),
401
402            // ===== Type conversion =====
403            // JSONBool -> BOOL
404            Expression::JSONBool(f) => Ok(Expression::Function(Box::new(Function::new(
405                "BOOL".to_string(),
406                vec![f.this],
407            )))),
408
409            // StringFunc -> STRING
410            Expression::StringFunc(f) => Ok(Expression::Function(Box::new(Function::new(
411                "STRING".to_string(),
412                vec![*f.this],
413            )))),
414
415            // ===== Date/time from parts =====
416            // DateFromUnixDate -> DATE_FROM_UNIX_DATE
417            Expression::DateFromUnixDate(f) => Ok(Expression::Function(Box::new(Function::new(
418                "DATE_FROM_UNIX_DATE".to_string(),
419                vec![f.this],
420            )))),
421
422            // UnixDate -> UNIX_DATE
423            Expression::UnixDate(f) => Ok(Expression::Function(Box::new(Function::new(
424                "UNIX_DATE".to_string(),
425                vec![f.this],
426            )))),
427
428            // TimestampDiff -> TIMESTAMP_DIFF
429            Expression::TimestampDiff(f) => Ok(Expression::Function(Box::new(Function::new(
430                "TIMESTAMP_DIFF".to_string(),
431                vec![*f.this, *f.expression],
432            )))),
433
434            // FromTimeZone -> DATETIME
435            Expression::FromTimeZone(f) => Ok(Expression::Function(Box::new(Function::new(
436                "DATETIME".to_string(),
437                vec![*f.this],
438            )))),
439
440            // TsOrDsToDatetime -> DATETIME
441            Expression::TsOrDsToDatetime(f) => Ok(Expression::Function(Box::new(Function::new(
442                "DATETIME".to_string(),
443                vec![f.this],
444            )))),
445
446            // TsOrDsToTimestamp -> TIMESTAMP
447            Expression::TsOrDsToTimestamp(f) => Ok(Expression::Function(Box::new(Function::new(
448                "TIMESTAMP".to_string(),
449                vec![f.this],
450            )))),
451
452            // ===== IfFunc -> IF in BigQuery =====
453            Expression::IfFunc(f) => {
454                let mut args = vec![f.condition, f.true_value];
455                if let Some(false_val) = f.false_value {
456                    args.push(false_val);
457                } else {
458                    args.push(Expression::Null(crate::expressions::Null));
459                }
460                Ok(Expression::Function(Box::new(Function::new(
461                    "IF".to_string(),
462                    args,
463                ))))
464            }
465
466            // ===== HexString -> FROM_HEX =====
467            Expression::HexStringExpr(f) => Ok(Expression::Function(Box::new(Function::new(
468                "FROM_HEX".to_string(),
469                vec![*f.this],
470            )))),
471
472            // ===== Additional auto-generated transforms from Python sqlglot =====
473            // ApproxTopK -> APPROX_TOP_COUNT
474            Expression::ApproxTopK(f) => {
475                let mut args = vec![*f.this];
476                if let Some(expr) = f.expression {
477                    args.push(*expr);
478                }
479                Ok(Expression::Function(Box::new(Function::new(
480                    "APPROX_TOP_COUNT".to_string(),
481                    args,
482                ))))
483            }
484
485            // SafeDivide -> SAFE_DIVIDE
486            Expression::SafeDivide(f) => Ok(Expression::Function(Box::new(Function::new(
487                "SAFE_DIVIDE".to_string(),
488                vec![*f.this, *f.expression],
489            )))),
490
491            // JSONKeysAtDepth -> JSON_KEYS
492            Expression::JSONKeysAtDepth(f) => Ok(Expression::Function(Box::new(Function::new(
493                "JSON_KEYS".to_string(),
494                vec![*f.this],
495            )))),
496
497            // JSONValueArray -> JSON_VALUE_ARRAY
498            Expression::JSONValueArray(f) => Ok(Expression::Function(Box::new(Function::new(
499                "JSON_VALUE_ARRAY".to_string(),
500                vec![*f.this],
501            )))),
502
503            // DateFromParts -> DATE
504            Expression::DateFromParts(f) => {
505                let mut args = Vec::new();
506                if let Some(y) = f.year {
507                    args.push(*y);
508                }
509                if let Some(m) = f.month {
510                    args.push(*m);
511                }
512                if let Some(d) = f.day {
513                    args.push(*d);
514                }
515                Ok(Expression::Function(Box::new(Function::new(
516                    "DATE".to_string(),
517                    args,
518                ))))
519            }
520
521            // SPLIT: BigQuery defaults to comma separator when none provided
522            // SPLIT(foo) -> SPLIT(foo, ',')
523            Expression::Split(f) => {
524                // Check if delimiter is empty or a placeholder - add default comma
525                let delimiter = match &f.delimiter {
526                    Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s.is_empty()) =>
527                    {
528                        let Literal::String(_) = lit.as_ref() else {
529                            unreachable!()
530                        };
531                        Expression::Literal(Box::new(Literal::String(",".to_string())))
532                    }
533                    _ => f.delimiter,
534                };
535                Ok(Expression::Split(Box::new(SplitFunc {
536                    this: f.this,
537                    delimiter,
538                })))
539            }
540
541            // Cast: Transform the target type according to BigQuery TYPE_MAPPING
542            // Special case: CAST to JSON -> PARSE_JSON in BigQuery
543            // Special case: CAST(x AS temporal FORMAT 'fmt') -> PARSE_DATE/PARSE_TIMESTAMP
544            Expression::Cast(c) => {
545                use crate::expressions::DataType;
546                // Check if casting to JSON - use PARSE_JSON instead
547                let is_json = matches!(c.to, DataType::Json | DataType::JsonB)
548                    || matches!(&c.to, DataType::Custom { name } if name.eq_ignore_ascii_case("JSON") || name.eq_ignore_ascii_case("JSONB"));
549                if is_json {
550                    return Ok(Expression::ParseJson(Box::new(UnaryFunc::new(c.this))));
551                }
552                // CAST(x AS temporal_type FORMAT 'fmt') -> PARSE_DATE/PARSE_TIMESTAMP(strftime_fmt, x)
553                if c.format.is_some() {
554                    let is_temporal = matches!(
555                        c.to,
556                        DataType::Date | DataType::Timestamp { .. } | DataType::Time { .. }
557                    ) || matches!(&c.to, DataType::Custom { name } if
558                        name.eq_ignore_ascii_case("TIMESTAMP") ||
559                        name.eq_ignore_ascii_case("DATE") ||
560                        name.eq_ignore_ascii_case("DATETIME") ||
561                        name.eq_ignore_ascii_case("TIME")
562                    );
563                    if is_temporal {
564                        let format_expr = c.format.as_ref().unwrap().as_ref();
565                        // Extract the actual format expr and timezone (if AT TIME ZONE is present)
566                        let (actual_format, timezone) = match format_expr {
567                            Expression::AtTimeZone(ref atz) => {
568                                (atz.this.clone(), Some(atz.zone.clone()))
569                            }
570                            _ => (format_expr.clone(), None),
571                        };
572                        let strftime_fmt = Self::bq_cast_format_to_strftime(&actual_format);
573                        let func_name = match &c.to {
574                            DataType::Date => "PARSE_DATE",
575                            DataType::Custom { name } if name.eq_ignore_ascii_case("DATE") => {
576                                "PARSE_DATE"
577                            }
578                            DataType::Custom { name } if name.eq_ignore_ascii_case("DATETIME") => {
579                                "PARSE_DATETIME"
580                            }
581                            _ => "PARSE_TIMESTAMP",
582                        };
583                        let mut func_args = vec![strftime_fmt, c.this];
584                        if let Some(tz) = timezone {
585                            func_args.push(tz);
586                        }
587                        return Ok(Expression::Function(Box::new(Function::new(
588                            func_name.to_string(),
589                            func_args,
590                        ))));
591                    }
592                }
593                let transformed_type = match self.transform_data_type(c.to)? {
594                    Expression::DataType(dt) => dt,
595                    _ => return Err(crate::error::Error::parse("Expected DataType", 0, 0, 0, 0)),
596                };
597                Ok(Expression::Cast(Box::new(crate::expressions::Cast {
598                    this: c.this,
599                    to: transformed_type,
600                    trailing_comments: c.trailing_comments,
601                    double_colon_syntax: c.double_colon_syntax,
602                    format: c.format,
603                    default: c.default,
604                    inferred_type: None,
605                })))
606            }
607
608            // SafeCast: Transform the target type according to BigQuery TYPE_MAPPING
609            Expression::SafeCast(c) => {
610                let transformed_type = match self.transform_data_type(c.to)? {
611                    Expression::DataType(dt) => dt,
612                    _ => return Err(crate::error::Error::parse("Expected DataType", 0, 0, 0, 0)),
613                };
614                Ok(Expression::SafeCast(Box::new(crate::expressions::Cast {
615                    this: c.this,
616                    to: transformed_type,
617                    trailing_comments: c.trailing_comments,
618                    double_colon_syntax: c.double_colon_syntax,
619                    format: c.format,
620                    default: c.default,
621                    inferred_type: None,
622                })))
623            }
624
625            // ===== SELECT-level transforms =====
626            // BigQuery: GROUP BY expression → alias when both GROUP BY and ORDER BY exist
627            Expression::Select(mut select) => {
628                if select.group_by.is_some() && select.order_by.is_some() {
629                    // Build map: expression → alias name for aliased projections
630                    let aliases: Vec<(Expression, Identifier)> = select
631                        .expressions
632                        .iter()
633                        .filter_map(|e| {
634                            if let Expression::Alias(a) = e {
635                                Some((a.this.clone(), a.alias.clone()))
636                            } else {
637                                None
638                            }
639                        })
640                        .collect();
641
642                    if let Some(ref mut group_by) = select.group_by {
643                        for grouped in group_by.expressions.iter_mut() {
644                            // Skip numeric indices (already aliased)
645                            if matches!(grouped, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)))
646                            {
647                                continue;
648                            }
649                            // Check if this GROUP BY expression matches a SELECT alias
650                            for (expr, alias_ident) in &aliases {
651                                if grouped == expr {
652                                    *grouped = Expression::boxed_column(Column {
653                                        name: alias_ident.clone(),
654                                        table: None,
655                                        join_mark: false,
656                                        trailing_comments: Vec::new(),
657                                        span: None,
658                                        inferred_type: None,
659                                    });
660                                    break;
661                                }
662                            }
663                        }
664                    }
665                }
666                Ok(Expression::Select(select))
667            }
668
669            // ===== ArrayContains → EXISTS(SELECT 1 FROM UNNEST(arr) AS _col WHERE _col = val) =====
670            Expression::ArrayContains(f) => {
671                let array_expr = f.this;
672                let value_expr = f.expression;
673
674                // Build: SELECT 1 FROM UNNEST(array) AS _col WHERE _col = value
675                let unnest = Expression::Unnest(Box::new(UnnestFunc {
676                    this: array_expr,
677                    expressions: Vec::new(),
678                    with_ordinality: false,
679                    alias: None,
680                    offset_alias: None,
681                }));
682                let aliased_unnest = Expression::Alias(Box::new(Alias {
683                    this: unnest,
684                    alias: Identifier::new("_col"),
685                    column_aliases: Vec::new(),
686                    pre_alias_comments: Vec::new(),
687                    trailing_comments: Vec::new(),
688                    inferred_type: None,
689                }));
690                let col_ref = Expression::boxed_column(Column {
691                    name: Identifier::new("_col"),
692                    table: None,
693                    join_mark: false,
694                    trailing_comments: Vec::new(),
695                    span: None,
696                    inferred_type: None,
697                });
698                let where_clause = Where {
699                    this: Expression::Eq(Box::new(BinaryOp {
700                        left: col_ref,
701                        right: value_expr,
702                        left_comments: Vec::new(),
703                        operator_comments: Vec::new(),
704                        trailing_comments: Vec::new(),
705                        inferred_type: None,
706                    })),
707                };
708                let inner_select = Expression::Select(Box::new(Select {
709                    expressions: vec![Expression::Literal(Box::new(Literal::Number(
710                        "1".to_string(),
711                    )))],
712                    from: Some(From {
713                        expressions: vec![aliased_unnest],
714                    }),
715                    where_clause: Some(where_clause),
716                    ..Default::default()
717                }));
718                Ok(Expression::Exists(Box::new(Exists {
719                    this: inner_select,
720                    not: false,
721                })))
722            }
723
724            // ===== JSON_OBJECT array form → key-value pairs =====
725            // BigQuery "signature 2": JSON_OBJECT(['a', 'b'], [10, NULL]) → JSON_OBJECT('a', 10, 'b', NULL)
726            Expression::JsonObject(mut f) => {
727                if f.pairs.len() == 1 {
728                    // Extract expressions from both Array and ArrayFunc variants
729                    let keys_exprs = match &f.pairs[0].0 {
730                        Expression::Array(arr) => Some(&arr.expressions),
731                        Expression::ArrayFunc(arr) => Some(&arr.expressions),
732                        _ => None,
733                    };
734                    let vals_exprs = match &f.pairs[0].1 {
735                        Expression::Array(arr) => Some(&arr.expressions),
736                        Expression::ArrayFunc(arr) => Some(&arr.expressions),
737                        _ => None,
738                    };
739                    if let (Some(keys), Some(vals)) = (keys_exprs, vals_exprs) {
740                        if keys.len() == vals.len() {
741                            let new_pairs: Vec<(Expression, Expression)> = keys
742                                .iter()
743                                .zip(vals.iter())
744                                .map(|(k, v)| (k.clone(), v.clone()))
745                                .collect();
746                            f.pairs = new_pairs;
747                        }
748                    }
749                }
750                Ok(Expression::JsonObject(f))
751            }
752
753            // ===== MOD function: unwrap unnecessary Paren from first argument =====
754            // BigQuery normalizes MOD((a + 1), b) -> MOD(a + 1, b)
755            Expression::ModFunc(mut f) => {
756                // Unwrap Paren from first argument if present
757                if let Expression::Paren(paren) = f.this {
758                    f.this = paren.this;
759                }
760                Ok(Expression::ModFunc(f))
761            }
762
763            // JSONExtract with variant_extract (Snowflake colon syntax) -> JSON_EXTRACT
764            Expression::JSONExtract(e) if e.variant_extract.is_some() => {
765                let path = match *e.expression {
766                    Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
767                        let Literal::String(s) = lit.as_ref() else {
768                            unreachable!()
769                        };
770                        let normalized = if s.starts_with('$') {
771                            s.clone()
772                        } else if s.starts_with('[') {
773                            format!("${}", s)
774                        } else {
775                            format!("$.{}", s)
776                        };
777                        Expression::Literal(Box::new(Literal::String(normalized)))
778                    }
779                    other => other,
780                };
781                Ok(Expression::Function(Box::new(Function::new(
782                    "JSON_EXTRACT".to_string(),
783                    vec![*e.this, path],
784                ))))
785            }
786
787            // Generic function transformations
788            Expression::Function(f) => self.transform_function(*f),
789
790            // Generic aggregate function transformations
791            Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
792
793            // MethodCall: Handle SAFE.PARSE_DATE, SAFE.PARSE_DATETIME, SAFE.PARSE_TIMESTAMP
794            // These are parsed as MethodCall(this=SAFE, method=PARSE_DATE, args=[...])
795            Expression::MethodCall(mc) => self.transform_method_call(*mc),
796
797            // CreateFunction: Convert RETURNS TABLE(...) to RETURNS TABLE <...> for BigQuery
798            // and set is_table_function = true for TABLE FUNCTION syntax
799            Expression::CreateFunction(mut cf) => {
800                if let Some(ref mut rtb) = cf.returns_table_body {
801                    if rtb.starts_with("TABLE (") || rtb.starts_with("TABLE(") {
802                        // Convert TABLE (...) to TABLE <...> with BigQuery types
803                        let inner = if rtb.starts_with("TABLE (") {
804                            &rtb["TABLE (".len()..rtb.len() - 1]
805                        } else {
806                            &rtb["TABLE(".len()..rtb.len() - 1]
807                        };
808                        // Convert common types to BigQuery equivalents
809                        let converted = inner
810                            .replace(" INT,", " INT64,")
811                            .replace(" INT)", " INT64)")
812                            .replace(" INTEGER,", " INT64,")
813                            .replace(" INTEGER)", " INT64)")
814                            .replace(" FLOAT,", " FLOAT64,")
815                            .replace(" FLOAT)", " FLOAT64)")
816                            .replace(" BOOLEAN,", " BOOL,")
817                            .replace(" BOOLEAN)", " BOOL)")
818                            .replace(" VARCHAR", " STRING")
819                            .replace(" TEXT", " STRING");
820                        // Handle trailing type (no comma, no paren)
821                        let converted = if converted.ends_with(" INT") {
822                            format!("{}{}", &converted[..converted.len() - 4], " INT64")
823                        } else {
824                            converted
825                        };
826                        *rtb = format!("TABLE <{}>", converted);
827                        cf.is_table_function = true;
828                    }
829                }
830                // Convert string literal body to expression body for BigQuery TABLE FUNCTIONs only
831                if cf.is_table_function {
832                    if let Some(ref body) = cf.body {
833                        if matches!(body, FunctionBody::StringLiteral(_)) {
834                            if let Some(FunctionBody::StringLiteral(sql)) = cf.body.take() {
835                                // Parse the SQL string into an expression
836                                if let Ok(parsed) = crate::parser::Parser::parse_sql(&sql) {
837                                    if let Some(stmt) = parsed.into_iter().next() {
838                                        cf.body = Some(FunctionBody::Expression(stmt));
839                                    } else {
840                                        cf.body = Some(FunctionBody::StringLiteral(sql));
841                                    }
842                                } else {
843                                    cf.body = Some(FunctionBody::StringLiteral(sql));
844                                }
845                            }
846                        }
847                    }
848                }
849                Ok(Expression::CreateFunction(cf))
850            }
851
852            // Pass through everything else
853            _ => Ok(expr),
854        }
855    }
856}
857
858impl BigQueryDialect {
859    /// Transform data types according to BigQuery TYPE_MAPPING
860    fn transform_data_type(&self, dt: crate::expressions::DataType) -> Result<Expression> {
861        use crate::expressions::DataType;
862        let transformed = match dt {
863            // BIGINT -> INT64
864            DataType::BigInt { .. } => DataType::Custom {
865                name: "INT64".to_string(),
866            },
867            // INT -> INT64
868            DataType::Int { .. } => DataType::Custom {
869                name: "INT64".to_string(),
870            },
871            // SMALLINT -> INT64
872            DataType::SmallInt { .. } => DataType::Custom {
873                name: "INT64".to_string(),
874            },
875            // TINYINT -> INT64
876            DataType::TinyInt { .. } => DataType::Custom {
877                name: "INT64".to_string(),
878            },
879            // FLOAT -> FLOAT64
880            DataType::Float { .. } => DataType::Custom {
881                name: "FLOAT64".to_string(),
882            },
883            // DOUBLE -> FLOAT64
884            DataType::Double { .. } => DataType::Custom {
885                name: "FLOAT64".to_string(),
886            },
887            // BOOLEAN -> BOOL
888            DataType::Boolean => DataType::Custom {
889                name: "BOOL".to_string(),
890            },
891            // CHAR -> STRING
892            DataType::Char { .. } => DataType::Custom {
893                name: "STRING".to_string(),
894            },
895            // VARCHAR -> STRING
896            DataType::VarChar { .. } => DataType::Custom {
897                name: "STRING".to_string(),
898            },
899            // TEXT -> STRING
900            DataType::Text => DataType::Custom {
901                name: "STRING".to_string(),
902            },
903            // STRING(n) -> STRING (BigQuery doesn't support length for STRING)
904            DataType::String { .. } => DataType::Custom {
905                name: "STRING".to_string(),
906            },
907            // BINARY -> BYTES
908            DataType::Binary { .. } => DataType::Custom {
909                name: "BYTES".to_string(),
910            },
911            // VARBINARY -> BYTES
912            DataType::VarBinary { .. } => DataType::Custom {
913                name: "BYTES".to_string(),
914            },
915            // BLOB -> BYTES
916            DataType::Blob => DataType::Custom {
917                name: "BYTES".to_string(),
918            },
919            // DECIMAL -> NUMERIC (BigQuery strips precision in CAST context)
920            DataType::Decimal { .. } => DataType::Custom {
921                name: "NUMERIC".to_string(),
922            },
923            // For BigQuery identity: preserve TIMESTAMP/DATETIME as Custom types
924            // This avoids the issue where parsed TIMESTAMP (timezone: false) would
925            // be converted to DATETIME by the generator
926            DataType::Timestamp {
927                timezone: false, ..
928            } => DataType::Custom {
929                name: "TIMESTAMP".to_string(),
930            },
931            DataType::Timestamp { timezone: true, .. } => DataType::Custom {
932                name: "TIMESTAMP".to_string(),
933            },
934            // UUID -> STRING (BigQuery doesn't have native UUID type)
935            DataType::Uuid => DataType::Custom {
936                name: "STRING".to_string(),
937            },
938            // RECORD -> STRUCT in BigQuery
939            DataType::Custom { ref name } if name.eq_ignore_ascii_case("RECORD") => {
940                DataType::Custom {
941                    name: "STRUCT".to_string(),
942                }
943            }
944            // TIMESTAMPTZ (custom) -> TIMESTAMP
945            DataType::Custom { ref name } if name.eq_ignore_ascii_case("TIMESTAMPTZ") => {
946                DataType::Custom {
947                    name: "TIMESTAMP".to_string(),
948                }
949            }
950            // BYTEINT (custom) -> INT64
951            DataType::Custom { ref name } if name.eq_ignore_ascii_case("BYTEINT") => {
952                DataType::Custom {
953                    name: "INT64".to_string(),
954                }
955            }
956            // Keep all other types as-is
957            other => other,
958        };
959        Ok(Expression::DataType(transformed))
960    }
961
962    fn transform_function(&self, f: Function) -> Result<Expression> {
963        let name_upper = f.name.to_uppercase();
964        match name_upper.as_str() {
965            // IFNULL -> COALESCE (both work in BigQuery)
966            "IFNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
967                original_name: None,
968                expressions: f.args,
969                inferred_type: None,
970            }))),
971
972            // NVL -> COALESCE
973            "NVL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
974                original_name: None,
975                expressions: f.args,
976                inferred_type: None,
977            }))),
978
979            // ISNULL -> COALESCE
980            "ISNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
981                original_name: None,
982                expressions: f.args,
983                inferred_type: None,
984            }))),
985
986            // GROUP_CONCAT -> STRING_AGG in BigQuery
987            "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
988                Function::new("STRING_AGG".to_string(), f.args),
989            ))),
990
991            // SUBSTR -> SUBSTRING (both work)
992            "SUBSTR" => Ok(Expression::Function(Box::new(Function::new(
993                "SUBSTRING".to_string(),
994                f.args,
995            )))),
996
997            // RANDOM -> RAND
998            "RANDOM" => Ok(Expression::Rand(Box::new(crate::expressions::Rand {
999                seed: None,
1000                lower: None,
1001                upper: None,
1002            }))),
1003
1004            // CURRENT_DATE -> CURRENT_DATE() in BigQuery
1005            // Keep as Function when it has args (e.g., CURRENT_DATE('UTC'))
1006            "CURRENT_DATE" if f.args.is_empty() => {
1007                Ok(Expression::CurrentDate(crate::expressions::CurrentDate))
1008            }
1009            "CURRENT_DATE" => Ok(Expression::Function(Box::new(Function {
1010                name: "CURRENT_DATE".to_string(),
1011                args: f.args,
1012                distinct: false,
1013                trailing_comments: Vec::new(),
1014                use_bracket_syntax: false,
1015                no_parens: false,
1016                quoted: false,
1017                span: None,
1018                inferred_type: None,
1019            }))),
1020
1021            // NOW -> CURRENT_TIMESTAMP in BigQuery
1022            "NOW" => Ok(Expression::CurrentTimestamp(
1023                crate::expressions::CurrentTimestamp {
1024                    precision: None,
1025                    sysdate: false,
1026                },
1027            )),
1028
1029            // TO_DATE -> PARSE_DATE in BigQuery
1030            "TO_DATE" => Ok(Expression::Function(Box::new(Function::new(
1031                "PARSE_DATE".to_string(),
1032                f.args,
1033            )))),
1034
1035            // TO_TIMESTAMP -> PARSE_TIMESTAMP in BigQuery
1036            "TO_TIMESTAMP" => Ok(Expression::Function(Box::new(Function::new(
1037                "PARSE_TIMESTAMP".to_string(),
1038                f.args,
1039            )))),
1040
1041            // TO_TIME -> TIME in BigQuery
1042            "TO_TIME" if f.args.len() == 1 => Ok(Expression::Function(Box::new(Function::new(
1043                "TIME".to_string(),
1044                f.args,
1045            )))),
1046
1047            // DATE_FORMAT -> FORMAT_DATE in BigQuery (argument order may differ)
1048            "DATE_FORMAT" => Ok(Expression::Function(Box::new(Function::new(
1049                "FORMAT_DATE".to_string(),
1050                f.args,
1051            )))),
1052
1053            // POSITION -> STRPOS in BigQuery
1054            // BigQuery uses STRPOS(string, substring)
1055            "POSITION" if f.args.len() == 2 => {
1056                let mut args = f.args;
1057                // Swap arguments: POSITION(sub IN str) -> STRPOS(str, sub)
1058                let first = args.remove(0);
1059                let second = args.remove(0);
1060                Ok(Expression::Function(Box::new(Function::new(
1061                    "STRPOS".to_string(),
1062                    vec![second, first],
1063                ))))
1064            }
1065
1066            // LEN -> LENGTH
1067            "LEN" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc::new(
1068                f.args.into_iter().next().unwrap(),
1069            )))),
1070
1071            // CEILING -> CEIL (both work)
1072            "CEILING" if f.args.len() == 1 => Ok(Expression::Ceil(Box::new(CeilFunc {
1073                this: f.args.into_iter().next().unwrap(),
1074                decimals: None,
1075                to: None,
1076            }))),
1077
1078            // GETDATE -> CURRENT_TIMESTAMP
1079            "GETDATE" => Ok(Expression::CurrentTimestamp(
1080                crate::expressions::CurrentTimestamp {
1081                    precision: None,
1082                    sysdate: false,
1083                },
1084            )),
1085
1086            // ARRAY_LENGTH -> ARRAY_LENGTH (native)
1087            // CARDINALITY -> ARRAY_LENGTH
1088            "CARDINALITY" if f.args.len() == 1 => Ok(Expression::ArrayLength(Box::new(
1089                UnaryFunc::new(f.args.into_iter().next().unwrap()),
1090            ))),
1091
1092            // UNNEST is native to BigQuery
1093
1094            // GENERATE_SERIES -> GENERATE_ARRAY in BigQuery
1095            "GENERATE_SERIES" => Ok(Expression::Function(Box::new(Function::new(
1096                "GENERATE_ARRAY".to_string(),
1097                f.args,
1098            )))),
1099
1100            // APPROX_COUNT_DISTINCT -> APPROX_COUNT_DISTINCT (native)
1101            // APPROX_DISTINCT -> APPROX_COUNT_DISTINCT
1102            "APPROX_DISTINCT" => Ok(Expression::Function(Box::new(Function::new(
1103                "APPROX_COUNT_DISTINCT".to_string(),
1104                f.args,
1105            )))),
1106
1107            // COUNT_IF -> COUNTIF in BigQuery
1108            "COUNT_IF" => Ok(Expression::Function(Box::new(Function::new(
1109                "COUNTIF".to_string(),
1110                f.args,
1111            )))),
1112
1113            // SHA1 -> SHA1 (native), SHA -> SHA1
1114            "SHA" => Ok(Expression::Function(Box::new(Function::new(
1115                "SHA1".to_string(),
1116                f.args,
1117            )))),
1118
1119            // SHA256/SHA2 -> SHA256
1120            "SHA2" => Ok(Expression::Function(Box::new(Function::new(
1121                "SHA256".to_string(),
1122                f.args,
1123            )))),
1124
1125            // MD5 in BigQuery returns bytes, often combined with TO_HEX
1126            // TO_HEX(MD5(x)) pattern
1127            "MD5" => Ok(Expression::Function(Box::new(Function::new(
1128                "MD5".to_string(),
1129                f.args,
1130            )))),
1131
1132            // VARIANCE/VAR_SAMP -> VAR_SAMP (native)
1133            // VAR_POP -> VAR_POP (native)
1134
1135            // DATEADD(unit, amount, date) → DATE_ADD(date, INTERVAL amount unit) for BigQuery
1136            "DATEADD" if f.args.len() == 3 => {
1137                let mut args = f.args;
1138                let unit_expr = args.remove(0);
1139                let amount = args.remove(0);
1140                let date = args.remove(0);
1141                // Convert unit identifier to IntervalUnit
1142                let unit_name = match &unit_expr {
1143                    Expression::Identifier(id) => id.name.to_uppercase(),
1144                    Expression::Var(v) => v.this.to_uppercase(),
1145                    Expression::Column(col) if col.table.is_none() => {
1146                        col.name.name.to_uppercase()
1147                    }
1148                    _ => "DAY".to_string(),
1149                };
1150                let unit = match unit_name.as_str() {
1151                    "YEAR" | "YEARS" | "YY" | "YYYY" => crate::expressions::IntervalUnit::Year,
1152                    "QUARTER" | "QUARTERS" | "QQ" | "Q" => {
1153                        crate::expressions::IntervalUnit::Quarter
1154                    }
1155                    "MONTH" | "MONTHS" | "MM" | "M" => crate::expressions::IntervalUnit::Month,
1156                    "WEEK" | "WEEKS" | "WK" | "WW" => crate::expressions::IntervalUnit::Week,
1157                    "DAY" | "DAYS" | "DD" | "D" | "DAYOFMONTH" => {
1158                        crate::expressions::IntervalUnit::Day
1159                    }
1160                    "HOUR" | "HOURS" | "HH" => crate::expressions::IntervalUnit::Hour,
1161                    "MINUTE" | "MINUTES" | "MI" | "N" => crate::expressions::IntervalUnit::Minute,
1162                    "SECOND" | "SECONDS" | "SS" | "S" => crate::expressions::IntervalUnit::Second,
1163                    "MILLISECOND" | "MILLISECONDS" | "MS" => {
1164                        crate::expressions::IntervalUnit::Millisecond
1165                    }
1166                    "MICROSECOND" | "MICROSECONDS" | "US" => {
1167                        crate::expressions::IntervalUnit::Microsecond
1168                    }
1169                    _ => crate::expressions::IntervalUnit::Day,
1170                };
1171                Ok(Expression::DateAdd(Box::new(
1172                    crate::expressions::DateAddFunc {
1173                        this: date,
1174                        interval: amount,
1175                        unit,
1176                    },
1177                )))
1178            }
1179            "DATE_ADD" => Ok(Expression::Function(Box::new(Function::new(
1180                "DATE_ADD".to_string(),
1181                f.args,
1182            )))),
1183
1184            // DATE_DIFF in BigQuery (native)
1185            "DATEDIFF" => Ok(Expression::Function(Box::new(Function::new(
1186                "DATE_DIFF".to_string(),
1187                f.args,
1188            )))),
1189
1190            // TIMESTAMP_DIFF in BigQuery
1191            "TIMESTAMPDIFF" => Ok(Expression::Function(Box::new(Function::new(
1192                "TIMESTAMP_DIFF".to_string(),
1193                f.args,
1194            )))),
1195
1196            // TIME -> TIME (native)
1197            // DATETIME -> DATETIME (native)
1198
1199            // SAFE_DIVIDE -> SAFE_DIVIDE (native)
1200
1201            // NEWID/UUID -> GENERATE_UUID
1202            "NEWID" | "UUID" => Ok(Expression::Function(Box::new(Function::new(
1203                "GENERATE_UUID".to_string(),
1204                vec![],
1205            )))),
1206
1207            // LEVENSHTEIN -> EDIT_DISTANCE (BigQuery naming)
1208            "LEVENSHTEIN" => Ok(Expression::Function(Box::new(Function::new(
1209                "EDIT_DISTANCE".to_string(),
1210                f.args,
1211            )))),
1212
1213            // UNIX_TIMESTAMP -> UNIX_SECONDS
1214            "UNIX_TIMESTAMP" => Ok(Expression::Function(Box::new(Function::new(
1215                "UNIX_SECONDS".to_string(),
1216                f.args,
1217            )))),
1218
1219            // FROM_UNIXTIME -> TIMESTAMP_SECONDS
1220            "FROM_UNIXTIME" => Ok(Expression::Function(Box::new(Function::new(
1221                "TIMESTAMP_SECONDS".to_string(),
1222                f.args,
1223            )))),
1224
1225            // CHAR_LENGTH / CHARACTER_LENGTH -> LENGTH
1226            "CHAR_LENGTH" | "CHARACTER_LENGTH" => Ok(Expression::Function(Box::new(
1227                Function::new("LENGTH".to_string(), f.args),
1228            ))),
1229
1230            // OCTET_LENGTH -> BYTE_LENGTH in BigQuery
1231            "OCTET_LENGTH" => Ok(Expression::Function(Box::new(Function::new(
1232                "BYTE_LENGTH".to_string(),
1233                f.args,
1234            )))),
1235
1236            // JSON_EXTRACT_STRING_ARRAY -> JSON_VALUE_ARRAY in BigQuery
1237            "JSON_EXTRACT_STRING_ARRAY" => Ok(Expression::Function(Box::new(Function::new(
1238                "JSON_VALUE_ARRAY".to_string(),
1239                f.args,
1240            )))),
1241
1242            // INSTR is native to BigQuery
1243
1244            // SPLIT: BigQuery defaults to comma separator when none provided
1245            // SPLIT(foo) -> SPLIT(foo, ',')
1246            "SPLIT" if f.args.len() == 1 => {
1247                let mut args = f.args;
1248                args.push(Expression::Literal(Box::new(Literal::String(
1249                    ",".to_string(),
1250                ))));
1251                Ok(Expression::Split(Box::new(SplitFunc {
1252                    this: args.remove(0),
1253                    delimiter: args.remove(0),
1254                })))
1255            }
1256
1257            // SPLIT with two args - convert to Split expression
1258            "SPLIT" if f.args.len() == 2 => {
1259                let mut args = f.args;
1260                Ok(Expression::Split(Box::new(SplitFunc {
1261                    this: args.remove(0),
1262                    delimiter: args.remove(0),
1263                })))
1264            }
1265
1266            // REGEXP_SUBSTR -> REGEXP_EXTRACT in BigQuery (strip extra Snowflake args)
1267            "REGEXP_SUBSTR" if f.args.len() >= 2 => {
1268                // BigQuery REGEXP_EXTRACT supports (subject, pattern, pos, occ) max 4 args
1269                let args = if f.args.len() > 4 {
1270                    f.args[..4].to_vec()
1271                } else {
1272                    f.args
1273                };
1274                Ok(Expression::Function(Box::new(Function::new(
1275                    "REGEXP_EXTRACT".to_string(),
1276                    args,
1277                ))))
1278            }
1279            "REGEXP_SUBSTR" => Ok(Expression::Function(Box::new(Function::new(
1280                "REGEXP_EXTRACT".to_string(),
1281                f.args,
1282            )))),
1283
1284            // REGEXP_REPLACE - strip extra Snowflake-specific args
1285            "REGEXP_REPLACE" if f.args.len() > 3 => {
1286                let args = f.args[..3].to_vec();
1287                Ok(Expression::Function(Box::new(Function::new(
1288                    "REGEXP_REPLACE".to_string(),
1289                    args,
1290                ))))
1291            }
1292
1293            // OBJECT_CONSTRUCT_KEEP_NULL -> JSON_OBJECT
1294            "OBJECT_CONSTRUCT_KEEP_NULL" => Ok(Expression::Function(Box::new(Function::new(
1295                "JSON_OBJECT".to_string(),
1296                f.args,
1297            )))),
1298
1299            // EDITDISTANCE -> EDIT_DISTANCE with named max_distance parameter
1300            "EDITDISTANCE" if f.args.len() == 3 => {
1301                let col1 = f.args[0].clone();
1302                let col2 = f.args[1].clone();
1303                let max_dist = f.args[2].clone();
1304                Ok(Expression::Function(Box::new(Function::new(
1305                    "EDIT_DISTANCE".to_string(),
1306                    vec![
1307                        col1,
1308                        col2,
1309                        Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
1310                            name: crate::expressions::Identifier::new("max_distance".to_string()),
1311                            value: max_dist,
1312                            separator: crate::expressions::NamedArgSeparator::DArrow,
1313                        })),
1314                    ],
1315                ))))
1316            }
1317            "EDITDISTANCE" if f.args.len() == 2 => Ok(Expression::Function(Box::new(
1318                Function::new("EDIT_DISTANCE".to_string(), f.args),
1319            ))),
1320
1321            // HEX_DECODE_BINARY -> FROM_HEX
1322            "HEX_DECODE_BINARY" => Ok(Expression::Function(Box::new(Function::new(
1323                "FROM_HEX".to_string(),
1324                f.args,
1325            )))),
1326
1327            // BigQuery format string normalization for PARSE_DATE/DATETIME/TIMESTAMP functions
1328            // %Y-%m-%d -> %F and %H:%M:%S -> %T
1329            "PARSE_DATE"
1330            | "PARSE_DATETIME"
1331            | "PARSE_TIMESTAMP"
1332            | "SAFE.PARSE_DATE"
1333            | "SAFE.PARSE_DATETIME"
1334            | "SAFE.PARSE_TIMESTAMP" => {
1335                let args = self.normalize_time_format_args(f.args);
1336                Ok(Expression::Function(Box::new(Function {
1337                    name: f.name,
1338                    args,
1339                    distinct: f.distinct,
1340                    no_parens: f.no_parens,
1341                    trailing_comments: f.trailing_comments,
1342                    quoted: f.quoted,
1343                    use_bracket_syntax: f.use_bracket_syntax,
1344                    span: None,
1345                    inferred_type: None,
1346                })))
1347            }
1348
1349            // GET_PATH(obj, path) -> JSON_EXTRACT(obj, json_path) in BigQuery
1350            "GET_PATH" if f.args.len() == 2 => {
1351                let mut args = f.args;
1352                let this = args.remove(0);
1353                let path = args.remove(0);
1354                let json_path = match &path {
1355                    Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
1356                        let Literal::String(s) = lit.as_ref() else {
1357                            unreachable!()
1358                        };
1359                        let normalized = if s.starts_with('$') {
1360                            s.clone()
1361                        } else if s.starts_with('[') {
1362                            format!("${}", s)
1363                        } else {
1364                            format!("$.{}", s)
1365                        };
1366                        Expression::Literal(Box::new(Literal::String(normalized)))
1367                    }
1368                    _ => path,
1369                };
1370                Ok(Expression::JsonExtract(Box::new(JsonExtractFunc {
1371                    this,
1372                    path: json_path,
1373                    returning: None,
1374                    arrow_syntax: false,
1375                    hash_arrow_syntax: false,
1376                    wrapper_option: None,
1377                    quotes_option: None,
1378                    on_scalar_string: false,
1379                    on_error: None,
1380                })))
1381            }
1382
1383            // Pass through everything else
1384            _ => Ok(Expression::Function(Box::new(f))),
1385        }
1386    }
1387
1388    fn transform_aggregate_function(
1389        &self,
1390        f: Box<crate::expressions::AggregateFunction>,
1391    ) -> Result<Expression> {
1392        let name_upper = f.name.to_uppercase();
1393        match name_upper.as_str() {
1394            // GROUP_CONCAT -> STRING_AGG
1395            "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
1396                Function::new("STRING_AGG".to_string(), f.args),
1397            ))),
1398
1399            // Pass through everything else
1400            _ => Ok(Expression::AggregateFunction(f)),
1401        }
1402    }
1403
1404    /// Transform MethodCall expressions
1405    /// Handles SAFE.PARSE_DATE, SAFE.PARSE_DATETIME, SAFE.PARSE_TIMESTAMP format normalization
1406    fn transform_method_call(&self, mc: crate::expressions::MethodCall) -> Result<Expression> {
1407        use crate::expressions::MethodCall;
1408
1409        // Check if this is SAFE.PARSE_DATE/DATETIME/TIMESTAMP
1410        if let Expression::Column(ref col) = mc.this {
1411            if col.name.name.eq_ignore_ascii_case("SAFE") {
1412                let method_upper = mc.method.name.to_uppercase();
1413                if method_upper == "PARSE_DATE"
1414                    || method_upper == "PARSE_DATETIME"
1415                    || method_upper == "PARSE_TIMESTAMP"
1416                {
1417                    // Normalize the format string in the first argument
1418                    let args = self.normalize_time_format_args(mc.args);
1419                    return Ok(Expression::MethodCall(Box::new(MethodCall {
1420                        this: mc.this,
1421                        method: mc.method,
1422                        args,
1423                    })));
1424                }
1425            }
1426        }
1427
1428        // Pass through all other method calls
1429        Ok(Expression::MethodCall(Box::new(mc)))
1430    }
1431
1432    /// Normalize time format strings in function arguments
1433    /// BigQuery normalizes: %Y-%m-%d -> %F, %H:%M:%S -> %T
1434    fn normalize_time_format_args(&self, args: Vec<Expression>) -> Vec<Expression> {
1435        args.into_iter()
1436            .enumerate()
1437            .map(|(i, arg)| {
1438                // Only transform the first argument (the format string)
1439                if i == 0 {
1440                    if let Expression::Literal(ref lit) = arg {
1441                        if let Literal::String(s) = lit.as_ref() {
1442                            let normalized = self.normalize_time_format(&s);
1443                            return Expression::Literal(Box::new(Literal::String(normalized)));
1444                        }
1445                    }
1446                }
1447                arg
1448            })
1449            .collect()
1450    }
1451
1452    /// Normalize a time format string according to BigQuery conventions
1453    /// %Y-%m-%d -> %F (ISO date)
1454    /// %H:%M:%S -> %T (time)
1455    fn normalize_time_format(&self, format: &str) -> String {
1456        format.replace("%Y-%m-%d", "%F").replace("%H:%M:%S", "%T")
1457    }
1458
1459    /// Convert BigQuery CAST FORMAT elements to strftime equivalents,
1460    /// then normalize BigQuery shorthand forms (%Y-%m-%d -> %F, %H:%M:%S -> %T)
1461    fn bq_cast_format_to_strftime(format_expr: &Expression) -> Expression {
1462        use crate::expressions::Literal;
1463        if let Expression::Literal(lit) = format_expr {
1464            if let Literal::String(s) = lit.as_ref() {
1465                let result = s
1466                    .replace("YYYYMMDD", "%Y%m%d")
1467                    .replace("YYYY", "%Y")
1468                    .replace("YY", "%y")
1469                    .replace("MONTH", "%B")
1470                    .replace("MON", "%b")
1471                    .replace("MM", "%m")
1472                    .replace("DD", "%d")
1473                    .replace("HH24", "%H")
1474                    .replace("HH12", "%I")
1475                    .replace("HH", "%I")
1476                    .replace("MI", "%M")
1477                    .replace("SSTZH", "%S%z")
1478                    .replace("SS", "%S")
1479                    .replace("TZH", "%z");
1480                // Normalize: %Y-%m-%d -> %F, %H:%M:%S -> %T
1481                let normalized = result.replace("%Y-%m-%d", "%F").replace("%H:%M:%S", "%T");
1482                return Expression::Literal(Box::new(Literal::String(normalized)));
1483            }
1484        }
1485        format_expr.clone()
1486    }
1487}
1488
1489#[cfg(test)]
1490mod tests {
1491    use super::*;
1492    use crate::dialects::Dialect;
1493    use crate::parse_one;
1494
1495    fn transpile_to_bigquery(sql: &str) -> String {
1496        let dialect = Dialect::get(DialectType::Generic);
1497        let result = dialect
1498            .transpile_to(sql, DialectType::BigQuery)
1499            .expect("Transpile failed");
1500        result[0].clone()
1501    }
1502
1503    #[test]
1504    fn test_ifnull_identity() {
1505        // Generic -> BigQuery: IFNULL is normalized to COALESCE (matching sqlglot behavior)
1506        let result = transpile_to_bigquery("SELECT IFNULL(a, b)");
1507        assert!(
1508            result.contains("COALESCE"),
1509            "Expected COALESCE, got: {}",
1510            result
1511        );
1512    }
1513
1514    #[test]
1515    fn test_nvl_to_ifnull() {
1516        // NVL is converted to IFNULL in BigQuery
1517        let result = transpile_to_bigquery("SELECT NVL(a, b)");
1518        assert!(
1519            result.contains("IFNULL"),
1520            "Expected IFNULL, got: {}",
1521            result
1522        );
1523    }
1524
1525    #[test]
1526    fn test_try_cast_to_safe_cast() {
1527        let result = transpile_to_bigquery("SELECT TRY_CAST(a AS INT)");
1528        assert!(
1529            result.contains("SAFE_CAST"),
1530            "Expected SAFE_CAST, got: {}",
1531            result
1532        );
1533    }
1534
1535    #[test]
1536    fn test_random_to_rand() {
1537        let result = transpile_to_bigquery("SELECT RANDOM()");
1538        assert!(result.contains("RAND"), "Expected RAND, got: {}", result);
1539    }
1540
1541    #[test]
1542    fn test_basic_select() {
1543        let result = transpile_to_bigquery("SELECT a, b FROM users WHERE id = 1");
1544        assert!(result.contains("SELECT"));
1545        assert!(result.contains("FROM users"));
1546    }
1547
1548    #[test]
1549    fn test_group_concat_to_string_agg() {
1550        let result = transpile_to_bigquery("SELECT GROUP_CONCAT(name)");
1551        assert!(
1552            result.contains("STRING_AGG"),
1553            "Expected STRING_AGG, got: {}",
1554            result
1555        );
1556    }
1557
1558    #[test]
1559    fn test_generate_series_to_generate_array() {
1560        let result = transpile_to_bigquery("SELECT GENERATE_SERIES(1, 10)");
1561        assert!(
1562            result.contains("GENERATE_ARRAY"),
1563            "Expected GENERATE_ARRAY, got: {}",
1564            result
1565        );
1566    }
1567
1568    #[test]
1569    fn test_backtick_identifiers() {
1570        // BigQuery uses backticks for identifiers
1571        let dialect = BigQueryDialect;
1572        let config = dialect.generator_config();
1573        assert_eq!(config.identifier_quote, '`');
1574    }
1575
1576    fn bigquery_identity(sql: &str, expected: &str) {
1577        let dialect = Dialect::get(DialectType::BigQuery);
1578        let ast = dialect.parse(sql).expect("Parse failed");
1579        let transformed = dialect.transform(ast[0].clone()).expect("Transform failed");
1580        let result = dialect.generate(&transformed).expect("Generate failed");
1581        assert_eq!(result, expected, "SQL: {}", sql);
1582    }
1583
1584    #[test]
1585    fn test_cast_char_to_string() {
1586        bigquery_identity("CAST(x AS CHAR)", "CAST(x AS STRING)");
1587    }
1588
1589    #[test]
1590    fn test_cast_varchar_to_string() {
1591        bigquery_identity("CAST(x AS VARCHAR)", "CAST(x AS STRING)");
1592    }
1593
1594    #[test]
1595    fn test_cast_nchar_to_string() {
1596        bigquery_identity("CAST(x AS NCHAR)", "CAST(x AS STRING)");
1597    }
1598
1599    #[test]
1600    fn test_cast_nvarchar_to_string() {
1601        bigquery_identity("CAST(x AS NVARCHAR)", "CAST(x AS STRING)");
1602    }
1603
1604    #[test]
1605    fn test_cast_timestamptz_to_timestamp() {
1606        bigquery_identity("CAST(x AS TIMESTAMPTZ)", "CAST(x AS TIMESTAMP)");
1607    }
1608
1609    #[test]
1610    fn test_cast_record_to_struct() {
1611        bigquery_identity("CAST(x AS RECORD)", "CAST(x AS STRUCT)");
1612    }
1613
1614    #[test]
1615    fn test_json_literal_to_parse_json() {
1616        // JSON 'string' literal syntax should be converted to PARSE_JSON()
1617        bigquery_identity(
1618            "SELECT JSON '\"foo\"' AS json_data",
1619            "SELECT PARSE_JSON('\"foo\"') AS json_data",
1620        );
1621    }
1622
1623    #[test]
1624    fn test_grant_as_alias_not_quoted() {
1625        // GRANT is not a reserved keyword in BigQuery, should not be backtick-quoted
1626        bigquery_identity(
1627            "SELECT GRANT FROM (SELECT 'input' AS GRANT)",
1628            "SELECT GRANT FROM (SELECT 'input' AS GRANT)",
1629        );
1630    }
1631
1632    #[test]
1633    fn test_timestamp_literal_to_cast() {
1634        // TIMESTAMP 'value' literal should be converted to CAST('value' AS TIMESTAMP)
1635        bigquery_identity(
1636            "CREATE VIEW `d.v` OPTIONS (expiration_timestamp=TIMESTAMP '2020-01-02T04:05:06.007Z') AS SELECT 1 AS c",
1637            "CREATE VIEW `d.v` OPTIONS (expiration_timestamp=CAST('2020-01-02T04:05:06.007Z' AS TIMESTAMP)) AS SELECT 1 AS c"
1638        );
1639    }
1640
1641    #[test]
1642    fn test_date_literal_to_cast_in_extract() {
1643        // Issue 1: DATE literal should become CAST syntax in BigQuery
1644        bigquery_identity(
1645            "EXTRACT(WEEK(THURSDAY) FROM DATE '2013-12-25')",
1646            "EXTRACT(WEEK(THURSDAY) FROM CAST('2013-12-25' AS DATE))",
1647        );
1648    }
1649
1650    #[test]
1651    fn test_json_object_with_json_literals() {
1652        // Issue 2: JSON literals in JSON_OBJECT should use PARSE_JSON, not CAST AS JSON
1653        bigquery_identity(
1654            "SELECT JSON_OBJECT('a', JSON '10') AS json_data",
1655            "SELECT JSON_OBJECT('a', PARSE_JSON('10')) AS json_data",
1656        );
1657    }
1658
1659    // NOTE: MOD paren unwrapping is tested in the conformance tests (sqlglot_dialect_identity).
1660    // The unit test version was removed due to stack overflow in debug builds (deep recursion).
1661    // Test case: MOD((a + 1), b) -> MOD(a + 1, b)
1662
1663    #[test]
1664    fn test_safe_parse_date_format_normalization() {
1665        // SAFE.PARSE_DATE format string normalization: %Y-%m-%d -> %F
1666        bigquery_identity(
1667            "SAFE.PARSE_DATE('%Y-%m-%d', '2024-01-15')",
1668            "SAFE.PARSE_DATE('%F', '2024-01-15')",
1669        );
1670    }
1671
1672    #[test]
1673    fn test_safe_parse_datetime_format_normalization() {
1674        // SAFE.PARSE_DATETIME format string normalization: %Y-%m-%d %H:%M:%S -> %F %T
1675        bigquery_identity(
1676            "SAFE.PARSE_DATETIME('%Y-%m-%d %H:%M:%S', '2024-01-15 10:30:00')",
1677            "SAFE.PARSE_DATETIME('%F %T', '2024-01-15 10:30:00')",
1678        );
1679    }
1680
1681    #[test]
1682    fn test_safe_parse_timestamp_format_normalization() {
1683        // SAFE.PARSE_TIMESTAMP format string normalization: %Y-%m-%d %H:%M:%S -> %F %T
1684        bigquery_identity(
1685            "SAFE.PARSE_TIMESTAMP('%Y-%m-%d %H:%M:%S', '2024-01-15 10:30:00')",
1686            "SAFE.PARSE_TIMESTAMP('%F %T', '2024-01-15 10:30:00')",
1687        );
1688    }
1689
1690    #[test]
1691    fn test_datetime_literal_to_cast() {
1692        // DATETIME 'value' literal should be converted to CAST('value' AS DATETIME)
1693        bigquery_identity(
1694            "LAST_DAY(DATETIME '2008-11-10 15:30:00', WEEK(SUNDAY))",
1695            "LAST_DAY(CAST('2008-11-10 15:30:00' AS DATETIME), WEEK)",
1696        );
1697    }
1698
1699    #[test]
1700    fn test_last_day_week_modifier_stripped() {
1701        // WEEK(SUNDAY) should become WEEK in BigQuery LAST_DAY function
1702        bigquery_identity("LAST_DAY(col, WEEK(MONDAY))", "LAST_DAY(col, WEEK)");
1703    }
1704
1705    #[test]
1706    fn test_hash_line_comment_parses() {
1707        // Regression test for issue #38:
1708        // BigQuery should accept # as a single-line comment.
1709        let result = parse_one("SELECT 1 as a #hello world", DialectType::BigQuery);
1710        assert!(result.is_ok(), "Expected parse to succeed, got: {result:?}");
1711    }
1712}