Skip to main content

polyglot_sql/dialects/
bigquery.rs

1//! BigQuery Dialect
2//!
3//! BigQuery-specific transformations based on sqlglot patterns.
4//! Key differences:
5//! - Uses backticks for identifiers (especially for project.dataset.table)
6//! - SAFE_ prefix for safe operations
7//! - Different date/time function names (DATE_DIFF, FORMAT_DATE, PARSE_DATE)
8//! - STRUCT and ARRAY syntax differences
9//! - No ILIKE support
10//! - QUALIFY clause support
11
12use super::{DialectImpl, DialectType};
13use crate::error::Result;
14use crate::expressions::{
15    Alias, BinaryOp, CeilFunc, Column, Exists, Expression, From, Function, FunctionBody,
16    Identifier, JsonExtractFunc, LikeOp, Literal, Select, SplitFunc, StringAggFunc, UnaryFunc,
17    UnnestFunc, VarArgFunc, Where,
18};
19use crate::generator::GeneratorConfig;
20use crate::tokens::TokenizerConfig;
21
22/// BigQuery dialect
23pub struct BigQueryDialect;
24
25impl DialectImpl for BigQueryDialect {
26    fn dialect_type(&self) -> DialectType {
27        DialectType::BigQuery
28    }
29
30    fn tokenizer_config(&self) -> TokenizerConfig {
31        let mut config = TokenizerConfig::default();
32        // BigQuery uses backticks for identifiers, NOT double quotes
33        // Remove double quote from identifiers (it's in the default config)
34        config.identifiers.remove(&'"');
35        config.identifiers.insert('`', '`');
36        // BigQuery supports double quotes for strings (in addition to single quotes)
37        config.quotes.insert("\"".to_string(), "\"".to_string());
38        // BigQuery supports triple-quoted strings
39        config.quotes.insert("'''".to_string(), "'''".to_string());
40        config
41            .quotes
42            .insert("\"\"\"".to_string(), "\"\"\"".to_string());
43        // BigQuery supports backslash escaping in strings
44        config.string_escapes = vec!['\'', '\\'];
45        // In BigQuery, b'...' is a byte string (bytes), not a bit string (binary digits)
46        config.b_prefix_is_byte_string = true;
47        // BigQuery supports hex number strings like 0xA, 0xFF
48        config.hex_number_strings = true;
49        // BigQuery: 0xA represents integer 10 (not binary/blob)
50        config.hex_string_is_integer_type = true;
51        // BigQuery supports # as single-line comments.
52        config.hash_comments = true;
53        config
54    }
55
56    fn generator_config(&self) -> GeneratorConfig {
57        use crate::generator::{IdentifierQuoteStyle, NormalizeFunctions};
58        GeneratorConfig {
59            identifier_quote: '`',
60            identifier_quote_style: IdentifierQuoteStyle::BACKTICK,
61            dialect: Some(DialectType::BigQuery),
62            // BigQuery doesn't normalize function names (Python: NORMALIZE_FUNCTIONS = False)
63            normalize_functions: NormalizeFunctions::None,
64            // BigQuery-specific settings from Python sqlglot
65            interval_allows_plural_form: false,
66            join_hints: false,
67            query_hints: false,
68            table_hints: false,
69            limit_fetch_style: crate::generator::LimitFetchStyle::Limit,
70            rename_table_with_db: false,
71            nvl2_supported: false,
72            unnest_with_ordinality: false,
73            collate_is_func: true,
74            limit_only_literals: true,
75            supports_table_alias_columns: false,
76            unpivot_aliases_are_identifiers: false,
77            json_key_value_pair_sep: ",",
78            null_ordering_supported: false,
79            ignore_nulls_in_func: true,
80            json_path_single_quote_escape: true,
81            can_implement_array_any: true,
82            supports_to_number: false,
83            named_placeholder_token: "@",
84            hex_func: "TO_HEX",
85            with_properties_prefix: "OPTIONS",
86            supports_exploding_projections: false,
87            except_intersect_support_all_clause: false,
88            supports_unix_seconds: true,
89            // BigQuery uses SAFE_ prefix for safe operations
90            try_supported: true,
91            // BigQuery does not support SEMI/ANTI JOIN syntax
92            semi_anti_join_with_side: false,
93            ..Default::default()
94        }
95    }
96
97    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
98        match expr {
99            // ===== Data Type Mappings =====
100            Expression::DataType(dt) => self.transform_data_type(dt),
101
102            // ===== Null handling =====
103            // IFNULL is native to BigQuery - keep as-is for identity
104            Expression::IfNull(f) => Ok(Expression::IfNull(f)),
105
106            // NVL -> IFNULL in BigQuery (BigQuery uses IFNULL, not NVL)
107            Expression::Nvl(f) => Ok(Expression::IfNull(f)),
108
109            // Coalesce stays as Coalesce
110            Expression::Coalesce(f) => Ok(Expression::Coalesce(f)),
111
112            // ===== String aggregation =====
113            // GROUP_CONCAT -> STRING_AGG in BigQuery
114            Expression::GroupConcat(f) => Ok(Expression::StringAgg(Box::new(StringAggFunc {
115                this: f.this,
116                separator: f.separator,
117                order_by: f.order_by,
118                distinct: f.distinct,
119                filter: f.filter,
120                limit: None,
121            }))),
122
123            // ===== Cast operations =====
124            // Cast data types are transformed by transform_recursive in mod.rs
125            // which calls transform_data_type via transform_expr(Expression::DataType(...))
126
127            // TryCast -> SafeCast in BigQuery with type transformation
128            Expression::TryCast(c) => {
129                let transformed_type = match self.transform_data_type(c.to)? {
130                    Expression::DataType(dt) => dt,
131                    _ => return Err(crate::error::Error::parse("Expected DataType", 0, 0, 0, 0)),
132                };
133                Ok(Expression::SafeCast(Box::new(crate::expressions::Cast {
134                    this: c.this,
135                    to: transformed_type,
136                    trailing_comments: c.trailing_comments,
137                    double_colon_syntax: c.double_colon_syntax,
138                    format: c.format,
139                    default: c.default,
140                })))
141            }
142
143            // ===== Pattern matching =====
144            // ILIKE -> LOWER() LIKE LOWER() in BigQuery (no ILIKE support)
145            Expression::ILike(op) => {
146                let lower_left = Expression::Lower(Box::new(UnaryFunc::new(op.left)));
147                let lower_right = Expression::Lower(Box::new(UnaryFunc::new(op.right)));
148                Ok(Expression::Like(Box::new(LikeOp {
149                    left: lower_left,
150                    right: lower_right,
151                    escape: op.escape,
152                    quantifier: op.quantifier,
153                })))
154            }
155
156            // RegexpLike -> REGEXP_CONTAINS in BigQuery
157            Expression::RegexpLike(f) => Ok(Expression::Function(Box::new(Function::new(
158                "REGEXP_CONTAINS".to_string(),
159                vec![f.this, f.pattern],
160            )))),
161
162            // ===== Array operations =====
163            // EXPLODE -> UNNEST in BigQuery
164            Expression::Explode(f) => Ok(Expression::Unnest(Box::new(
165                crate::expressions::UnnestFunc {
166                    this: f.this,
167                    expressions: Vec::new(),
168                    with_ordinality: false,
169                    alias: None,
170                    offset_alias: None,
171                },
172            ))),
173
174            // ExplodeOuter -> UNNEST with LEFT JOIN semantics
175            Expression::ExplodeOuter(f) => Ok(Expression::Unnest(Box::new(
176                crate::expressions::UnnestFunc {
177                    this: f.this,
178                    expressions: Vec::new(),
179                    with_ordinality: false,
180                    alias: None,
181                    offset_alias: None,
182                },
183            ))),
184
185            // GenerateSeries -> GENERATE_ARRAY in BigQuery
186            Expression::GenerateSeries(f) => {
187                let mut args = Vec::new();
188                if let Some(start) = f.start {
189                    args.push(*start);
190                }
191                if let Some(end) = f.end {
192                    args.push(*end);
193                }
194                if let Some(step) = f.step {
195                    args.push(*step);
196                }
197                Ok(Expression::Function(Box::new(Function::new(
198                    "GENERATE_ARRAY".to_string(),
199                    args,
200                ))))
201            }
202
203            // ===== Bitwise operations =====
204            // BitwiseAndAgg -> BIT_AND
205            Expression::BitwiseAndAgg(f) => Ok(Expression::Function(Box::new(Function::new(
206                "BIT_AND".to_string(),
207                vec![f.this],
208            )))),
209
210            // BitwiseOrAgg -> BIT_OR
211            Expression::BitwiseOrAgg(f) => Ok(Expression::Function(Box::new(Function::new(
212                "BIT_OR".to_string(),
213                vec![f.this],
214            )))),
215
216            // BitwiseXorAgg -> BIT_XOR
217            Expression::BitwiseXorAgg(f) => Ok(Expression::Function(Box::new(Function::new(
218                "BIT_XOR".to_string(),
219                vec![f.this],
220            )))),
221
222            // BitwiseCount -> BIT_COUNT
223            Expression::BitwiseCount(f) => Ok(Expression::Function(Box::new(Function::new(
224                "BIT_COUNT".to_string(),
225                vec![f.this],
226            )))),
227
228            // ByteLength -> BYTE_LENGTH
229            Expression::ByteLength(f) => Ok(Expression::Function(Box::new(Function::new(
230                "BYTE_LENGTH".to_string(),
231                vec![f.this],
232            )))),
233
234            // IntDiv -> DIV
235            Expression::IntDiv(f) => Ok(Expression::Function(Box::new(Function::new(
236                "DIV".to_string(),
237                vec![f.this, f.expression],
238            )))),
239
240            // Int64 -> INT64
241            Expression::Int64(f) => Ok(Expression::Function(Box::new(Function::new(
242                "INT64".to_string(),
243                vec![f.this],
244            )))),
245
246            // ===== Random =====
247            // RANDOM -> RAND in BigQuery
248            Expression::Random(_) => Ok(Expression::Rand(Box::new(crate::expressions::Rand {
249                seed: None,
250                lower: None,
251                upper: None,
252            }))),
253
254            // ===== UUID =====
255            // Uuid -> GENERATE_UUID in BigQuery
256            Expression::Uuid(_) => Ok(Expression::Function(Box::new(Function::new(
257                "GENERATE_UUID".to_string(),
258                vec![],
259            )))),
260
261            // ===== Approximate functions =====
262            // ApproxDistinct -> APPROX_COUNT_DISTINCT
263            Expression::ApproxDistinct(f) => Ok(Expression::Function(Box::new(Function::new(
264                "APPROX_COUNT_DISTINCT".to_string(),
265                vec![f.this],
266            )))),
267
268            // ArgMax -> MAX_BY in BigQuery
269            Expression::ArgMax(f) => Ok(Expression::Function(Box::new(Function::new(
270                "MAX_BY".to_string(),
271                vec![*f.this, *f.expression],
272            )))),
273
274            // ArgMin -> MIN_BY in BigQuery
275            Expression::ArgMin(f) => Ok(Expression::Function(Box::new(Function::new(
276                "MIN_BY".to_string(),
277                vec![*f.this, *f.expression],
278            )))),
279
280            // ===== Conditional =====
281            // CountIf -> COUNTIF in BigQuery
282            Expression::CountIf(f) => Ok(Expression::Function(Box::new(Function::new(
283                "COUNTIF".to_string(),
284                vec![f.this],
285            )))),
286
287            // ===== String functions =====
288            // StringAgg -> STRING_AGG in BigQuery - keep as-is to preserve ORDER BY
289            Expression::StringAgg(f) => Ok(Expression::StringAgg(f)),
290
291            // ===== Conversion =====
292            // Unhex -> FROM_HEX
293            Expression::Unhex(f) => Ok(Expression::Function(Box::new(Function::new(
294                "FROM_HEX".to_string(),
295                vec![*f.this],
296            )))),
297
298            // UnixToTime -> TIMESTAMP_SECONDS/MILLIS/MICROS based on scale
299            Expression::UnixToTime(f) => {
300                let scale = f.scale.unwrap_or(0);
301                match scale {
302                    0 => Ok(Expression::Function(Box::new(Function::new(
303                        "TIMESTAMP_SECONDS".to_string(),
304                        vec![*f.this],
305                    )))),
306                    3 => Ok(Expression::Function(Box::new(Function::new(
307                        "TIMESTAMP_MILLIS".to_string(),
308                        vec![*f.this],
309                    )))),
310                    6 => Ok(Expression::Function(Box::new(Function::new(
311                        "TIMESTAMP_MICROS".to_string(),
312                        vec![*f.this],
313                    )))),
314                    _ => {
315                        // TIMESTAMP_SECONDS(CAST(value / POWER(10, scale) AS INT64))
316                        let div_expr =
317                            Expression::Div(Box::new(crate::expressions::BinaryOp::new(
318                                *f.this,
319                                Expression::Function(Box::new(Function::new(
320                                    "POWER".to_string(),
321                                    vec![Expression::number(10), Expression::number(scale)],
322                                ))),
323                            )));
324                        let cast_expr = Expression::Cast(Box::new(crate::expressions::Cast {
325                            this: div_expr,
326                            to: crate::expressions::DataType::Custom {
327                                name: "INT64".to_string(),
328                            },
329                            double_colon_syntax: false,
330                            trailing_comments: vec![],
331                            format: None,
332                            default: None,
333                        }));
334                        Ok(Expression::Function(Box::new(Function::new(
335                            "TIMESTAMP_SECONDS".to_string(),
336                            vec![cast_expr],
337                        ))))
338                    }
339                }
340            }
341
342            // ===== Date/time =====
343            // DateDiff -> DATE_DIFF in BigQuery
344            Expression::DateDiff(f) => {
345                // BigQuery: DATE_DIFF(date1, date2, part)
346                let unit_str = match f.unit {
347                    Some(crate::expressions::IntervalUnit::Year) => "YEAR",
348                    Some(crate::expressions::IntervalUnit::Quarter) => "QUARTER",
349                    Some(crate::expressions::IntervalUnit::Month) => "MONTH",
350                    Some(crate::expressions::IntervalUnit::Week) => "WEEK",
351                    Some(crate::expressions::IntervalUnit::Day) => "DAY",
352                    Some(crate::expressions::IntervalUnit::Hour) => "HOUR",
353                    Some(crate::expressions::IntervalUnit::Minute) => "MINUTE",
354                    Some(crate::expressions::IntervalUnit::Second) => "SECOND",
355                    Some(crate::expressions::IntervalUnit::Millisecond) => "MILLISECOND",
356                    Some(crate::expressions::IntervalUnit::Microsecond) => "MICROSECOND",
357                    Some(crate::expressions::IntervalUnit::Nanosecond) => "NANOSECOND",
358                    None => "DAY",
359                };
360                let unit = Expression::Identifier(crate::expressions::Identifier {
361                    name: unit_str.to_string(),
362                    quoted: false,
363                    trailing_comments: Vec::new(),
364                    span: None,
365                });
366                Ok(Expression::Function(Box::new(Function::new(
367                    "DATE_DIFF".to_string(),
368                    vec![f.this, f.expression, unit],
369                ))))
370            }
371
372            // ===== Variance =====
373            // VarPop -> VAR_POP
374            Expression::VarPop(f) => Ok(Expression::Function(Box::new(Function::new(
375                "VAR_POP".to_string(),
376                vec![f.this],
377            )))),
378
379            // ===== Hash functions =====
380            // SHA -> SHA1
381            Expression::SHA(f) => Ok(Expression::Function(Box::new(Function::new(
382                "SHA1".to_string(),
383                vec![f.this],
384            )))),
385
386            // SHA1Digest -> SHA1
387            Expression::SHA1Digest(f) => Ok(Expression::Function(Box::new(Function::new(
388                "SHA1".to_string(),
389                vec![f.this],
390            )))),
391
392            // MD5Digest -> MD5
393            Expression::MD5Digest(f) => Ok(Expression::Function(Box::new(Function::new(
394                "MD5".to_string(),
395                vec![*f.this],
396            )))),
397
398            // ===== Type conversion =====
399            // JSONBool -> BOOL
400            Expression::JSONBool(f) => Ok(Expression::Function(Box::new(Function::new(
401                "BOOL".to_string(),
402                vec![f.this],
403            )))),
404
405            // StringFunc -> STRING
406            Expression::StringFunc(f) => Ok(Expression::Function(Box::new(Function::new(
407                "STRING".to_string(),
408                vec![*f.this],
409            )))),
410
411            // ===== Date/time from parts =====
412            // DateFromUnixDate -> DATE_FROM_UNIX_DATE
413            Expression::DateFromUnixDate(f) => Ok(Expression::Function(Box::new(Function::new(
414                "DATE_FROM_UNIX_DATE".to_string(),
415                vec![f.this],
416            )))),
417
418            // UnixDate -> UNIX_DATE
419            Expression::UnixDate(f) => Ok(Expression::Function(Box::new(Function::new(
420                "UNIX_DATE".to_string(),
421                vec![f.this],
422            )))),
423
424            // TimestampDiff -> TIMESTAMP_DIFF
425            Expression::TimestampDiff(f) => Ok(Expression::Function(Box::new(Function::new(
426                "TIMESTAMP_DIFF".to_string(),
427                vec![*f.this, *f.expression],
428            )))),
429
430            // FromTimeZone -> DATETIME
431            Expression::FromTimeZone(f) => Ok(Expression::Function(Box::new(Function::new(
432                "DATETIME".to_string(),
433                vec![*f.this],
434            )))),
435
436            // TsOrDsToDatetime -> DATETIME
437            Expression::TsOrDsToDatetime(f) => Ok(Expression::Function(Box::new(Function::new(
438                "DATETIME".to_string(),
439                vec![f.this],
440            )))),
441
442            // TsOrDsToTimestamp -> TIMESTAMP
443            Expression::TsOrDsToTimestamp(f) => Ok(Expression::Function(Box::new(Function::new(
444                "TIMESTAMP".to_string(),
445                vec![f.this],
446            )))),
447
448            // ===== IfFunc -> IF in BigQuery =====
449            Expression::IfFunc(f) => {
450                let mut args = vec![f.condition, f.true_value];
451                if let Some(false_val) = f.false_value {
452                    args.push(false_val);
453                } else {
454                    args.push(Expression::Null(crate::expressions::Null));
455                }
456                Ok(Expression::Function(Box::new(Function::new(
457                    "IF".to_string(),
458                    args,
459                ))))
460            }
461
462            // ===== HexString -> FROM_HEX =====
463            Expression::HexStringExpr(f) => Ok(Expression::Function(Box::new(Function::new(
464                "FROM_HEX".to_string(),
465                vec![*f.this],
466            )))),
467
468            // ===== Additional auto-generated transforms from Python sqlglot =====
469            // ApproxTopK -> APPROX_TOP_COUNT
470            Expression::ApproxTopK(f) => {
471                let mut args = vec![*f.this];
472                if let Some(expr) = f.expression {
473                    args.push(*expr);
474                }
475                Ok(Expression::Function(Box::new(Function::new(
476                    "APPROX_TOP_COUNT".to_string(),
477                    args,
478                ))))
479            }
480
481            // SafeDivide -> SAFE_DIVIDE
482            Expression::SafeDivide(f) => Ok(Expression::Function(Box::new(Function::new(
483                "SAFE_DIVIDE".to_string(),
484                vec![*f.this, *f.expression],
485            )))),
486
487            // JSONKeysAtDepth -> JSON_KEYS
488            Expression::JSONKeysAtDepth(f) => Ok(Expression::Function(Box::new(Function::new(
489                "JSON_KEYS".to_string(),
490                vec![*f.this],
491            )))),
492
493            // JSONValueArray -> JSON_VALUE_ARRAY
494            Expression::JSONValueArray(f) => Ok(Expression::Function(Box::new(Function::new(
495                "JSON_VALUE_ARRAY".to_string(),
496                vec![*f.this],
497            )))),
498
499            // DateFromParts -> DATE
500            Expression::DateFromParts(f) => {
501                let mut args = Vec::new();
502                if let Some(y) = f.year {
503                    args.push(*y);
504                }
505                if let Some(m) = f.month {
506                    args.push(*m);
507                }
508                if let Some(d) = f.day {
509                    args.push(*d);
510                }
511                Ok(Expression::Function(Box::new(Function::new(
512                    "DATE".to_string(),
513                    args,
514                ))))
515            }
516
517            // SPLIT: BigQuery defaults to comma separator when none provided
518            // SPLIT(foo) -> SPLIT(foo, ',')
519            Expression::Split(f) => {
520                // Check if delimiter is empty or a placeholder - add default comma
521                let delimiter = match &f.delimiter {
522                    Expression::Literal(Literal::String(s)) if s.is_empty() => {
523                        Expression::Literal(Literal::String(",".to_string()))
524                    }
525                    _ => f.delimiter,
526                };
527                Ok(Expression::Split(Box::new(SplitFunc {
528                    this: f.this,
529                    delimiter,
530                })))
531            }
532
533            // Cast: Transform the target type according to BigQuery TYPE_MAPPING
534            // Special case: CAST to JSON -> PARSE_JSON in BigQuery
535            Expression::Cast(c) => {
536                use crate::expressions::DataType;
537                // Check if casting to JSON - use PARSE_JSON instead
538                // Handle both DataType::Json/JsonB and DataType::Custom { name: "JSON" }
539                // (parser creates Custom for type literals like JSON 'string')
540                let is_json = matches!(c.to, DataType::Json | DataType::JsonB)
541                    || matches!(&c.to, DataType::Custom { name } if name.eq_ignore_ascii_case("JSON") || name.eq_ignore_ascii_case("JSONB"));
542                if is_json {
543                    return Ok(Expression::ParseJson(Box::new(UnaryFunc::new(c.this))));
544                }
545                let transformed_type = match self.transform_data_type(c.to)? {
546                    Expression::DataType(dt) => dt,
547                    _ => return Err(crate::error::Error::parse("Expected DataType", 0, 0, 0, 0)),
548                };
549                Ok(Expression::Cast(Box::new(crate::expressions::Cast {
550                    this: c.this,
551                    to: transformed_type,
552                    trailing_comments: c.trailing_comments,
553                    double_colon_syntax: c.double_colon_syntax,
554                    format: c.format,
555                    default: c.default,
556                })))
557            }
558
559            // SafeCast: Transform the target type according to BigQuery TYPE_MAPPING
560            Expression::SafeCast(c) => {
561                let transformed_type = match self.transform_data_type(c.to)? {
562                    Expression::DataType(dt) => dt,
563                    _ => return Err(crate::error::Error::parse("Expected DataType", 0, 0, 0, 0)),
564                };
565                Ok(Expression::SafeCast(Box::new(crate::expressions::Cast {
566                    this: c.this,
567                    to: transformed_type,
568                    trailing_comments: c.trailing_comments,
569                    double_colon_syntax: c.double_colon_syntax,
570                    format: c.format,
571                    default: c.default,
572                })))
573            }
574
575            // ===== SELECT-level transforms =====
576            // BigQuery: GROUP BY expression → alias when both GROUP BY and ORDER BY exist
577            Expression::Select(mut select) => {
578                if select.group_by.is_some() && select.order_by.is_some() {
579                    // Build map: expression → alias name for aliased projections
580                    let aliases: Vec<(Expression, Identifier)> = select
581                        .expressions
582                        .iter()
583                        .filter_map(|e| {
584                            if let Expression::Alias(a) = e {
585                                Some((a.this.clone(), a.alias.clone()))
586                            } else {
587                                None
588                            }
589                        })
590                        .collect();
591
592                    if let Some(ref mut group_by) = select.group_by {
593                        for grouped in group_by.expressions.iter_mut() {
594                            // Skip numeric indices (already aliased)
595                            if matches!(grouped, Expression::Literal(Literal::Number(_))) {
596                                continue;
597                            }
598                            // Check if this GROUP BY expression matches a SELECT alias
599                            for (expr, alias_ident) in &aliases {
600                                if grouped == expr {
601                                    *grouped = Expression::Column(Column {
602                                        name: alias_ident.clone(),
603                                        table: None,
604                                        join_mark: false,
605                                        trailing_comments: Vec::new(),
606                                        span: None,
607                                    });
608                                    break;
609                                }
610                            }
611                        }
612                    }
613                }
614                Ok(Expression::Select(select))
615            }
616
617            // ===== ArrayContains → EXISTS(SELECT 1 FROM UNNEST(arr) AS _col WHERE _col = val) =====
618            Expression::ArrayContains(f) => {
619                let array_expr = f.this;
620                let value_expr = f.expression;
621
622                // Build: SELECT 1 FROM UNNEST(array) AS _col WHERE _col = value
623                let unnest = Expression::Unnest(Box::new(UnnestFunc {
624                    this: array_expr,
625                    expressions: Vec::new(),
626                    with_ordinality: false,
627                    alias: None,
628                    offset_alias: None,
629                }));
630                let aliased_unnest = Expression::Alias(Box::new(Alias {
631                    this: unnest,
632                    alias: Identifier::new("_col"),
633                    column_aliases: Vec::new(),
634                    pre_alias_comments: Vec::new(),
635                    trailing_comments: Vec::new(),
636                }));
637                let col_ref = Expression::Column(Column {
638                    name: Identifier::new("_col"),
639                    table: None,
640                    join_mark: false,
641                    trailing_comments: Vec::new(),
642                    span: None,
643                });
644                let where_clause = Where {
645                    this: Expression::Eq(Box::new(BinaryOp {
646                        left: col_ref,
647                        right: value_expr,
648                        left_comments: Vec::new(),
649                        operator_comments: Vec::new(),
650                        trailing_comments: Vec::new(),
651                    })),
652                };
653                let inner_select = Expression::Select(Box::new(Select {
654                    expressions: vec![Expression::Literal(Literal::Number("1".to_string()))],
655                    from: Some(From {
656                        expressions: vec![aliased_unnest],
657                    }),
658                    where_clause: Some(where_clause),
659                    ..Default::default()
660                }));
661                Ok(Expression::Exists(Box::new(Exists {
662                    this: inner_select,
663                    not: false,
664                })))
665            }
666
667            // ===== JSON_OBJECT array form → key-value pairs =====
668            // BigQuery "signature 2": JSON_OBJECT(['a', 'b'], [10, NULL]) → JSON_OBJECT('a', 10, 'b', NULL)
669            Expression::JsonObject(mut f) => {
670                if f.pairs.len() == 1 {
671                    // Extract expressions from both Array and ArrayFunc variants
672                    let keys_exprs = match &f.pairs[0].0 {
673                        Expression::Array(arr) => Some(&arr.expressions),
674                        Expression::ArrayFunc(arr) => Some(&arr.expressions),
675                        _ => None,
676                    };
677                    let vals_exprs = match &f.pairs[0].1 {
678                        Expression::Array(arr) => Some(&arr.expressions),
679                        Expression::ArrayFunc(arr) => Some(&arr.expressions),
680                        _ => None,
681                    };
682                    if let (Some(keys), Some(vals)) = (keys_exprs, vals_exprs) {
683                        if keys.len() == vals.len() {
684                            let new_pairs: Vec<(Expression, Expression)> = keys
685                                .iter()
686                                .zip(vals.iter())
687                                .map(|(k, v)| (k.clone(), v.clone()))
688                                .collect();
689                            f.pairs = new_pairs;
690                        }
691                    }
692                }
693                Ok(Expression::JsonObject(f))
694            }
695
696            // ===== MOD function: unwrap unnecessary Paren from first argument =====
697            // BigQuery normalizes MOD((a + 1), b) -> MOD(a + 1, b)
698            Expression::ModFunc(mut f) => {
699                // Unwrap Paren from first argument if present
700                if let Expression::Paren(paren) = f.this {
701                    f.this = paren.this;
702                }
703                Ok(Expression::ModFunc(f))
704            }
705
706            // JSONExtract with variant_extract (Snowflake colon syntax) -> JSON_EXTRACT
707            Expression::JSONExtract(e) if e.variant_extract.is_some() => {
708                let path = match *e.expression {
709                    Expression::Literal(Literal::String(s)) => {
710                        let normalized = if s.starts_with('$') {
711                            s
712                        } else if s.starts_with('[') {
713                            format!("${}", s)
714                        } else {
715                            format!("$.{}", s)
716                        };
717                        Expression::Literal(Literal::String(normalized))
718                    }
719                    other => other,
720                };
721                Ok(Expression::Function(Box::new(Function::new(
722                    "JSON_EXTRACT".to_string(),
723                    vec![*e.this, path],
724                ))))
725            }
726
727            // Generic function transformations
728            Expression::Function(f) => self.transform_function(*f),
729
730            // Generic aggregate function transformations
731            Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
732
733            // MethodCall: Handle SAFE.PARSE_DATE, SAFE.PARSE_DATETIME, SAFE.PARSE_TIMESTAMP
734            // These are parsed as MethodCall(this=SAFE, method=PARSE_DATE, args=[...])
735            Expression::MethodCall(mc) => self.transform_method_call(*mc),
736
737            // CreateFunction: Convert RETURNS TABLE(...) to RETURNS TABLE <...> for BigQuery
738            // and set is_table_function = true for TABLE FUNCTION syntax
739            Expression::CreateFunction(mut cf) => {
740                if let Some(ref mut rtb) = cf.returns_table_body {
741                    if rtb.starts_with("TABLE (") || rtb.starts_with("TABLE(") {
742                        // Convert TABLE (...) to TABLE <...> with BigQuery types
743                        let inner = if rtb.starts_with("TABLE (") {
744                            &rtb["TABLE (".len()..rtb.len() - 1]
745                        } else {
746                            &rtb["TABLE(".len()..rtb.len() - 1]
747                        };
748                        // Convert common types to BigQuery equivalents
749                        let converted = inner
750                            .replace(" INT,", " INT64,")
751                            .replace(" INT)", " INT64)")
752                            .replace(" INTEGER,", " INT64,")
753                            .replace(" INTEGER)", " INT64)")
754                            .replace(" FLOAT,", " FLOAT64,")
755                            .replace(" FLOAT)", " FLOAT64)")
756                            .replace(" BOOLEAN,", " BOOL,")
757                            .replace(" BOOLEAN)", " BOOL)")
758                            .replace(" VARCHAR", " STRING")
759                            .replace(" TEXT", " STRING");
760                        // Handle trailing type (no comma, no paren)
761                        let converted = if converted.ends_with(" INT") {
762                            format!("{}{}", &converted[..converted.len() - 4], " INT64")
763                        } else {
764                            converted
765                        };
766                        *rtb = format!("TABLE <{}>", converted);
767                        cf.is_table_function = true;
768                    }
769                }
770                // Convert string literal body to expression body for BigQuery TABLE FUNCTIONs only
771                if cf.is_table_function {
772                    if let Some(ref body) = cf.body {
773                        if matches!(body, FunctionBody::StringLiteral(_)) {
774                            if let Some(FunctionBody::StringLiteral(sql)) = cf.body.take() {
775                                // Parse the SQL string into an expression
776                                if let Ok(parsed) = crate::parser::Parser::parse_sql(&sql) {
777                                    if let Some(stmt) = parsed.into_iter().next() {
778                                        cf.body = Some(FunctionBody::Expression(stmt));
779                                    } else {
780                                        cf.body = Some(FunctionBody::StringLiteral(sql));
781                                    }
782                                } else {
783                                    cf.body = Some(FunctionBody::StringLiteral(sql));
784                                }
785                            }
786                        }
787                    }
788                }
789                Ok(Expression::CreateFunction(cf))
790            }
791
792            // Pass through everything else
793            _ => Ok(expr),
794        }
795    }
796}
797
798impl BigQueryDialect {
799    /// Transform data types according to BigQuery TYPE_MAPPING
800    fn transform_data_type(&self, dt: crate::expressions::DataType) -> Result<Expression> {
801        use crate::expressions::DataType;
802        let transformed = match dt {
803            // BIGINT -> INT64
804            DataType::BigInt { .. } => DataType::Custom {
805                name: "INT64".to_string(),
806            },
807            // INT -> INT64
808            DataType::Int { .. } => DataType::Custom {
809                name: "INT64".to_string(),
810            },
811            // SMALLINT -> INT64
812            DataType::SmallInt { .. } => DataType::Custom {
813                name: "INT64".to_string(),
814            },
815            // TINYINT -> INT64
816            DataType::TinyInt { .. } => DataType::Custom {
817                name: "INT64".to_string(),
818            },
819            // FLOAT -> FLOAT64
820            DataType::Float { .. } => DataType::Custom {
821                name: "FLOAT64".to_string(),
822            },
823            // DOUBLE -> FLOAT64
824            DataType::Double { .. } => DataType::Custom {
825                name: "FLOAT64".to_string(),
826            },
827            // BOOLEAN -> BOOL
828            DataType::Boolean => DataType::Custom {
829                name: "BOOL".to_string(),
830            },
831            // CHAR -> STRING
832            DataType::Char { .. } => DataType::Custom {
833                name: "STRING".to_string(),
834            },
835            // VARCHAR -> STRING
836            DataType::VarChar { .. } => DataType::Custom {
837                name: "STRING".to_string(),
838            },
839            // TEXT -> STRING
840            DataType::Text => DataType::Custom {
841                name: "STRING".to_string(),
842            },
843            // STRING(n) -> STRING (BigQuery doesn't support length for STRING)
844            DataType::String { .. } => DataType::Custom {
845                name: "STRING".to_string(),
846            },
847            // BINARY -> BYTES
848            DataType::Binary { .. } => DataType::Custom {
849                name: "BYTES".to_string(),
850            },
851            // VARBINARY -> BYTES
852            DataType::VarBinary { .. } => DataType::Custom {
853                name: "BYTES".to_string(),
854            },
855            // BLOB -> BYTES
856            DataType::Blob => DataType::Custom {
857                name: "BYTES".to_string(),
858            },
859            // DECIMAL -> NUMERIC (BigQuery strips precision in CAST context)
860            DataType::Decimal { .. } => DataType::Custom {
861                name: "NUMERIC".to_string(),
862            },
863            // For BigQuery identity: preserve TIMESTAMP/DATETIME as Custom types
864            // This avoids the issue where parsed TIMESTAMP (timezone: false) would
865            // be converted to DATETIME by the generator
866            DataType::Timestamp {
867                timezone: false, ..
868            } => DataType::Custom {
869                name: "TIMESTAMP".to_string(),
870            },
871            DataType::Timestamp { timezone: true, .. } => DataType::Custom {
872                name: "TIMESTAMP".to_string(),
873            },
874            // UUID -> STRING (BigQuery doesn't have native UUID type)
875            DataType::Uuid => DataType::Custom {
876                name: "STRING".to_string(),
877            },
878            // RECORD -> STRUCT in BigQuery
879            DataType::Custom { ref name } if name.eq_ignore_ascii_case("RECORD") => {
880                DataType::Custom {
881                    name: "STRUCT".to_string(),
882                }
883            }
884            // TIMESTAMPTZ (custom) -> TIMESTAMP
885            DataType::Custom { ref name } if name.eq_ignore_ascii_case("TIMESTAMPTZ") => {
886                DataType::Custom {
887                    name: "TIMESTAMP".to_string(),
888                }
889            }
890            // BYTEINT (custom) -> INT64
891            DataType::Custom { ref name } if name.eq_ignore_ascii_case("BYTEINT") => {
892                DataType::Custom {
893                    name: "INT64".to_string(),
894                }
895            }
896            // Keep all other types as-is
897            other => other,
898        };
899        Ok(Expression::DataType(transformed))
900    }
901
902    fn transform_function(&self, f: Function) -> Result<Expression> {
903        let name_upper = f.name.to_uppercase();
904        match name_upper.as_str() {
905            // IFNULL -> COALESCE (both work in BigQuery)
906            "IFNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
907                original_name: None,
908                expressions: f.args,
909            }))),
910
911            // NVL -> COALESCE
912            "NVL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
913                original_name: None,
914                expressions: f.args,
915            }))),
916
917            // ISNULL -> COALESCE
918            "ISNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
919                original_name: None,
920                expressions: f.args,
921            }))),
922
923            // GROUP_CONCAT -> STRING_AGG in BigQuery
924            "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
925                Function::new("STRING_AGG".to_string(), f.args),
926            ))),
927
928            // SUBSTR -> SUBSTRING (both work)
929            "SUBSTR" => Ok(Expression::Function(Box::new(Function::new(
930                "SUBSTRING".to_string(),
931                f.args,
932            )))),
933
934            // RANDOM -> RAND
935            "RANDOM" => Ok(Expression::Rand(Box::new(crate::expressions::Rand {
936                seed: None,
937                lower: None,
938                upper: None,
939            }))),
940
941            // CURRENT_DATE -> CURRENT_DATE() in BigQuery
942            // Keep as Function when it has args (e.g., CURRENT_DATE('UTC'))
943            "CURRENT_DATE" if f.args.is_empty() => {
944                Ok(Expression::CurrentDate(crate::expressions::CurrentDate))
945            }
946            "CURRENT_DATE" => Ok(Expression::Function(Box::new(Function {
947                name: "CURRENT_DATE".to_string(),
948                args: f.args,
949                distinct: false,
950                trailing_comments: Vec::new(),
951                use_bracket_syntax: false,
952                no_parens: false,
953                quoted: false,
954                span: None,
955            }))),
956
957            // NOW -> CURRENT_TIMESTAMP in BigQuery
958            "NOW" => Ok(Expression::CurrentTimestamp(
959                crate::expressions::CurrentTimestamp {
960                    precision: None,
961                    sysdate: false,
962                },
963            )),
964
965            // TO_DATE -> PARSE_DATE in BigQuery
966            "TO_DATE" => Ok(Expression::Function(Box::new(Function::new(
967                "PARSE_DATE".to_string(),
968                f.args,
969            )))),
970
971            // TO_TIMESTAMP -> PARSE_TIMESTAMP in BigQuery
972            "TO_TIMESTAMP" => Ok(Expression::Function(Box::new(Function::new(
973                "PARSE_TIMESTAMP".to_string(),
974                f.args,
975            )))),
976
977            // TO_TIME -> TIME in BigQuery
978            "TO_TIME" if f.args.len() == 1 => Ok(Expression::Function(Box::new(Function::new(
979                "TIME".to_string(),
980                f.args,
981            )))),
982
983            // DATE_FORMAT -> FORMAT_DATE in BigQuery (argument order may differ)
984            "DATE_FORMAT" => Ok(Expression::Function(Box::new(Function::new(
985                "FORMAT_DATE".to_string(),
986                f.args,
987            )))),
988
989            // POSITION -> STRPOS in BigQuery
990            // BigQuery uses STRPOS(string, substring)
991            "POSITION" if f.args.len() == 2 => {
992                let mut args = f.args;
993                // Swap arguments: POSITION(sub IN str) -> STRPOS(str, sub)
994                let first = args.remove(0);
995                let second = args.remove(0);
996                Ok(Expression::Function(Box::new(Function::new(
997                    "STRPOS".to_string(),
998                    vec![second, first],
999                ))))
1000            }
1001
1002            // LEN -> LENGTH
1003            "LEN" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc::new(
1004                f.args.into_iter().next().unwrap(),
1005            )))),
1006
1007            // CEILING -> CEIL (both work)
1008            "CEILING" if f.args.len() == 1 => Ok(Expression::Ceil(Box::new(CeilFunc {
1009                this: f.args.into_iter().next().unwrap(),
1010                decimals: None,
1011                to: None,
1012            }))),
1013
1014            // GETDATE -> CURRENT_TIMESTAMP
1015            "GETDATE" => Ok(Expression::CurrentTimestamp(
1016                crate::expressions::CurrentTimestamp {
1017                    precision: None,
1018                    sysdate: false,
1019                },
1020            )),
1021
1022            // ARRAY_LENGTH -> ARRAY_LENGTH (native)
1023            // CARDINALITY -> ARRAY_LENGTH
1024            "CARDINALITY" if f.args.len() == 1 => Ok(Expression::ArrayLength(Box::new(
1025                UnaryFunc::new(f.args.into_iter().next().unwrap()),
1026            ))),
1027
1028            // UNNEST is native to BigQuery
1029
1030            // GENERATE_SERIES -> GENERATE_ARRAY in BigQuery
1031            "GENERATE_SERIES" => Ok(Expression::Function(Box::new(Function::new(
1032                "GENERATE_ARRAY".to_string(),
1033                f.args,
1034            )))),
1035
1036            // APPROX_COUNT_DISTINCT -> APPROX_COUNT_DISTINCT (native)
1037            // APPROX_DISTINCT -> APPROX_COUNT_DISTINCT
1038            "APPROX_DISTINCT" => Ok(Expression::Function(Box::new(Function::new(
1039                "APPROX_COUNT_DISTINCT".to_string(),
1040                f.args,
1041            )))),
1042
1043            // COUNT_IF -> COUNTIF in BigQuery
1044            "COUNT_IF" => Ok(Expression::Function(Box::new(Function::new(
1045                "COUNTIF".to_string(),
1046                f.args,
1047            )))),
1048
1049            // SHA1 -> SHA1 (native), SHA -> SHA1
1050            "SHA" => Ok(Expression::Function(Box::new(Function::new(
1051                "SHA1".to_string(),
1052                f.args,
1053            )))),
1054
1055            // SHA256/SHA2 -> SHA256
1056            "SHA2" => Ok(Expression::Function(Box::new(Function::new(
1057                "SHA256".to_string(),
1058                f.args,
1059            )))),
1060
1061            // MD5 in BigQuery returns bytes, often combined with TO_HEX
1062            // TO_HEX(MD5(x)) pattern
1063            "MD5" => Ok(Expression::Function(Box::new(Function::new(
1064                "MD5".to_string(),
1065                f.args,
1066            )))),
1067
1068            // VARIANCE/VAR_SAMP -> VAR_SAMP (native)
1069            // VAR_POP -> VAR_POP (native)
1070
1071            // DATEADD(unit, amount, date) → DATE_ADD(date, INTERVAL amount unit) for BigQuery
1072            "DATEADD" if f.args.len() == 3 => {
1073                let mut args = f.args;
1074                let unit_expr = args.remove(0);
1075                let amount = args.remove(0);
1076                let date = args.remove(0);
1077                // Convert unit identifier to IntervalUnit
1078                let unit_name = match &unit_expr {
1079                    Expression::Identifier(id) => id.name.to_uppercase(),
1080                    _ => "DAY".to_string(),
1081                };
1082                let unit = match unit_name.as_str() {
1083                    "YEAR" | "YEARS" | "YY" | "YYYY" => crate::expressions::IntervalUnit::Year,
1084                    "QUARTER" | "QUARTERS" | "QQ" | "Q" => {
1085                        crate::expressions::IntervalUnit::Quarter
1086                    }
1087                    "MONTH" | "MONTHS" | "MM" | "M" => crate::expressions::IntervalUnit::Month,
1088                    "WEEK" | "WEEKS" | "WK" | "WW" => crate::expressions::IntervalUnit::Week,
1089                    "DAY" | "DAYS" | "DD" | "D" | "DAYOFMONTH" => {
1090                        crate::expressions::IntervalUnit::Day
1091                    }
1092                    "HOUR" | "HOURS" | "HH" => crate::expressions::IntervalUnit::Hour,
1093                    "MINUTE" | "MINUTES" | "MI" | "N" => crate::expressions::IntervalUnit::Minute,
1094                    "SECOND" | "SECONDS" | "SS" | "S" => crate::expressions::IntervalUnit::Second,
1095                    "MILLISECOND" | "MILLISECONDS" | "MS" => {
1096                        crate::expressions::IntervalUnit::Millisecond
1097                    }
1098                    "MICROSECOND" | "MICROSECONDS" | "US" => {
1099                        crate::expressions::IntervalUnit::Microsecond
1100                    }
1101                    _ => crate::expressions::IntervalUnit::Day,
1102                };
1103                Ok(Expression::DateAdd(Box::new(
1104                    crate::expressions::DateAddFunc {
1105                        this: date,
1106                        interval: amount,
1107                        unit,
1108                    },
1109                )))
1110            }
1111            "DATE_ADD" => Ok(Expression::Function(Box::new(Function::new(
1112                "DATE_ADD".to_string(),
1113                f.args,
1114            )))),
1115
1116            // DATE_DIFF in BigQuery (native)
1117            "DATEDIFF" => Ok(Expression::Function(Box::new(Function::new(
1118                "DATE_DIFF".to_string(),
1119                f.args,
1120            )))),
1121
1122            // TIMESTAMP_DIFF in BigQuery
1123            "TIMESTAMPDIFF" => Ok(Expression::Function(Box::new(Function::new(
1124                "TIMESTAMP_DIFF".to_string(),
1125                f.args,
1126            )))),
1127
1128            // TIME -> TIME (native)
1129            // DATETIME -> DATETIME (native)
1130
1131            // SAFE_DIVIDE -> SAFE_DIVIDE (native)
1132
1133            // NEWID/UUID -> GENERATE_UUID
1134            "NEWID" | "UUID" => Ok(Expression::Function(Box::new(Function::new(
1135                "GENERATE_UUID".to_string(),
1136                vec![],
1137            )))),
1138
1139            // LEVENSHTEIN -> EDIT_DISTANCE (BigQuery naming)
1140            "LEVENSHTEIN" => Ok(Expression::Function(Box::new(Function::new(
1141                "EDIT_DISTANCE".to_string(),
1142                f.args,
1143            )))),
1144
1145            // UNIX_TIMESTAMP -> UNIX_SECONDS
1146            "UNIX_TIMESTAMP" => Ok(Expression::Function(Box::new(Function::new(
1147                "UNIX_SECONDS".to_string(),
1148                f.args,
1149            )))),
1150
1151            // FROM_UNIXTIME -> TIMESTAMP_SECONDS
1152            "FROM_UNIXTIME" => Ok(Expression::Function(Box::new(Function::new(
1153                "TIMESTAMP_SECONDS".to_string(),
1154                f.args,
1155            )))),
1156
1157            // CHAR_LENGTH / CHARACTER_LENGTH -> LENGTH
1158            "CHAR_LENGTH" | "CHARACTER_LENGTH" => Ok(Expression::Function(Box::new(
1159                Function::new("LENGTH".to_string(), f.args),
1160            ))),
1161
1162            // OCTET_LENGTH -> BYTE_LENGTH in BigQuery
1163            "OCTET_LENGTH" => Ok(Expression::Function(Box::new(Function::new(
1164                "BYTE_LENGTH".to_string(),
1165                f.args,
1166            )))),
1167
1168            // JSON_EXTRACT_STRING_ARRAY -> JSON_VALUE_ARRAY in BigQuery
1169            "JSON_EXTRACT_STRING_ARRAY" => Ok(Expression::Function(Box::new(Function::new(
1170                "JSON_VALUE_ARRAY".to_string(),
1171                f.args,
1172            )))),
1173
1174            // INSTR is native to BigQuery
1175
1176            // SPLIT: BigQuery defaults to comma separator when none provided
1177            // SPLIT(foo) -> SPLIT(foo, ',')
1178            "SPLIT" if f.args.len() == 1 => {
1179                let mut args = f.args;
1180                args.push(Expression::Literal(Literal::String(",".to_string())));
1181                Ok(Expression::Split(Box::new(SplitFunc {
1182                    this: args.remove(0),
1183                    delimiter: args.remove(0),
1184                })))
1185            }
1186
1187            // SPLIT with two args - convert to Split expression
1188            "SPLIT" if f.args.len() == 2 => {
1189                let mut args = f.args;
1190                Ok(Expression::Split(Box::new(SplitFunc {
1191                    this: args.remove(0),
1192                    delimiter: args.remove(0),
1193                })))
1194            }
1195
1196            // REGEXP_SUBSTR -> REGEXP_EXTRACT in BigQuery (strip extra Snowflake args)
1197            "REGEXP_SUBSTR" if f.args.len() >= 2 => {
1198                // BigQuery REGEXP_EXTRACT supports (subject, pattern, pos, occ) max 4 args
1199                let args = if f.args.len() > 4 {
1200                    f.args[..4].to_vec()
1201                } else {
1202                    f.args
1203                };
1204                Ok(Expression::Function(Box::new(Function::new(
1205                    "REGEXP_EXTRACT".to_string(),
1206                    args,
1207                ))))
1208            }
1209            "REGEXP_SUBSTR" => Ok(Expression::Function(Box::new(Function::new(
1210                "REGEXP_EXTRACT".to_string(),
1211                f.args,
1212            )))),
1213
1214            // REGEXP_REPLACE - strip extra Snowflake-specific args
1215            "REGEXP_REPLACE" if f.args.len() > 3 => {
1216                let args = f.args[..3].to_vec();
1217                Ok(Expression::Function(Box::new(Function::new(
1218                    "REGEXP_REPLACE".to_string(),
1219                    args,
1220                ))))
1221            }
1222
1223            // OBJECT_CONSTRUCT_KEEP_NULL -> JSON_OBJECT
1224            "OBJECT_CONSTRUCT_KEEP_NULL" => Ok(Expression::Function(Box::new(Function::new(
1225                "JSON_OBJECT".to_string(),
1226                f.args,
1227            )))),
1228
1229            // EDITDISTANCE -> EDIT_DISTANCE with named max_distance parameter
1230            "EDITDISTANCE" if f.args.len() == 3 => {
1231                let col1 = f.args[0].clone();
1232                let col2 = f.args[1].clone();
1233                let max_dist = f.args[2].clone();
1234                Ok(Expression::Function(Box::new(Function::new(
1235                    "EDIT_DISTANCE".to_string(),
1236                    vec![
1237                        col1,
1238                        col2,
1239                        Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
1240                            name: crate::expressions::Identifier::new("max_distance".to_string()),
1241                            value: max_dist,
1242                            separator: crate::expressions::NamedArgSeparator::DArrow,
1243                        })),
1244                    ],
1245                ))))
1246            }
1247            "EDITDISTANCE" if f.args.len() == 2 => Ok(Expression::Function(Box::new(
1248                Function::new("EDIT_DISTANCE".to_string(), f.args),
1249            ))),
1250
1251            // HEX_DECODE_BINARY -> FROM_HEX
1252            "HEX_DECODE_BINARY" => Ok(Expression::Function(Box::new(Function::new(
1253                "FROM_HEX".to_string(),
1254                f.args,
1255            )))),
1256
1257            // BigQuery format string normalization for PARSE_DATE/DATETIME/TIMESTAMP functions
1258            // %Y-%m-%d -> %F and %H:%M:%S -> %T
1259            "PARSE_DATE"
1260            | "PARSE_DATETIME"
1261            | "PARSE_TIMESTAMP"
1262            | "SAFE.PARSE_DATE"
1263            | "SAFE.PARSE_DATETIME"
1264            | "SAFE.PARSE_TIMESTAMP" => {
1265                let args = self.normalize_time_format_args(f.args);
1266                Ok(Expression::Function(Box::new(Function {
1267                    name: f.name,
1268                    args,
1269                    distinct: f.distinct,
1270                    no_parens: f.no_parens,
1271                    trailing_comments: f.trailing_comments,
1272                    quoted: f.quoted,
1273                    use_bracket_syntax: f.use_bracket_syntax,
1274                    span: None,
1275                })))
1276            }
1277
1278            // GET_PATH(obj, path) -> JSON_EXTRACT(obj, json_path) in BigQuery
1279            "GET_PATH" if f.args.len() == 2 => {
1280                let mut args = f.args;
1281                let this = args.remove(0);
1282                let path = args.remove(0);
1283                let json_path = match &path {
1284                    Expression::Literal(Literal::String(s)) => {
1285                        let normalized = if s.starts_with('$') {
1286                            s.clone()
1287                        } else if s.starts_with('[') {
1288                            format!("${}", s)
1289                        } else {
1290                            format!("$.{}", s)
1291                        };
1292                        Expression::Literal(Literal::String(normalized))
1293                    }
1294                    _ => path,
1295                };
1296                Ok(Expression::JsonExtract(Box::new(JsonExtractFunc {
1297                    this,
1298                    path: json_path,
1299                    returning: None,
1300                    arrow_syntax: false,
1301                    hash_arrow_syntax: false,
1302                    wrapper_option: None,
1303                    quotes_option: None,
1304                    on_scalar_string: false,
1305                    on_error: None,
1306                })))
1307            }
1308
1309            // Pass through everything else
1310            _ => Ok(Expression::Function(Box::new(f))),
1311        }
1312    }
1313
1314    fn transform_aggregate_function(
1315        &self,
1316        f: Box<crate::expressions::AggregateFunction>,
1317    ) -> Result<Expression> {
1318        let name_upper = f.name.to_uppercase();
1319        match name_upper.as_str() {
1320            // GROUP_CONCAT -> STRING_AGG
1321            "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
1322                Function::new("STRING_AGG".to_string(), f.args),
1323            ))),
1324
1325            // Pass through everything else
1326            _ => Ok(Expression::AggregateFunction(f)),
1327        }
1328    }
1329
1330    /// Transform MethodCall expressions
1331    /// Handles SAFE.PARSE_DATE, SAFE.PARSE_DATETIME, SAFE.PARSE_TIMESTAMP format normalization
1332    fn transform_method_call(&self, mc: crate::expressions::MethodCall) -> Result<Expression> {
1333        use crate::expressions::MethodCall;
1334
1335        // Check if this is SAFE.PARSE_DATE/DATETIME/TIMESTAMP
1336        if let Expression::Column(ref col) = mc.this {
1337            if col.name.name.eq_ignore_ascii_case("SAFE") {
1338                let method_upper = mc.method.name.to_uppercase();
1339                if method_upper == "PARSE_DATE"
1340                    || method_upper == "PARSE_DATETIME"
1341                    || method_upper == "PARSE_TIMESTAMP"
1342                {
1343                    // Normalize the format string in the first argument
1344                    let args = self.normalize_time_format_args(mc.args);
1345                    return Ok(Expression::MethodCall(Box::new(MethodCall {
1346                        this: mc.this,
1347                        method: mc.method,
1348                        args,
1349                    })));
1350                }
1351            }
1352        }
1353
1354        // Pass through all other method calls
1355        Ok(Expression::MethodCall(Box::new(mc)))
1356    }
1357
1358    /// Normalize time format strings in function arguments
1359    /// BigQuery normalizes: %Y-%m-%d -> %F, %H:%M:%S -> %T
1360    fn normalize_time_format_args(&self, args: Vec<Expression>) -> Vec<Expression> {
1361        args.into_iter()
1362            .enumerate()
1363            .map(|(i, arg)| {
1364                // Only transform the first argument (the format string)
1365                if i == 0 {
1366                    if let Expression::Literal(Literal::String(s)) = arg {
1367                        let normalized = self.normalize_time_format(&s);
1368                        return Expression::Literal(Literal::String(normalized));
1369                    }
1370                }
1371                arg
1372            })
1373            .collect()
1374    }
1375
1376    /// Normalize a time format string according to BigQuery conventions
1377    /// %Y-%m-%d -> %F (ISO date)
1378    /// %H:%M:%S -> %T (time)
1379    fn normalize_time_format(&self, format: &str) -> String {
1380        format.replace("%Y-%m-%d", "%F").replace("%H:%M:%S", "%T")
1381    }
1382}
1383
1384#[cfg(test)]
1385mod tests {
1386    use super::*;
1387    use crate::dialects::Dialect;
1388    use crate::parse_one;
1389
1390    fn transpile_to_bigquery(sql: &str) -> String {
1391        let dialect = Dialect::get(DialectType::Generic);
1392        let result = dialect
1393            .transpile_to(sql, DialectType::BigQuery)
1394            .expect("Transpile failed");
1395        result[0].clone()
1396    }
1397
1398    #[test]
1399    fn test_ifnull_identity() {
1400        // Generic -> BigQuery: IFNULL is normalized to COALESCE (matching sqlglot behavior)
1401        let result = transpile_to_bigquery("SELECT IFNULL(a, b)");
1402        assert!(
1403            result.contains("COALESCE"),
1404            "Expected COALESCE, got: {}",
1405            result
1406        );
1407    }
1408
1409    #[test]
1410    fn test_nvl_to_ifnull() {
1411        // NVL is converted to IFNULL in BigQuery
1412        let result = transpile_to_bigquery("SELECT NVL(a, b)");
1413        assert!(
1414            result.contains("IFNULL"),
1415            "Expected IFNULL, got: {}",
1416            result
1417        );
1418    }
1419
1420    #[test]
1421    fn test_try_cast_to_safe_cast() {
1422        let result = transpile_to_bigquery("SELECT TRY_CAST(a AS INT)");
1423        assert!(
1424            result.contains("SAFE_CAST"),
1425            "Expected SAFE_CAST, got: {}",
1426            result
1427        );
1428    }
1429
1430    #[test]
1431    fn test_random_to_rand() {
1432        let result = transpile_to_bigquery("SELECT RANDOM()");
1433        assert!(result.contains("RAND"), "Expected RAND, got: {}", result);
1434    }
1435
1436    #[test]
1437    fn test_basic_select() {
1438        let result = transpile_to_bigquery("SELECT a, b FROM users WHERE id = 1");
1439        assert!(result.contains("SELECT"));
1440        assert!(result.contains("FROM users"));
1441    }
1442
1443    #[test]
1444    fn test_group_concat_to_string_agg() {
1445        let result = transpile_to_bigquery("SELECT GROUP_CONCAT(name)");
1446        assert!(
1447            result.contains("STRING_AGG"),
1448            "Expected STRING_AGG, got: {}",
1449            result
1450        );
1451    }
1452
1453    #[test]
1454    fn test_generate_series_to_generate_array() {
1455        let result = transpile_to_bigquery("SELECT GENERATE_SERIES(1, 10)");
1456        assert!(
1457            result.contains("GENERATE_ARRAY"),
1458            "Expected GENERATE_ARRAY, got: {}",
1459            result
1460        );
1461    }
1462
1463    #[test]
1464    fn test_backtick_identifiers() {
1465        // BigQuery uses backticks for identifiers
1466        let dialect = BigQueryDialect;
1467        let config = dialect.generator_config();
1468        assert_eq!(config.identifier_quote, '`');
1469    }
1470
1471    fn bigquery_identity(sql: &str, expected: &str) {
1472        let dialect = Dialect::get(DialectType::BigQuery);
1473        let ast = dialect.parse(sql).expect("Parse failed");
1474        let transformed = dialect.transform(ast[0].clone()).expect("Transform failed");
1475        let result = dialect.generate(&transformed).expect("Generate failed");
1476        assert_eq!(result, expected, "SQL: {}", sql);
1477    }
1478
1479    #[test]
1480    fn test_cast_char_to_string() {
1481        bigquery_identity("CAST(x AS CHAR)", "CAST(x AS STRING)");
1482    }
1483
1484    #[test]
1485    fn test_cast_varchar_to_string() {
1486        bigquery_identity("CAST(x AS VARCHAR)", "CAST(x AS STRING)");
1487    }
1488
1489    #[test]
1490    fn test_cast_nchar_to_string() {
1491        bigquery_identity("CAST(x AS NCHAR)", "CAST(x AS STRING)");
1492    }
1493
1494    #[test]
1495    fn test_cast_nvarchar_to_string() {
1496        bigquery_identity("CAST(x AS NVARCHAR)", "CAST(x AS STRING)");
1497    }
1498
1499    #[test]
1500    fn test_cast_timestamptz_to_timestamp() {
1501        bigquery_identity("CAST(x AS TIMESTAMPTZ)", "CAST(x AS TIMESTAMP)");
1502    }
1503
1504    #[test]
1505    fn test_cast_record_to_struct() {
1506        bigquery_identity("CAST(x AS RECORD)", "CAST(x AS STRUCT)");
1507    }
1508
1509    #[test]
1510    fn test_json_literal_to_parse_json() {
1511        // JSON 'string' literal syntax should be converted to PARSE_JSON()
1512        bigquery_identity(
1513            "SELECT JSON '\"foo\"' AS json_data",
1514            "SELECT PARSE_JSON('\"foo\"') AS json_data",
1515        );
1516    }
1517
1518    #[test]
1519    fn test_grant_as_alias_not_quoted() {
1520        // GRANT is not a reserved keyword in BigQuery, should not be backtick-quoted
1521        bigquery_identity(
1522            "SELECT GRANT FROM (SELECT 'input' AS GRANT)",
1523            "SELECT GRANT FROM (SELECT 'input' AS GRANT)",
1524        );
1525    }
1526
1527    #[test]
1528    fn test_timestamp_literal_to_cast() {
1529        // TIMESTAMP 'value' literal should be converted to CAST('value' AS TIMESTAMP)
1530        bigquery_identity(
1531            "CREATE VIEW `d.v` OPTIONS (expiration_timestamp=TIMESTAMP '2020-01-02T04:05:06.007Z') AS SELECT 1 AS c",
1532            "CREATE VIEW `d.v` OPTIONS (expiration_timestamp=CAST('2020-01-02T04:05:06.007Z' AS TIMESTAMP)) AS SELECT 1 AS c"
1533        );
1534    }
1535
1536    #[test]
1537    fn test_date_literal_to_cast_in_extract() {
1538        // Issue 1: DATE literal should become CAST syntax in BigQuery
1539        bigquery_identity(
1540            "EXTRACT(WEEK(THURSDAY) FROM DATE '2013-12-25')",
1541            "EXTRACT(WEEK(THURSDAY) FROM CAST('2013-12-25' AS DATE))",
1542        );
1543    }
1544
1545    #[test]
1546    fn test_json_object_with_json_literals() {
1547        // Issue 2: JSON literals in JSON_OBJECT should use PARSE_JSON, not CAST AS JSON
1548        bigquery_identity(
1549            "SELECT JSON_OBJECT('a', JSON '10') AS json_data",
1550            "SELECT JSON_OBJECT('a', PARSE_JSON('10')) AS json_data",
1551        );
1552    }
1553
1554    // NOTE: MOD paren unwrapping is tested in the conformance tests (sqlglot_dialect_identity).
1555    // The unit test version was removed due to stack overflow in debug builds (deep recursion).
1556    // Test case: MOD((a + 1), b) -> MOD(a + 1, b)
1557
1558    #[test]
1559    fn test_safe_parse_date_format_normalization() {
1560        // SAFE.PARSE_DATE format string normalization: %Y-%m-%d -> %F
1561        bigquery_identity(
1562            "SAFE.PARSE_DATE('%Y-%m-%d', '2024-01-15')",
1563            "SAFE.PARSE_DATE('%F', '2024-01-15')",
1564        );
1565    }
1566
1567    #[test]
1568    fn test_safe_parse_datetime_format_normalization() {
1569        // SAFE.PARSE_DATETIME format string normalization: %Y-%m-%d %H:%M:%S -> %F %T
1570        bigquery_identity(
1571            "SAFE.PARSE_DATETIME('%Y-%m-%d %H:%M:%S', '2024-01-15 10:30:00')",
1572            "SAFE.PARSE_DATETIME('%F %T', '2024-01-15 10:30:00')",
1573        );
1574    }
1575
1576    #[test]
1577    fn test_safe_parse_timestamp_format_normalization() {
1578        // SAFE.PARSE_TIMESTAMP format string normalization: %Y-%m-%d %H:%M:%S -> %F %T
1579        bigquery_identity(
1580            "SAFE.PARSE_TIMESTAMP('%Y-%m-%d %H:%M:%S', '2024-01-15 10:30:00')",
1581            "SAFE.PARSE_TIMESTAMP('%F %T', '2024-01-15 10:30:00')",
1582        );
1583    }
1584
1585    #[test]
1586    fn test_datetime_literal_to_cast() {
1587        // DATETIME 'value' literal should be converted to CAST('value' AS DATETIME)
1588        bigquery_identity(
1589            "LAST_DAY(DATETIME '2008-11-10 15:30:00', WEEK(SUNDAY))",
1590            "LAST_DAY(CAST('2008-11-10 15:30:00' AS DATETIME), WEEK)",
1591        );
1592    }
1593
1594    #[test]
1595    fn test_last_day_week_modifier_stripped() {
1596        // WEEK(SUNDAY) should become WEEK in BigQuery LAST_DAY function
1597        bigquery_identity("LAST_DAY(col, WEEK(MONDAY))", "LAST_DAY(col, WEEK)");
1598    }
1599
1600    #[test]
1601    fn test_hash_line_comment_parses() {
1602        // Regression test for issue #38:
1603        // BigQuery should accept # as a single-line comment.
1604        let result = parse_one("SELECT 1 as a #hello world", DialectType::BigQuery);
1605        assert!(result.is_ok(), "Expected parse to succeed, got: {result:?}");
1606    }
1607}