Skip to main content

polyglot_sql/dialects/
bigquery.rs

1//! BigQuery Dialect
2//!
3//! BigQuery-specific transformations based on sqlglot patterns.
4//! Key differences:
5//! - Uses backticks for identifiers (especially for project.dataset.table)
6//! - SAFE_ prefix for safe operations
7//! - Different date/time function names (DATE_DIFF, FORMAT_DATE, PARSE_DATE)
8//! - STRUCT and ARRAY syntax differences
9//! - No ILIKE support
10//! - QUALIFY clause support
11
12use super::{DialectImpl, DialectType};
13use crate::error::Result;
14use crate::expressions::{
15    Alias, BinaryOp, CeilFunc, Column, Exists, Expression, From, Function, FunctionBody,
16    Identifier, JsonExtractFunc, LikeOp, Literal, Select, SplitFunc, StringAggFunc, UnaryFunc,
17    UnnestFunc, VarArgFunc, Where,
18};
19use crate::generator::GeneratorConfig;
20use crate::tokens::TokenizerConfig;
21
22/// BigQuery dialect
23pub struct BigQueryDialect;
24
25impl DialectImpl for BigQueryDialect {
26    fn dialect_type(&self) -> DialectType {
27        DialectType::BigQuery
28    }
29
30    fn tokenizer_config(&self) -> TokenizerConfig {
31        let mut config = TokenizerConfig::default();
32        // BigQuery uses backticks for identifiers, NOT double quotes
33        // Remove double quote from identifiers (it's in the default config)
34        config.identifiers.remove(&'"');
35        config.identifiers.insert('`', '`');
36        // BigQuery supports double quotes for strings (in addition to single quotes)
37        config.quotes.insert("\"".to_string(), "\"".to_string());
38        // BigQuery supports triple-quoted strings
39        config.quotes.insert("'''".to_string(), "'''".to_string());
40        config
41            .quotes
42            .insert("\"\"\"".to_string(), "\"\"\"".to_string());
43        // BigQuery supports backslash escaping in strings
44        config.string_escapes = vec!['\'', '\\'];
45        // In BigQuery, b'...' is a byte string (bytes), not a bit string (binary digits)
46        config.b_prefix_is_byte_string = true;
47        // BigQuery supports hex number strings like 0xA, 0xFF
48        config.hex_number_strings = true;
49        // BigQuery: 0xA represents integer 10 (not binary/blob)
50        config.hex_string_is_integer_type = true;
51        config
52    }
53
54    fn generator_config(&self) -> GeneratorConfig {
55        use crate::generator::{IdentifierQuoteStyle, NormalizeFunctions};
56        GeneratorConfig {
57            identifier_quote: '`',
58            identifier_quote_style: IdentifierQuoteStyle::BACKTICK,
59            dialect: Some(DialectType::BigQuery),
60            // BigQuery doesn't normalize function names (Python: NORMALIZE_FUNCTIONS = False)
61            normalize_functions: NormalizeFunctions::None,
62            // BigQuery-specific settings from Python sqlglot
63            interval_allows_plural_form: false,
64            join_hints: false,
65            query_hints: false,
66            table_hints: false,
67            limit_fetch_style: crate::generator::LimitFetchStyle::Limit,
68            rename_table_with_db: false,
69            nvl2_supported: false,
70            unnest_with_ordinality: false,
71            collate_is_func: true,
72            limit_only_literals: true,
73            supports_table_alias_columns: false,
74            unpivot_aliases_are_identifiers: false,
75            json_key_value_pair_sep: ",",
76            null_ordering_supported: false,
77            ignore_nulls_in_func: true,
78            json_path_single_quote_escape: true,
79            can_implement_array_any: true,
80            supports_to_number: false,
81            named_placeholder_token: "@",
82            hex_func: "TO_HEX",
83            with_properties_prefix: "OPTIONS",
84            supports_exploding_projections: false,
85            except_intersect_support_all_clause: false,
86            supports_unix_seconds: true,
87            // BigQuery uses SAFE_ prefix for safe operations
88            try_supported: true,
89            // BigQuery does not support SEMI/ANTI JOIN syntax
90            semi_anti_join_with_side: false,
91            ..Default::default()
92        }
93    }
94
95    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
96        match expr {
97            // ===== Data Type Mappings =====
98            Expression::DataType(dt) => self.transform_data_type(dt),
99
100            // ===== Null handling =====
101            // IFNULL is native to BigQuery - keep as-is for identity
102            Expression::IfNull(f) => Ok(Expression::IfNull(f)),
103
104            // NVL -> IFNULL in BigQuery (BigQuery uses IFNULL, not NVL)
105            Expression::Nvl(f) => Ok(Expression::IfNull(f)),
106
107            // Coalesce stays as Coalesce
108            Expression::Coalesce(f) => Ok(Expression::Coalesce(f)),
109
110            // ===== String aggregation =====
111            // GROUP_CONCAT -> STRING_AGG in BigQuery
112            Expression::GroupConcat(f) => Ok(Expression::StringAgg(Box::new(StringAggFunc {
113                this: f.this,
114                separator: f.separator,
115                order_by: f.order_by,
116                distinct: f.distinct,
117                filter: f.filter,
118                limit: None,
119            }))),
120
121            // ===== Cast operations =====
122            // Cast data types are transformed by transform_recursive in mod.rs
123            // which calls transform_data_type via transform_expr(Expression::DataType(...))
124
125            // TryCast -> SafeCast in BigQuery with type transformation
126            Expression::TryCast(c) => {
127                let transformed_type = match self.transform_data_type(c.to)? {
128                    Expression::DataType(dt) => dt,
129                    _ => return Err(crate::error::Error::parse("Expected DataType", 0, 0, 0, 0)),
130                };
131                Ok(Expression::SafeCast(Box::new(crate::expressions::Cast {
132                    this: c.this,
133                    to: transformed_type,
134                    trailing_comments: c.trailing_comments,
135                    double_colon_syntax: c.double_colon_syntax,
136                    format: c.format,
137                    default: c.default,
138                })))
139            }
140
141            // ===== Pattern matching =====
142            // ILIKE -> LOWER() LIKE LOWER() in BigQuery (no ILIKE support)
143            Expression::ILike(op) => {
144                let lower_left = Expression::Lower(Box::new(UnaryFunc::new(op.left)));
145                let lower_right = Expression::Lower(Box::new(UnaryFunc::new(op.right)));
146                Ok(Expression::Like(Box::new(LikeOp {
147                    left: lower_left,
148                    right: lower_right,
149                    escape: op.escape,
150                    quantifier: op.quantifier,
151                })))
152            }
153
154            // RegexpLike -> REGEXP_CONTAINS in BigQuery
155            Expression::RegexpLike(f) => Ok(Expression::Function(Box::new(Function::new(
156                "REGEXP_CONTAINS".to_string(),
157                vec![f.this, f.pattern],
158            )))),
159
160            // ===== Array operations =====
161            // EXPLODE -> UNNEST in BigQuery
162            Expression::Explode(f) => Ok(Expression::Unnest(Box::new(
163                crate::expressions::UnnestFunc {
164                    this: f.this,
165                    expressions: Vec::new(),
166                    with_ordinality: false,
167                    alias: None,
168                    offset_alias: None,
169                },
170            ))),
171
172            // ExplodeOuter -> UNNEST with LEFT JOIN semantics
173            Expression::ExplodeOuter(f) => Ok(Expression::Unnest(Box::new(
174                crate::expressions::UnnestFunc {
175                    this: f.this,
176                    expressions: Vec::new(),
177                    with_ordinality: false,
178                    alias: None,
179                    offset_alias: None,
180                },
181            ))),
182
183            // GenerateSeries -> GENERATE_ARRAY in BigQuery
184            Expression::GenerateSeries(f) => {
185                let mut args = Vec::new();
186                if let Some(start) = f.start {
187                    args.push(*start);
188                }
189                if let Some(end) = f.end {
190                    args.push(*end);
191                }
192                if let Some(step) = f.step {
193                    args.push(*step);
194                }
195                Ok(Expression::Function(Box::new(Function::new(
196                    "GENERATE_ARRAY".to_string(),
197                    args,
198                ))))
199            }
200
201            // ===== Bitwise operations =====
202            // BitwiseAndAgg -> BIT_AND
203            Expression::BitwiseAndAgg(f) => Ok(Expression::Function(Box::new(Function::new(
204                "BIT_AND".to_string(),
205                vec![f.this],
206            )))),
207
208            // BitwiseOrAgg -> BIT_OR
209            Expression::BitwiseOrAgg(f) => Ok(Expression::Function(Box::new(Function::new(
210                "BIT_OR".to_string(),
211                vec![f.this],
212            )))),
213
214            // BitwiseXorAgg -> BIT_XOR
215            Expression::BitwiseXorAgg(f) => Ok(Expression::Function(Box::new(Function::new(
216                "BIT_XOR".to_string(),
217                vec![f.this],
218            )))),
219
220            // BitwiseCount -> BIT_COUNT
221            Expression::BitwiseCount(f) => Ok(Expression::Function(Box::new(Function::new(
222                "BIT_COUNT".to_string(),
223                vec![f.this],
224            )))),
225
226            // ByteLength -> BYTE_LENGTH
227            Expression::ByteLength(f) => Ok(Expression::Function(Box::new(Function::new(
228                "BYTE_LENGTH".to_string(),
229                vec![f.this],
230            )))),
231
232            // IntDiv -> DIV
233            Expression::IntDiv(f) => Ok(Expression::Function(Box::new(Function::new(
234                "DIV".to_string(),
235                vec![f.this, f.expression],
236            )))),
237
238            // Int64 -> INT64
239            Expression::Int64(f) => Ok(Expression::Function(Box::new(Function::new(
240                "INT64".to_string(),
241                vec![f.this],
242            )))),
243
244            // ===== Random =====
245            // RANDOM -> RAND in BigQuery
246            Expression::Random(_) => Ok(Expression::Rand(Box::new(crate::expressions::Rand {
247                seed: None,
248                lower: None,
249                upper: None,
250            }))),
251
252            // ===== UUID =====
253            // Uuid -> GENERATE_UUID in BigQuery
254            Expression::Uuid(_) => Ok(Expression::Function(Box::new(Function::new(
255                "GENERATE_UUID".to_string(),
256                vec![],
257            )))),
258
259            // ===== Approximate functions =====
260            // ApproxDistinct -> APPROX_COUNT_DISTINCT
261            Expression::ApproxDistinct(f) => Ok(Expression::Function(Box::new(Function::new(
262                "APPROX_COUNT_DISTINCT".to_string(),
263                vec![f.this],
264            )))),
265
266            // ArgMax -> MAX_BY in BigQuery
267            Expression::ArgMax(f) => Ok(Expression::Function(Box::new(Function::new(
268                "MAX_BY".to_string(),
269                vec![*f.this, *f.expression],
270            )))),
271
272            // ArgMin -> MIN_BY in BigQuery
273            Expression::ArgMin(f) => Ok(Expression::Function(Box::new(Function::new(
274                "MIN_BY".to_string(),
275                vec![*f.this, *f.expression],
276            )))),
277
278            // ===== Conditional =====
279            // CountIf -> COUNTIF in BigQuery
280            Expression::CountIf(f) => Ok(Expression::Function(Box::new(Function::new(
281                "COUNTIF".to_string(),
282                vec![f.this],
283            )))),
284
285            // ===== String functions =====
286            // StringAgg -> STRING_AGG in BigQuery - keep as-is to preserve ORDER BY
287            Expression::StringAgg(f) => Ok(Expression::StringAgg(f)),
288
289            // ===== Conversion =====
290            // Unhex -> FROM_HEX
291            Expression::Unhex(f) => Ok(Expression::Function(Box::new(Function::new(
292                "FROM_HEX".to_string(),
293                vec![*f.this],
294            )))),
295
296            // UnixToTime -> TIMESTAMP_SECONDS/MILLIS/MICROS based on scale
297            Expression::UnixToTime(f) => {
298                let scale = f.scale.unwrap_or(0);
299                match scale {
300                    0 => Ok(Expression::Function(Box::new(Function::new(
301                        "TIMESTAMP_SECONDS".to_string(),
302                        vec![*f.this],
303                    )))),
304                    3 => Ok(Expression::Function(Box::new(Function::new(
305                        "TIMESTAMP_MILLIS".to_string(),
306                        vec![*f.this],
307                    )))),
308                    6 => Ok(Expression::Function(Box::new(Function::new(
309                        "TIMESTAMP_MICROS".to_string(),
310                        vec![*f.this],
311                    )))),
312                    _ => {
313                        // TIMESTAMP_SECONDS(CAST(value / POWER(10, scale) AS INT64))
314                        let div_expr =
315                            Expression::Div(Box::new(crate::expressions::BinaryOp::new(
316                                *f.this,
317                                Expression::Function(Box::new(Function::new(
318                                    "POWER".to_string(),
319                                    vec![Expression::number(10), Expression::number(scale)],
320                                ))),
321                            )));
322                        let cast_expr = Expression::Cast(Box::new(crate::expressions::Cast {
323                            this: div_expr,
324                            to: crate::expressions::DataType::Custom {
325                                name: "INT64".to_string(),
326                            },
327                            double_colon_syntax: false,
328                            trailing_comments: vec![],
329                            format: None,
330                            default: None,
331                        }));
332                        Ok(Expression::Function(Box::new(Function::new(
333                            "TIMESTAMP_SECONDS".to_string(),
334                            vec![cast_expr],
335                        ))))
336                    }
337                }
338            }
339
340            // ===== Date/time =====
341            // DateDiff -> DATE_DIFF in BigQuery
342            Expression::DateDiff(f) => {
343                // BigQuery: DATE_DIFF(date1, date2, part)
344                let unit_str = match f.unit {
345                    Some(crate::expressions::IntervalUnit::Year) => "YEAR",
346                    Some(crate::expressions::IntervalUnit::Quarter) => "QUARTER",
347                    Some(crate::expressions::IntervalUnit::Month) => "MONTH",
348                    Some(crate::expressions::IntervalUnit::Week) => "WEEK",
349                    Some(crate::expressions::IntervalUnit::Day) => "DAY",
350                    Some(crate::expressions::IntervalUnit::Hour) => "HOUR",
351                    Some(crate::expressions::IntervalUnit::Minute) => "MINUTE",
352                    Some(crate::expressions::IntervalUnit::Second) => "SECOND",
353                    Some(crate::expressions::IntervalUnit::Millisecond) => "MILLISECOND",
354                    Some(crate::expressions::IntervalUnit::Microsecond) => "MICROSECOND",
355                    Some(crate::expressions::IntervalUnit::Nanosecond) => "NANOSECOND",
356                    None => "DAY",
357                };
358                let unit = Expression::Identifier(crate::expressions::Identifier {
359                    name: unit_str.to_string(),
360                    quoted: false,
361                    trailing_comments: Vec::new(),
362                    span: None,
363                });
364                Ok(Expression::Function(Box::new(Function::new(
365                    "DATE_DIFF".to_string(),
366                    vec![f.this, f.expression, unit],
367                ))))
368            }
369
370            // ===== Variance =====
371            // VarPop -> VAR_POP
372            Expression::VarPop(f) => Ok(Expression::Function(Box::new(Function::new(
373                "VAR_POP".to_string(),
374                vec![f.this],
375            )))),
376
377            // ===== Hash functions =====
378            // SHA -> SHA1
379            Expression::SHA(f) => Ok(Expression::Function(Box::new(Function::new(
380                "SHA1".to_string(),
381                vec![f.this],
382            )))),
383
384            // SHA1Digest -> SHA1
385            Expression::SHA1Digest(f) => Ok(Expression::Function(Box::new(Function::new(
386                "SHA1".to_string(),
387                vec![f.this],
388            )))),
389
390            // MD5Digest -> MD5
391            Expression::MD5Digest(f) => Ok(Expression::Function(Box::new(Function::new(
392                "MD5".to_string(),
393                vec![*f.this],
394            )))),
395
396            // ===== Type conversion =====
397            // JSONBool -> BOOL
398            Expression::JSONBool(f) => Ok(Expression::Function(Box::new(Function::new(
399                "BOOL".to_string(),
400                vec![f.this],
401            )))),
402
403            // StringFunc -> STRING
404            Expression::StringFunc(f) => Ok(Expression::Function(Box::new(Function::new(
405                "STRING".to_string(),
406                vec![*f.this],
407            )))),
408
409            // ===== Date/time from parts =====
410            // DateFromUnixDate -> DATE_FROM_UNIX_DATE
411            Expression::DateFromUnixDate(f) => Ok(Expression::Function(Box::new(Function::new(
412                "DATE_FROM_UNIX_DATE".to_string(),
413                vec![f.this],
414            )))),
415
416            // UnixDate -> UNIX_DATE
417            Expression::UnixDate(f) => Ok(Expression::Function(Box::new(Function::new(
418                "UNIX_DATE".to_string(),
419                vec![f.this],
420            )))),
421
422            // TimestampDiff -> TIMESTAMP_DIFF
423            Expression::TimestampDiff(f) => Ok(Expression::Function(Box::new(Function::new(
424                "TIMESTAMP_DIFF".to_string(),
425                vec![*f.this, *f.expression],
426            )))),
427
428            // FromTimeZone -> DATETIME
429            Expression::FromTimeZone(f) => Ok(Expression::Function(Box::new(Function::new(
430                "DATETIME".to_string(),
431                vec![*f.this],
432            )))),
433
434            // TsOrDsToDatetime -> DATETIME
435            Expression::TsOrDsToDatetime(f) => Ok(Expression::Function(Box::new(Function::new(
436                "DATETIME".to_string(),
437                vec![f.this],
438            )))),
439
440            // TsOrDsToTimestamp -> TIMESTAMP
441            Expression::TsOrDsToTimestamp(f) => Ok(Expression::Function(Box::new(Function::new(
442                "TIMESTAMP".to_string(),
443                vec![f.this],
444            )))),
445
446            // ===== IfFunc -> IF in BigQuery =====
447            Expression::IfFunc(f) => {
448                let mut args = vec![f.condition, f.true_value];
449                if let Some(false_val) = f.false_value {
450                    args.push(false_val);
451                } else {
452                    args.push(Expression::Null(crate::expressions::Null));
453                }
454                Ok(Expression::Function(Box::new(Function::new(
455                    "IF".to_string(),
456                    args,
457                ))))
458            }
459
460            // ===== HexString -> FROM_HEX =====
461            Expression::HexStringExpr(f) => Ok(Expression::Function(Box::new(Function::new(
462                "FROM_HEX".to_string(),
463                vec![*f.this],
464            )))),
465
466            // ===== Additional auto-generated transforms from Python sqlglot =====
467            // ApproxTopK -> APPROX_TOP_COUNT
468            Expression::ApproxTopK(f) => {
469                let mut args = vec![*f.this];
470                if let Some(expr) = f.expression {
471                    args.push(*expr);
472                }
473                Ok(Expression::Function(Box::new(Function::new(
474                    "APPROX_TOP_COUNT".to_string(),
475                    args,
476                ))))
477            }
478
479            // SafeDivide -> SAFE_DIVIDE
480            Expression::SafeDivide(f) => Ok(Expression::Function(Box::new(Function::new(
481                "SAFE_DIVIDE".to_string(),
482                vec![*f.this, *f.expression],
483            )))),
484
485            // JSONKeysAtDepth -> JSON_KEYS
486            Expression::JSONKeysAtDepth(f) => Ok(Expression::Function(Box::new(Function::new(
487                "JSON_KEYS".to_string(),
488                vec![*f.this],
489            )))),
490
491            // JSONValueArray -> JSON_VALUE_ARRAY
492            Expression::JSONValueArray(f) => Ok(Expression::Function(Box::new(Function::new(
493                "JSON_VALUE_ARRAY".to_string(),
494                vec![*f.this],
495            )))),
496
497            // DateFromParts -> DATE
498            Expression::DateFromParts(f) => {
499                let mut args = Vec::new();
500                if let Some(y) = f.year {
501                    args.push(*y);
502                }
503                if let Some(m) = f.month {
504                    args.push(*m);
505                }
506                if let Some(d) = f.day {
507                    args.push(*d);
508                }
509                Ok(Expression::Function(Box::new(Function::new(
510                    "DATE".to_string(),
511                    args,
512                ))))
513            }
514
515            // SPLIT: BigQuery defaults to comma separator when none provided
516            // SPLIT(foo) -> SPLIT(foo, ',')
517            Expression::Split(f) => {
518                // Check if delimiter is empty or a placeholder - add default comma
519                let delimiter = match &f.delimiter {
520                    Expression::Literal(Literal::String(s)) if s.is_empty() => {
521                        Expression::Literal(Literal::String(",".to_string()))
522                    }
523                    _ => f.delimiter,
524                };
525                Ok(Expression::Split(Box::new(SplitFunc {
526                    this: f.this,
527                    delimiter,
528                })))
529            }
530
531            // Cast: Transform the target type according to BigQuery TYPE_MAPPING
532            // Special case: CAST to JSON -> PARSE_JSON in BigQuery
533            Expression::Cast(c) => {
534                use crate::expressions::DataType;
535                // Check if casting to JSON - use PARSE_JSON instead
536                // Handle both DataType::Json/JsonB and DataType::Custom { name: "JSON" }
537                // (parser creates Custom for type literals like JSON 'string')
538                let is_json = matches!(c.to, DataType::Json | DataType::JsonB)
539                    || matches!(&c.to, DataType::Custom { name } if name.eq_ignore_ascii_case("JSON") || name.eq_ignore_ascii_case("JSONB"));
540                if is_json {
541                    return Ok(Expression::ParseJson(Box::new(UnaryFunc::new(c.this))));
542                }
543                let transformed_type = match self.transform_data_type(c.to)? {
544                    Expression::DataType(dt) => dt,
545                    _ => return Err(crate::error::Error::parse("Expected DataType", 0, 0, 0, 0)),
546                };
547                Ok(Expression::Cast(Box::new(crate::expressions::Cast {
548                    this: c.this,
549                    to: transformed_type,
550                    trailing_comments: c.trailing_comments,
551                    double_colon_syntax: c.double_colon_syntax,
552                    format: c.format,
553                    default: c.default,
554                })))
555            }
556
557            // SafeCast: Transform the target type according to BigQuery TYPE_MAPPING
558            Expression::SafeCast(c) => {
559                let transformed_type = match self.transform_data_type(c.to)? {
560                    Expression::DataType(dt) => dt,
561                    _ => return Err(crate::error::Error::parse("Expected DataType", 0, 0, 0, 0)),
562                };
563                Ok(Expression::SafeCast(Box::new(crate::expressions::Cast {
564                    this: c.this,
565                    to: transformed_type,
566                    trailing_comments: c.trailing_comments,
567                    double_colon_syntax: c.double_colon_syntax,
568                    format: c.format,
569                    default: c.default,
570                })))
571            }
572
573            // ===== SELECT-level transforms =====
574            // BigQuery: GROUP BY expression → alias when both GROUP BY and ORDER BY exist
575            Expression::Select(mut select) => {
576                if select.group_by.is_some() && select.order_by.is_some() {
577                    // Build map: expression → alias name for aliased projections
578                    let aliases: Vec<(Expression, Identifier)> = select
579                        .expressions
580                        .iter()
581                        .filter_map(|e| {
582                            if let Expression::Alias(a) = e {
583                                Some((a.this.clone(), a.alias.clone()))
584                            } else {
585                                None
586                            }
587                        })
588                        .collect();
589
590                    if let Some(ref mut group_by) = select.group_by {
591                        for grouped in group_by.expressions.iter_mut() {
592                            // Skip numeric indices (already aliased)
593                            if matches!(grouped, Expression::Literal(Literal::Number(_))) {
594                                continue;
595                            }
596                            // Check if this GROUP BY expression matches a SELECT alias
597                            for (expr, alias_ident) in &aliases {
598                                if grouped == expr {
599                                    *grouped = Expression::Column(Column {
600                                        name: alias_ident.clone(),
601                                        table: None,
602                                        join_mark: false,
603                                        trailing_comments: Vec::new(),
604                                        span: None,
605                                    });
606                                    break;
607                                }
608                            }
609                        }
610                    }
611                }
612                Ok(Expression::Select(select))
613            }
614
615            // ===== ArrayContains → EXISTS(SELECT 1 FROM UNNEST(arr) AS _col WHERE _col = val) =====
616            Expression::ArrayContains(f) => {
617                let array_expr = f.this;
618                let value_expr = f.expression;
619
620                // Build: SELECT 1 FROM UNNEST(array) AS _col WHERE _col = value
621                let unnest = Expression::Unnest(Box::new(UnnestFunc {
622                    this: array_expr,
623                    expressions: Vec::new(),
624                    with_ordinality: false,
625                    alias: None,
626                    offset_alias: None,
627                }));
628                let aliased_unnest = Expression::Alias(Box::new(Alias {
629                    this: unnest,
630                    alias: Identifier::new("_col"),
631                    column_aliases: Vec::new(),
632                    pre_alias_comments: Vec::new(),
633                    trailing_comments: Vec::new(),
634                }));
635                let col_ref = Expression::Column(Column {
636                    name: Identifier::new("_col"),
637                    table: None,
638                    join_mark: false,
639                    trailing_comments: Vec::new(),
640                    span: None,
641                });
642                let where_clause = Where {
643                    this: Expression::Eq(Box::new(BinaryOp {
644                        left: col_ref,
645                        right: value_expr,
646                        left_comments: Vec::new(),
647                        operator_comments: Vec::new(),
648                        trailing_comments: Vec::new(),
649                    })),
650                };
651                let inner_select = Expression::Select(Box::new(Select {
652                    expressions: vec![Expression::Literal(Literal::Number("1".to_string()))],
653                    from: Some(From {
654                        expressions: vec![aliased_unnest],
655                    }),
656                    where_clause: Some(where_clause),
657                    ..Default::default()
658                }));
659                Ok(Expression::Exists(Box::new(Exists {
660                    this: inner_select,
661                    not: false,
662                })))
663            }
664
665            // ===== JSON_OBJECT array form → key-value pairs =====
666            // BigQuery "signature 2": JSON_OBJECT(['a', 'b'], [10, NULL]) → JSON_OBJECT('a', 10, 'b', NULL)
667            Expression::JsonObject(mut f) => {
668                if f.pairs.len() == 1 {
669                    // Extract expressions from both Array and ArrayFunc variants
670                    let keys_exprs = match &f.pairs[0].0 {
671                        Expression::Array(arr) => Some(&arr.expressions),
672                        Expression::ArrayFunc(arr) => Some(&arr.expressions),
673                        _ => None,
674                    };
675                    let vals_exprs = match &f.pairs[0].1 {
676                        Expression::Array(arr) => Some(&arr.expressions),
677                        Expression::ArrayFunc(arr) => Some(&arr.expressions),
678                        _ => None,
679                    };
680                    if let (Some(keys), Some(vals)) = (keys_exprs, vals_exprs) {
681                        if keys.len() == vals.len() {
682                            let new_pairs: Vec<(Expression, Expression)> = keys
683                                .iter()
684                                .zip(vals.iter())
685                                .map(|(k, v)| (k.clone(), v.clone()))
686                                .collect();
687                            f.pairs = new_pairs;
688                        }
689                    }
690                }
691                Ok(Expression::JsonObject(f))
692            }
693
694            // ===== MOD function: unwrap unnecessary Paren from first argument =====
695            // BigQuery normalizes MOD((a + 1), b) -> MOD(a + 1, b)
696            Expression::ModFunc(mut f) => {
697                // Unwrap Paren from first argument if present
698                if let Expression::Paren(paren) = f.this {
699                    f.this = paren.this;
700                }
701                Ok(Expression::ModFunc(f))
702            }
703
704            // JSONExtract with variant_extract (Snowflake colon syntax) -> JSON_EXTRACT
705            Expression::JSONExtract(e) if e.variant_extract.is_some() => {
706                let path = match *e.expression {
707                    Expression::Literal(Literal::String(s)) => {
708                        let normalized = if s.starts_with('$') {
709                            s
710                        } else if s.starts_with('[') {
711                            format!("${}", s)
712                        } else {
713                            format!("$.{}", s)
714                        };
715                        Expression::Literal(Literal::String(normalized))
716                    }
717                    other => other,
718                };
719                Ok(Expression::Function(Box::new(Function::new(
720                    "JSON_EXTRACT".to_string(),
721                    vec![*e.this, path],
722                ))))
723            }
724
725            // Generic function transformations
726            Expression::Function(f) => self.transform_function(*f),
727
728            // Generic aggregate function transformations
729            Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
730
731            // MethodCall: Handle SAFE.PARSE_DATE, SAFE.PARSE_DATETIME, SAFE.PARSE_TIMESTAMP
732            // These are parsed as MethodCall(this=SAFE, method=PARSE_DATE, args=[...])
733            Expression::MethodCall(mc) => self.transform_method_call(*mc),
734
735            // CreateFunction: Convert RETURNS TABLE(...) to RETURNS TABLE <...> for BigQuery
736            // and set is_table_function = true for TABLE FUNCTION syntax
737            Expression::CreateFunction(mut cf) => {
738                if let Some(ref mut rtb) = cf.returns_table_body {
739                    if rtb.starts_with("TABLE (") || rtb.starts_with("TABLE(") {
740                        // Convert TABLE (...) to TABLE <...> with BigQuery types
741                        let inner = if rtb.starts_with("TABLE (") {
742                            &rtb["TABLE (".len()..rtb.len() - 1]
743                        } else {
744                            &rtb["TABLE(".len()..rtb.len() - 1]
745                        };
746                        // Convert common types to BigQuery equivalents
747                        let converted = inner
748                            .replace(" INT,", " INT64,")
749                            .replace(" INT)", " INT64)")
750                            .replace(" INTEGER,", " INT64,")
751                            .replace(" INTEGER)", " INT64)")
752                            .replace(" FLOAT,", " FLOAT64,")
753                            .replace(" FLOAT)", " FLOAT64)")
754                            .replace(" BOOLEAN,", " BOOL,")
755                            .replace(" BOOLEAN)", " BOOL)")
756                            .replace(" VARCHAR", " STRING")
757                            .replace(" TEXT", " STRING");
758                        // Handle trailing type (no comma, no paren)
759                        let converted = if converted.ends_with(" INT") {
760                            format!("{}{}", &converted[..converted.len() - 4], " INT64")
761                        } else {
762                            converted
763                        };
764                        *rtb = format!("TABLE <{}>", converted);
765                        cf.is_table_function = true;
766                    }
767                }
768                // Convert string literal body to expression body for BigQuery TABLE FUNCTIONs only
769                if cf.is_table_function {
770                    if let Some(ref body) = cf.body {
771                        if matches!(body, FunctionBody::StringLiteral(_)) {
772                            if let Some(FunctionBody::StringLiteral(sql)) = cf.body.take() {
773                                // Parse the SQL string into an expression
774                                if let Ok(parsed) = crate::parser::Parser::parse_sql(&sql) {
775                                    if let Some(stmt) = parsed.into_iter().next() {
776                                        cf.body = Some(FunctionBody::Expression(stmt));
777                                    } else {
778                                        cf.body = Some(FunctionBody::StringLiteral(sql));
779                                    }
780                                } else {
781                                    cf.body = Some(FunctionBody::StringLiteral(sql));
782                                }
783                            }
784                        }
785                    }
786                }
787                Ok(Expression::CreateFunction(cf))
788            }
789
790            // Pass through everything else
791            _ => Ok(expr),
792        }
793    }
794}
795
796impl BigQueryDialect {
797    /// Transform data types according to BigQuery TYPE_MAPPING
798    fn transform_data_type(&self, dt: crate::expressions::DataType) -> Result<Expression> {
799        use crate::expressions::DataType;
800        let transformed = match dt {
801            // BIGINT -> INT64
802            DataType::BigInt { .. } => DataType::Custom {
803                name: "INT64".to_string(),
804            },
805            // INT -> INT64
806            DataType::Int { .. } => DataType::Custom {
807                name: "INT64".to_string(),
808            },
809            // SMALLINT -> INT64
810            DataType::SmallInt { .. } => DataType::Custom {
811                name: "INT64".to_string(),
812            },
813            // TINYINT -> INT64
814            DataType::TinyInt { .. } => DataType::Custom {
815                name: "INT64".to_string(),
816            },
817            // FLOAT -> FLOAT64
818            DataType::Float { .. } => DataType::Custom {
819                name: "FLOAT64".to_string(),
820            },
821            // DOUBLE -> FLOAT64
822            DataType::Double { .. } => DataType::Custom {
823                name: "FLOAT64".to_string(),
824            },
825            // BOOLEAN -> BOOL
826            DataType::Boolean => DataType::Custom {
827                name: "BOOL".to_string(),
828            },
829            // CHAR -> STRING
830            DataType::Char { .. } => DataType::Custom {
831                name: "STRING".to_string(),
832            },
833            // VARCHAR -> STRING
834            DataType::VarChar { .. } => DataType::Custom {
835                name: "STRING".to_string(),
836            },
837            // TEXT -> STRING
838            DataType::Text => DataType::Custom {
839                name: "STRING".to_string(),
840            },
841            // STRING(n) -> STRING (BigQuery doesn't support length for STRING)
842            DataType::String { .. } => DataType::Custom {
843                name: "STRING".to_string(),
844            },
845            // BINARY -> BYTES
846            DataType::Binary { .. } => DataType::Custom {
847                name: "BYTES".to_string(),
848            },
849            // VARBINARY -> BYTES
850            DataType::VarBinary { .. } => DataType::Custom {
851                name: "BYTES".to_string(),
852            },
853            // BLOB -> BYTES
854            DataType::Blob => DataType::Custom {
855                name: "BYTES".to_string(),
856            },
857            // DECIMAL -> NUMERIC (BigQuery strips precision in CAST context)
858            DataType::Decimal { .. } => DataType::Custom {
859                name: "NUMERIC".to_string(),
860            },
861            // For BigQuery identity: preserve TIMESTAMP/DATETIME as Custom types
862            // This avoids the issue where parsed TIMESTAMP (timezone: false) would
863            // be converted to DATETIME by the generator
864            DataType::Timestamp {
865                timezone: false, ..
866            } => DataType::Custom {
867                name: "TIMESTAMP".to_string(),
868            },
869            DataType::Timestamp { timezone: true, .. } => DataType::Custom {
870                name: "TIMESTAMP".to_string(),
871            },
872            // UUID -> STRING (BigQuery doesn't have native UUID type)
873            DataType::Uuid => DataType::Custom {
874                name: "STRING".to_string(),
875            },
876            // RECORD -> STRUCT in BigQuery
877            DataType::Custom { ref name } if name.eq_ignore_ascii_case("RECORD") => {
878                DataType::Custom {
879                    name: "STRUCT".to_string(),
880                }
881            }
882            // TIMESTAMPTZ (custom) -> TIMESTAMP
883            DataType::Custom { ref name } if name.eq_ignore_ascii_case("TIMESTAMPTZ") => {
884                DataType::Custom {
885                    name: "TIMESTAMP".to_string(),
886                }
887            }
888            // BYTEINT (custom) -> INT64
889            DataType::Custom { ref name } if name.eq_ignore_ascii_case("BYTEINT") => {
890                DataType::Custom {
891                    name: "INT64".to_string(),
892                }
893            }
894            // Keep all other types as-is
895            other => other,
896        };
897        Ok(Expression::DataType(transformed))
898    }
899
900    fn transform_function(&self, f: Function) -> Result<Expression> {
901        let name_upper = f.name.to_uppercase();
902        match name_upper.as_str() {
903            // IFNULL -> COALESCE (both work in BigQuery)
904            "IFNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
905                original_name: None,
906                expressions: f.args,
907            }))),
908
909            // NVL -> COALESCE
910            "NVL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
911                original_name: None,
912                expressions: f.args,
913            }))),
914
915            // ISNULL -> COALESCE
916            "ISNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
917                original_name: None,
918                expressions: f.args,
919            }))),
920
921            // GROUP_CONCAT -> STRING_AGG in BigQuery
922            "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
923                Function::new("STRING_AGG".to_string(), f.args),
924            ))),
925
926            // SUBSTR -> SUBSTRING (both work)
927            "SUBSTR" => Ok(Expression::Function(Box::new(Function::new(
928                "SUBSTRING".to_string(),
929                f.args,
930            )))),
931
932            // RANDOM -> RAND
933            "RANDOM" => Ok(Expression::Rand(Box::new(crate::expressions::Rand {
934                seed: None,
935                lower: None,
936                upper: None,
937            }))),
938
939            // CURRENT_DATE -> CURRENT_DATE() in BigQuery
940            // Keep as Function when it has args (e.g., CURRENT_DATE('UTC'))
941            "CURRENT_DATE" if f.args.is_empty() => {
942                Ok(Expression::CurrentDate(crate::expressions::CurrentDate))
943            }
944            "CURRENT_DATE" => Ok(Expression::Function(Box::new(Function {
945                name: "CURRENT_DATE".to_string(),
946                args: f.args,
947                distinct: false,
948                trailing_comments: Vec::new(),
949                use_bracket_syntax: false,
950                no_parens: false,
951                quoted: false,
952                span: None,
953            }))),
954
955            // NOW -> CURRENT_TIMESTAMP in BigQuery
956            "NOW" => Ok(Expression::CurrentTimestamp(
957                crate::expressions::CurrentTimestamp {
958                    precision: None,
959                    sysdate: false,
960                },
961            )),
962
963            // TO_DATE -> PARSE_DATE in BigQuery
964            "TO_DATE" => Ok(Expression::Function(Box::new(Function::new(
965                "PARSE_DATE".to_string(),
966                f.args,
967            )))),
968
969            // TO_TIMESTAMP -> PARSE_TIMESTAMP in BigQuery
970            "TO_TIMESTAMP" => Ok(Expression::Function(Box::new(Function::new(
971                "PARSE_TIMESTAMP".to_string(),
972                f.args,
973            )))),
974
975            // TO_TIME -> TIME in BigQuery
976            "TO_TIME" if f.args.len() == 1 => Ok(Expression::Function(Box::new(Function::new(
977                "TIME".to_string(),
978                f.args,
979            )))),
980
981            // DATE_FORMAT -> FORMAT_DATE in BigQuery (argument order may differ)
982            "DATE_FORMAT" => Ok(Expression::Function(Box::new(Function::new(
983                "FORMAT_DATE".to_string(),
984                f.args,
985            )))),
986
987            // POSITION -> STRPOS in BigQuery
988            // BigQuery uses STRPOS(string, substring)
989            "POSITION" if f.args.len() == 2 => {
990                let mut args = f.args;
991                // Swap arguments: POSITION(sub IN str) -> STRPOS(str, sub)
992                let first = args.remove(0);
993                let second = args.remove(0);
994                Ok(Expression::Function(Box::new(Function::new(
995                    "STRPOS".to_string(),
996                    vec![second, first],
997                ))))
998            }
999
1000            // LEN -> LENGTH
1001            "LEN" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc::new(
1002                f.args.into_iter().next().unwrap(),
1003            )))),
1004
1005            // CEILING -> CEIL (both work)
1006            "CEILING" if f.args.len() == 1 => Ok(Expression::Ceil(Box::new(CeilFunc {
1007                this: f.args.into_iter().next().unwrap(),
1008                decimals: None,
1009                to: None,
1010            }))),
1011
1012            // GETDATE -> CURRENT_TIMESTAMP
1013            "GETDATE" => Ok(Expression::CurrentTimestamp(
1014                crate::expressions::CurrentTimestamp {
1015                    precision: None,
1016                    sysdate: false,
1017                },
1018            )),
1019
1020            // ARRAY_LENGTH -> ARRAY_LENGTH (native)
1021            // CARDINALITY -> ARRAY_LENGTH
1022            "CARDINALITY" if f.args.len() == 1 => Ok(Expression::ArrayLength(Box::new(
1023                UnaryFunc::new(f.args.into_iter().next().unwrap()),
1024            ))),
1025
1026            // UNNEST is native to BigQuery
1027
1028            // GENERATE_SERIES -> GENERATE_ARRAY in BigQuery
1029            "GENERATE_SERIES" => Ok(Expression::Function(Box::new(Function::new(
1030                "GENERATE_ARRAY".to_string(),
1031                f.args,
1032            )))),
1033
1034            // APPROX_COUNT_DISTINCT -> APPROX_COUNT_DISTINCT (native)
1035            // APPROX_DISTINCT -> APPROX_COUNT_DISTINCT
1036            "APPROX_DISTINCT" => Ok(Expression::Function(Box::new(Function::new(
1037                "APPROX_COUNT_DISTINCT".to_string(),
1038                f.args,
1039            )))),
1040
1041            // COUNT_IF -> COUNTIF in BigQuery
1042            "COUNT_IF" => Ok(Expression::Function(Box::new(Function::new(
1043                "COUNTIF".to_string(),
1044                f.args,
1045            )))),
1046
1047            // SHA1 -> SHA1 (native), SHA -> SHA1
1048            "SHA" => Ok(Expression::Function(Box::new(Function::new(
1049                "SHA1".to_string(),
1050                f.args,
1051            )))),
1052
1053            // SHA256/SHA2 -> SHA256
1054            "SHA2" => Ok(Expression::Function(Box::new(Function::new(
1055                "SHA256".to_string(),
1056                f.args,
1057            )))),
1058
1059            // MD5 in BigQuery returns bytes, often combined with TO_HEX
1060            // TO_HEX(MD5(x)) pattern
1061            "MD5" => Ok(Expression::Function(Box::new(Function::new(
1062                "MD5".to_string(),
1063                f.args,
1064            )))),
1065
1066            // VARIANCE/VAR_SAMP -> VAR_SAMP (native)
1067            // VAR_POP -> VAR_POP (native)
1068
1069            // DATEADD(unit, amount, date) → DATE_ADD(date, INTERVAL amount unit) for BigQuery
1070            "DATEADD" if f.args.len() == 3 => {
1071                let mut args = f.args;
1072                let unit_expr = args.remove(0);
1073                let amount = args.remove(0);
1074                let date = args.remove(0);
1075                // Convert unit identifier to IntervalUnit
1076                let unit_name = match &unit_expr {
1077                    Expression::Identifier(id) => id.name.to_uppercase(),
1078                    _ => "DAY".to_string(),
1079                };
1080                let unit = match unit_name.as_str() {
1081                    "YEAR" | "YEARS" | "YY" | "YYYY" => crate::expressions::IntervalUnit::Year,
1082                    "QUARTER" | "QUARTERS" | "QQ" | "Q" => {
1083                        crate::expressions::IntervalUnit::Quarter
1084                    }
1085                    "MONTH" | "MONTHS" | "MM" | "M" => crate::expressions::IntervalUnit::Month,
1086                    "WEEK" | "WEEKS" | "WK" | "WW" => crate::expressions::IntervalUnit::Week,
1087                    "DAY" | "DAYS" | "DD" | "D" | "DAYOFMONTH" => {
1088                        crate::expressions::IntervalUnit::Day
1089                    }
1090                    "HOUR" | "HOURS" | "HH" => crate::expressions::IntervalUnit::Hour,
1091                    "MINUTE" | "MINUTES" | "MI" | "N" => crate::expressions::IntervalUnit::Minute,
1092                    "SECOND" | "SECONDS" | "SS" | "S" => crate::expressions::IntervalUnit::Second,
1093                    "MILLISECOND" | "MILLISECONDS" | "MS" => {
1094                        crate::expressions::IntervalUnit::Millisecond
1095                    }
1096                    "MICROSECOND" | "MICROSECONDS" | "US" => {
1097                        crate::expressions::IntervalUnit::Microsecond
1098                    }
1099                    _ => crate::expressions::IntervalUnit::Day,
1100                };
1101                Ok(Expression::DateAdd(Box::new(
1102                    crate::expressions::DateAddFunc {
1103                        this: date,
1104                        interval: amount,
1105                        unit,
1106                    },
1107                )))
1108            }
1109            "DATE_ADD" => Ok(Expression::Function(Box::new(Function::new(
1110                "DATE_ADD".to_string(),
1111                f.args,
1112            )))),
1113
1114            // DATE_DIFF in BigQuery (native)
1115            "DATEDIFF" => Ok(Expression::Function(Box::new(Function::new(
1116                "DATE_DIFF".to_string(),
1117                f.args,
1118            )))),
1119
1120            // TIMESTAMP_DIFF in BigQuery
1121            "TIMESTAMPDIFF" => Ok(Expression::Function(Box::new(Function::new(
1122                "TIMESTAMP_DIFF".to_string(),
1123                f.args,
1124            )))),
1125
1126            // TIME -> TIME (native)
1127            // DATETIME -> DATETIME (native)
1128
1129            // SAFE_DIVIDE -> SAFE_DIVIDE (native)
1130
1131            // NEWID/UUID -> GENERATE_UUID
1132            "NEWID" | "UUID" => Ok(Expression::Function(Box::new(Function::new(
1133                "GENERATE_UUID".to_string(),
1134                vec![],
1135            )))),
1136
1137            // LEVENSHTEIN -> EDIT_DISTANCE (BigQuery naming)
1138            "LEVENSHTEIN" => Ok(Expression::Function(Box::new(Function::new(
1139                "EDIT_DISTANCE".to_string(),
1140                f.args,
1141            )))),
1142
1143            // UNIX_TIMESTAMP -> UNIX_SECONDS
1144            "UNIX_TIMESTAMP" => Ok(Expression::Function(Box::new(Function::new(
1145                "UNIX_SECONDS".to_string(),
1146                f.args,
1147            )))),
1148
1149            // FROM_UNIXTIME -> TIMESTAMP_SECONDS
1150            "FROM_UNIXTIME" => Ok(Expression::Function(Box::new(Function::new(
1151                "TIMESTAMP_SECONDS".to_string(),
1152                f.args,
1153            )))),
1154
1155            // CHAR_LENGTH / CHARACTER_LENGTH -> LENGTH
1156            "CHAR_LENGTH" | "CHARACTER_LENGTH" => Ok(Expression::Function(Box::new(
1157                Function::new("LENGTH".to_string(), f.args),
1158            ))),
1159
1160            // OCTET_LENGTH -> BYTE_LENGTH in BigQuery
1161            "OCTET_LENGTH" => Ok(Expression::Function(Box::new(Function::new(
1162                "BYTE_LENGTH".to_string(),
1163                f.args,
1164            )))),
1165
1166            // JSON_EXTRACT_STRING_ARRAY -> JSON_VALUE_ARRAY in BigQuery
1167            "JSON_EXTRACT_STRING_ARRAY" => Ok(Expression::Function(Box::new(Function::new(
1168                "JSON_VALUE_ARRAY".to_string(),
1169                f.args,
1170            )))),
1171
1172            // INSTR is native to BigQuery
1173
1174            // SPLIT: BigQuery defaults to comma separator when none provided
1175            // SPLIT(foo) -> SPLIT(foo, ',')
1176            "SPLIT" if f.args.len() == 1 => {
1177                let mut args = f.args;
1178                args.push(Expression::Literal(Literal::String(",".to_string())));
1179                Ok(Expression::Split(Box::new(SplitFunc {
1180                    this: args.remove(0),
1181                    delimiter: args.remove(0),
1182                })))
1183            }
1184
1185            // SPLIT with two args - convert to Split expression
1186            "SPLIT" if f.args.len() == 2 => {
1187                let mut args = f.args;
1188                Ok(Expression::Split(Box::new(SplitFunc {
1189                    this: args.remove(0),
1190                    delimiter: args.remove(0),
1191                })))
1192            }
1193
1194            // REGEXP_SUBSTR -> REGEXP_EXTRACT in BigQuery (strip extra Snowflake args)
1195            "REGEXP_SUBSTR" if f.args.len() >= 2 => {
1196                // BigQuery REGEXP_EXTRACT supports (subject, pattern, pos, occ) max 4 args
1197                let args = if f.args.len() > 4 {
1198                    f.args[..4].to_vec()
1199                } else {
1200                    f.args
1201                };
1202                Ok(Expression::Function(Box::new(Function::new(
1203                    "REGEXP_EXTRACT".to_string(),
1204                    args,
1205                ))))
1206            }
1207            "REGEXP_SUBSTR" => Ok(Expression::Function(Box::new(Function::new(
1208                "REGEXP_EXTRACT".to_string(),
1209                f.args,
1210            )))),
1211
1212            // REGEXP_REPLACE - strip extra Snowflake-specific args
1213            "REGEXP_REPLACE" if f.args.len() > 3 => {
1214                let args = f.args[..3].to_vec();
1215                Ok(Expression::Function(Box::new(Function::new(
1216                    "REGEXP_REPLACE".to_string(),
1217                    args,
1218                ))))
1219            }
1220
1221            // OBJECT_CONSTRUCT_KEEP_NULL -> JSON_OBJECT
1222            "OBJECT_CONSTRUCT_KEEP_NULL" => Ok(Expression::Function(Box::new(Function::new(
1223                "JSON_OBJECT".to_string(),
1224                f.args,
1225            )))),
1226
1227            // EDITDISTANCE -> EDIT_DISTANCE with named max_distance parameter
1228            "EDITDISTANCE" if f.args.len() == 3 => {
1229                let col1 = f.args[0].clone();
1230                let col2 = f.args[1].clone();
1231                let max_dist = f.args[2].clone();
1232                Ok(Expression::Function(Box::new(Function::new(
1233                    "EDIT_DISTANCE".to_string(),
1234                    vec![
1235                        col1,
1236                        col2,
1237                        Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
1238                            name: crate::expressions::Identifier::new("max_distance".to_string()),
1239                            value: max_dist,
1240                            separator: crate::expressions::NamedArgSeparator::DArrow,
1241                        })),
1242                    ],
1243                ))))
1244            }
1245            "EDITDISTANCE" if f.args.len() == 2 => Ok(Expression::Function(Box::new(
1246                Function::new("EDIT_DISTANCE".to_string(), f.args),
1247            ))),
1248
1249            // HEX_DECODE_BINARY -> FROM_HEX
1250            "HEX_DECODE_BINARY" => Ok(Expression::Function(Box::new(Function::new(
1251                "FROM_HEX".to_string(),
1252                f.args,
1253            )))),
1254
1255            // BigQuery format string normalization for PARSE_DATE/DATETIME/TIMESTAMP functions
1256            // %Y-%m-%d -> %F and %H:%M:%S -> %T
1257            "PARSE_DATE"
1258            | "PARSE_DATETIME"
1259            | "PARSE_TIMESTAMP"
1260            | "SAFE.PARSE_DATE"
1261            | "SAFE.PARSE_DATETIME"
1262            | "SAFE.PARSE_TIMESTAMP" => {
1263                let args = self.normalize_time_format_args(f.args);
1264                Ok(Expression::Function(Box::new(Function {
1265                    name: f.name,
1266                    args,
1267                    distinct: f.distinct,
1268                    no_parens: f.no_parens,
1269                    trailing_comments: f.trailing_comments,
1270                    quoted: f.quoted,
1271                    use_bracket_syntax: f.use_bracket_syntax,
1272                    span: None,
1273                })))
1274            }
1275
1276            // GET_PATH(obj, path) -> JSON_EXTRACT(obj, json_path) in BigQuery
1277            "GET_PATH" if f.args.len() == 2 => {
1278                let mut args = f.args;
1279                let this = args.remove(0);
1280                let path = args.remove(0);
1281                let json_path = match &path {
1282                    Expression::Literal(Literal::String(s)) => {
1283                        let normalized = if s.starts_with('$') {
1284                            s.clone()
1285                        } else if s.starts_with('[') {
1286                            format!("${}", s)
1287                        } else {
1288                            format!("$.{}", s)
1289                        };
1290                        Expression::Literal(Literal::String(normalized))
1291                    }
1292                    _ => path,
1293                };
1294                Ok(Expression::JsonExtract(Box::new(JsonExtractFunc {
1295                    this,
1296                    path: json_path,
1297                    returning: None,
1298                    arrow_syntax: false,
1299                    hash_arrow_syntax: false,
1300                    wrapper_option: None,
1301                    quotes_option: None,
1302                    on_scalar_string: false,
1303                    on_error: None,
1304                })))
1305            }
1306
1307            // Pass through everything else
1308            _ => Ok(Expression::Function(Box::new(f))),
1309        }
1310    }
1311
1312    fn transform_aggregate_function(
1313        &self,
1314        f: Box<crate::expressions::AggregateFunction>,
1315    ) -> Result<Expression> {
1316        let name_upper = f.name.to_uppercase();
1317        match name_upper.as_str() {
1318            // GROUP_CONCAT -> STRING_AGG
1319            "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
1320                Function::new("STRING_AGG".to_string(), f.args),
1321            ))),
1322
1323            // Pass through everything else
1324            _ => Ok(Expression::AggregateFunction(f)),
1325        }
1326    }
1327
1328    /// Transform MethodCall expressions
1329    /// Handles SAFE.PARSE_DATE, SAFE.PARSE_DATETIME, SAFE.PARSE_TIMESTAMP format normalization
1330    fn transform_method_call(&self, mc: crate::expressions::MethodCall) -> Result<Expression> {
1331        use crate::expressions::MethodCall;
1332
1333        // Check if this is SAFE.PARSE_DATE/DATETIME/TIMESTAMP
1334        if let Expression::Column(ref col) = mc.this {
1335            if col.name.name.eq_ignore_ascii_case("SAFE") {
1336                let method_upper = mc.method.name.to_uppercase();
1337                if method_upper == "PARSE_DATE"
1338                    || method_upper == "PARSE_DATETIME"
1339                    || method_upper == "PARSE_TIMESTAMP"
1340                {
1341                    // Normalize the format string in the first argument
1342                    let args = self.normalize_time_format_args(mc.args);
1343                    return Ok(Expression::MethodCall(Box::new(MethodCall {
1344                        this: mc.this,
1345                        method: mc.method,
1346                        args,
1347                    })));
1348                }
1349            }
1350        }
1351
1352        // Pass through all other method calls
1353        Ok(Expression::MethodCall(Box::new(mc)))
1354    }
1355
1356    /// Normalize time format strings in function arguments
1357    /// BigQuery normalizes: %Y-%m-%d -> %F, %H:%M:%S -> %T
1358    fn normalize_time_format_args(&self, args: Vec<Expression>) -> Vec<Expression> {
1359        args.into_iter()
1360            .enumerate()
1361            .map(|(i, arg)| {
1362                // Only transform the first argument (the format string)
1363                if i == 0 {
1364                    if let Expression::Literal(Literal::String(s)) = arg {
1365                        let normalized = self.normalize_time_format(&s);
1366                        return Expression::Literal(Literal::String(normalized));
1367                    }
1368                }
1369                arg
1370            })
1371            .collect()
1372    }
1373
1374    /// Normalize a time format string according to BigQuery conventions
1375    /// %Y-%m-%d -> %F (ISO date)
1376    /// %H:%M:%S -> %T (time)
1377    fn normalize_time_format(&self, format: &str) -> String {
1378        format.replace("%Y-%m-%d", "%F").replace("%H:%M:%S", "%T")
1379    }
1380}
1381
1382#[cfg(test)]
1383mod tests {
1384    use super::*;
1385    use crate::dialects::Dialect;
1386
1387    fn transpile_to_bigquery(sql: &str) -> String {
1388        let dialect = Dialect::get(DialectType::Generic);
1389        let result = dialect
1390            .transpile_to(sql, DialectType::BigQuery)
1391            .expect("Transpile failed");
1392        result[0].clone()
1393    }
1394
1395    #[test]
1396    fn test_ifnull_identity() {
1397        // Generic -> BigQuery: IFNULL is normalized to COALESCE (matching sqlglot behavior)
1398        let result = transpile_to_bigquery("SELECT IFNULL(a, b)");
1399        assert!(
1400            result.contains("COALESCE"),
1401            "Expected COALESCE, got: {}",
1402            result
1403        );
1404    }
1405
1406    #[test]
1407    fn test_nvl_to_ifnull() {
1408        // NVL is converted to IFNULL in BigQuery
1409        let result = transpile_to_bigquery("SELECT NVL(a, b)");
1410        assert!(
1411            result.contains("IFNULL"),
1412            "Expected IFNULL, got: {}",
1413            result
1414        );
1415    }
1416
1417    #[test]
1418    fn test_try_cast_to_safe_cast() {
1419        let result = transpile_to_bigquery("SELECT TRY_CAST(a AS INT)");
1420        assert!(
1421            result.contains("SAFE_CAST"),
1422            "Expected SAFE_CAST, got: {}",
1423            result
1424        );
1425    }
1426
1427    #[test]
1428    fn test_random_to_rand() {
1429        let result = transpile_to_bigquery("SELECT RANDOM()");
1430        assert!(result.contains("RAND"), "Expected RAND, got: {}", result);
1431    }
1432
1433    #[test]
1434    fn test_basic_select() {
1435        let result = transpile_to_bigquery("SELECT a, b FROM users WHERE id = 1");
1436        assert!(result.contains("SELECT"));
1437        assert!(result.contains("FROM users"));
1438    }
1439
1440    #[test]
1441    fn test_group_concat_to_string_agg() {
1442        let result = transpile_to_bigquery("SELECT GROUP_CONCAT(name)");
1443        assert!(
1444            result.contains("STRING_AGG"),
1445            "Expected STRING_AGG, got: {}",
1446            result
1447        );
1448    }
1449
1450    #[test]
1451    fn test_generate_series_to_generate_array() {
1452        let result = transpile_to_bigquery("SELECT GENERATE_SERIES(1, 10)");
1453        assert!(
1454            result.contains("GENERATE_ARRAY"),
1455            "Expected GENERATE_ARRAY, got: {}",
1456            result
1457        );
1458    }
1459
1460    #[test]
1461    fn test_backtick_identifiers() {
1462        // BigQuery uses backticks for identifiers
1463        let dialect = BigQueryDialect;
1464        let config = dialect.generator_config();
1465        assert_eq!(config.identifier_quote, '`');
1466    }
1467
1468    fn bigquery_identity(sql: &str, expected: &str) {
1469        let dialect = Dialect::get(DialectType::BigQuery);
1470        let ast = dialect.parse(sql).expect("Parse failed");
1471        let transformed = dialect.transform(ast[0].clone()).expect("Transform failed");
1472        let result = dialect.generate(&transformed).expect("Generate failed");
1473        assert_eq!(result, expected, "SQL: {}", sql);
1474    }
1475
1476    #[test]
1477    fn test_cast_char_to_string() {
1478        bigquery_identity("CAST(x AS CHAR)", "CAST(x AS STRING)");
1479    }
1480
1481    #[test]
1482    fn test_cast_varchar_to_string() {
1483        bigquery_identity("CAST(x AS VARCHAR)", "CAST(x AS STRING)");
1484    }
1485
1486    #[test]
1487    fn test_cast_nchar_to_string() {
1488        bigquery_identity("CAST(x AS NCHAR)", "CAST(x AS STRING)");
1489    }
1490
1491    #[test]
1492    fn test_cast_nvarchar_to_string() {
1493        bigquery_identity("CAST(x AS NVARCHAR)", "CAST(x AS STRING)");
1494    }
1495
1496    #[test]
1497    fn test_cast_timestamptz_to_timestamp() {
1498        bigquery_identity("CAST(x AS TIMESTAMPTZ)", "CAST(x AS TIMESTAMP)");
1499    }
1500
1501    #[test]
1502    fn test_cast_record_to_struct() {
1503        bigquery_identity("CAST(x AS RECORD)", "CAST(x AS STRUCT)");
1504    }
1505
1506    #[test]
1507    fn test_json_literal_to_parse_json() {
1508        // JSON 'string' literal syntax should be converted to PARSE_JSON()
1509        bigquery_identity(
1510            "SELECT JSON '\"foo\"' AS json_data",
1511            "SELECT PARSE_JSON('\"foo\"') AS json_data",
1512        );
1513    }
1514
1515    #[test]
1516    fn test_grant_as_alias_not_quoted() {
1517        // GRANT is not a reserved keyword in BigQuery, should not be backtick-quoted
1518        bigquery_identity(
1519            "SELECT GRANT FROM (SELECT 'input' AS GRANT)",
1520            "SELECT GRANT FROM (SELECT 'input' AS GRANT)",
1521        );
1522    }
1523
1524    #[test]
1525    fn test_timestamp_literal_to_cast() {
1526        // TIMESTAMP 'value' literal should be converted to CAST('value' AS TIMESTAMP)
1527        bigquery_identity(
1528            "CREATE VIEW `d.v` OPTIONS (expiration_timestamp=TIMESTAMP '2020-01-02T04:05:06.007Z') AS SELECT 1 AS c",
1529            "CREATE VIEW `d.v` OPTIONS (expiration_timestamp=CAST('2020-01-02T04:05:06.007Z' AS TIMESTAMP)) AS SELECT 1 AS c"
1530        );
1531    }
1532
1533    #[test]
1534    fn test_date_literal_to_cast_in_extract() {
1535        // Issue 1: DATE literal should become CAST syntax in BigQuery
1536        bigquery_identity(
1537            "EXTRACT(WEEK(THURSDAY) FROM DATE '2013-12-25')",
1538            "EXTRACT(WEEK(THURSDAY) FROM CAST('2013-12-25' AS DATE))",
1539        );
1540    }
1541
1542    #[test]
1543    fn test_json_object_with_json_literals() {
1544        // Issue 2: JSON literals in JSON_OBJECT should use PARSE_JSON, not CAST AS JSON
1545        bigquery_identity(
1546            "SELECT JSON_OBJECT('a', JSON '10') AS json_data",
1547            "SELECT JSON_OBJECT('a', PARSE_JSON('10')) AS json_data",
1548        );
1549    }
1550
1551    // NOTE: MOD paren unwrapping is tested in the conformance tests (sqlglot_dialect_identity).
1552    // The unit test version was removed due to stack overflow in debug builds (deep recursion).
1553    // Test case: MOD((a + 1), b) -> MOD(a + 1, b)
1554
1555    #[test]
1556    fn test_safe_parse_date_format_normalization() {
1557        // SAFE.PARSE_DATE format string normalization: %Y-%m-%d -> %F
1558        bigquery_identity(
1559            "SAFE.PARSE_DATE('%Y-%m-%d', '2024-01-15')",
1560            "SAFE.PARSE_DATE('%F', '2024-01-15')",
1561        );
1562    }
1563
1564    #[test]
1565    fn test_safe_parse_datetime_format_normalization() {
1566        // SAFE.PARSE_DATETIME format string normalization: %Y-%m-%d %H:%M:%S -> %F %T
1567        bigquery_identity(
1568            "SAFE.PARSE_DATETIME('%Y-%m-%d %H:%M:%S', '2024-01-15 10:30:00')",
1569            "SAFE.PARSE_DATETIME('%F %T', '2024-01-15 10:30:00')",
1570        );
1571    }
1572
1573    #[test]
1574    fn test_safe_parse_timestamp_format_normalization() {
1575        // SAFE.PARSE_TIMESTAMP format string normalization: %Y-%m-%d %H:%M:%S -> %F %T
1576        bigquery_identity(
1577            "SAFE.PARSE_TIMESTAMP('%Y-%m-%d %H:%M:%S', '2024-01-15 10:30:00')",
1578            "SAFE.PARSE_TIMESTAMP('%F %T', '2024-01-15 10:30:00')",
1579        );
1580    }
1581
1582    #[test]
1583    fn test_datetime_literal_to_cast() {
1584        // DATETIME 'value' literal should be converted to CAST('value' AS DATETIME)
1585        bigquery_identity(
1586            "LAST_DAY(DATETIME '2008-11-10 15:30:00', WEEK(SUNDAY))",
1587            "LAST_DAY(CAST('2008-11-10 15:30:00' AS DATETIME), WEEK)",
1588        );
1589    }
1590
1591    #[test]
1592    fn test_last_day_week_modifier_stripped() {
1593        // WEEK(SUNDAY) should become WEEK in BigQuery LAST_DAY function
1594        bigquery_identity("LAST_DAY(col, WEEK(MONDAY))", "LAST_DAY(col, WEEK)");
1595    }
1596}