Skip to main content

polyglot_sql/dialects/
bigquery.rs

1//! BigQuery Dialect
2//!
3//! BigQuery-specific transformations based on sqlglot patterns.
4//! Key differences:
5//! - Uses backticks for identifiers (especially for project.dataset.table)
6//! - SAFE_ prefix for safe operations
7//! - Different date/time function names (DATE_DIFF, FORMAT_DATE, PARSE_DATE)
8//! - STRUCT and ARRAY syntax differences
9//! - No ILIKE support
10//! - QUALIFY clause support
11
12use super::{DialectImpl, DialectType};
13use crate::error::Result;
14use crate::expressions::{
15    Alias, BinaryOp, CeilFunc, Column, Exists, Expression, From, Function, FunctionBody, Identifier,
16    JsonExtractFunc, LikeOp, Literal, Select, SplitFunc, StringAggFunc, UnaryFunc, UnnestFunc, VarArgFunc,
17    Where,
18};
19use crate::generator::GeneratorConfig;
20use crate::tokens::TokenizerConfig;
21
22/// BigQuery dialect
23pub struct BigQueryDialect;
24
25impl DialectImpl for BigQueryDialect {
26    fn dialect_type(&self) -> DialectType {
27        DialectType::BigQuery
28    }
29
30    fn tokenizer_config(&self) -> TokenizerConfig {
31        let mut config = TokenizerConfig::default();
32        // BigQuery uses backticks for identifiers, NOT double quotes
33        // Remove double quote from identifiers (it's in the default config)
34        config.identifiers.remove(&'"');
35        config.identifiers.insert('`', '`');
36        // BigQuery supports double quotes for strings (in addition to single quotes)
37        config.quotes.insert("\"".to_string(), "\"".to_string());
38        // BigQuery supports triple-quoted strings
39        config.quotes.insert("'''".to_string(), "'''".to_string());
40        config
41            .quotes
42            .insert("\"\"\"".to_string(), "\"\"\"".to_string());
43        // BigQuery supports backslash escaping in strings
44        config.string_escapes = vec!['\'', '\\'];
45        // In BigQuery, b'...' is a byte string (bytes), not a bit string (binary digits)
46        config.b_prefix_is_byte_string = true;
47        // BigQuery supports hex number strings like 0xA, 0xFF
48        config.hex_number_strings = true;
49        // BigQuery: 0xA represents integer 10 (not binary/blob)
50        config.hex_string_is_integer_type = true;
51        config
52    }
53
54    fn generator_config(&self) -> GeneratorConfig {
55        use crate::generator::{IdentifierQuoteStyle, NormalizeFunctions};
56        GeneratorConfig {
57            identifier_quote: '`',
58            identifier_quote_style: IdentifierQuoteStyle::BACKTICK,
59            dialect: Some(DialectType::BigQuery),
60            // BigQuery doesn't normalize function names (Python: NORMALIZE_FUNCTIONS = False)
61            normalize_functions: NormalizeFunctions::None,
62            // BigQuery-specific settings from Python sqlglot
63            interval_allows_plural_form: false,
64            join_hints: false,
65            query_hints: false,
66            table_hints: false,
67            limit_fetch_style: crate::generator::LimitFetchStyle::Limit,
68            rename_table_with_db: false,
69            nvl2_supported: false,
70            unnest_with_ordinality: false,
71            collate_is_func: true,
72            limit_only_literals: true,
73            supports_table_alias_columns: false,
74            unpivot_aliases_are_identifiers: false,
75            json_key_value_pair_sep: ",",
76            null_ordering_supported: false,
77            ignore_nulls_in_func: true,
78            json_path_single_quote_escape: true,
79            can_implement_array_any: true,
80            supports_to_number: false,
81            named_placeholder_token: "@",
82            hex_func: "TO_HEX",
83            with_properties_prefix: "OPTIONS",
84            supports_exploding_projections: false,
85            except_intersect_support_all_clause: false,
86            supports_unix_seconds: true,
87            // BigQuery uses SAFE_ prefix for safe operations
88            try_supported: true,
89            // BigQuery does not support SEMI/ANTI JOIN syntax
90            semi_anti_join_with_side: false,
91            ..Default::default()
92        }
93    }
94
95    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
96        match expr {
97            // ===== Data Type Mappings =====
98            Expression::DataType(dt) => self.transform_data_type(dt),
99
100            // ===== Null handling =====
101            // IFNULL is native to BigQuery - keep as-is for identity
102            Expression::IfNull(f) => Ok(Expression::IfNull(f)),
103
104            // NVL -> IFNULL in BigQuery (BigQuery uses IFNULL, not NVL)
105            Expression::Nvl(f) => Ok(Expression::IfNull(f)),
106
107            // Coalesce stays as Coalesce
108            Expression::Coalesce(f) => Ok(Expression::Coalesce(f)),
109
110            // ===== String aggregation =====
111            // GROUP_CONCAT -> STRING_AGG in BigQuery
112            Expression::GroupConcat(f) => Ok(Expression::StringAgg(Box::new(StringAggFunc {
113                this: f.this,
114                separator: f.separator,
115                order_by: f.order_by,
116                distinct: f.distinct,
117                filter: f.filter,
118                limit: None,
119            }))),
120
121            // ===== Cast operations =====
122            // Cast data types are transformed by transform_recursive in mod.rs
123            // which calls transform_data_type via transform_expr(Expression::DataType(...))
124
125            // TryCast -> SafeCast in BigQuery with type transformation
126            Expression::TryCast(c) => {
127                let transformed_type = match self.transform_data_type(c.to)? {
128                    Expression::DataType(dt) => dt,
129                    _ => return Err(crate::error::Error::Parse("Expected DataType".to_string())),
130                };
131                Ok(Expression::SafeCast(Box::new(crate::expressions::Cast {
132                    this: c.this,
133                    to: transformed_type,
134                    trailing_comments: c.trailing_comments,
135                    double_colon_syntax: c.double_colon_syntax,
136                    format: c.format,
137                    default: c.default,
138                })))
139            }
140
141            // ===== Pattern matching =====
142            // ILIKE -> LOWER() LIKE LOWER() in BigQuery (no ILIKE support)
143            Expression::ILike(op) => {
144                let lower_left = Expression::Lower(Box::new(UnaryFunc::new(op.left)));
145                let lower_right = Expression::Lower(Box::new(UnaryFunc::new(op.right)));
146                Ok(Expression::Like(Box::new(LikeOp {
147                    left: lower_left,
148                    right: lower_right,
149                    escape: op.escape,
150                    quantifier: op.quantifier,
151                })))
152            }
153
154            // RegexpLike -> REGEXP_CONTAINS in BigQuery
155            Expression::RegexpLike(f) => Ok(Expression::Function(Box::new(Function::new(
156                "REGEXP_CONTAINS".to_string(),
157                vec![f.this, f.pattern],
158            )))),
159
160            // ===== Array operations =====
161            // EXPLODE -> UNNEST in BigQuery
162            Expression::Explode(f) => Ok(Expression::Unnest(Box::new(
163                crate::expressions::UnnestFunc {
164                    this: f.this,
165                    expressions: Vec::new(),
166                    with_ordinality: false,
167                    alias: None,
168                    offset_alias: None,
169                },
170            ))),
171
172            // ExplodeOuter -> UNNEST with LEFT JOIN semantics
173            Expression::ExplodeOuter(f) => Ok(Expression::Unnest(Box::new(
174                crate::expressions::UnnestFunc {
175                    this: f.this,
176                    expressions: Vec::new(),
177                    with_ordinality: false,
178                    alias: None,
179                    offset_alias: None,
180                },
181            ))),
182
183            // GenerateSeries -> GENERATE_ARRAY in BigQuery
184            Expression::GenerateSeries(f) => {
185                let mut args = Vec::new();
186                if let Some(start) = f.start {
187                    args.push(*start);
188                }
189                if let Some(end) = f.end {
190                    args.push(*end);
191                }
192                if let Some(step) = f.step {
193                    args.push(*step);
194                }
195                Ok(Expression::Function(Box::new(Function::new(
196                    "GENERATE_ARRAY".to_string(),
197                    args,
198                ))))
199            }
200
201            // ===== Bitwise operations =====
202            // BitwiseAndAgg -> BIT_AND
203            Expression::BitwiseAndAgg(f) => Ok(Expression::Function(Box::new(Function::new(
204                "BIT_AND".to_string(),
205                vec![f.this],
206            )))),
207
208            // BitwiseOrAgg -> BIT_OR
209            Expression::BitwiseOrAgg(f) => Ok(Expression::Function(Box::new(Function::new(
210                "BIT_OR".to_string(),
211                vec![f.this],
212            )))),
213
214            // BitwiseXorAgg -> BIT_XOR
215            Expression::BitwiseXorAgg(f) => Ok(Expression::Function(Box::new(Function::new(
216                "BIT_XOR".to_string(),
217                vec![f.this],
218            )))),
219
220            // BitwiseCount -> BIT_COUNT
221            Expression::BitwiseCount(f) => Ok(Expression::Function(Box::new(Function::new(
222                "BIT_COUNT".to_string(),
223                vec![f.this],
224            )))),
225
226            // ByteLength -> BYTE_LENGTH
227            Expression::ByteLength(f) => Ok(Expression::Function(Box::new(Function::new(
228                "BYTE_LENGTH".to_string(),
229                vec![f.this],
230            )))),
231
232            // IntDiv -> DIV
233            Expression::IntDiv(f) => Ok(Expression::Function(Box::new(Function::new(
234                "DIV".to_string(),
235                vec![f.this, f.expression],
236            )))),
237
238            // Int64 -> INT64
239            Expression::Int64(f) => Ok(Expression::Function(Box::new(Function::new(
240                "INT64".to_string(),
241                vec![f.this],
242            )))),
243
244            // ===== Random =====
245            // RANDOM -> RAND in BigQuery
246            Expression::Random(_) => Ok(Expression::Rand(Box::new(crate::expressions::Rand {
247                seed: None, lower: None, upper: None,
248            }))),
249
250            // ===== UUID =====
251            // Uuid -> GENERATE_UUID in BigQuery
252            Expression::Uuid(_) => Ok(Expression::Function(Box::new(Function::new(
253                "GENERATE_UUID".to_string(),
254                vec![],
255            )))),
256
257            // ===== Approximate functions =====
258            // ApproxDistinct -> APPROX_COUNT_DISTINCT
259            Expression::ApproxDistinct(f) => Ok(Expression::Function(Box::new(Function::new(
260                "APPROX_COUNT_DISTINCT".to_string(),
261                vec![f.this],
262            )))),
263
264            // ArgMax -> MAX_BY in BigQuery
265            Expression::ArgMax(f) => Ok(Expression::Function(Box::new(Function::new(
266                "MAX_BY".to_string(),
267                vec![*f.this, *f.expression],
268            )))),
269
270            // ArgMin -> MIN_BY in BigQuery
271            Expression::ArgMin(f) => Ok(Expression::Function(Box::new(Function::new(
272                "MIN_BY".to_string(),
273                vec![*f.this, *f.expression],
274            )))),
275
276            // ===== Conditional =====
277            // CountIf -> COUNTIF in BigQuery
278            Expression::CountIf(f) => Ok(Expression::Function(Box::new(Function::new(
279                "COUNTIF".to_string(),
280                vec![f.this],
281            )))),
282
283            // ===== String functions =====
284            // StringAgg -> STRING_AGG in BigQuery - keep as-is to preserve ORDER BY
285            Expression::StringAgg(f) => Ok(Expression::StringAgg(f)),
286
287            // ===== Conversion =====
288            // Unhex -> FROM_HEX
289            Expression::Unhex(f) => Ok(Expression::Function(Box::new(Function::new(
290                "FROM_HEX".to_string(),
291                vec![*f.this],
292            )))),
293
294            // UnixToTime -> TIMESTAMP_SECONDS/MILLIS/MICROS based on scale
295            Expression::UnixToTime(f) => {
296                let scale = f.scale.unwrap_or(0);
297                match scale {
298                    0 => Ok(Expression::Function(Box::new(Function::new(
299                        "TIMESTAMP_SECONDS".to_string(),
300                        vec![*f.this],
301                    )))),
302                    3 => Ok(Expression::Function(Box::new(Function::new(
303                        "TIMESTAMP_MILLIS".to_string(),
304                        vec![*f.this],
305                    )))),
306                    6 => Ok(Expression::Function(Box::new(Function::new(
307                        "TIMESTAMP_MICROS".to_string(),
308                        vec![*f.this],
309                    )))),
310                    _ => {
311                        // TIMESTAMP_SECONDS(CAST(value / POWER(10, scale) AS INT64))
312                        let div_expr = Expression::Div(Box::new(crate::expressions::BinaryOp::new(
313                            *f.this,
314                            Expression::Function(Box::new(Function::new(
315                                "POWER".to_string(),
316                                vec![Expression::number(10), Expression::number(scale)],
317                            ))),
318                        )));
319                        let cast_expr = Expression::Cast(Box::new(crate::expressions::Cast {
320                            this: div_expr,
321                            to: crate::expressions::DataType::Custom { name: "INT64".to_string() },
322                            double_colon_syntax: false,
323                            trailing_comments: vec![],
324                            format: None,
325                            default: None,
326                        }));
327                        Ok(Expression::Function(Box::new(Function::new(
328                            "TIMESTAMP_SECONDS".to_string(),
329                            vec![cast_expr],
330                        ))))
331                    }
332                }
333            }
334
335            // ===== Date/time =====
336            // DateDiff -> DATE_DIFF in BigQuery
337            Expression::DateDiff(f) => {
338                // BigQuery: DATE_DIFF(date1, date2, part)
339                let unit_str = match f.unit {
340                    Some(crate::expressions::IntervalUnit::Year) => "YEAR",
341                    Some(crate::expressions::IntervalUnit::Quarter) => "QUARTER",
342                    Some(crate::expressions::IntervalUnit::Month) => "MONTH",
343                    Some(crate::expressions::IntervalUnit::Week) => "WEEK",
344                    Some(crate::expressions::IntervalUnit::Day) => "DAY",
345                    Some(crate::expressions::IntervalUnit::Hour) => "HOUR",
346                    Some(crate::expressions::IntervalUnit::Minute) => "MINUTE",
347                    Some(crate::expressions::IntervalUnit::Second) => "SECOND",
348                    Some(crate::expressions::IntervalUnit::Millisecond) => "MILLISECOND",
349                    Some(crate::expressions::IntervalUnit::Microsecond) => "MICROSECOND",
350                    None => "DAY",
351                };
352                let unit = Expression::Identifier(crate::expressions::Identifier {
353                    name: unit_str.to_string(),
354                    quoted: false,
355                    trailing_comments: Vec::new(),
356                });
357                Ok(Expression::Function(Box::new(Function::new(
358                    "DATE_DIFF".to_string(),
359                    vec![f.this, f.expression, unit],
360                ))))
361            }
362
363            // ===== Variance =====
364            // VarPop -> VAR_POP
365            Expression::VarPop(f) => Ok(Expression::Function(Box::new(Function::new(
366                "VAR_POP".to_string(),
367                vec![f.this],
368            )))),
369
370            // ===== Hash functions =====
371            // SHA -> SHA1
372            Expression::SHA(f) => Ok(Expression::Function(Box::new(Function::new(
373                "SHA1".to_string(),
374                vec![f.this],
375            )))),
376
377            // SHA1Digest -> SHA1
378            Expression::SHA1Digest(f) => Ok(Expression::Function(Box::new(Function::new(
379                "SHA1".to_string(),
380                vec![f.this],
381            )))),
382
383            // MD5Digest -> MD5
384            Expression::MD5Digest(f) => Ok(Expression::Function(Box::new(Function::new(
385                "MD5".to_string(),
386                vec![*f.this],
387            )))),
388
389            // ===== Type conversion =====
390            // JSONBool -> BOOL
391            Expression::JSONBool(f) => Ok(Expression::Function(Box::new(Function::new(
392                "BOOL".to_string(),
393                vec![f.this],
394            )))),
395
396            // StringFunc -> STRING
397            Expression::StringFunc(f) => Ok(Expression::Function(Box::new(Function::new(
398                "STRING".to_string(),
399                vec![*f.this],
400            )))),
401
402            // ===== Date/time from parts =====
403            // DateFromUnixDate -> DATE_FROM_UNIX_DATE
404            Expression::DateFromUnixDate(f) => Ok(Expression::Function(Box::new(Function::new(
405                "DATE_FROM_UNIX_DATE".to_string(),
406                vec![f.this],
407            )))),
408
409            // UnixDate -> UNIX_DATE
410            Expression::UnixDate(f) => Ok(Expression::Function(Box::new(Function::new(
411                "UNIX_DATE".to_string(),
412                vec![f.this],
413            )))),
414
415            // TimestampDiff -> TIMESTAMP_DIFF
416            Expression::TimestampDiff(f) => Ok(Expression::Function(Box::new(Function::new(
417                "TIMESTAMP_DIFF".to_string(),
418                vec![*f.this, *f.expression],
419            )))),
420
421            // FromTimeZone -> DATETIME
422            Expression::FromTimeZone(f) => Ok(Expression::Function(Box::new(Function::new(
423                "DATETIME".to_string(),
424                vec![*f.this],
425            )))),
426
427            // TsOrDsToDatetime -> DATETIME
428            Expression::TsOrDsToDatetime(f) => Ok(Expression::Function(Box::new(Function::new(
429                "DATETIME".to_string(),
430                vec![f.this],
431            )))),
432
433            // TsOrDsToTimestamp -> TIMESTAMP
434            Expression::TsOrDsToTimestamp(f) => Ok(Expression::Function(Box::new(Function::new(
435                "TIMESTAMP".to_string(),
436                vec![f.this],
437            )))),
438
439            // ===== IfFunc -> IF in BigQuery =====
440            Expression::IfFunc(f) => {
441                let mut args = vec![f.condition, f.true_value];
442                if let Some(false_val) = f.false_value {
443                    args.push(false_val);
444                } else {
445                    args.push(Expression::Null(crate::expressions::Null));
446                }
447                Ok(Expression::Function(Box::new(Function::new(
448                    "IF".to_string(),
449                    args,
450                ))))
451            }
452
453            // ===== HexString -> FROM_HEX =====
454            Expression::HexStringExpr(f) => Ok(Expression::Function(Box::new(Function::new(
455                "FROM_HEX".to_string(),
456                vec![*f.this],
457            )))),
458
459            // ===== Additional auto-generated transforms from Python sqlglot =====
460            // ApproxTopK -> APPROX_TOP_COUNT
461            Expression::ApproxTopK(f) => {
462                let mut args = vec![*f.this];
463                if let Some(expr) = f.expression {
464                    args.push(*expr);
465                }
466                Ok(Expression::Function(Box::new(Function::new(
467                    "APPROX_TOP_COUNT".to_string(),
468                    args,
469                ))))
470            }
471
472            // SafeDivide -> SAFE_DIVIDE
473            Expression::SafeDivide(f) => Ok(Expression::Function(Box::new(Function::new(
474                "SAFE_DIVIDE".to_string(),
475                vec![*f.this, *f.expression],
476            )))),
477
478            // JSONKeysAtDepth -> JSON_KEYS
479            Expression::JSONKeysAtDepth(f) => Ok(Expression::Function(Box::new(Function::new(
480                "JSON_KEYS".to_string(),
481                vec![*f.this],
482            )))),
483
484            // JSONValueArray -> JSON_VALUE_ARRAY
485            Expression::JSONValueArray(f) => Ok(Expression::Function(Box::new(Function::new(
486                "JSON_VALUE_ARRAY".to_string(),
487                vec![*f.this],
488            )))),
489
490            // DateFromParts -> DATE
491            Expression::DateFromParts(f) => {
492                let mut args = Vec::new();
493                if let Some(y) = f.year {
494                    args.push(*y);
495                }
496                if let Some(m) = f.month {
497                    args.push(*m);
498                }
499                if let Some(d) = f.day {
500                    args.push(*d);
501                }
502                Ok(Expression::Function(Box::new(Function::new(
503                    "DATE".to_string(),
504                    args,
505                ))))
506            }
507
508            // SPLIT: BigQuery defaults to comma separator when none provided
509            // SPLIT(foo) -> SPLIT(foo, ',')
510            Expression::Split(f) => {
511                // Check if delimiter is empty or a placeholder - add default comma
512                let delimiter = match &f.delimiter {
513                    Expression::Literal(Literal::String(s)) if s.is_empty() => {
514                        Expression::Literal(Literal::String(",".to_string()))
515                    }
516                    _ => f.delimiter,
517                };
518                Ok(Expression::Split(Box::new(SplitFunc {
519                    this: f.this,
520                    delimiter,
521                })))
522            }
523
524            // Cast: Transform the target type according to BigQuery TYPE_MAPPING
525            // Special case: CAST to JSON -> PARSE_JSON in BigQuery
526            Expression::Cast(c) => {
527                use crate::expressions::DataType;
528                // Check if casting to JSON - use PARSE_JSON instead
529                // Handle both DataType::Json/JsonB and DataType::Custom { name: "JSON" }
530                // (parser creates Custom for type literals like JSON 'string')
531                let is_json = matches!(c.to, DataType::Json | DataType::JsonB)
532                    || matches!(&c.to, DataType::Custom { name } if name.eq_ignore_ascii_case("JSON") || name.eq_ignore_ascii_case("JSONB"));
533                if is_json {
534                    return Ok(Expression::ParseJson(Box::new(UnaryFunc::new(c.this))));
535                }
536                let transformed_type = match self.transform_data_type(c.to)? {
537                    Expression::DataType(dt) => dt,
538                    _ => return Err(crate::error::Error::Parse("Expected DataType".to_string())),
539                };
540                Ok(Expression::Cast(Box::new(crate::expressions::Cast {
541                    this: c.this,
542                    to: transformed_type,
543                    trailing_comments: c.trailing_comments,
544                    double_colon_syntax: c.double_colon_syntax,
545                    format: c.format,
546                    default: c.default,
547                })))
548            }
549
550            // SafeCast: Transform the target type according to BigQuery TYPE_MAPPING
551            Expression::SafeCast(c) => {
552                let transformed_type = match self.transform_data_type(c.to)? {
553                    Expression::DataType(dt) => dt,
554                    _ => return Err(crate::error::Error::Parse("Expected DataType".to_string())),
555                };
556                Ok(Expression::SafeCast(Box::new(crate::expressions::Cast {
557                    this: c.this,
558                    to: transformed_type,
559                    trailing_comments: c.trailing_comments,
560                    double_colon_syntax: c.double_colon_syntax,
561                    format: c.format,
562                    default: c.default,
563                })))
564            }
565
566            // ===== SELECT-level transforms =====
567            // BigQuery: GROUP BY expression → alias when both GROUP BY and ORDER BY exist
568            Expression::Select(mut select) => {
569                if select.group_by.is_some() && select.order_by.is_some() {
570                    // Build map: expression → alias name for aliased projections
571                    let aliases: Vec<(Expression, Identifier)> = select
572                        .expressions
573                        .iter()
574                        .filter_map(|e| {
575                            if let Expression::Alias(a) = e {
576                                Some((a.this.clone(), a.alias.clone()))
577                            } else {
578                                None
579                            }
580                        })
581                        .collect();
582
583                    if let Some(ref mut group_by) = select.group_by {
584                        for grouped in group_by.expressions.iter_mut() {
585                            // Skip numeric indices (already aliased)
586                            if matches!(grouped, Expression::Literal(Literal::Number(_))) {
587                                continue;
588                            }
589                            // Check if this GROUP BY expression matches a SELECT alias
590                            for (expr, alias_ident) in &aliases {
591                                if grouped == expr {
592                                    *grouped = Expression::Column(Column {
593                                        name: alias_ident.clone(),
594                                        table: None,
595                                        join_mark: false,
596                                        trailing_comments: Vec::new(),
597                                    });
598                                    break;
599                                }
600                            }
601                        }
602                    }
603                }
604                Ok(Expression::Select(select))
605            }
606
607            // ===== ArrayContains → EXISTS(SELECT 1 FROM UNNEST(arr) AS _col WHERE _col = val) =====
608            Expression::ArrayContains(f) => {
609                let array_expr = f.this;
610                let value_expr = f.expression;
611
612                // Build: SELECT 1 FROM UNNEST(array) AS _col WHERE _col = value
613                let unnest = Expression::Unnest(Box::new(UnnestFunc {
614                    this: array_expr,
615                    expressions: Vec::new(),
616                    with_ordinality: false,
617                    alias: None,
618                    offset_alias: None,
619                }));
620                let aliased_unnest = Expression::Alias(Box::new(Alias {
621                    this: unnest,
622                    alias: Identifier::new("_col"),
623                    column_aliases: Vec::new(),
624                    pre_alias_comments: Vec::new(),
625                    trailing_comments: Vec::new(),
626                }));
627                let col_ref = Expression::Column(Column {
628                    name: Identifier::new("_col"),
629                    table: None,
630                    join_mark: false,
631                    trailing_comments: Vec::new(),
632                });
633                let where_clause = Where {
634                    this: Expression::Eq(Box::new(BinaryOp {
635                        left: col_ref,
636                        right: value_expr,
637                        left_comments: Vec::new(),
638                        operator_comments: Vec::new(),
639                        trailing_comments: Vec::new(),
640                    })),
641                };
642                let inner_select = Expression::Select(Box::new(Select {
643                    expressions: vec![Expression::Literal(Literal::Number("1".to_string()))],
644                    from: Some(From {
645                        expressions: vec![aliased_unnest],
646                    }),
647                    where_clause: Some(where_clause),
648                    ..Default::default()
649                }));
650                Ok(Expression::Exists(Box::new(Exists {
651                    this: inner_select,
652                    not: false,
653                })))
654            }
655
656            // ===== JSON_OBJECT array form → key-value pairs =====
657            // BigQuery "signature 2": JSON_OBJECT(['a', 'b'], [10, NULL]) → JSON_OBJECT('a', 10, 'b', NULL)
658            Expression::JsonObject(mut f) => {
659                if f.pairs.len() == 1 {
660                    // Extract expressions from both Array and ArrayFunc variants
661                    let keys_exprs = match &f.pairs[0].0 {
662                        Expression::Array(arr) => Some(&arr.expressions),
663                        Expression::ArrayFunc(arr) => Some(&arr.expressions),
664                        _ => None,
665                    };
666                    let vals_exprs = match &f.pairs[0].1 {
667                        Expression::Array(arr) => Some(&arr.expressions),
668                        Expression::ArrayFunc(arr) => Some(&arr.expressions),
669                        _ => None,
670                    };
671                    if let (Some(keys), Some(vals)) = (keys_exprs, vals_exprs) {
672                        if keys.len() == vals.len() {
673                            let new_pairs: Vec<(Expression, Expression)> = keys.iter()
674                                .zip(vals.iter())
675                                .map(|(k, v)| (k.clone(), v.clone()))
676                                .collect();
677                            f.pairs = new_pairs;
678                        }
679                    }
680                }
681                Ok(Expression::JsonObject(f))
682            }
683
684            // ===== MOD function: unwrap unnecessary Paren from first argument =====
685            // BigQuery normalizes MOD((a + 1), b) -> MOD(a + 1, b)
686            Expression::ModFunc(mut f) => {
687                // Unwrap Paren from first argument if present
688                if let Expression::Paren(paren) = f.this {
689                    f.this = paren.this;
690                }
691                Ok(Expression::ModFunc(f))
692            }
693
694            // JSONExtract with variant_extract (Snowflake colon syntax) -> JSON_EXTRACT
695            Expression::JSONExtract(e) if e.variant_extract.is_some() => {
696                let path = match *e.expression {
697                    Expression::Literal(Literal::String(s)) => {
698                        let normalized = if s.starts_with('$') {
699                            s
700                        } else if s.starts_with('[') {
701                            format!("${}", s)
702                        } else {
703                            format!("$.{}", s)
704                        };
705                        Expression::Literal(Literal::String(normalized))
706                    }
707                    other => other,
708                };
709                Ok(Expression::Function(Box::new(Function::new(
710                    "JSON_EXTRACT".to_string(),
711                    vec![*e.this, path],
712                ))))
713            }
714
715            // Generic function transformations
716            Expression::Function(f) => self.transform_function(*f),
717
718            // Generic aggregate function transformations
719            Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
720
721            // MethodCall: Handle SAFE.PARSE_DATE, SAFE.PARSE_DATETIME, SAFE.PARSE_TIMESTAMP
722            // These are parsed as MethodCall(this=SAFE, method=PARSE_DATE, args=[...])
723            Expression::MethodCall(mc) => self.transform_method_call(*mc),
724
725            // CreateFunction: Convert RETURNS TABLE(...) to RETURNS TABLE <...> for BigQuery
726            // and set is_table_function = true for TABLE FUNCTION syntax
727            Expression::CreateFunction(mut cf) => {
728                if let Some(ref mut rtb) = cf.returns_table_body {
729                    if rtb.starts_with("TABLE (") || rtb.starts_with("TABLE(") {
730                        // Convert TABLE (...) to TABLE <...> with BigQuery types
731                        let inner = if rtb.starts_with("TABLE (") {
732                            &rtb["TABLE (".len()..rtb.len()-1]
733                        } else {
734                            &rtb["TABLE(".len()..rtb.len()-1]
735                        };
736                        // Convert common types to BigQuery equivalents
737                        let converted = inner
738                            .replace(" INT,", " INT64,")
739                            .replace(" INT)", " INT64)")
740                            .replace(" INTEGER,", " INT64,")
741                            .replace(" INTEGER)", " INT64)")
742                            .replace(" FLOAT,", " FLOAT64,")
743                            .replace(" FLOAT)", " FLOAT64)")
744                            .replace(" BOOLEAN,", " BOOL,")
745                            .replace(" BOOLEAN)", " BOOL)")
746                            .replace(" VARCHAR", " STRING")
747                            .replace(" TEXT", " STRING");
748                        // Handle trailing type (no comma, no paren)
749                        let converted = if converted.ends_with(" INT") {
750                            format!("{}{}", &converted[..converted.len()-4], " INT64")
751                        } else {
752                            converted
753                        };
754                        *rtb = format!("TABLE <{}>", converted);
755                        cf.is_table_function = true;
756                    }
757                }
758                // Convert string literal body to expression body for BigQuery TABLE FUNCTIONs only
759                if cf.is_table_function {
760                    if let Some(ref body) = cf.body {
761                        if matches!(body, FunctionBody::StringLiteral(_)) {
762                            if let Some(FunctionBody::StringLiteral(sql)) = cf.body.take() {
763                                // Parse the SQL string into an expression
764                                if let Ok(parsed) = crate::parser::Parser::parse_sql(&sql) {
765                                    if let Some(stmt) = parsed.into_iter().next() {
766                                        cf.body = Some(FunctionBody::Expression(stmt));
767                                    } else {
768                                        cf.body = Some(FunctionBody::StringLiteral(sql));
769                                    }
770                                } else {
771                                    cf.body = Some(FunctionBody::StringLiteral(sql));
772                                }
773                            }
774                        }
775                    }
776                }
777                Ok(Expression::CreateFunction(cf))
778            }
779
780            // Pass through everything else
781            _ => Ok(expr),
782        }
783    }
784}
785
786impl BigQueryDialect {
787    /// Transform data types according to BigQuery TYPE_MAPPING
788    fn transform_data_type(&self, dt: crate::expressions::DataType) -> Result<Expression> {
789        use crate::expressions::DataType;
790        let transformed = match dt {
791            // BIGINT -> INT64
792            DataType::BigInt { .. } => DataType::Custom {
793                name: "INT64".to_string(),
794            },
795            // INT -> INT64
796            DataType::Int { .. } => DataType::Custom {
797                name: "INT64".to_string(),
798            },
799            // SMALLINT -> INT64
800            DataType::SmallInt { .. } => DataType::Custom {
801                name: "INT64".to_string(),
802            },
803            // TINYINT -> INT64
804            DataType::TinyInt { .. } => DataType::Custom {
805                name: "INT64".to_string(),
806            },
807            // FLOAT -> FLOAT64
808            DataType::Float { .. } => DataType::Custom {
809                name: "FLOAT64".to_string(),
810            },
811            // DOUBLE -> FLOAT64
812            DataType::Double { .. } => DataType::Custom {
813                name: "FLOAT64".to_string(),
814            },
815            // BOOLEAN -> BOOL
816            DataType::Boolean => DataType::Custom {
817                name: "BOOL".to_string(),
818            },
819            // CHAR -> STRING
820            DataType::Char { .. } => DataType::Custom {
821                name: "STRING".to_string(),
822            },
823            // VARCHAR -> STRING
824            DataType::VarChar { .. } => DataType::Custom {
825                name: "STRING".to_string(),
826            },
827            // TEXT -> STRING
828            DataType::Text => DataType::Custom {
829                name: "STRING".to_string(),
830            },
831            // STRING(n) -> STRING (BigQuery doesn't support length for STRING)
832            DataType::String { .. } => DataType::Custom {
833                name: "STRING".to_string(),
834            },
835            // BINARY -> BYTES
836            DataType::Binary { .. } => DataType::Custom {
837                name: "BYTES".to_string(),
838            },
839            // VARBINARY -> BYTES
840            DataType::VarBinary { .. } => DataType::Custom {
841                name: "BYTES".to_string(),
842            },
843            // BLOB -> BYTES
844            DataType::Blob => DataType::Custom {
845                name: "BYTES".to_string(),
846            },
847            // DECIMAL -> NUMERIC (BigQuery strips precision in CAST context)
848            DataType::Decimal { .. } => DataType::Custom {
849                name: "NUMERIC".to_string(),
850            },
851            // For BigQuery identity: preserve TIMESTAMP/DATETIME as Custom types
852            // This avoids the issue where parsed TIMESTAMP (timezone: false) would
853            // be converted to DATETIME by the generator
854            DataType::Timestamp { timezone: false, .. } => DataType::Custom {
855                name: "TIMESTAMP".to_string(),
856            },
857            DataType::Timestamp { timezone: true, .. } => DataType::Custom {
858                name: "TIMESTAMP".to_string(),
859            },
860            // UUID -> STRING (BigQuery doesn't have native UUID type)
861            DataType::Uuid => DataType::Custom {
862                name: "STRING".to_string(),
863            },
864            // RECORD -> STRUCT in BigQuery
865            DataType::Custom { ref name } if name.eq_ignore_ascii_case("RECORD") => {
866                DataType::Custom {
867                    name: "STRUCT".to_string(),
868                }
869            }
870            // TIMESTAMPTZ (custom) -> TIMESTAMP
871            DataType::Custom { ref name } if name.eq_ignore_ascii_case("TIMESTAMPTZ") => {
872                DataType::Custom {
873                    name: "TIMESTAMP".to_string(),
874                }
875            }
876            // BYTEINT (custom) -> INT64
877            DataType::Custom { ref name } if name.eq_ignore_ascii_case("BYTEINT") => {
878                DataType::Custom {
879                    name: "INT64".to_string(),
880                }
881            }
882            // Keep all other types as-is
883            other => other,
884        };
885        Ok(Expression::DataType(transformed))
886    }
887
888    fn transform_function(&self, f: Function) -> Result<Expression> {
889        let name_upper = f.name.to_uppercase();
890        match name_upper.as_str() {
891            // IFNULL -> COALESCE (both work in BigQuery)
892            "IFNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc { original_name: None,
893                expressions: f.args,
894            }))),
895
896            // NVL -> COALESCE
897            "NVL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc { original_name: None,
898                expressions: f.args,
899            }))),
900
901            // ISNULL -> COALESCE
902            "ISNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc { original_name: None,
903                expressions: f.args,
904            }))),
905
906            // GROUP_CONCAT -> STRING_AGG in BigQuery
907            "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
908                Function::new("STRING_AGG".to_string(), f.args),
909            ))),
910
911            // SUBSTR -> SUBSTRING (both work)
912            "SUBSTR" => Ok(Expression::Function(Box::new(Function::new(
913                "SUBSTRING".to_string(),
914                f.args,
915            )))),
916
917            // RANDOM -> RAND
918            "RANDOM" => Ok(Expression::Rand(Box::new(crate::expressions::Rand {
919                seed: None, lower: None, upper: None,
920            }))),
921
922            // CURRENT_DATE -> CURRENT_DATE() in BigQuery
923            // Keep as Function when it has args (e.g., CURRENT_DATE('UTC'))
924            "CURRENT_DATE" if f.args.is_empty() => Ok(Expression::CurrentDate(crate::expressions::CurrentDate)),
925            "CURRENT_DATE" => Ok(Expression::Function(Box::new(Function {
926                name: "CURRENT_DATE".to_string(),
927                args: f.args,
928                distinct: false,
929                trailing_comments: Vec::new(),
930                use_bracket_syntax: false,
931                no_parens: false,
932                quoted: false,
933            }))),
934
935            // NOW -> CURRENT_TIMESTAMP in BigQuery
936            "NOW" => Ok(Expression::CurrentTimestamp(
937                crate::expressions::CurrentTimestamp { precision: None, sysdate: false },
938            )),
939
940            // TO_DATE -> PARSE_DATE in BigQuery
941            "TO_DATE" => Ok(Expression::Function(Box::new(Function::new(
942                "PARSE_DATE".to_string(),
943                f.args,
944            )))),
945
946            // TO_TIMESTAMP -> PARSE_TIMESTAMP in BigQuery
947            "TO_TIMESTAMP" => Ok(Expression::Function(Box::new(Function::new(
948                "PARSE_TIMESTAMP".to_string(),
949                f.args,
950            )))),
951
952            // TO_TIME -> TIME in BigQuery
953            "TO_TIME" if f.args.len() == 1 => Ok(Expression::Function(Box::new(Function::new(
954                "TIME".to_string(),
955                f.args,
956            )))),
957
958            // DATE_FORMAT -> FORMAT_DATE in BigQuery (argument order may differ)
959            "DATE_FORMAT" => Ok(Expression::Function(Box::new(Function::new(
960                "FORMAT_DATE".to_string(),
961                f.args,
962            )))),
963
964            // POSITION -> STRPOS in BigQuery
965            // BigQuery uses STRPOS(string, substring)
966            "POSITION" if f.args.len() == 2 => {
967                let mut args = f.args;
968                // Swap arguments: POSITION(sub IN str) -> STRPOS(str, sub)
969                let first = args.remove(0);
970                let second = args.remove(0);
971                Ok(Expression::Function(Box::new(Function::new(
972                    "STRPOS".to_string(),
973                    vec![second, first],
974                ))))
975            }
976
977            // LEN -> LENGTH
978            "LEN" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc::new(
979                f.args.into_iter().next().unwrap(),
980            )))),
981
982            // CEILING -> CEIL (both work)
983            "CEILING" if f.args.len() == 1 => Ok(Expression::Ceil(Box::new(CeilFunc {
984                this: f.args.into_iter().next().unwrap(),
985                decimals: None,
986                to: None,
987            }))),
988
989            // GETDATE -> CURRENT_TIMESTAMP
990            "GETDATE" => Ok(Expression::CurrentTimestamp(
991                crate::expressions::CurrentTimestamp { precision: None, sysdate: false },
992            )),
993
994            // ARRAY_LENGTH -> ARRAY_LENGTH (native)
995            // CARDINALITY -> ARRAY_LENGTH
996            "CARDINALITY" if f.args.len() == 1 => Ok(Expression::ArrayLength(Box::new(
997                UnaryFunc::new(f.args.into_iter().next().unwrap()),
998            ))),
999
1000            // UNNEST is native to BigQuery
1001
1002            // GENERATE_SERIES -> GENERATE_ARRAY in BigQuery
1003            "GENERATE_SERIES" => Ok(Expression::Function(Box::new(Function::new(
1004                "GENERATE_ARRAY".to_string(),
1005                f.args,
1006            )))),
1007
1008            // APPROX_COUNT_DISTINCT -> APPROX_COUNT_DISTINCT (native)
1009            // APPROX_DISTINCT -> APPROX_COUNT_DISTINCT
1010            "APPROX_DISTINCT" => Ok(Expression::Function(Box::new(Function::new(
1011                "APPROX_COUNT_DISTINCT".to_string(),
1012                f.args,
1013            )))),
1014
1015            // COUNT_IF -> COUNTIF in BigQuery
1016            "COUNT_IF" => Ok(Expression::Function(Box::new(Function::new(
1017                "COUNTIF".to_string(),
1018                f.args,
1019            )))),
1020
1021            // SHA1 -> SHA1 (native), SHA -> SHA1
1022            "SHA" => Ok(Expression::Function(Box::new(Function::new(
1023                "SHA1".to_string(),
1024                f.args,
1025            )))),
1026
1027            // SHA256/SHA2 -> SHA256
1028            "SHA2" => Ok(Expression::Function(Box::new(Function::new(
1029                "SHA256".to_string(),
1030                f.args,
1031            )))),
1032
1033            // MD5 in BigQuery returns bytes, often combined with TO_HEX
1034            // TO_HEX(MD5(x)) pattern
1035            "MD5" => Ok(Expression::Function(Box::new(Function::new(
1036                "MD5".to_string(),
1037                f.args,
1038            )))),
1039
1040            // VARIANCE/VAR_SAMP -> VAR_SAMP (native)
1041            // VAR_POP -> VAR_POP (native)
1042
1043            // DATEADD(unit, amount, date) → DATE_ADD(date, INTERVAL amount unit) for BigQuery
1044            "DATEADD" if f.args.len() == 3 => {
1045                let mut args = f.args;
1046                let unit_expr = args.remove(0);
1047                let amount = args.remove(0);
1048                let date = args.remove(0);
1049                // Convert unit identifier to IntervalUnit
1050                let unit_name = match &unit_expr {
1051                    Expression::Identifier(id) => id.name.to_uppercase(),
1052                    _ => "DAY".to_string(),
1053                };
1054                let unit = match unit_name.as_str() {
1055                    "YEAR" | "YEARS" | "YY" | "YYYY" => crate::expressions::IntervalUnit::Year,
1056                    "QUARTER" | "QUARTERS" | "QQ" | "Q" => crate::expressions::IntervalUnit::Quarter,
1057                    "MONTH" | "MONTHS" | "MM" | "M" => crate::expressions::IntervalUnit::Month,
1058                    "WEEK" | "WEEKS" | "WK" | "WW" => crate::expressions::IntervalUnit::Week,
1059                    "DAY" | "DAYS" | "DD" | "D" | "DAYOFMONTH" => crate::expressions::IntervalUnit::Day,
1060                    "HOUR" | "HOURS" | "HH" => crate::expressions::IntervalUnit::Hour,
1061                    "MINUTE" | "MINUTES" | "MI" | "N" => crate::expressions::IntervalUnit::Minute,
1062                    "SECOND" | "SECONDS" | "SS" | "S" => crate::expressions::IntervalUnit::Second,
1063                    "MILLISECOND" | "MILLISECONDS" | "MS" => crate::expressions::IntervalUnit::Millisecond,
1064                    "MICROSECOND" | "MICROSECONDS" | "US" => crate::expressions::IntervalUnit::Microsecond,
1065                    _ => crate::expressions::IntervalUnit::Day,
1066                };
1067                Ok(Expression::DateAdd(Box::new(crate::expressions::DateAddFunc {
1068                    this: date,
1069                    interval: amount,
1070                    unit,
1071                })))
1072            }
1073            "DATE_ADD" => Ok(Expression::Function(Box::new(Function::new(
1074                "DATE_ADD".to_string(),
1075                f.args,
1076            )))),
1077
1078            // DATE_DIFF in BigQuery (native)
1079            "DATEDIFF" => Ok(Expression::Function(Box::new(Function::new(
1080                "DATE_DIFF".to_string(),
1081                f.args,
1082            )))),
1083
1084            // TIMESTAMP_DIFF in BigQuery
1085            "TIMESTAMPDIFF" => Ok(Expression::Function(Box::new(Function::new(
1086                "TIMESTAMP_DIFF".to_string(),
1087                f.args,
1088            )))),
1089
1090            // TIME -> TIME (native)
1091            // DATETIME -> DATETIME (native)
1092
1093            // SAFE_DIVIDE -> SAFE_DIVIDE (native)
1094
1095            // NEWID/UUID -> GENERATE_UUID
1096            "NEWID" | "UUID" => Ok(Expression::Function(Box::new(Function::new(
1097                "GENERATE_UUID".to_string(),
1098                vec![],
1099            )))),
1100
1101            // LEVENSHTEIN -> EDIT_DISTANCE (BigQuery naming)
1102            "LEVENSHTEIN" => Ok(Expression::Function(Box::new(Function::new(
1103                "EDIT_DISTANCE".to_string(),
1104                f.args,
1105            )))),
1106
1107            // UNIX_TIMESTAMP -> UNIX_SECONDS
1108            "UNIX_TIMESTAMP" => Ok(Expression::Function(Box::new(Function::new(
1109                "UNIX_SECONDS".to_string(),
1110                f.args,
1111            )))),
1112
1113            // FROM_UNIXTIME -> TIMESTAMP_SECONDS
1114            "FROM_UNIXTIME" => Ok(Expression::Function(Box::new(Function::new(
1115                "TIMESTAMP_SECONDS".to_string(),
1116                f.args,
1117            )))),
1118
1119            // CHAR_LENGTH / CHARACTER_LENGTH -> LENGTH
1120            "CHAR_LENGTH" | "CHARACTER_LENGTH" => Ok(Expression::Function(Box::new(Function::new(
1121                "LENGTH".to_string(),
1122                f.args,
1123            )))),
1124
1125            // OCTET_LENGTH -> BYTE_LENGTH in BigQuery
1126            "OCTET_LENGTH" => Ok(Expression::Function(Box::new(Function::new(
1127                "BYTE_LENGTH".to_string(),
1128                f.args,
1129            )))),
1130
1131            // JSON_EXTRACT_STRING_ARRAY -> JSON_VALUE_ARRAY in BigQuery
1132            "JSON_EXTRACT_STRING_ARRAY" => Ok(Expression::Function(Box::new(Function::new(
1133                "JSON_VALUE_ARRAY".to_string(),
1134                f.args,
1135            )))),
1136
1137            // INSTR is native to BigQuery
1138
1139            // SPLIT: BigQuery defaults to comma separator when none provided
1140            // SPLIT(foo) -> SPLIT(foo, ',')
1141            "SPLIT" if f.args.len() == 1 => {
1142                let mut args = f.args;
1143                args.push(Expression::Literal(Literal::String(",".to_string())));
1144                Ok(Expression::Split(Box::new(SplitFunc {
1145                    this: args.remove(0),
1146                    delimiter: args.remove(0),
1147                })))
1148            }
1149
1150            // SPLIT with two args - convert to Split expression
1151            "SPLIT" if f.args.len() == 2 => {
1152                let mut args = f.args;
1153                Ok(Expression::Split(Box::new(SplitFunc {
1154                    this: args.remove(0),
1155                    delimiter: args.remove(0),
1156                })))
1157            }
1158
1159            // REGEXP_SUBSTR -> REGEXP_EXTRACT in BigQuery (strip extra Snowflake args)
1160            "REGEXP_SUBSTR" if f.args.len() >= 2 => {
1161                // BigQuery REGEXP_EXTRACT supports (subject, pattern, pos, occ) max 4 args
1162                let args = if f.args.len() > 4 {
1163                    f.args[..4].to_vec()
1164                } else {
1165                    f.args
1166                };
1167                Ok(Expression::Function(Box::new(Function::new(
1168                    "REGEXP_EXTRACT".to_string(),
1169                    args,
1170                ))))
1171            }
1172            "REGEXP_SUBSTR" => Ok(Expression::Function(Box::new(Function::new(
1173                "REGEXP_EXTRACT".to_string(),
1174                f.args,
1175            )))),
1176
1177            // REGEXP_REPLACE - strip extra Snowflake-specific args
1178            "REGEXP_REPLACE" if f.args.len() > 3 => {
1179                let args = f.args[..3].to_vec();
1180                Ok(Expression::Function(Box::new(Function::new("REGEXP_REPLACE".to_string(), args))))
1181            }
1182
1183            // OBJECT_CONSTRUCT_KEEP_NULL -> JSON_OBJECT
1184            "OBJECT_CONSTRUCT_KEEP_NULL" => Ok(Expression::Function(Box::new(Function::new(
1185                "JSON_OBJECT".to_string(),
1186                f.args,
1187            )))),
1188
1189            // EDITDISTANCE -> EDIT_DISTANCE with named max_distance parameter
1190            "EDITDISTANCE" if f.args.len() == 3 => {
1191                let col1 = f.args[0].clone();
1192                let col2 = f.args[1].clone();
1193                let max_dist = f.args[2].clone();
1194                Ok(Expression::Function(Box::new(Function::new(
1195                    "EDIT_DISTANCE".to_string(),
1196                    vec![col1, col2, Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
1197                        name: crate::expressions::Identifier::new("max_distance".to_string()),
1198                        value: max_dist,
1199                        separator: crate::expressions::NamedArgSeparator::DArrow,
1200                    }))],
1201                ))))
1202            }
1203            "EDITDISTANCE" if f.args.len() == 2 => Ok(Expression::Function(Box::new(Function::new(
1204                "EDIT_DISTANCE".to_string(),
1205                f.args,
1206            )))),
1207
1208            // HEX_DECODE_BINARY -> FROM_HEX
1209            "HEX_DECODE_BINARY" => Ok(Expression::Function(Box::new(Function::new(
1210                "FROM_HEX".to_string(),
1211                f.args,
1212            )))),
1213
1214            // BigQuery format string normalization for PARSE_DATE/DATETIME/TIMESTAMP functions
1215            // %Y-%m-%d -> %F and %H:%M:%S -> %T
1216            "PARSE_DATE" | "PARSE_DATETIME" | "PARSE_TIMESTAMP"
1217            | "SAFE.PARSE_DATE" | "SAFE.PARSE_DATETIME" | "SAFE.PARSE_TIMESTAMP" => {
1218                let args = self.normalize_time_format_args(f.args);
1219                Ok(Expression::Function(Box::new(Function {
1220                    name: f.name,
1221                    args,
1222                    distinct: f.distinct,
1223                    no_parens: f.no_parens,
1224                    trailing_comments: f.trailing_comments,
1225                    quoted: f.quoted,
1226                    use_bracket_syntax: f.use_bracket_syntax,
1227                })))
1228            }
1229
1230            // GET_PATH(obj, path) -> JSON_EXTRACT(obj, json_path) in BigQuery
1231            "GET_PATH" if f.args.len() == 2 => {
1232                let mut args = f.args;
1233                let this = args.remove(0);
1234                let path = args.remove(0);
1235                let json_path = match &path {
1236                    Expression::Literal(Literal::String(s)) => {
1237                        let normalized = if s.starts_with('$') {
1238                            s.clone()
1239                        } else if s.starts_with('[') {
1240                            format!("${}", s)
1241                        } else {
1242                            format!("$.{}", s)
1243                        };
1244                        Expression::Literal(Literal::String(normalized))
1245                    }
1246                    _ => path,
1247                };
1248                Ok(Expression::JsonExtract(Box::new(JsonExtractFunc {
1249                    this,
1250                    path: json_path,
1251                    returning: None,
1252                    arrow_syntax: false,
1253                    hash_arrow_syntax: false,
1254                    wrapper_option: None,
1255                    quotes_option: None,
1256                    on_scalar_string: false,
1257                    on_error: None,
1258                })))
1259            }
1260
1261            // Pass through everything else
1262            _ => Ok(Expression::Function(Box::new(f))),
1263        }
1264    }
1265
1266    fn transform_aggregate_function(
1267        &self,
1268        f: Box<crate::expressions::AggregateFunction>,
1269    ) -> Result<Expression> {
1270        let name_upper = f.name.to_uppercase();
1271        match name_upper.as_str() {
1272            // GROUP_CONCAT -> STRING_AGG
1273            "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
1274                Function::new("STRING_AGG".to_string(), f.args),
1275            ))),
1276
1277            // Pass through everything else
1278            _ => Ok(Expression::AggregateFunction(f)),
1279        }
1280    }
1281
1282    /// Transform MethodCall expressions
1283    /// Handles SAFE.PARSE_DATE, SAFE.PARSE_DATETIME, SAFE.PARSE_TIMESTAMP format normalization
1284    fn transform_method_call(&self, mc: crate::expressions::MethodCall) -> Result<Expression> {
1285        use crate::expressions::MethodCall;
1286
1287        // Check if this is SAFE.PARSE_DATE/DATETIME/TIMESTAMP
1288        if let Expression::Column(ref col) = mc.this {
1289            if col.name.name.eq_ignore_ascii_case("SAFE") {
1290                let method_upper = mc.method.name.to_uppercase();
1291                if method_upper == "PARSE_DATE"
1292                    || method_upper == "PARSE_DATETIME"
1293                    || method_upper == "PARSE_TIMESTAMP"
1294                {
1295                    // Normalize the format string in the first argument
1296                    let args = self.normalize_time_format_args(mc.args);
1297                    return Ok(Expression::MethodCall(Box::new(MethodCall {
1298                        this: mc.this,
1299                        method: mc.method,
1300                        args,
1301                    })));
1302                }
1303            }
1304        }
1305
1306        // Pass through all other method calls
1307        Ok(Expression::MethodCall(Box::new(mc)))
1308    }
1309
1310    /// Normalize time format strings in function arguments
1311    /// BigQuery normalizes: %Y-%m-%d -> %F, %H:%M:%S -> %T
1312    fn normalize_time_format_args(&self, args: Vec<Expression>) -> Vec<Expression> {
1313        args.into_iter()
1314            .enumerate()
1315            .map(|(i, arg)| {
1316                // Only transform the first argument (the format string)
1317                if i == 0 {
1318                    if let Expression::Literal(Literal::String(s)) = arg {
1319                        let normalized = self.normalize_time_format(&s);
1320                        return Expression::Literal(Literal::String(normalized));
1321                    }
1322                }
1323                arg
1324            })
1325            .collect()
1326    }
1327
1328    /// Normalize a time format string according to BigQuery conventions
1329    /// %Y-%m-%d -> %F (ISO date)
1330    /// %H:%M:%S -> %T (time)
1331    fn normalize_time_format(&self, format: &str) -> String {
1332        format
1333            .replace("%Y-%m-%d", "%F")
1334            .replace("%H:%M:%S", "%T")
1335    }
1336}
1337
1338#[cfg(test)]
1339mod tests {
1340    use super::*;
1341    use crate::dialects::Dialect;
1342
1343    fn transpile_to_bigquery(sql: &str) -> String {
1344        let dialect = Dialect::get(DialectType::Generic);
1345        let result = dialect
1346            .transpile_to(sql, DialectType::BigQuery)
1347            .expect("Transpile failed");
1348        result[0].clone()
1349    }
1350
1351    #[test]
1352    fn test_ifnull_identity() {
1353        // BigQuery supports IFNULL natively, so it should be preserved
1354        let result = transpile_to_bigquery("SELECT IFNULL(a, b)");
1355        assert!(
1356            result.contains("IFNULL"),
1357            "Expected IFNULL, got: {}",
1358            result
1359        );
1360    }
1361
1362    #[test]
1363    fn test_nvl_to_ifnull() {
1364        // NVL is converted to IFNULL in BigQuery
1365        let result = transpile_to_bigquery("SELECT NVL(a, b)");
1366        assert!(
1367            result.contains("IFNULL"),
1368            "Expected IFNULL, got: {}",
1369            result
1370        );
1371    }
1372
1373    #[test]
1374    fn test_try_cast_to_safe_cast() {
1375        let result = transpile_to_bigquery("SELECT TRY_CAST(a AS INT)");
1376        assert!(
1377            result.contains("SAFE_CAST"),
1378            "Expected SAFE_CAST, got: {}",
1379            result
1380        );
1381    }
1382
1383    #[test]
1384    fn test_random_to_rand() {
1385        let result = transpile_to_bigquery("SELECT RANDOM()");
1386        assert!(result.contains("RAND"), "Expected RAND, got: {}", result);
1387    }
1388
1389    #[test]
1390    fn test_basic_select() {
1391        let result = transpile_to_bigquery("SELECT a, b FROM users WHERE id = 1");
1392        assert!(result.contains("SELECT"));
1393        assert!(result.contains("FROM users"));
1394    }
1395
1396    #[test]
1397    fn test_group_concat_to_string_agg() {
1398        let result = transpile_to_bigquery("SELECT GROUP_CONCAT(name)");
1399        assert!(
1400            result.contains("STRING_AGG"),
1401            "Expected STRING_AGG, got: {}",
1402            result
1403        );
1404    }
1405
1406    #[test]
1407    fn test_generate_series_to_generate_array() {
1408        let result = transpile_to_bigquery("SELECT GENERATE_SERIES(1, 10)");
1409        assert!(
1410            result.contains("GENERATE_ARRAY"),
1411            "Expected GENERATE_ARRAY, got: {}",
1412            result
1413        );
1414    }
1415
1416    #[test]
1417    fn test_backtick_identifiers() {
1418        // BigQuery uses backticks for identifiers
1419        let dialect = BigQueryDialect;
1420        let config = dialect.generator_config();
1421        assert_eq!(config.identifier_quote, '`');
1422    }
1423
1424    fn bigquery_identity(sql: &str, expected: &str) {
1425        let dialect = Dialect::get(DialectType::BigQuery);
1426        let ast = dialect.parse(sql).expect("Parse failed");
1427        let transformed = dialect.transform(ast[0].clone()).expect("Transform failed");
1428        let result = dialect.generate(&transformed).expect("Generate failed");
1429        assert_eq!(result, expected, "SQL: {}", sql);
1430    }
1431
1432    #[test]
1433    fn test_cast_char_to_string() {
1434        bigquery_identity("CAST(x AS CHAR)", "CAST(x AS STRING)");
1435    }
1436
1437    #[test]
1438    fn test_cast_varchar_to_string() {
1439        bigquery_identity("CAST(x AS VARCHAR)", "CAST(x AS STRING)");
1440    }
1441
1442    #[test]
1443    fn test_cast_nchar_to_string() {
1444        bigquery_identity("CAST(x AS NCHAR)", "CAST(x AS STRING)");
1445    }
1446
1447    #[test]
1448    fn test_cast_nvarchar_to_string() {
1449        bigquery_identity("CAST(x AS NVARCHAR)", "CAST(x AS STRING)");
1450    }
1451
1452    #[test]
1453    fn test_cast_timestamptz_to_timestamp() {
1454        bigquery_identity("CAST(x AS TIMESTAMPTZ)", "CAST(x AS TIMESTAMP)");
1455    }
1456
1457    #[test]
1458    fn test_cast_record_to_struct() {
1459        bigquery_identity("CAST(x AS RECORD)", "CAST(x AS STRUCT)");
1460    }
1461
1462    #[test]
1463    fn test_json_literal_to_parse_json() {
1464        // JSON 'string' literal syntax should be converted to PARSE_JSON()
1465        bigquery_identity("SELECT JSON '\"foo\"' AS json_data", "SELECT PARSE_JSON('\"foo\"') AS json_data");
1466    }
1467
1468    #[test]
1469    fn test_grant_as_alias_not_quoted() {
1470        // GRANT is not a reserved keyword in BigQuery, should not be backtick-quoted
1471        bigquery_identity(
1472            "SELECT GRANT FROM (SELECT 'input' AS GRANT)",
1473            "SELECT GRANT FROM (SELECT 'input' AS GRANT)"
1474        );
1475    }
1476
1477    #[test]
1478    fn test_timestamp_literal_to_cast() {
1479        // TIMESTAMP 'value' literal should be converted to CAST('value' AS TIMESTAMP)
1480        bigquery_identity(
1481            "CREATE VIEW `d.v` OPTIONS (expiration_timestamp=TIMESTAMP '2020-01-02T04:05:06.007Z') AS SELECT 1 AS c",
1482            "CREATE VIEW `d.v` OPTIONS (expiration_timestamp=CAST('2020-01-02T04:05:06.007Z' AS TIMESTAMP)) AS SELECT 1 AS c"
1483        );
1484    }
1485
1486    #[test]
1487    fn test_date_literal_to_cast_in_extract() {
1488        // Issue 1: DATE literal should become CAST syntax in BigQuery
1489        bigquery_identity(
1490            "EXTRACT(WEEK(THURSDAY) FROM DATE '2013-12-25')",
1491            "EXTRACT(WEEK(THURSDAY) FROM CAST('2013-12-25' AS DATE))"
1492        );
1493    }
1494
1495    #[test]
1496    fn test_json_object_with_json_literals() {
1497        // Issue 2: JSON literals in JSON_OBJECT should use PARSE_JSON, not CAST AS JSON
1498        bigquery_identity(
1499            "SELECT JSON_OBJECT('a', JSON '10') AS json_data",
1500            "SELECT JSON_OBJECT('a', PARSE_JSON('10')) AS json_data"
1501        );
1502    }
1503
1504    // NOTE: MOD paren unwrapping is tested in the conformance tests (sqlglot_dialect_identity).
1505    // The unit test version was removed due to stack overflow in debug builds (deep recursion).
1506    // Test case: MOD((a + 1), b) -> MOD(a + 1, b)
1507
1508    #[test]
1509    fn test_safe_parse_date_format_normalization() {
1510        // SAFE.PARSE_DATE format string normalization: %Y-%m-%d -> %F
1511        bigquery_identity(
1512            "SAFE.PARSE_DATE('%Y-%m-%d', '2024-01-15')",
1513            "SAFE.PARSE_DATE('%F', '2024-01-15')"
1514        );
1515    }
1516
1517    #[test]
1518    fn test_safe_parse_datetime_format_normalization() {
1519        // SAFE.PARSE_DATETIME format string normalization: %Y-%m-%d %H:%M:%S -> %F %T
1520        bigquery_identity(
1521            "SAFE.PARSE_DATETIME('%Y-%m-%d %H:%M:%S', '2024-01-15 10:30:00')",
1522            "SAFE.PARSE_DATETIME('%F %T', '2024-01-15 10:30:00')"
1523        );
1524    }
1525
1526    #[test]
1527    fn test_safe_parse_timestamp_format_normalization() {
1528        // SAFE.PARSE_TIMESTAMP format string normalization: %Y-%m-%d %H:%M:%S -> %F %T
1529        bigquery_identity(
1530            "SAFE.PARSE_TIMESTAMP('%Y-%m-%d %H:%M:%S', '2024-01-15 10:30:00')",
1531            "SAFE.PARSE_TIMESTAMP('%F %T', '2024-01-15 10:30:00')"
1532        );
1533    }
1534
1535    #[test]
1536    fn test_datetime_literal_to_cast() {
1537        // DATETIME 'value' literal should be converted to CAST('value' AS DATETIME)
1538        bigquery_identity(
1539            "LAST_DAY(DATETIME '2008-11-10 15:30:00', WEEK(SUNDAY))",
1540            "LAST_DAY(CAST('2008-11-10 15:30:00' AS DATETIME), WEEK)"
1541        );
1542    }
1543
1544    #[test]
1545    fn test_last_day_week_modifier_stripped() {
1546        // WEEK(SUNDAY) should become WEEK in BigQuery LAST_DAY function
1547        bigquery_identity(
1548            "LAST_DAY(col, WEEK(MONDAY))",
1549            "LAST_DAY(col, WEEK)"
1550        );
1551    }
1552
1553}