Skip to main content

polyglot_sql/dialects/
bigquery.rs

1//! BigQuery Dialect
2//!
3//! BigQuery-specific transformations based on sqlglot patterns.
4//! Key differences:
5//! - Uses backticks for identifiers (especially for project.dataset.table)
6//! - SAFE_ prefix for safe operations
7//! - Different date/time function names (DATE_DIFF, FORMAT_DATE, PARSE_DATE)
8//! - STRUCT and ARRAY syntax differences
9//! - No ILIKE support
10//! - QUALIFY clause support
11
12use super::{DialectImpl, DialectType};
13use crate::error::Result;
14use crate::expressions::{
15    Alias, BinaryOp, CeilFunc, Column, Exists, Expression, From, Function, FunctionBody,
16    Identifier, JsonExtractFunc, LikeOp, Literal, Select, SplitFunc, StringAggFunc, UnaryFunc,
17    UnnestFunc, VarArgFunc, Where,
18};
19use crate::generator::GeneratorConfig;
20use crate::tokens::TokenizerConfig;
21
22/// BigQuery dialect
23pub struct BigQueryDialect;
24
25impl DialectImpl for BigQueryDialect {
26    fn dialect_type(&self) -> DialectType {
27        DialectType::BigQuery
28    }
29
30    fn tokenizer_config(&self) -> TokenizerConfig {
31        let mut config = TokenizerConfig::default();
32        // BigQuery uses backticks for identifiers, NOT double quotes
33        // Remove double quote from identifiers (it's in the default config)
34        config.identifiers.remove(&'"');
35        config.identifiers.insert('`', '`');
36        // BigQuery supports double quotes for strings (in addition to single quotes)
37        config.quotes.insert("\"".to_string(), "\"".to_string());
38        // BigQuery supports triple-quoted strings
39        config.quotes.insert("'''".to_string(), "'''".to_string());
40        config
41            .quotes
42            .insert("\"\"\"".to_string(), "\"\"\"".to_string());
43        // BigQuery supports backslash escaping in strings
44        config.string_escapes = vec!['\'', '\\'];
45        // In BigQuery, b'...' is a byte string (bytes), not a bit string (binary digits)
46        config.b_prefix_is_byte_string = true;
47        // BigQuery supports hex number strings like 0xA, 0xFF
48        config.hex_number_strings = true;
49        // BigQuery: 0xA represents integer 10 (not binary/blob)
50        config.hex_string_is_integer_type = true;
51        config
52    }
53
54    fn generator_config(&self) -> GeneratorConfig {
55        use crate::generator::{IdentifierQuoteStyle, NormalizeFunctions};
56        GeneratorConfig {
57            identifier_quote: '`',
58            identifier_quote_style: IdentifierQuoteStyle::BACKTICK,
59            dialect: Some(DialectType::BigQuery),
60            // BigQuery doesn't normalize function names (Python: NORMALIZE_FUNCTIONS = False)
61            normalize_functions: NormalizeFunctions::None,
62            // BigQuery-specific settings from Python sqlglot
63            interval_allows_plural_form: false,
64            join_hints: false,
65            query_hints: false,
66            table_hints: false,
67            limit_fetch_style: crate::generator::LimitFetchStyle::Limit,
68            rename_table_with_db: false,
69            nvl2_supported: false,
70            unnest_with_ordinality: false,
71            collate_is_func: true,
72            limit_only_literals: true,
73            supports_table_alias_columns: false,
74            unpivot_aliases_are_identifiers: false,
75            json_key_value_pair_sep: ",",
76            null_ordering_supported: false,
77            ignore_nulls_in_func: true,
78            json_path_single_quote_escape: true,
79            can_implement_array_any: true,
80            supports_to_number: false,
81            named_placeholder_token: "@",
82            hex_func: "TO_HEX",
83            with_properties_prefix: "OPTIONS",
84            supports_exploding_projections: false,
85            except_intersect_support_all_clause: false,
86            supports_unix_seconds: true,
87            // BigQuery uses SAFE_ prefix for safe operations
88            try_supported: true,
89            // BigQuery does not support SEMI/ANTI JOIN syntax
90            semi_anti_join_with_side: false,
91            ..Default::default()
92        }
93    }
94
95    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
96        match expr {
97            // ===== Data Type Mappings =====
98            Expression::DataType(dt) => self.transform_data_type(dt),
99
100            // ===== Null handling =====
101            // IFNULL is native to BigQuery - keep as-is for identity
102            Expression::IfNull(f) => Ok(Expression::IfNull(f)),
103
104            // NVL -> IFNULL in BigQuery (BigQuery uses IFNULL, not NVL)
105            Expression::Nvl(f) => Ok(Expression::IfNull(f)),
106
107            // Coalesce stays as Coalesce
108            Expression::Coalesce(f) => Ok(Expression::Coalesce(f)),
109
110            // ===== String aggregation =====
111            // GROUP_CONCAT -> STRING_AGG in BigQuery
112            Expression::GroupConcat(f) => Ok(Expression::StringAgg(Box::new(StringAggFunc {
113                this: f.this,
114                separator: f.separator,
115                order_by: f.order_by,
116                distinct: f.distinct,
117                filter: f.filter,
118                limit: None,
119            }))),
120
121            // ===== Cast operations =====
122            // Cast data types are transformed by transform_recursive in mod.rs
123            // which calls transform_data_type via transform_expr(Expression::DataType(...))
124
125            // TryCast -> SafeCast in BigQuery with type transformation
126            Expression::TryCast(c) => {
127                let transformed_type = match self.transform_data_type(c.to)? {
128                    Expression::DataType(dt) => dt,
129                    _ => return Err(crate::error::Error::parse("Expected DataType", 0, 0)),
130                };
131                Ok(Expression::SafeCast(Box::new(crate::expressions::Cast {
132                    this: c.this,
133                    to: transformed_type,
134                    trailing_comments: c.trailing_comments,
135                    double_colon_syntax: c.double_colon_syntax,
136                    format: c.format,
137                    default: c.default,
138                })))
139            }
140
141            // ===== Pattern matching =====
142            // ILIKE -> LOWER() LIKE LOWER() in BigQuery (no ILIKE support)
143            Expression::ILike(op) => {
144                let lower_left = Expression::Lower(Box::new(UnaryFunc::new(op.left)));
145                let lower_right = Expression::Lower(Box::new(UnaryFunc::new(op.right)));
146                Ok(Expression::Like(Box::new(LikeOp {
147                    left: lower_left,
148                    right: lower_right,
149                    escape: op.escape,
150                    quantifier: op.quantifier,
151                })))
152            }
153
154            // RegexpLike -> REGEXP_CONTAINS in BigQuery
155            Expression::RegexpLike(f) => Ok(Expression::Function(Box::new(Function::new(
156                "REGEXP_CONTAINS".to_string(),
157                vec![f.this, f.pattern],
158            )))),
159
160            // ===== Array operations =====
161            // EXPLODE -> UNNEST in BigQuery
162            Expression::Explode(f) => Ok(Expression::Unnest(Box::new(
163                crate::expressions::UnnestFunc {
164                    this: f.this,
165                    expressions: Vec::new(),
166                    with_ordinality: false,
167                    alias: None,
168                    offset_alias: None,
169                },
170            ))),
171
172            // ExplodeOuter -> UNNEST with LEFT JOIN semantics
173            Expression::ExplodeOuter(f) => Ok(Expression::Unnest(Box::new(
174                crate::expressions::UnnestFunc {
175                    this: f.this,
176                    expressions: Vec::new(),
177                    with_ordinality: false,
178                    alias: None,
179                    offset_alias: None,
180                },
181            ))),
182
183            // GenerateSeries -> GENERATE_ARRAY in BigQuery
184            Expression::GenerateSeries(f) => {
185                let mut args = Vec::new();
186                if let Some(start) = f.start {
187                    args.push(*start);
188                }
189                if let Some(end) = f.end {
190                    args.push(*end);
191                }
192                if let Some(step) = f.step {
193                    args.push(*step);
194                }
195                Ok(Expression::Function(Box::new(Function::new(
196                    "GENERATE_ARRAY".to_string(),
197                    args,
198                ))))
199            }
200
201            // ===== Bitwise operations =====
202            // BitwiseAndAgg -> BIT_AND
203            Expression::BitwiseAndAgg(f) => Ok(Expression::Function(Box::new(Function::new(
204                "BIT_AND".to_string(),
205                vec![f.this],
206            )))),
207
208            // BitwiseOrAgg -> BIT_OR
209            Expression::BitwiseOrAgg(f) => Ok(Expression::Function(Box::new(Function::new(
210                "BIT_OR".to_string(),
211                vec![f.this],
212            )))),
213
214            // BitwiseXorAgg -> BIT_XOR
215            Expression::BitwiseXorAgg(f) => Ok(Expression::Function(Box::new(Function::new(
216                "BIT_XOR".to_string(),
217                vec![f.this],
218            )))),
219
220            // BitwiseCount -> BIT_COUNT
221            Expression::BitwiseCount(f) => Ok(Expression::Function(Box::new(Function::new(
222                "BIT_COUNT".to_string(),
223                vec![f.this],
224            )))),
225
226            // ByteLength -> BYTE_LENGTH
227            Expression::ByteLength(f) => Ok(Expression::Function(Box::new(Function::new(
228                "BYTE_LENGTH".to_string(),
229                vec![f.this],
230            )))),
231
232            // IntDiv -> DIV
233            Expression::IntDiv(f) => Ok(Expression::Function(Box::new(Function::new(
234                "DIV".to_string(),
235                vec![f.this, f.expression],
236            )))),
237
238            // Int64 -> INT64
239            Expression::Int64(f) => Ok(Expression::Function(Box::new(Function::new(
240                "INT64".to_string(),
241                vec![f.this],
242            )))),
243
244            // ===== Random =====
245            // RANDOM -> RAND in BigQuery
246            Expression::Random(_) => Ok(Expression::Rand(Box::new(crate::expressions::Rand {
247                seed: None,
248                lower: None,
249                upper: None,
250            }))),
251
252            // ===== UUID =====
253            // Uuid -> GENERATE_UUID in BigQuery
254            Expression::Uuid(_) => Ok(Expression::Function(Box::new(Function::new(
255                "GENERATE_UUID".to_string(),
256                vec![],
257            )))),
258
259            // ===== Approximate functions =====
260            // ApproxDistinct -> APPROX_COUNT_DISTINCT
261            Expression::ApproxDistinct(f) => Ok(Expression::Function(Box::new(Function::new(
262                "APPROX_COUNT_DISTINCT".to_string(),
263                vec![f.this],
264            )))),
265
266            // ArgMax -> MAX_BY in BigQuery
267            Expression::ArgMax(f) => Ok(Expression::Function(Box::new(Function::new(
268                "MAX_BY".to_string(),
269                vec![*f.this, *f.expression],
270            )))),
271
272            // ArgMin -> MIN_BY in BigQuery
273            Expression::ArgMin(f) => Ok(Expression::Function(Box::new(Function::new(
274                "MIN_BY".to_string(),
275                vec![*f.this, *f.expression],
276            )))),
277
278            // ===== Conditional =====
279            // CountIf -> COUNTIF in BigQuery
280            Expression::CountIf(f) => Ok(Expression::Function(Box::new(Function::new(
281                "COUNTIF".to_string(),
282                vec![f.this],
283            )))),
284
285            // ===== String functions =====
286            // StringAgg -> STRING_AGG in BigQuery - keep as-is to preserve ORDER BY
287            Expression::StringAgg(f) => Ok(Expression::StringAgg(f)),
288
289            // ===== Conversion =====
290            // Unhex -> FROM_HEX
291            Expression::Unhex(f) => Ok(Expression::Function(Box::new(Function::new(
292                "FROM_HEX".to_string(),
293                vec![*f.this],
294            )))),
295
296            // UnixToTime -> TIMESTAMP_SECONDS/MILLIS/MICROS based on scale
297            Expression::UnixToTime(f) => {
298                let scale = f.scale.unwrap_or(0);
299                match scale {
300                    0 => Ok(Expression::Function(Box::new(Function::new(
301                        "TIMESTAMP_SECONDS".to_string(),
302                        vec![*f.this],
303                    )))),
304                    3 => Ok(Expression::Function(Box::new(Function::new(
305                        "TIMESTAMP_MILLIS".to_string(),
306                        vec![*f.this],
307                    )))),
308                    6 => Ok(Expression::Function(Box::new(Function::new(
309                        "TIMESTAMP_MICROS".to_string(),
310                        vec![*f.this],
311                    )))),
312                    _ => {
313                        // TIMESTAMP_SECONDS(CAST(value / POWER(10, scale) AS INT64))
314                        let div_expr =
315                            Expression::Div(Box::new(crate::expressions::BinaryOp::new(
316                                *f.this,
317                                Expression::Function(Box::new(Function::new(
318                                    "POWER".to_string(),
319                                    vec![Expression::number(10), Expression::number(scale)],
320                                ))),
321                            )));
322                        let cast_expr = Expression::Cast(Box::new(crate::expressions::Cast {
323                            this: div_expr,
324                            to: crate::expressions::DataType::Custom {
325                                name: "INT64".to_string(),
326                            },
327                            double_colon_syntax: false,
328                            trailing_comments: vec![],
329                            format: None,
330                            default: None,
331                        }));
332                        Ok(Expression::Function(Box::new(Function::new(
333                            "TIMESTAMP_SECONDS".to_string(),
334                            vec![cast_expr],
335                        ))))
336                    }
337                }
338            }
339
340            // ===== Date/time =====
341            // DateDiff -> DATE_DIFF in BigQuery
342            Expression::DateDiff(f) => {
343                // BigQuery: DATE_DIFF(date1, date2, part)
344                let unit_str = match f.unit {
345                    Some(crate::expressions::IntervalUnit::Year) => "YEAR",
346                    Some(crate::expressions::IntervalUnit::Quarter) => "QUARTER",
347                    Some(crate::expressions::IntervalUnit::Month) => "MONTH",
348                    Some(crate::expressions::IntervalUnit::Week) => "WEEK",
349                    Some(crate::expressions::IntervalUnit::Day) => "DAY",
350                    Some(crate::expressions::IntervalUnit::Hour) => "HOUR",
351                    Some(crate::expressions::IntervalUnit::Minute) => "MINUTE",
352                    Some(crate::expressions::IntervalUnit::Second) => "SECOND",
353                    Some(crate::expressions::IntervalUnit::Millisecond) => "MILLISECOND",
354                    Some(crate::expressions::IntervalUnit::Microsecond) => "MICROSECOND",
355                    Some(crate::expressions::IntervalUnit::Nanosecond) => "NANOSECOND",
356                    None => "DAY",
357                };
358                let unit = Expression::Identifier(crate::expressions::Identifier {
359                    name: unit_str.to_string(),
360                    quoted: false,
361                    trailing_comments: Vec::new(),
362                });
363                Ok(Expression::Function(Box::new(Function::new(
364                    "DATE_DIFF".to_string(),
365                    vec![f.this, f.expression, unit],
366                ))))
367            }
368
369            // ===== Variance =====
370            // VarPop -> VAR_POP
371            Expression::VarPop(f) => Ok(Expression::Function(Box::new(Function::new(
372                "VAR_POP".to_string(),
373                vec![f.this],
374            )))),
375
376            // ===== Hash functions =====
377            // SHA -> SHA1
378            Expression::SHA(f) => Ok(Expression::Function(Box::new(Function::new(
379                "SHA1".to_string(),
380                vec![f.this],
381            )))),
382
383            // SHA1Digest -> SHA1
384            Expression::SHA1Digest(f) => Ok(Expression::Function(Box::new(Function::new(
385                "SHA1".to_string(),
386                vec![f.this],
387            )))),
388
389            // MD5Digest -> MD5
390            Expression::MD5Digest(f) => Ok(Expression::Function(Box::new(Function::new(
391                "MD5".to_string(),
392                vec![*f.this],
393            )))),
394
395            // ===== Type conversion =====
396            // JSONBool -> BOOL
397            Expression::JSONBool(f) => Ok(Expression::Function(Box::new(Function::new(
398                "BOOL".to_string(),
399                vec![f.this],
400            )))),
401
402            // StringFunc -> STRING
403            Expression::StringFunc(f) => Ok(Expression::Function(Box::new(Function::new(
404                "STRING".to_string(),
405                vec![*f.this],
406            )))),
407
408            // ===== Date/time from parts =====
409            // DateFromUnixDate -> DATE_FROM_UNIX_DATE
410            Expression::DateFromUnixDate(f) => Ok(Expression::Function(Box::new(Function::new(
411                "DATE_FROM_UNIX_DATE".to_string(),
412                vec![f.this],
413            )))),
414
415            // UnixDate -> UNIX_DATE
416            Expression::UnixDate(f) => Ok(Expression::Function(Box::new(Function::new(
417                "UNIX_DATE".to_string(),
418                vec![f.this],
419            )))),
420
421            // TimestampDiff -> TIMESTAMP_DIFF
422            Expression::TimestampDiff(f) => Ok(Expression::Function(Box::new(Function::new(
423                "TIMESTAMP_DIFF".to_string(),
424                vec![*f.this, *f.expression],
425            )))),
426
427            // FromTimeZone -> DATETIME
428            Expression::FromTimeZone(f) => Ok(Expression::Function(Box::new(Function::new(
429                "DATETIME".to_string(),
430                vec![*f.this],
431            )))),
432
433            // TsOrDsToDatetime -> DATETIME
434            Expression::TsOrDsToDatetime(f) => Ok(Expression::Function(Box::new(Function::new(
435                "DATETIME".to_string(),
436                vec![f.this],
437            )))),
438
439            // TsOrDsToTimestamp -> TIMESTAMP
440            Expression::TsOrDsToTimestamp(f) => Ok(Expression::Function(Box::new(Function::new(
441                "TIMESTAMP".to_string(),
442                vec![f.this],
443            )))),
444
445            // ===== IfFunc -> IF in BigQuery =====
446            Expression::IfFunc(f) => {
447                let mut args = vec![f.condition, f.true_value];
448                if let Some(false_val) = f.false_value {
449                    args.push(false_val);
450                } else {
451                    args.push(Expression::Null(crate::expressions::Null));
452                }
453                Ok(Expression::Function(Box::new(Function::new(
454                    "IF".to_string(),
455                    args,
456                ))))
457            }
458
459            // ===== HexString -> FROM_HEX =====
460            Expression::HexStringExpr(f) => Ok(Expression::Function(Box::new(Function::new(
461                "FROM_HEX".to_string(),
462                vec![*f.this],
463            )))),
464
465            // ===== Additional auto-generated transforms from Python sqlglot =====
466            // ApproxTopK -> APPROX_TOP_COUNT
467            Expression::ApproxTopK(f) => {
468                let mut args = vec![*f.this];
469                if let Some(expr) = f.expression {
470                    args.push(*expr);
471                }
472                Ok(Expression::Function(Box::new(Function::new(
473                    "APPROX_TOP_COUNT".to_string(),
474                    args,
475                ))))
476            }
477
478            // SafeDivide -> SAFE_DIVIDE
479            Expression::SafeDivide(f) => Ok(Expression::Function(Box::new(Function::new(
480                "SAFE_DIVIDE".to_string(),
481                vec![*f.this, *f.expression],
482            )))),
483
484            // JSONKeysAtDepth -> JSON_KEYS
485            Expression::JSONKeysAtDepth(f) => Ok(Expression::Function(Box::new(Function::new(
486                "JSON_KEYS".to_string(),
487                vec![*f.this],
488            )))),
489
490            // JSONValueArray -> JSON_VALUE_ARRAY
491            Expression::JSONValueArray(f) => Ok(Expression::Function(Box::new(Function::new(
492                "JSON_VALUE_ARRAY".to_string(),
493                vec![*f.this],
494            )))),
495
496            // DateFromParts -> DATE
497            Expression::DateFromParts(f) => {
498                let mut args = Vec::new();
499                if let Some(y) = f.year {
500                    args.push(*y);
501                }
502                if let Some(m) = f.month {
503                    args.push(*m);
504                }
505                if let Some(d) = f.day {
506                    args.push(*d);
507                }
508                Ok(Expression::Function(Box::new(Function::new(
509                    "DATE".to_string(),
510                    args,
511                ))))
512            }
513
514            // SPLIT: BigQuery defaults to comma separator when none provided
515            // SPLIT(foo) -> SPLIT(foo, ',')
516            Expression::Split(f) => {
517                // Check if delimiter is empty or a placeholder - add default comma
518                let delimiter = match &f.delimiter {
519                    Expression::Literal(Literal::String(s)) if s.is_empty() => {
520                        Expression::Literal(Literal::String(",".to_string()))
521                    }
522                    _ => f.delimiter,
523                };
524                Ok(Expression::Split(Box::new(SplitFunc {
525                    this: f.this,
526                    delimiter,
527                })))
528            }
529
530            // Cast: Transform the target type according to BigQuery TYPE_MAPPING
531            // Special case: CAST to JSON -> PARSE_JSON in BigQuery
532            Expression::Cast(c) => {
533                use crate::expressions::DataType;
534                // Check if casting to JSON - use PARSE_JSON instead
535                // Handle both DataType::Json/JsonB and DataType::Custom { name: "JSON" }
536                // (parser creates Custom for type literals like JSON 'string')
537                let is_json = matches!(c.to, DataType::Json | DataType::JsonB)
538                    || matches!(&c.to, DataType::Custom { name } if name.eq_ignore_ascii_case("JSON") || name.eq_ignore_ascii_case("JSONB"));
539                if is_json {
540                    return Ok(Expression::ParseJson(Box::new(UnaryFunc::new(c.this))));
541                }
542                let transformed_type = match self.transform_data_type(c.to)? {
543                    Expression::DataType(dt) => dt,
544                    _ => return Err(crate::error::Error::parse("Expected DataType", 0, 0)),
545                };
546                Ok(Expression::Cast(Box::new(crate::expressions::Cast {
547                    this: c.this,
548                    to: transformed_type,
549                    trailing_comments: c.trailing_comments,
550                    double_colon_syntax: c.double_colon_syntax,
551                    format: c.format,
552                    default: c.default,
553                })))
554            }
555
556            // SafeCast: Transform the target type according to BigQuery TYPE_MAPPING
557            Expression::SafeCast(c) => {
558                let transformed_type = match self.transform_data_type(c.to)? {
559                    Expression::DataType(dt) => dt,
560                    _ => return Err(crate::error::Error::parse("Expected DataType", 0, 0)),
561                };
562                Ok(Expression::SafeCast(Box::new(crate::expressions::Cast {
563                    this: c.this,
564                    to: transformed_type,
565                    trailing_comments: c.trailing_comments,
566                    double_colon_syntax: c.double_colon_syntax,
567                    format: c.format,
568                    default: c.default,
569                })))
570            }
571
572            // ===== SELECT-level transforms =====
573            // BigQuery: GROUP BY expression → alias when both GROUP BY and ORDER BY exist
574            Expression::Select(mut select) => {
575                if select.group_by.is_some() && select.order_by.is_some() {
576                    // Build map: expression → alias name for aliased projections
577                    let aliases: Vec<(Expression, Identifier)> = select
578                        .expressions
579                        .iter()
580                        .filter_map(|e| {
581                            if let Expression::Alias(a) = e {
582                                Some((a.this.clone(), a.alias.clone()))
583                            } else {
584                                None
585                            }
586                        })
587                        .collect();
588
589                    if let Some(ref mut group_by) = select.group_by {
590                        for grouped in group_by.expressions.iter_mut() {
591                            // Skip numeric indices (already aliased)
592                            if matches!(grouped, Expression::Literal(Literal::Number(_))) {
593                                continue;
594                            }
595                            // Check if this GROUP BY expression matches a SELECT alias
596                            for (expr, alias_ident) in &aliases {
597                                if grouped == expr {
598                                    *grouped = Expression::Column(Column {
599                                        name: alias_ident.clone(),
600                                        table: None,
601                                        join_mark: false,
602                                        trailing_comments: Vec::new(),
603                                    });
604                                    break;
605                                }
606                            }
607                        }
608                    }
609                }
610                Ok(Expression::Select(select))
611            }
612
613            // ===== ArrayContains → EXISTS(SELECT 1 FROM UNNEST(arr) AS _col WHERE _col = val) =====
614            Expression::ArrayContains(f) => {
615                let array_expr = f.this;
616                let value_expr = f.expression;
617
618                // Build: SELECT 1 FROM UNNEST(array) AS _col WHERE _col = value
619                let unnest = Expression::Unnest(Box::new(UnnestFunc {
620                    this: array_expr,
621                    expressions: Vec::new(),
622                    with_ordinality: false,
623                    alias: None,
624                    offset_alias: None,
625                }));
626                let aliased_unnest = Expression::Alias(Box::new(Alias {
627                    this: unnest,
628                    alias: Identifier::new("_col"),
629                    column_aliases: Vec::new(),
630                    pre_alias_comments: Vec::new(),
631                    trailing_comments: Vec::new(),
632                }));
633                let col_ref = Expression::Column(Column {
634                    name: Identifier::new("_col"),
635                    table: None,
636                    join_mark: false,
637                    trailing_comments: Vec::new(),
638                });
639                let where_clause = Where {
640                    this: Expression::Eq(Box::new(BinaryOp {
641                        left: col_ref,
642                        right: value_expr,
643                        left_comments: Vec::new(),
644                        operator_comments: Vec::new(),
645                        trailing_comments: Vec::new(),
646                    })),
647                };
648                let inner_select = Expression::Select(Box::new(Select {
649                    expressions: vec![Expression::Literal(Literal::Number("1".to_string()))],
650                    from: Some(From {
651                        expressions: vec![aliased_unnest],
652                    }),
653                    where_clause: Some(where_clause),
654                    ..Default::default()
655                }));
656                Ok(Expression::Exists(Box::new(Exists {
657                    this: inner_select,
658                    not: false,
659                })))
660            }
661
662            // ===== JSON_OBJECT array form → key-value pairs =====
663            // BigQuery "signature 2": JSON_OBJECT(['a', 'b'], [10, NULL]) → JSON_OBJECT('a', 10, 'b', NULL)
664            Expression::JsonObject(mut f) => {
665                if f.pairs.len() == 1 {
666                    // Extract expressions from both Array and ArrayFunc variants
667                    let keys_exprs = match &f.pairs[0].0 {
668                        Expression::Array(arr) => Some(&arr.expressions),
669                        Expression::ArrayFunc(arr) => Some(&arr.expressions),
670                        _ => None,
671                    };
672                    let vals_exprs = match &f.pairs[0].1 {
673                        Expression::Array(arr) => Some(&arr.expressions),
674                        Expression::ArrayFunc(arr) => Some(&arr.expressions),
675                        _ => None,
676                    };
677                    if let (Some(keys), Some(vals)) = (keys_exprs, vals_exprs) {
678                        if keys.len() == vals.len() {
679                            let new_pairs: Vec<(Expression, Expression)> = keys
680                                .iter()
681                                .zip(vals.iter())
682                                .map(|(k, v)| (k.clone(), v.clone()))
683                                .collect();
684                            f.pairs = new_pairs;
685                        }
686                    }
687                }
688                Ok(Expression::JsonObject(f))
689            }
690
691            // ===== MOD function: unwrap unnecessary Paren from first argument =====
692            // BigQuery normalizes MOD((a + 1), b) -> MOD(a + 1, b)
693            Expression::ModFunc(mut f) => {
694                // Unwrap Paren from first argument if present
695                if let Expression::Paren(paren) = f.this {
696                    f.this = paren.this;
697                }
698                Ok(Expression::ModFunc(f))
699            }
700
701            // JSONExtract with variant_extract (Snowflake colon syntax) -> JSON_EXTRACT
702            Expression::JSONExtract(e) if e.variant_extract.is_some() => {
703                let path = match *e.expression {
704                    Expression::Literal(Literal::String(s)) => {
705                        let normalized = if s.starts_with('$') {
706                            s
707                        } else if s.starts_with('[') {
708                            format!("${}", s)
709                        } else {
710                            format!("$.{}", s)
711                        };
712                        Expression::Literal(Literal::String(normalized))
713                    }
714                    other => other,
715                };
716                Ok(Expression::Function(Box::new(Function::new(
717                    "JSON_EXTRACT".to_string(),
718                    vec![*e.this, path],
719                ))))
720            }
721
722            // Generic function transformations
723            Expression::Function(f) => self.transform_function(*f),
724
725            // Generic aggregate function transformations
726            Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
727
728            // MethodCall: Handle SAFE.PARSE_DATE, SAFE.PARSE_DATETIME, SAFE.PARSE_TIMESTAMP
729            // These are parsed as MethodCall(this=SAFE, method=PARSE_DATE, args=[...])
730            Expression::MethodCall(mc) => self.transform_method_call(*mc),
731
732            // CreateFunction: Convert RETURNS TABLE(...) to RETURNS TABLE <...> for BigQuery
733            // and set is_table_function = true for TABLE FUNCTION syntax
734            Expression::CreateFunction(mut cf) => {
735                if let Some(ref mut rtb) = cf.returns_table_body {
736                    if rtb.starts_with("TABLE (") || rtb.starts_with("TABLE(") {
737                        // Convert TABLE (...) to TABLE <...> with BigQuery types
738                        let inner = if rtb.starts_with("TABLE (") {
739                            &rtb["TABLE (".len()..rtb.len() - 1]
740                        } else {
741                            &rtb["TABLE(".len()..rtb.len() - 1]
742                        };
743                        // Convert common types to BigQuery equivalents
744                        let converted = inner
745                            .replace(" INT,", " INT64,")
746                            .replace(" INT)", " INT64)")
747                            .replace(" INTEGER,", " INT64,")
748                            .replace(" INTEGER)", " INT64)")
749                            .replace(" FLOAT,", " FLOAT64,")
750                            .replace(" FLOAT)", " FLOAT64)")
751                            .replace(" BOOLEAN,", " BOOL,")
752                            .replace(" BOOLEAN)", " BOOL)")
753                            .replace(" VARCHAR", " STRING")
754                            .replace(" TEXT", " STRING");
755                        // Handle trailing type (no comma, no paren)
756                        let converted = if converted.ends_with(" INT") {
757                            format!("{}{}", &converted[..converted.len() - 4], " INT64")
758                        } else {
759                            converted
760                        };
761                        *rtb = format!("TABLE <{}>", converted);
762                        cf.is_table_function = true;
763                    }
764                }
765                // Convert string literal body to expression body for BigQuery TABLE FUNCTIONs only
766                if cf.is_table_function {
767                    if let Some(ref body) = cf.body {
768                        if matches!(body, FunctionBody::StringLiteral(_)) {
769                            if let Some(FunctionBody::StringLiteral(sql)) = cf.body.take() {
770                                // Parse the SQL string into an expression
771                                if let Ok(parsed) = crate::parser::Parser::parse_sql(&sql) {
772                                    if let Some(stmt) = parsed.into_iter().next() {
773                                        cf.body = Some(FunctionBody::Expression(stmt));
774                                    } else {
775                                        cf.body = Some(FunctionBody::StringLiteral(sql));
776                                    }
777                                } else {
778                                    cf.body = Some(FunctionBody::StringLiteral(sql));
779                                }
780                            }
781                        }
782                    }
783                }
784                Ok(Expression::CreateFunction(cf))
785            }
786
787            // Pass through everything else
788            _ => Ok(expr),
789        }
790    }
791}
792
793impl BigQueryDialect {
794    /// Transform data types according to BigQuery TYPE_MAPPING
795    fn transform_data_type(&self, dt: crate::expressions::DataType) -> Result<Expression> {
796        use crate::expressions::DataType;
797        let transformed = match dt {
798            // BIGINT -> INT64
799            DataType::BigInt { .. } => DataType::Custom {
800                name: "INT64".to_string(),
801            },
802            // INT -> INT64
803            DataType::Int { .. } => DataType::Custom {
804                name: "INT64".to_string(),
805            },
806            // SMALLINT -> INT64
807            DataType::SmallInt { .. } => DataType::Custom {
808                name: "INT64".to_string(),
809            },
810            // TINYINT -> INT64
811            DataType::TinyInt { .. } => DataType::Custom {
812                name: "INT64".to_string(),
813            },
814            // FLOAT -> FLOAT64
815            DataType::Float { .. } => DataType::Custom {
816                name: "FLOAT64".to_string(),
817            },
818            // DOUBLE -> FLOAT64
819            DataType::Double { .. } => DataType::Custom {
820                name: "FLOAT64".to_string(),
821            },
822            // BOOLEAN -> BOOL
823            DataType::Boolean => DataType::Custom {
824                name: "BOOL".to_string(),
825            },
826            // CHAR -> STRING
827            DataType::Char { .. } => DataType::Custom {
828                name: "STRING".to_string(),
829            },
830            // VARCHAR -> STRING
831            DataType::VarChar { .. } => DataType::Custom {
832                name: "STRING".to_string(),
833            },
834            // TEXT -> STRING
835            DataType::Text => DataType::Custom {
836                name: "STRING".to_string(),
837            },
838            // STRING(n) -> STRING (BigQuery doesn't support length for STRING)
839            DataType::String { .. } => DataType::Custom {
840                name: "STRING".to_string(),
841            },
842            // BINARY -> BYTES
843            DataType::Binary { .. } => DataType::Custom {
844                name: "BYTES".to_string(),
845            },
846            // VARBINARY -> BYTES
847            DataType::VarBinary { .. } => DataType::Custom {
848                name: "BYTES".to_string(),
849            },
850            // BLOB -> BYTES
851            DataType::Blob => DataType::Custom {
852                name: "BYTES".to_string(),
853            },
854            // DECIMAL -> NUMERIC (BigQuery strips precision in CAST context)
855            DataType::Decimal { .. } => DataType::Custom {
856                name: "NUMERIC".to_string(),
857            },
858            // For BigQuery identity: preserve TIMESTAMP/DATETIME as Custom types
859            // This avoids the issue where parsed TIMESTAMP (timezone: false) would
860            // be converted to DATETIME by the generator
861            DataType::Timestamp {
862                timezone: false, ..
863            } => DataType::Custom {
864                name: "TIMESTAMP".to_string(),
865            },
866            DataType::Timestamp { timezone: true, .. } => DataType::Custom {
867                name: "TIMESTAMP".to_string(),
868            },
869            // UUID -> STRING (BigQuery doesn't have native UUID type)
870            DataType::Uuid => DataType::Custom {
871                name: "STRING".to_string(),
872            },
873            // RECORD -> STRUCT in BigQuery
874            DataType::Custom { ref name } if name.eq_ignore_ascii_case("RECORD") => {
875                DataType::Custom {
876                    name: "STRUCT".to_string(),
877                }
878            }
879            // TIMESTAMPTZ (custom) -> TIMESTAMP
880            DataType::Custom { ref name } if name.eq_ignore_ascii_case("TIMESTAMPTZ") => {
881                DataType::Custom {
882                    name: "TIMESTAMP".to_string(),
883                }
884            }
885            // BYTEINT (custom) -> INT64
886            DataType::Custom { ref name } if name.eq_ignore_ascii_case("BYTEINT") => {
887                DataType::Custom {
888                    name: "INT64".to_string(),
889                }
890            }
891            // Keep all other types as-is
892            other => other,
893        };
894        Ok(Expression::DataType(transformed))
895    }
896
897    fn transform_function(&self, f: Function) -> Result<Expression> {
898        let name_upper = f.name.to_uppercase();
899        match name_upper.as_str() {
900            // IFNULL -> COALESCE (both work in BigQuery)
901            "IFNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
902                original_name: None,
903                expressions: f.args,
904            }))),
905
906            // NVL -> COALESCE
907            "NVL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
908                original_name: None,
909                expressions: f.args,
910            }))),
911
912            // ISNULL -> COALESCE
913            "ISNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
914                original_name: None,
915                expressions: f.args,
916            }))),
917
918            // GROUP_CONCAT -> STRING_AGG in BigQuery
919            "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
920                Function::new("STRING_AGG".to_string(), f.args),
921            ))),
922
923            // SUBSTR -> SUBSTRING (both work)
924            "SUBSTR" => Ok(Expression::Function(Box::new(Function::new(
925                "SUBSTRING".to_string(),
926                f.args,
927            )))),
928
929            // RANDOM -> RAND
930            "RANDOM" => Ok(Expression::Rand(Box::new(crate::expressions::Rand {
931                seed: None,
932                lower: None,
933                upper: None,
934            }))),
935
936            // CURRENT_DATE -> CURRENT_DATE() in BigQuery
937            // Keep as Function when it has args (e.g., CURRENT_DATE('UTC'))
938            "CURRENT_DATE" if f.args.is_empty() => {
939                Ok(Expression::CurrentDate(crate::expressions::CurrentDate))
940            }
941            "CURRENT_DATE" => Ok(Expression::Function(Box::new(Function {
942                name: "CURRENT_DATE".to_string(),
943                args: f.args,
944                distinct: false,
945                trailing_comments: Vec::new(),
946                use_bracket_syntax: false,
947                no_parens: false,
948                quoted: false,
949            }))),
950
951            // NOW -> CURRENT_TIMESTAMP in BigQuery
952            "NOW" => Ok(Expression::CurrentTimestamp(
953                crate::expressions::CurrentTimestamp {
954                    precision: None,
955                    sysdate: false,
956                },
957            )),
958
959            // TO_DATE -> PARSE_DATE in BigQuery
960            "TO_DATE" => Ok(Expression::Function(Box::new(Function::new(
961                "PARSE_DATE".to_string(),
962                f.args,
963            )))),
964
965            // TO_TIMESTAMP -> PARSE_TIMESTAMP in BigQuery
966            "TO_TIMESTAMP" => Ok(Expression::Function(Box::new(Function::new(
967                "PARSE_TIMESTAMP".to_string(),
968                f.args,
969            )))),
970
971            // TO_TIME -> TIME in BigQuery
972            "TO_TIME" if f.args.len() == 1 => Ok(Expression::Function(Box::new(Function::new(
973                "TIME".to_string(),
974                f.args,
975            )))),
976
977            // DATE_FORMAT -> FORMAT_DATE in BigQuery (argument order may differ)
978            "DATE_FORMAT" => Ok(Expression::Function(Box::new(Function::new(
979                "FORMAT_DATE".to_string(),
980                f.args,
981            )))),
982
983            // POSITION -> STRPOS in BigQuery
984            // BigQuery uses STRPOS(string, substring)
985            "POSITION" if f.args.len() == 2 => {
986                let mut args = f.args;
987                // Swap arguments: POSITION(sub IN str) -> STRPOS(str, sub)
988                let first = args.remove(0);
989                let second = args.remove(0);
990                Ok(Expression::Function(Box::new(Function::new(
991                    "STRPOS".to_string(),
992                    vec![second, first],
993                ))))
994            }
995
996            // LEN -> LENGTH
997            "LEN" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc::new(
998                f.args.into_iter().next().unwrap(),
999            )))),
1000
1001            // CEILING -> CEIL (both work)
1002            "CEILING" if f.args.len() == 1 => Ok(Expression::Ceil(Box::new(CeilFunc {
1003                this: f.args.into_iter().next().unwrap(),
1004                decimals: None,
1005                to: None,
1006            }))),
1007
1008            // GETDATE -> CURRENT_TIMESTAMP
1009            "GETDATE" => Ok(Expression::CurrentTimestamp(
1010                crate::expressions::CurrentTimestamp {
1011                    precision: None,
1012                    sysdate: false,
1013                },
1014            )),
1015
1016            // ARRAY_LENGTH -> ARRAY_LENGTH (native)
1017            // CARDINALITY -> ARRAY_LENGTH
1018            "CARDINALITY" if f.args.len() == 1 => Ok(Expression::ArrayLength(Box::new(
1019                UnaryFunc::new(f.args.into_iter().next().unwrap()),
1020            ))),
1021
1022            // UNNEST is native to BigQuery
1023
1024            // GENERATE_SERIES -> GENERATE_ARRAY in BigQuery
1025            "GENERATE_SERIES" => Ok(Expression::Function(Box::new(Function::new(
1026                "GENERATE_ARRAY".to_string(),
1027                f.args,
1028            )))),
1029
1030            // APPROX_COUNT_DISTINCT -> APPROX_COUNT_DISTINCT (native)
1031            // APPROX_DISTINCT -> APPROX_COUNT_DISTINCT
1032            "APPROX_DISTINCT" => Ok(Expression::Function(Box::new(Function::new(
1033                "APPROX_COUNT_DISTINCT".to_string(),
1034                f.args,
1035            )))),
1036
1037            // COUNT_IF -> COUNTIF in BigQuery
1038            "COUNT_IF" => Ok(Expression::Function(Box::new(Function::new(
1039                "COUNTIF".to_string(),
1040                f.args,
1041            )))),
1042
1043            // SHA1 -> SHA1 (native), SHA -> SHA1
1044            "SHA" => Ok(Expression::Function(Box::new(Function::new(
1045                "SHA1".to_string(),
1046                f.args,
1047            )))),
1048
1049            // SHA256/SHA2 -> SHA256
1050            "SHA2" => Ok(Expression::Function(Box::new(Function::new(
1051                "SHA256".to_string(),
1052                f.args,
1053            )))),
1054
1055            // MD5 in BigQuery returns bytes, often combined with TO_HEX
1056            // TO_HEX(MD5(x)) pattern
1057            "MD5" => Ok(Expression::Function(Box::new(Function::new(
1058                "MD5".to_string(),
1059                f.args,
1060            )))),
1061
1062            // VARIANCE/VAR_SAMP -> VAR_SAMP (native)
1063            // VAR_POP -> VAR_POP (native)
1064
1065            // DATEADD(unit, amount, date) → DATE_ADD(date, INTERVAL amount unit) for BigQuery
1066            "DATEADD" if f.args.len() == 3 => {
1067                let mut args = f.args;
1068                let unit_expr = args.remove(0);
1069                let amount = args.remove(0);
1070                let date = args.remove(0);
1071                // Convert unit identifier to IntervalUnit
1072                let unit_name = match &unit_expr {
1073                    Expression::Identifier(id) => id.name.to_uppercase(),
1074                    _ => "DAY".to_string(),
1075                };
1076                let unit = match unit_name.as_str() {
1077                    "YEAR" | "YEARS" | "YY" | "YYYY" => crate::expressions::IntervalUnit::Year,
1078                    "QUARTER" | "QUARTERS" | "QQ" | "Q" => {
1079                        crate::expressions::IntervalUnit::Quarter
1080                    }
1081                    "MONTH" | "MONTHS" | "MM" | "M" => crate::expressions::IntervalUnit::Month,
1082                    "WEEK" | "WEEKS" | "WK" | "WW" => crate::expressions::IntervalUnit::Week,
1083                    "DAY" | "DAYS" | "DD" | "D" | "DAYOFMONTH" => {
1084                        crate::expressions::IntervalUnit::Day
1085                    }
1086                    "HOUR" | "HOURS" | "HH" => crate::expressions::IntervalUnit::Hour,
1087                    "MINUTE" | "MINUTES" | "MI" | "N" => crate::expressions::IntervalUnit::Minute,
1088                    "SECOND" | "SECONDS" | "SS" | "S" => crate::expressions::IntervalUnit::Second,
1089                    "MILLISECOND" | "MILLISECONDS" | "MS" => {
1090                        crate::expressions::IntervalUnit::Millisecond
1091                    }
1092                    "MICROSECOND" | "MICROSECONDS" | "US" => {
1093                        crate::expressions::IntervalUnit::Microsecond
1094                    }
1095                    _ => crate::expressions::IntervalUnit::Day,
1096                };
1097                Ok(Expression::DateAdd(Box::new(
1098                    crate::expressions::DateAddFunc {
1099                        this: date,
1100                        interval: amount,
1101                        unit,
1102                    },
1103                )))
1104            }
1105            "DATE_ADD" => Ok(Expression::Function(Box::new(Function::new(
1106                "DATE_ADD".to_string(),
1107                f.args,
1108            )))),
1109
1110            // DATE_DIFF in BigQuery (native)
1111            "DATEDIFF" => Ok(Expression::Function(Box::new(Function::new(
1112                "DATE_DIFF".to_string(),
1113                f.args,
1114            )))),
1115
1116            // TIMESTAMP_DIFF in BigQuery
1117            "TIMESTAMPDIFF" => Ok(Expression::Function(Box::new(Function::new(
1118                "TIMESTAMP_DIFF".to_string(),
1119                f.args,
1120            )))),
1121
1122            // TIME -> TIME (native)
1123            // DATETIME -> DATETIME (native)
1124
1125            // SAFE_DIVIDE -> SAFE_DIVIDE (native)
1126
1127            // NEWID/UUID -> GENERATE_UUID
1128            "NEWID" | "UUID" => Ok(Expression::Function(Box::new(Function::new(
1129                "GENERATE_UUID".to_string(),
1130                vec![],
1131            )))),
1132
1133            // LEVENSHTEIN -> EDIT_DISTANCE (BigQuery naming)
1134            "LEVENSHTEIN" => Ok(Expression::Function(Box::new(Function::new(
1135                "EDIT_DISTANCE".to_string(),
1136                f.args,
1137            )))),
1138
1139            // UNIX_TIMESTAMP -> UNIX_SECONDS
1140            "UNIX_TIMESTAMP" => Ok(Expression::Function(Box::new(Function::new(
1141                "UNIX_SECONDS".to_string(),
1142                f.args,
1143            )))),
1144
1145            // FROM_UNIXTIME -> TIMESTAMP_SECONDS
1146            "FROM_UNIXTIME" => Ok(Expression::Function(Box::new(Function::new(
1147                "TIMESTAMP_SECONDS".to_string(),
1148                f.args,
1149            )))),
1150
1151            // CHAR_LENGTH / CHARACTER_LENGTH -> LENGTH
1152            "CHAR_LENGTH" | "CHARACTER_LENGTH" => Ok(Expression::Function(Box::new(
1153                Function::new("LENGTH".to_string(), f.args),
1154            ))),
1155
1156            // OCTET_LENGTH -> BYTE_LENGTH in BigQuery
1157            "OCTET_LENGTH" => Ok(Expression::Function(Box::new(Function::new(
1158                "BYTE_LENGTH".to_string(),
1159                f.args,
1160            )))),
1161
1162            // JSON_EXTRACT_STRING_ARRAY -> JSON_VALUE_ARRAY in BigQuery
1163            "JSON_EXTRACT_STRING_ARRAY" => Ok(Expression::Function(Box::new(Function::new(
1164                "JSON_VALUE_ARRAY".to_string(),
1165                f.args,
1166            )))),
1167
1168            // INSTR is native to BigQuery
1169
1170            // SPLIT: BigQuery defaults to comma separator when none provided
1171            // SPLIT(foo) -> SPLIT(foo, ',')
1172            "SPLIT" if f.args.len() == 1 => {
1173                let mut args = f.args;
1174                args.push(Expression::Literal(Literal::String(",".to_string())));
1175                Ok(Expression::Split(Box::new(SplitFunc {
1176                    this: args.remove(0),
1177                    delimiter: args.remove(0),
1178                })))
1179            }
1180
1181            // SPLIT with two args - convert to Split expression
1182            "SPLIT" if f.args.len() == 2 => {
1183                let mut args = f.args;
1184                Ok(Expression::Split(Box::new(SplitFunc {
1185                    this: args.remove(0),
1186                    delimiter: args.remove(0),
1187                })))
1188            }
1189
1190            // REGEXP_SUBSTR -> REGEXP_EXTRACT in BigQuery (strip extra Snowflake args)
1191            "REGEXP_SUBSTR" if f.args.len() >= 2 => {
1192                // BigQuery REGEXP_EXTRACT supports (subject, pattern, pos, occ) max 4 args
1193                let args = if f.args.len() > 4 {
1194                    f.args[..4].to_vec()
1195                } else {
1196                    f.args
1197                };
1198                Ok(Expression::Function(Box::new(Function::new(
1199                    "REGEXP_EXTRACT".to_string(),
1200                    args,
1201                ))))
1202            }
1203            "REGEXP_SUBSTR" => Ok(Expression::Function(Box::new(Function::new(
1204                "REGEXP_EXTRACT".to_string(),
1205                f.args,
1206            )))),
1207
1208            // REGEXP_REPLACE - strip extra Snowflake-specific args
1209            "REGEXP_REPLACE" if f.args.len() > 3 => {
1210                let args = f.args[..3].to_vec();
1211                Ok(Expression::Function(Box::new(Function::new(
1212                    "REGEXP_REPLACE".to_string(),
1213                    args,
1214                ))))
1215            }
1216
1217            // OBJECT_CONSTRUCT_KEEP_NULL -> JSON_OBJECT
1218            "OBJECT_CONSTRUCT_KEEP_NULL" => Ok(Expression::Function(Box::new(Function::new(
1219                "JSON_OBJECT".to_string(),
1220                f.args,
1221            )))),
1222
1223            // EDITDISTANCE -> EDIT_DISTANCE with named max_distance parameter
1224            "EDITDISTANCE" if f.args.len() == 3 => {
1225                let col1 = f.args[0].clone();
1226                let col2 = f.args[1].clone();
1227                let max_dist = f.args[2].clone();
1228                Ok(Expression::Function(Box::new(Function::new(
1229                    "EDIT_DISTANCE".to_string(),
1230                    vec![
1231                        col1,
1232                        col2,
1233                        Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
1234                            name: crate::expressions::Identifier::new("max_distance".to_string()),
1235                            value: max_dist,
1236                            separator: crate::expressions::NamedArgSeparator::DArrow,
1237                        })),
1238                    ],
1239                ))))
1240            }
1241            "EDITDISTANCE" if f.args.len() == 2 => Ok(Expression::Function(Box::new(
1242                Function::new("EDIT_DISTANCE".to_string(), f.args),
1243            ))),
1244
1245            // HEX_DECODE_BINARY -> FROM_HEX
1246            "HEX_DECODE_BINARY" => Ok(Expression::Function(Box::new(Function::new(
1247                "FROM_HEX".to_string(),
1248                f.args,
1249            )))),
1250
1251            // BigQuery format string normalization for PARSE_DATE/DATETIME/TIMESTAMP functions
1252            // %Y-%m-%d -> %F and %H:%M:%S -> %T
1253            "PARSE_DATE"
1254            | "PARSE_DATETIME"
1255            | "PARSE_TIMESTAMP"
1256            | "SAFE.PARSE_DATE"
1257            | "SAFE.PARSE_DATETIME"
1258            | "SAFE.PARSE_TIMESTAMP" => {
1259                let args = self.normalize_time_format_args(f.args);
1260                Ok(Expression::Function(Box::new(Function {
1261                    name: f.name,
1262                    args,
1263                    distinct: f.distinct,
1264                    no_parens: f.no_parens,
1265                    trailing_comments: f.trailing_comments,
1266                    quoted: f.quoted,
1267                    use_bracket_syntax: f.use_bracket_syntax,
1268                })))
1269            }
1270
1271            // GET_PATH(obj, path) -> JSON_EXTRACT(obj, json_path) in BigQuery
1272            "GET_PATH" if f.args.len() == 2 => {
1273                let mut args = f.args;
1274                let this = args.remove(0);
1275                let path = args.remove(0);
1276                let json_path = match &path {
1277                    Expression::Literal(Literal::String(s)) => {
1278                        let normalized = if s.starts_with('$') {
1279                            s.clone()
1280                        } else if s.starts_with('[') {
1281                            format!("${}", s)
1282                        } else {
1283                            format!("$.{}", s)
1284                        };
1285                        Expression::Literal(Literal::String(normalized))
1286                    }
1287                    _ => path,
1288                };
1289                Ok(Expression::JsonExtract(Box::new(JsonExtractFunc {
1290                    this,
1291                    path: json_path,
1292                    returning: None,
1293                    arrow_syntax: false,
1294                    hash_arrow_syntax: false,
1295                    wrapper_option: None,
1296                    quotes_option: None,
1297                    on_scalar_string: false,
1298                    on_error: None,
1299                })))
1300            }
1301
1302            // Pass through everything else
1303            _ => Ok(Expression::Function(Box::new(f))),
1304        }
1305    }
1306
1307    fn transform_aggregate_function(
1308        &self,
1309        f: Box<crate::expressions::AggregateFunction>,
1310    ) -> Result<Expression> {
1311        let name_upper = f.name.to_uppercase();
1312        match name_upper.as_str() {
1313            // GROUP_CONCAT -> STRING_AGG
1314            "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
1315                Function::new("STRING_AGG".to_string(), f.args),
1316            ))),
1317
1318            // Pass through everything else
1319            _ => Ok(Expression::AggregateFunction(f)),
1320        }
1321    }
1322
1323    /// Transform MethodCall expressions
1324    /// Handles SAFE.PARSE_DATE, SAFE.PARSE_DATETIME, SAFE.PARSE_TIMESTAMP format normalization
1325    fn transform_method_call(&self, mc: crate::expressions::MethodCall) -> Result<Expression> {
1326        use crate::expressions::MethodCall;
1327
1328        // Check if this is SAFE.PARSE_DATE/DATETIME/TIMESTAMP
1329        if let Expression::Column(ref col) = mc.this {
1330            if col.name.name.eq_ignore_ascii_case("SAFE") {
1331                let method_upper = mc.method.name.to_uppercase();
1332                if method_upper == "PARSE_DATE"
1333                    || method_upper == "PARSE_DATETIME"
1334                    || method_upper == "PARSE_TIMESTAMP"
1335                {
1336                    // Normalize the format string in the first argument
1337                    let args = self.normalize_time_format_args(mc.args);
1338                    return Ok(Expression::MethodCall(Box::new(MethodCall {
1339                        this: mc.this,
1340                        method: mc.method,
1341                        args,
1342                    })));
1343                }
1344            }
1345        }
1346
1347        // Pass through all other method calls
1348        Ok(Expression::MethodCall(Box::new(mc)))
1349    }
1350
1351    /// Normalize time format strings in function arguments
1352    /// BigQuery normalizes: %Y-%m-%d -> %F, %H:%M:%S -> %T
1353    fn normalize_time_format_args(&self, args: Vec<Expression>) -> Vec<Expression> {
1354        args.into_iter()
1355            .enumerate()
1356            .map(|(i, arg)| {
1357                // Only transform the first argument (the format string)
1358                if i == 0 {
1359                    if let Expression::Literal(Literal::String(s)) = arg {
1360                        let normalized = self.normalize_time_format(&s);
1361                        return Expression::Literal(Literal::String(normalized));
1362                    }
1363                }
1364                arg
1365            })
1366            .collect()
1367    }
1368
1369    /// Normalize a time format string according to BigQuery conventions
1370    /// %Y-%m-%d -> %F (ISO date)
1371    /// %H:%M:%S -> %T (time)
1372    fn normalize_time_format(&self, format: &str) -> String {
1373        format.replace("%Y-%m-%d", "%F").replace("%H:%M:%S", "%T")
1374    }
1375}
1376
1377#[cfg(test)]
1378mod tests {
1379    use super::*;
1380    use crate::dialects::Dialect;
1381
1382    fn transpile_to_bigquery(sql: &str) -> String {
1383        let dialect = Dialect::get(DialectType::Generic);
1384        let result = dialect
1385            .transpile_to(sql, DialectType::BigQuery)
1386            .expect("Transpile failed");
1387        result[0].clone()
1388    }
1389
1390    #[test]
1391    fn test_ifnull_identity() {
1392        // Generic -> BigQuery: IFNULL is normalized to COALESCE (matching sqlglot behavior)
1393        let result = transpile_to_bigquery("SELECT IFNULL(a, b)");
1394        assert!(
1395            result.contains("COALESCE"),
1396            "Expected COALESCE, got: {}",
1397            result
1398        );
1399    }
1400
1401    #[test]
1402    fn test_nvl_to_ifnull() {
1403        // NVL is converted to IFNULL in BigQuery
1404        let result = transpile_to_bigquery("SELECT NVL(a, b)");
1405        assert!(
1406            result.contains("IFNULL"),
1407            "Expected IFNULL, got: {}",
1408            result
1409        );
1410    }
1411
1412    #[test]
1413    fn test_try_cast_to_safe_cast() {
1414        let result = transpile_to_bigquery("SELECT TRY_CAST(a AS INT)");
1415        assert!(
1416            result.contains("SAFE_CAST"),
1417            "Expected SAFE_CAST, got: {}",
1418            result
1419        );
1420    }
1421
1422    #[test]
1423    fn test_random_to_rand() {
1424        let result = transpile_to_bigquery("SELECT RANDOM()");
1425        assert!(result.contains("RAND"), "Expected RAND, got: {}", result);
1426    }
1427
1428    #[test]
1429    fn test_basic_select() {
1430        let result = transpile_to_bigquery("SELECT a, b FROM users WHERE id = 1");
1431        assert!(result.contains("SELECT"));
1432        assert!(result.contains("FROM users"));
1433    }
1434
1435    #[test]
1436    fn test_group_concat_to_string_agg() {
1437        let result = transpile_to_bigquery("SELECT GROUP_CONCAT(name)");
1438        assert!(
1439            result.contains("STRING_AGG"),
1440            "Expected STRING_AGG, got: {}",
1441            result
1442        );
1443    }
1444
1445    #[test]
1446    fn test_generate_series_to_generate_array() {
1447        let result = transpile_to_bigquery("SELECT GENERATE_SERIES(1, 10)");
1448        assert!(
1449            result.contains("GENERATE_ARRAY"),
1450            "Expected GENERATE_ARRAY, got: {}",
1451            result
1452        );
1453    }
1454
1455    #[test]
1456    fn test_backtick_identifiers() {
1457        // BigQuery uses backticks for identifiers
1458        let dialect = BigQueryDialect;
1459        let config = dialect.generator_config();
1460        assert_eq!(config.identifier_quote, '`');
1461    }
1462
1463    fn bigquery_identity(sql: &str, expected: &str) {
1464        let dialect = Dialect::get(DialectType::BigQuery);
1465        let ast = dialect.parse(sql).expect("Parse failed");
1466        let transformed = dialect.transform(ast[0].clone()).expect("Transform failed");
1467        let result = dialect.generate(&transformed).expect("Generate failed");
1468        assert_eq!(result, expected, "SQL: {}", sql);
1469    }
1470
1471    #[test]
1472    fn test_cast_char_to_string() {
1473        bigquery_identity("CAST(x AS CHAR)", "CAST(x AS STRING)");
1474    }
1475
1476    #[test]
1477    fn test_cast_varchar_to_string() {
1478        bigquery_identity("CAST(x AS VARCHAR)", "CAST(x AS STRING)");
1479    }
1480
1481    #[test]
1482    fn test_cast_nchar_to_string() {
1483        bigquery_identity("CAST(x AS NCHAR)", "CAST(x AS STRING)");
1484    }
1485
1486    #[test]
1487    fn test_cast_nvarchar_to_string() {
1488        bigquery_identity("CAST(x AS NVARCHAR)", "CAST(x AS STRING)");
1489    }
1490
1491    #[test]
1492    fn test_cast_timestamptz_to_timestamp() {
1493        bigquery_identity("CAST(x AS TIMESTAMPTZ)", "CAST(x AS TIMESTAMP)");
1494    }
1495
1496    #[test]
1497    fn test_cast_record_to_struct() {
1498        bigquery_identity("CAST(x AS RECORD)", "CAST(x AS STRUCT)");
1499    }
1500
1501    #[test]
1502    fn test_json_literal_to_parse_json() {
1503        // JSON 'string' literal syntax should be converted to PARSE_JSON()
1504        bigquery_identity(
1505            "SELECT JSON '\"foo\"' AS json_data",
1506            "SELECT PARSE_JSON('\"foo\"') AS json_data",
1507        );
1508    }
1509
1510    #[test]
1511    fn test_grant_as_alias_not_quoted() {
1512        // GRANT is not a reserved keyword in BigQuery, should not be backtick-quoted
1513        bigquery_identity(
1514            "SELECT GRANT FROM (SELECT 'input' AS GRANT)",
1515            "SELECT GRANT FROM (SELECT 'input' AS GRANT)",
1516        );
1517    }
1518
1519    #[test]
1520    fn test_timestamp_literal_to_cast() {
1521        // TIMESTAMP 'value' literal should be converted to CAST('value' AS TIMESTAMP)
1522        bigquery_identity(
1523            "CREATE VIEW `d.v` OPTIONS (expiration_timestamp=TIMESTAMP '2020-01-02T04:05:06.007Z') AS SELECT 1 AS c",
1524            "CREATE VIEW `d.v` OPTIONS (expiration_timestamp=CAST('2020-01-02T04:05:06.007Z' AS TIMESTAMP)) AS SELECT 1 AS c"
1525        );
1526    }
1527
1528    #[test]
1529    fn test_date_literal_to_cast_in_extract() {
1530        // Issue 1: DATE literal should become CAST syntax in BigQuery
1531        bigquery_identity(
1532            "EXTRACT(WEEK(THURSDAY) FROM DATE '2013-12-25')",
1533            "EXTRACT(WEEK(THURSDAY) FROM CAST('2013-12-25' AS DATE))",
1534        );
1535    }
1536
1537    #[test]
1538    fn test_json_object_with_json_literals() {
1539        // Issue 2: JSON literals in JSON_OBJECT should use PARSE_JSON, not CAST AS JSON
1540        bigquery_identity(
1541            "SELECT JSON_OBJECT('a', JSON '10') AS json_data",
1542            "SELECT JSON_OBJECT('a', PARSE_JSON('10')) AS json_data",
1543        );
1544    }
1545
1546    // NOTE: MOD paren unwrapping is tested in the conformance tests (sqlglot_dialect_identity).
1547    // The unit test version was removed due to stack overflow in debug builds (deep recursion).
1548    // Test case: MOD((a + 1), b) -> MOD(a + 1, b)
1549
1550    #[test]
1551    fn test_safe_parse_date_format_normalization() {
1552        // SAFE.PARSE_DATE format string normalization: %Y-%m-%d -> %F
1553        bigquery_identity(
1554            "SAFE.PARSE_DATE('%Y-%m-%d', '2024-01-15')",
1555            "SAFE.PARSE_DATE('%F', '2024-01-15')",
1556        );
1557    }
1558
1559    #[test]
1560    fn test_safe_parse_datetime_format_normalization() {
1561        // SAFE.PARSE_DATETIME format string normalization: %Y-%m-%d %H:%M:%S -> %F %T
1562        bigquery_identity(
1563            "SAFE.PARSE_DATETIME('%Y-%m-%d %H:%M:%S', '2024-01-15 10:30:00')",
1564            "SAFE.PARSE_DATETIME('%F %T', '2024-01-15 10:30:00')",
1565        );
1566    }
1567
1568    #[test]
1569    fn test_safe_parse_timestamp_format_normalization() {
1570        // SAFE.PARSE_TIMESTAMP format string normalization: %Y-%m-%d %H:%M:%S -> %F %T
1571        bigquery_identity(
1572            "SAFE.PARSE_TIMESTAMP('%Y-%m-%d %H:%M:%S', '2024-01-15 10:30:00')",
1573            "SAFE.PARSE_TIMESTAMP('%F %T', '2024-01-15 10:30:00')",
1574        );
1575    }
1576
1577    #[test]
1578    fn test_datetime_literal_to_cast() {
1579        // DATETIME 'value' literal should be converted to CAST('value' AS DATETIME)
1580        bigquery_identity(
1581            "LAST_DAY(DATETIME '2008-11-10 15:30:00', WEEK(SUNDAY))",
1582            "LAST_DAY(CAST('2008-11-10 15:30:00' AS DATETIME), WEEK)",
1583        );
1584    }
1585
1586    #[test]
1587    fn test_last_day_week_modifier_stripped() {
1588        // WEEK(SUNDAY) should become WEEK in BigQuery LAST_DAY function
1589        bigquery_identity("LAST_DAY(col, WEEK(MONDAY))", "LAST_DAY(col, WEEK)");
1590    }
1591}