Skip to main content

polyglot_sql/dialects/
bigquery.rs

1//! BigQuery Dialect
2//!
3//! BigQuery-specific transformations based on sqlglot patterns.
4//! Key differences:
5//! - Uses backticks for identifiers (especially for project.dataset.table)
6//! - SAFE_ prefix for safe operations
7//! - Different date/time function names (DATE_DIFF, FORMAT_DATE, PARSE_DATE)
8//! - STRUCT and ARRAY syntax differences
9//! - No ILIKE support
10//! - QUALIFY clause support
11
12use super::{DialectImpl, DialectType};
13use crate::error::Result;
14use crate::expressions::{
15    Alias, BinaryOp, CeilFunc, Column, Exists, Expression, From, Function, FunctionBody,
16    Identifier, JsonExtractFunc, LikeOp, Literal, Select, SplitFunc, StringAggFunc, UnaryFunc,
17    UnnestFunc, VarArgFunc, Where,
18};
19#[cfg(feature = "generate")]
20use crate::generator::GeneratorConfig;
21use crate::tokens::TokenizerConfig;
22
23/// BigQuery dialect
24pub struct BigQueryDialect;
25
26impl DialectImpl for BigQueryDialect {
27    fn dialect_type(&self) -> DialectType {
28        DialectType::BigQuery
29    }
30
31    fn tokenizer_config(&self) -> TokenizerConfig {
32        let mut config = TokenizerConfig::default();
33        // BigQuery uses backticks for identifiers, NOT double quotes
34        // Remove double quote from identifiers (it's in the default config)
35        config.identifiers.remove(&'"');
36        config.identifiers.insert('`', '`');
37        // BigQuery supports double quotes for strings (in addition to single quotes)
38        config.quotes.insert("\"".to_string(), "\"".to_string());
39        // BigQuery supports triple-quoted strings
40        config.quotes.insert("'''".to_string(), "'''".to_string());
41        config
42            .quotes
43            .insert("\"\"\"".to_string(), "\"\"\"".to_string());
44        // BigQuery supports backslash escaping in strings
45        config.string_escapes = vec!['\'', '\\'];
46        // In BigQuery, b'...' is a byte string (bytes), not a bit string (binary digits)
47        config.b_prefix_is_byte_string = true;
48        // BigQuery supports hex number strings like 0xA, 0xFF
49        config.hex_number_strings = true;
50        // BigQuery: 0xA represents integer 10 (not binary/blob)
51        config.hex_string_is_integer_type = true;
52        // BigQuery supports # as single-line comments.
53        config.hash_comments = true;
54        config
55    }
56
57    #[cfg(feature = "generate")]
58
59    fn generator_config(&self) -> GeneratorConfig {
60        use crate::generator::{IdentifierQuoteStyle, NormalizeFunctions};
61        GeneratorConfig {
62            identifier_quote: '`',
63            identifier_quote_style: IdentifierQuoteStyle::BACKTICK,
64            dialect: Some(DialectType::BigQuery),
65            // BigQuery doesn't normalize function names (Python: NORMALIZE_FUNCTIONS = False)
66            normalize_functions: NormalizeFunctions::None,
67            // BigQuery-specific settings from Python sqlglot
68            interval_allows_plural_form: false,
69            join_hints: false,
70            query_hints: false,
71            table_hints: false,
72            limit_fetch_style: crate::generator::LimitFetchStyle::Limit,
73            rename_table_with_db: false,
74            nvl2_supported: false,
75            unnest_with_ordinality: false,
76            collate_is_func: true,
77            limit_only_literals: true,
78            supports_table_alias_columns: false,
79            unpivot_aliases_are_identifiers: false,
80            json_key_value_pair_sep: ",",
81            null_ordering_supported: false,
82            ignore_nulls_in_func: true,
83            json_path_single_quote_escape: true,
84            can_implement_array_any: true,
85            supports_to_number: false,
86            named_placeholder_token: "@",
87            hex_func: "TO_HEX",
88            with_properties_prefix: "OPTIONS",
89            supports_exploding_projections: false,
90            except_intersect_support_all_clause: false,
91            supports_unix_seconds: true,
92            // BigQuery uses SAFE_ prefix for safe operations
93            try_supported: true,
94            // BigQuery does not support SEMI/ANTI JOIN syntax
95            semi_anti_join_with_side: false,
96            ..Default::default()
97        }
98    }
99
100    #[cfg(feature = "transpile")]
101
102    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
103        match expr {
104            // ===== Data Type Mappings =====
105            Expression::DataType(dt) => self.transform_data_type(dt),
106
107            // ===== Null handling =====
108            // IFNULL is native to BigQuery - keep as-is for identity
109            Expression::IfNull(f) => Ok(Expression::IfNull(f)),
110
111            // NVL -> IFNULL in BigQuery (BigQuery uses IFNULL, not NVL)
112            Expression::Nvl(f) => Ok(Expression::IfNull(f)),
113
114            // Coalesce stays as Coalesce
115            Expression::Coalesce(f) => Ok(Expression::Coalesce(f)),
116
117            // ===== String aggregation =====
118            // GROUP_CONCAT -> STRING_AGG in BigQuery
119            Expression::GroupConcat(f) => Ok(Expression::StringAgg(Box::new(StringAggFunc {
120                this: f.this,
121                separator: f.separator,
122                order_by: f.order_by,
123                distinct: f.distinct,
124                filter: f.filter,
125                limit: None,
126                inferred_type: None,
127            }))),
128
129            // ===== Cast operations =====
130            // Cast data types are transformed by transform_recursive in mod.rs
131            // which calls transform_data_type via transform_expr(Expression::DataType(...))
132
133            // TryCast -> SafeCast in BigQuery with type transformation
134            Expression::TryCast(c) => {
135                let transformed_type = match self.transform_data_type(c.to)? {
136                    Expression::DataType(dt) => dt,
137                    _ => return Err(crate::error::Error::parse("Expected DataType", 0, 0, 0, 0)),
138                };
139                Ok(Expression::SafeCast(Box::new(crate::expressions::Cast {
140                    this: c.this,
141                    to: transformed_type,
142                    trailing_comments: c.trailing_comments,
143                    double_colon_syntax: c.double_colon_syntax,
144                    format: c.format,
145                    default: c.default,
146                    inferred_type: None,
147                })))
148            }
149
150            // ===== Pattern matching =====
151            // ILIKE -> LOWER() LIKE LOWER() in BigQuery (no ILIKE support)
152            Expression::ILike(op) => {
153                let lower_left = Expression::Lower(Box::new(UnaryFunc::new(op.left)));
154                let lower_right = Expression::Lower(Box::new(UnaryFunc::new(op.right)));
155                Ok(Expression::Like(Box::new(LikeOp {
156                    left: lower_left,
157                    right: lower_right,
158                    escape: op.escape,
159                    quantifier: op.quantifier,
160                    inferred_type: None,
161                })))
162            }
163
164            // RegexpLike -> REGEXP_CONTAINS in BigQuery
165            Expression::RegexpLike(f) => Ok(Expression::Function(Box::new(Function::new(
166                "REGEXP_CONTAINS".to_string(),
167                vec![f.this, f.pattern],
168            )))),
169
170            // ===== Array operations =====
171            // EXPLODE -> UNNEST in BigQuery
172            Expression::Explode(f) => Ok(Expression::Unnest(Box::new(
173                crate::expressions::UnnestFunc {
174                    this: f.this,
175                    expressions: Vec::new(),
176                    with_ordinality: false,
177                    alias: None,
178                    offset_alias: None,
179                },
180            ))),
181
182            // ExplodeOuter -> UNNEST with LEFT JOIN semantics
183            Expression::ExplodeOuter(f) => Ok(Expression::Unnest(Box::new(
184                crate::expressions::UnnestFunc {
185                    this: f.this,
186                    expressions: Vec::new(),
187                    with_ordinality: false,
188                    alias: None,
189                    offset_alias: None,
190                },
191            ))),
192
193            // GenerateSeries -> GENERATE_ARRAY in BigQuery
194            Expression::GenerateSeries(f) => {
195                let mut args = Vec::new();
196                if let Some(start) = f.start {
197                    args.push(*start);
198                }
199                if let Some(end) = f.end {
200                    args.push(*end);
201                }
202                if let Some(step) = f.step {
203                    args.push(*step);
204                }
205                Ok(Expression::Function(Box::new(Function::new(
206                    "GENERATE_ARRAY".to_string(),
207                    args,
208                ))))
209            }
210
211            // ===== Bitwise operations =====
212            // BitwiseAndAgg -> BIT_AND
213            Expression::BitwiseAndAgg(f) => Ok(Expression::Function(Box::new(Function::new(
214                "BIT_AND".to_string(),
215                vec![f.this],
216            )))),
217
218            // BitwiseOrAgg -> BIT_OR
219            Expression::BitwiseOrAgg(f) => Ok(Expression::Function(Box::new(Function::new(
220                "BIT_OR".to_string(),
221                vec![f.this],
222            )))),
223
224            // BitwiseXorAgg -> BIT_XOR
225            Expression::BitwiseXorAgg(f) => Ok(Expression::Function(Box::new(Function::new(
226                "BIT_XOR".to_string(),
227                vec![f.this],
228            )))),
229
230            // BitwiseCount -> BIT_COUNT
231            Expression::BitwiseCount(f) => Ok(Expression::Function(Box::new(Function::new(
232                "BIT_COUNT".to_string(),
233                vec![f.this],
234            )))),
235
236            // ByteLength -> BYTE_LENGTH
237            Expression::ByteLength(f) => Ok(Expression::Function(Box::new(Function::new(
238                "BYTE_LENGTH".to_string(),
239                vec![f.this],
240            )))),
241
242            // IntDiv -> DIV
243            Expression::IntDiv(f) => Ok(Expression::Function(Box::new(Function::new(
244                "DIV".to_string(),
245                vec![f.this, f.expression],
246            )))),
247
248            // Int64 -> INT64
249            Expression::Int64(f) => Ok(Expression::Function(Box::new(Function::new(
250                "INT64".to_string(),
251                vec![f.this],
252            )))),
253
254            // ===== Random =====
255            // RANDOM -> RAND in BigQuery
256            Expression::Random(_) => Ok(Expression::Rand(Box::new(crate::expressions::Rand {
257                seed: None,
258                lower: None,
259                upper: None,
260            }))),
261
262            // ===== UUID =====
263            // Uuid -> GENERATE_UUID in BigQuery
264            Expression::Uuid(_) => Ok(Expression::Function(Box::new(Function::new(
265                "GENERATE_UUID".to_string(),
266                vec![],
267            )))),
268
269            // ===== Approximate functions =====
270            // ApproxDistinct -> APPROX_COUNT_DISTINCT
271            Expression::ApproxDistinct(f) => Ok(Expression::Function(Box::new(Function::new(
272                "APPROX_COUNT_DISTINCT".to_string(),
273                vec![f.this],
274            )))),
275
276            // ArgMax -> MAX_BY in BigQuery
277            Expression::ArgMax(f) => Ok(Expression::Function(Box::new(Function::new(
278                "MAX_BY".to_string(),
279                vec![*f.this, *f.expression],
280            )))),
281
282            // ArgMin -> MIN_BY in BigQuery
283            Expression::ArgMin(f) => Ok(Expression::Function(Box::new(Function::new(
284                "MIN_BY".to_string(),
285                vec![*f.this, *f.expression],
286            )))),
287
288            // ===== Conditional =====
289            // CountIf -> COUNTIF in BigQuery
290            Expression::CountIf(f) => Ok(Expression::Function(Box::new(Function::new(
291                "COUNTIF".to_string(),
292                vec![f.this],
293            )))),
294
295            // ===== String functions =====
296            // StringAgg -> STRING_AGG in BigQuery - keep as-is to preserve ORDER BY
297            Expression::StringAgg(f) => Ok(Expression::StringAgg(f)),
298
299            // ===== Conversion =====
300            // Unhex -> FROM_HEX
301            Expression::Unhex(f) => Ok(Expression::Function(Box::new(Function::new(
302                "FROM_HEX".to_string(),
303                vec![*f.this],
304            )))),
305
306            // UnixToTime -> TIMESTAMP_SECONDS/MILLIS/MICROS based on scale
307            Expression::UnixToTime(f) => {
308                let scale = f.scale.unwrap_or(0);
309                match scale {
310                    0 => Ok(Expression::Function(Box::new(Function::new(
311                        "TIMESTAMP_SECONDS".to_string(),
312                        vec![*f.this],
313                    )))),
314                    3 => Ok(Expression::Function(Box::new(Function::new(
315                        "TIMESTAMP_MILLIS".to_string(),
316                        vec![*f.this],
317                    )))),
318                    6 => Ok(Expression::Function(Box::new(Function::new(
319                        "TIMESTAMP_MICROS".to_string(),
320                        vec![*f.this],
321                    )))),
322                    _ => {
323                        // TIMESTAMP_SECONDS(CAST(value / POWER(10, scale) AS INT64))
324                        let div_expr =
325                            Expression::Div(Box::new(crate::expressions::BinaryOp::new(
326                                *f.this,
327                                Expression::Function(Box::new(Function::new(
328                                    "POWER".to_string(),
329                                    vec![Expression::number(10), Expression::number(scale)],
330                                ))),
331                            )));
332                        let cast_expr = Expression::Cast(Box::new(crate::expressions::Cast {
333                            this: div_expr,
334                            to: crate::expressions::DataType::Custom {
335                                name: "INT64".to_string(),
336                            },
337                            double_colon_syntax: false,
338                            trailing_comments: vec![],
339                            format: None,
340                            default: None,
341                            inferred_type: None,
342                        }));
343                        Ok(Expression::Function(Box::new(Function::new(
344                            "TIMESTAMP_SECONDS".to_string(),
345                            vec![cast_expr],
346                        ))))
347                    }
348                }
349            }
350
351            // ===== Date/time =====
352            // DateDiff -> DATE_DIFF in BigQuery
353            Expression::DateDiff(f) => {
354                // BigQuery: DATE_DIFF(date1, date2, part)
355                let unit_str = match f.unit {
356                    Some(crate::expressions::IntervalUnit::Year) => "YEAR",
357                    Some(crate::expressions::IntervalUnit::Quarter) => "QUARTER",
358                    Some(crate::expressions::IntervalUnit::Month) => "MONTH",
359                    Some(crate::expressions::IntervalUnit::Week) => "WEEK",
360                    Some(crate::expressions::IntervalUnit::Day) => "DAY",
361                    Some(crate::expressions::IntervalUnit::Hour) => "HOUR",
362                    Some(crate::expressions::IntervalUnit::Minute) => "MINUTE",
363                    Some(crate::expressions::IntervalUnit::Second) => "SECOND",
364                    Some(crate::expressions::IntervalUnit::Millisecond) => "MILLISECOND",
365                    Some(crate::expressions::IntervalUnit::Microsecond) => "MICROSECOND",
366                    Some(crate::expressions::IntervalUnit::Nanosecond) => "NANOSECOND",
367                    None => "DAY",
368                };
369                let unit = Expression::Identifier(crate::expressions::Identifier {
370                    name: unit_str.to_string(),
371                    quoted: false,
372                    trailing_comments: Vec::new(),
373                    span: None,
374                });
375                Ok(Expression::Function(Box::new(Function::new(
376                    "DATE_DIFF".to_string(),
377                    vec![f.this, f.expression, unit],
378                ))))
379            }
380
381            // ===== Variance =====
382            // VarPop -> VAR_POP
383            Expression::VarPop(f) => Ok(Expression::Function(Box::new(Function::new(
384                "VAR_POP".to_string(),
385                vec![f.this],
386            )))),
387
388            // ===== Hash functions =====
389            // SHA -> SHA1
390            Expression::SHA(f) => Ok(Expression::Function(Box::new(Function::new(
391                "SHA1".to_string(),
392                vec![f.this],
393            )))),
394
395            // SHA1Digest -> SHA1
396            Expression::SHA1Digest(f) => Ok(Expression::Function(Box::new(Function::new(
397                "SHA1".to_string(),
398                vec![f.this],
399            )))),
400
401            // MD5Digest -> MD5
402            Expression::MD5Digest(f) => Ok(Expression::Function(Box::new(Function::new(
403                "MD5".to_string(),
404                vec![*f.this],
405            )))),
406
407            // ===== Type conversion =====
408            // JSONBool -> BOOL
409            Expression::JSONBool(f) => Ok(Expression::Function(Box::new(Function::new(
410                "BOOL".to_string(),
411                vec![f.this],
412            )))),
413
414            // StringFunc -> STRING
415            Expression::StringFunc(f) => Ok(Expression::Function(Box::new(Function::new(
416                "STRING".to_string(),
417                vec![*f.this],
418            )))),
419
420            // ===== Date/time from parts =====
421            // DateFromUnixDate -> DATE_FROM_UNIX_DATE
422            Expression::DateFromUnixDate(f) => Ok(Expression::Function(Box::new(Function::new(
423                "DATE_FROM_UNIX_DATE".to_string(),
424                vec![f.this],
425            )))),
426
427            // UnixDate -> UNIX_DATE
428            Expression::UnixDate(f) => Ok(Expression::Function(Box::new(Function::new(
429                "UNIX_DATE".to_string(),
430                vec![f.this],
431            )))),
432
433            // TimestampDiff -> TIMESTAMP_DIFF
434            Expression::TimestampDiff(f) => Ok(Expression::Function(Box::new(Function::new(
435                "TIMESTAMP_DIFF".to_string(),
436                vec![*f.this, *f.expression],
437            )))),
438
439            // FromTimeZone -> DATETIME
440            Expression::FromTimeZone(f) => Ok(Expression::Function(Box::new(Function::new(
441                "DATETIME".to_string(),
442                vec![*f.this],
443            )))),
444
445            // TsOrDsToDatetime -> DATETIME
446            Expression::TsOrDsToDatetime(f) => Ok(Expression::Function(Box::new(Function::new(
447                "DATETIME".to_string(),
448                vec![f.this],
449            )))),
450
451            // TsOrDsToTimestamp -> TIMESTAMP
452            Expression::TsOrDsToTimestamp(f) => Ok(Expression::Function(Box::new(Function::new(
453                "TIMESTAMP".to_string(),
454                vec![f.this],
455            )))),
456
457            // ===== IfFunc -> IF in BigQuery =====
458            Expression::IfFunc(f) => {
459                let mut args = vec![f.condition, f.true_value];
460                if let Some(false_val) = f.false_value {
461                    args.push(false_val);
462                } else {
463                    args.push(Expression::Null(crate::expressions::Null));
464                }
465                Ok(Expression::Function(Box::new(Function::new(
466                    "IF".to_string(),
467                    args,
468                ))))
469            }
470
471            // ===== HexString -> FROM_HEX =====
472            Expression::HexStringExpr(f) => Ok(Expression::Function(Box::new(Function::new(
473                "FROM_HEX".to_string(),
474                vec![*f.this],
475            )))),
476
477            // ===== Additional auto-generated transforms from Python sqlglot =====
478            // ApproxTopK -> APPROX_TOP_COUNT
479            Expression::ApproxTopK(f) => {
480                let mut args = vec![*f.this];
481                if let Some(expr) = f.expression {
482                    args.push(*expr);
483                }
484                Ok(Expression::Function(Box::new(Function::new(
485                    "APPROX_TOP_COUNT".to_string(),
486                    args,
487                ))))
488            }
489
490            // SafeDivide -> SAFE_DIVIDE
491            Expression::SafeDivide(f) => Ok(Expression::Function(Box::new(Function::new(
492                "SAFE_DIVIDE".to_string(),
493                vec![*f.this, *f.expression],
494            )))),
495
496            // JSONKeysAtDepth -> JSON_KEYS
497            Expression::JSONKeysAtDepth(f) => Ok(Expression::Function(Box::new(Function::new(
498                "JSON_KEYS".to_string(),
499                vec![*f.this],
500            )))),
501
502            // JSONValueArray -> JSON_VALUE_ARRAY
503            Expression::JSONValueArray(f) => Ok(Expression::Function(Box::new(Function::new(
504                "JSON_VALUE_ARRAY".to_string(),
505                vec![*f.this],
506            )))),
507
508            // DateFromParts -> DATE
509            Expression::DateFromParts(f) => {
510                let mut args = Vec::new();
511                if let Some(y) = f.year {
512                    args.push(*y);
513                }
514                if let Some(m) = f.month {
515                    args.push(*m);
516                }
517                if let Some(d) = f.day {
518                    args.push(*d);
519                }
520                Ok(Expression::Function(Box::new(Function::new(
521                    "DATE".to_string(),
522                    args,
523                ))))
524            }
525
526            // SPLIT: BigQuery defaults to comma separator when none provided
527            // SPLIT(foo) -> SPLIT(foo, ',')
528            Expression::Split(f) => {
529                // Check if delimiter is empty or a placeholder - add default comma
530                let delimiter = match &f.delimiter {
531                    Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s.is_empty()) =>
532                    {
533                        let Literal::String(_) = lit.as_ref() else {
534                            unreachable!()
535                        };
536                        Expression::Literal(Box::new(Literal::String(",".to_string())))
537                    }
538                    _ => f.delimiter,
539                };
540                Ok(Expression::Split(Box::new(SplitFunc {
541                    this: f.this,
542                    delimiter,
543                })))
544            }
545
546            // Cast: Transform the target type according to BigQuery TYPE_MAPPING
547            // Special case: CAST to JSON -> PARSE_JSON in BigQuery
548            // Special case: CAST(x AS temporal FORMAT 'fmt') -> PARSE_DATE/PARSE_TIMESTAMP
549            Expression::Cast(c) => {
550                use crate::expressions::DataType;
551                // Check if casting to JSON - use PARSE_JSON instead
552                let is_json = matches!(c.to, DataType::Json | DataType::JsonB)
553                    || matches!(&c.to, DataType::Custom { name } if name.eq_ignore_ascii_case("JSON") || name.eq_ignore_ascii_case("JSONB"));
554                if is_json {
555                    return Ok(Expression::ParseJson(Box::new(UnaryFunc::new(c.this))));
556                }
557                // CAST(x AS temporal_type FORMAT 'fmt') -> PARSE_DATE/PARSE_TIMESTAMP(strftime_fmt, x)
558                if c.format.is_some() {
559                    let is_temporal = matches!(
560                        c.to,
561                        DataType::Date | DataType::Timestamp { .. } | DataType::Time { .. }
562                    ) || matches!(&c.to, DataType::Custom { name } if
563                        name.eq_ignore_ascii_case("TIMESTAMP") ||
564                        name.eq_ignore_ascii_case("DATE") ||
565                        name.eq_ignore_ascii_case("DATETIME") ||
566                        name.eq_ignore_ascii_case("TIME")
567                    );
568                    if is_temporal {
569                        let format_expr = c.format.as_ref().unwrap().as_ref();
570                        // Extract the actual format expr and timezone (if AT TIME ZONE is present)
571                        let (actual_format, timezone) = match format_expr {
572                            Expression::AtTimeZone(ref atz) => {
573                                (atz.this.clone(), Some(atz.zone.clone()))
574                            }
575                            _ => (format_expr.clone(), None),
576                        };
577                        let strftime_fmt = Self::bq_cast_format_to_strftime(&actual_format);
578                        let func_name = match &c.to {
579                            DataType::Date => "PARSE_DATE",
580                            DataType::Custom { name } if name.eq_ignore_ascii_case("DATE") => {
581                                "PARSE_DATE"
582                            }
583                            DataType::Custom { name } if name.eq_ignore_ascii_case("DATETIME") => {
584                                "PARSE_DATETIME"
585                            }
586                            _ => "PARSE_TIMESTAMP",
587                        };
588                        let mut func_args = vec![strftime_fmt, c.this];
589                        if let Some(tz) = timezone {
590                            func_args.push(tz);
591                        }
592                        return Ok(Expression::Function(Box::new(Function::new(
593                            func_name.to_string(),
594                            func_args,
595                        ))));
596                    }
597                }
598                let transformed_type = match self.transform_data_type(c.to)? {
599                    Expression::DataType(dt) => dt,
600                    _ => return Err(crate::error::Error::parse("Expected DataType", 0, 0, 0, 0)),
601                };
602                Ok(Expression::Cast(Box::new(crate::expressions::Cast {
603                    this: c.this,
604                    to: transformed_type,
605                    trailing_comments: c.trailing_comments,
606                    double_colon_syntax: c.double_colon_syntax,
607                    format: c.format,
608                    default: c.default,
609                    inferred_type: None,
610                })))
611            }
612
613            // SafeCast: Transform the target type according to BigQuery TYPE_MAPPING
614            Expression::SafeCast(c) => {
615                let transformed_type = match self.transform_data_type(c.to)? {
616                    Expression::DataType(dt) => dt,
617                    _ => return Err(crate::error::Error::parse("Expected DataType", 0, 0, 0, 0)),
618                };
619                Ok(Expression::SafeCast(Box::new(crate::expressions::Cast {
620                    this: c.this,
621                    to: transformed_type,
622                    trailing_comments: c.trailing_comments,
623                    double_colon_syntax: c.double_colon_syntax,
624                    format: c.format,
625                    default: c.default,
626                    inferred_type: None,
627                })))
628            }
629
630            // ===== SELECT-level transforms =====
631            // BigQuery: GROUP BY expression → alias when both GROUP BY and ORDER BY exist
632            Expression::Select(mut select) => {
633                if select.group_by.is_some() && select.order_by.is_some() {
634                    // Build map: expression → alias name for aliased projections
635                    let aliases: Vec<(Expression, Identifier)> = select
636                        .expressions
637                        .iter()
638                        .filter_map(|e| {
639                            if let Expression::Alias(a) = e {
640                                Some((a.this.clone(), a.alias.clone()))
641                            } else {
642                                None
643                            }
644                        })
645                        .collect();
646
647                    if let Some(ref mut group_by) = select.group_by {
648                        for grouped in group_by.expressions.iter_mut() {
649                            // Skip numeric indices (already aliased)
650                            if matches!(grouped, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)))
651                            {
652                                continue;
653                            }
654                            // Check if this GROUP BY expression matches a SELECT alias
655                            for (expr, alias_ident) in &aliases {
656                                if grouped == expr {
657                                    *grouped = Expression::boxed_column(Column {
658                                        name: alias_ident.clone(),
659                                        table: None,
660                                        join_mark: false,
661                                        trailing_comments: Vec::new(),
662                                        span: None,
663                                        inferred_type: None,
664                                    });
665                                    break;
666                                }
667                            }
668                        }
669                    }
670                }
671                Ok(Expression::Select(select))
672            }
673
674            // ===== ArrayContains → EXISTS(SELECT 1 FROM UNNEST(arr) AS _col WHERE _col = val) =====
675            Expression::ArrayContains(f) => {
676                let array_expr = f.this;
677                let value_expr = f.expression;
678
679                // Build: SELECT 1 FROM UNNEST(array) AS _col WHERE _col = value
680                let unnest = Expression::Unnest(Box::new(UnnestFunc {
681                    this: array_expr,
682                    expressions: Vec::new(),
683                    with_ordinality: false,
684                    alias: None,
685                    offset_alias: None,
686                }));
687                let aliased_unnest = Expression::Alias(Box::new(Alias {
688                    this: unnest,
689                    alias: Identifier::new("_col"),
690                    column_aliases: Vec::new(),
691                    alias_explicit_as: false,
692                    alias_keyword: None,
693                    pre_alias_comments: Vec::new(),
694                    trailing_comments: Vec::new(),
695                    inferred_type: None,
696                }));
697                let col_ref = Expression::boxed_column(Column {
698                    name: Identifier::new("_col"),
699                    table: None,
700                    join_mark: false,
701                    trailing_comments: Vec::new(),
702                    span: None,
703                    inferred_type: None,
704                });
705                let where_clause = Where {
706                    this: Expression::Eq(Box::new(BinaryOp {
707                        left: col_ref,
708                        right: value_expr,
709                        left_comments: Vec::new(),
710                        operator_comments: Vec::new(),
711                        trailing_comments: Vec::new(),
712                        inferred_type: None,
713                    })),
714                };
715                let inner_select = Expression::Select(Box::new(Select {
716                    expressions: vec![Expression::Literal(Box::new(Literal::Number(
717                        "1".to_string(),
718                    )))],
719                    from: Some(From {
720                        expressions: vec![aliased_unnest],
721                    }),
722                    where_clause: Some(where_clause),
723                    ..Default::default()
724                }));
725                Ok(Expression::Exists(Box::new(Exists {
726                    this: inner_select,
727                    not: false,
728                })))
729            }
730
731            // ===== JSON_OBJECT array form → key-value pairs =====
732            // BigQuery "signature 2": JSON_OBJECT(['a', 'b'], [10, NULL]) → JSON_OBJECT('a', 10, 'b', NULL)
733            Expression::JsonObject(mut f) => {
734                if f.pairs.len() == 1 {
735                    // Extract expressions from both Array and ArrayFunc variants
736                    let keys_exprs = match &f.pairs[0].0 {
737                        Expression::Array(arr) => Some(&arr.expressions),
738                        Expression::ArrayFunc(arr) => Some(&arr.expressions),
739                        _ => None,
740                    };
741                    let vals_exprs = match &f.pairs[0].1 {
742                        Expression::Array(arr) => Some(&arr.expressions),
743                        Expression::ArrayFunc(arr) => Some(&arr.expressions),
744                        _ => None,
745                    };
746                    if let (Some(keys), Some(vals)) = (keys_exprs, vals_exprs) {
747                        if keys.len() == vals.len() {
748                            let new_pairs: Vec<(Expression, Expression)> = keys
749                                .iter()
750                                .zip(vals.iter())
751                                .map(|(k, v)| (k.clone(), v.clone()))
752                                .collect();
753                            f.pairs = new_pairs;
754                        }
755                    }
756                }
757                Ok(Expression::JsonObject(f))
758            }
759
760            // ===== MOD function: unwrap unnecessary Paren from first argument =====
761            // BigQuery normalizes MOD((a + 1), b) -> MOD(a + 1, b)
762            Expression::ModFunc(mut f) => {
763                // Unwrap Paren from first argument if present
764                if let Expression::Paren(paren) = f.this {
765                    f.this = paren.this;
766                }
767                Ok(Expression::ModFunc(f))
768            }
769
770            // JSONExtract with variant_extract (Snowflake colon syntax) -> JSON_EXTRACT
771            Expression::JSONExtract(e) if e.variant_extract.is_some() => {
772                let path = match *e.expression {
773                    Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
774                        let Literal::String(s) = lit.as_ref() else {
775                            unreachable!()
776                        };
777                        let normalized = if s.starts_with('$') {
778                            s.clone()
779                        } else if s.starts_with('[') {
780                            format!("${}", s)
781                        } else {
782                            format!("$.{}", s)
783                        };
784                        Expression::Literal(Box::new(Literal::String(normalized)))
785                    }
786                    other => other,
787                };
788                Ok(Expression::Function(Box::new(Function::new(
789                    "JSON_EXTRACT".to_string(),
790                    vec![*e.this, path],
791                ))))
792            }
793
794            // Generic function transformations
795            Expression::Function(f) => self.transform_function(*f),
796
797            // Generic aggregate function transformations
798            Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
799
800            // MethodCall: Handle SAFE.PARSE_DATE, SAFE.PARSE_DATETIME, SAFE.PARSE_TIMESTAMP
801            // These are parsed as MethodCall(this=SAFE, method=PARSE_DATE, args=[...])
802            Expression::MethodCall(mc) => self.transform_method_call(*mc),
803
804            // CreateFunction: Convert RETURNS TABLE(...) to RETURNS TABLE <...> for BigQuery
805            // and set is_table_function = true for TABLE FUNCTION syntax
806            Expression::CreateFunction(mut cf) => {
807                if let Some(ref mut rtb) = cf.returns_table_body {
808                    if rtb.starts_with("TABLE (") || rtb.starts_with("TABLE(") {
809                        // Convert TABLE (...) to TABLE <...> with BigQuery types
810                        let inner = if rtb.starts_with("TABLE (") {
811                            &rtb["TABLE (".len()..rtb.len() - 1]
812                        } else {
813                            &rtb["TABLE(".len()..rtb.len() - 1]
814                        };
815                        // Convert common types to BigQuery equivalents
816                        let converted = inner
817                            .replace(" INT,", " INT64,")
818                            .replace(" INT)", " INT64)")
819                            .replace(" INTEGER,", " INT64,")
820                            .replace(" INTEGER)", " INT64)")
821                            .replace(" FLOAT,", " FLOAT64,")
822                            .replace(" FLOAT)", " FLOAT64)")
823                            .replace(" BOOLEAN,", " BOOL,")
824                            .replace(" BOOLEAN)", " BOOL)")
825                            .replace(" VARCHAR", " STRING")
826                            .replace(" TEXT", " STRING");
827                        // Handle trailing type (no comma, no paren)
828                        let converted = if converted.ends_with(" INT") {
829                            format!("{}{}", &converted[..converted.len() - 4], " INT64")
830                        } else {
831                            converted
832                        };
833                        *rtb = format!("TABLE <{}>", converted);
834                        cf.is_table_function = true;
835                    }
836                }
837                // Convert string literal body to expression body for BigQuery TABLE FUNCTIONs only
838                if cf.is_table_function {
839                    if let Some(ref body) = cf.body {
840                        if matches!(body, FunctionBody::StringLiteral(_)) {
841                            if let Some(FunctionBody::StringLiteral(sql)) = cf.body.take() {
842                                // Parse the SQL string into an expression
843                                if let Ok(parsed) = crate::parser::Parser::parse_sql(&sql) {
844                                    if let Some(stmt) = parsed.into_iter().next() {
845                                        cf.body = Some(FunctionBody::Expression(stmt));
846                                    } else {
847                                        cf.body = Some(FunctionBody::StringLiteral(sql));
848                                    }
849                                } else {
850                                    cf.body = Some(FunctionBody::StringLiteral(sql));
851                                }
852                            }
853                        }
854                    }
855                }
856                Ok(Expression::CreateFunction(cf))
857            }
858
859            // Pass through everything else
860            _ => Ok(expr),
861        }
862    }
863}
864
865#[cfg(feature = "transpile")]
866impl BigQueryDialect {
867    /// Transform data types according to BigQuery TYPE_MAPPING
868    fn transform_data_type(&self, dt: crate::expressions::DataType) -> Result<Expression> {
869        use crate::expressions::DataType;
870        let transformed = match dt {
871            // BIGINT -> INT64
872            DataType::BigInt { .. } => DataType::Custom {
873                name: "INT64".to_string(),
874            },
875            // INT -> INT64
876            DataType::Int { .. } => DataType::Custom {
877                name: "INT64".to_string(),
878            },
879            // SMALLINT -> INT64
880            DataType::SmallInt { .. } => DataType::Custom {
881                name: "INT64".to_string(),
882            },
883            // TINYINT -> INT64
884            DataType::TinyInt { .. } => DataType::Custom {
885                name: "INT64".to_string(),
886            },
887            // FLOAT -> FLOAT64
888            DataType::Float { .. } => DataType::Custom {
889                name: "FLOAT64".to_string(),
890            },
891            // DOUBLE -> FLOAT64
892            DataType::Double { .. } => DataType::Custom {
893                name: "FLOAT64".to_string(),
894            },
895            // BOOLEAN -> BOOL
896            DataType::Boolean => DataType::Custom {
897                name: "BOOL".to_string(),
898            },
899            // CHAR -> STRING
900            DataType::Char { .. } => DataType::Custom {
901                name: "STRING".to_string(),
902            },
903            // VARCHAR -> STRING
904            DataType::VarChar { .. } => DataType::Custom {
905                name: "STRING".to_string(),
906            },
907            // TEXT -> STRING
908            DataType::Text => DataType::Custom {
909                name: "STRING".to_string(),
910            },
911            // STRING(n) -> STRING (BigQuery doesn't support length for STRING)
912            DataType::String { .. } => DataType::Custom {
913                name: "STRING".to_string(),
914            },
915            // BINARY -> BYTES
916            DataType::Binary { .. } => DataType::Custom {
917                name: "BYTES".to_string(),
918            },
919            // VARBINARY -> BYTES
920            DataType::VarBinary { .. } => DataType::Custom {
921                name: "BYTES".to_string(),
922            },
923            // BLOB -> BYTES
924            DataType::Blob => DataType::Custom {
925                name: "BYTES".to_string(),
926            },
927            // DECIMAL -> NUMERIC (BigQuery strips precision in CAST context)
928            DataType::Decimal { .. } => DataType::Custom {
929                name: "NUMERIC".to_string(),
930            },
931            // For BigQuery identity: preserve TIMESTAMP/DATETIME as Custom types
932            // This avoids the issue where parsed TIMESTAMP (timezone: false) would
933            // be converted to DATETIME by the generator
934            DataType::Timestamp {
935                timezone: false, ..
936            } => DataType::Custom {
937                name: "TIMESTAMP".to_string(),
938            },
939            DataType::Timestamp { timezone: true, .. } => DataType::Custom {
940                name: "TIMESTAMP".to_string(),
941            },
942            // UUID -> STRING (BigQuery doesn't have native UUID type)
943            DataType::Uuid => DataType::Custom {
944                name: "STRING".to_string(),
945            },
946            // RECORD -> STRUCT in BigQuery
947            DataType::Custom { ref name } if name.eq_ignore_ascii_case("RECORD") => {
948                DataType::Custom {
949                    name: "STRUCT".to_string(),
950                }
951            }
952            // TIMESTAMPTZ (custom) -> TIMESTAMP
953            DataType::Custom { ref name } if name.eq_ignore_ascii_case("TIMESTAMPTZ") => {
954                DataType::Custom {
955                    name: "TIMESTAMP".to_string(),
956                }
957            }
958            // BYTEINT (custom) -> INT64
959            DataType::Custom { ref name } if name.eq_ignore_ascii_case("BYTEINT") => {
960                DataType::Custom {
961                    name: "INT64".to_string(),
962                }
963            }
964            // Keep all other types as-is
965            other => other,
966        };
967        Ok(Expression::DataType(transformed))
968    }
969
970    fn transform_function(&self, f: Function) -> Result<Expression> {
971        let name_upper = f.name.to_uppercase();
972        match name_upper.as_str() {
973            // IFNULL -> COALESCE (both work in BigQuery)
974            "IFNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
975                original_name: None,
976                expressions: f.args,
977                inferred_type: None,
978            }))),
979
980            // NVL -> COALESCE
981            "NVL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
982                original_name: None,
983                expressions: f.args,
984                inferred_type: None,
985            }))),
986
987            // ISNULL -> COALESCE
988            "ISNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
989                original_name: None,
990                expressions: f.args,
991                inferred_type: None,
992            }))),
993
994            // GROUP_CONCAT -> STRING_AGG in BigQuery
995            "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
996                Function::new("STRING_AGG".to_string(), f.args),
997            ))),
998
999            // SUBSTR -> SUBSTRING (both work)
1000            "SUBSTR" => Ok(Expression::Function(Box::new(Function::new(
1001                "SUBSTRING".to_string(),
1002                f.args,
1003            )))),
1004
1005            // RANDOM -> RAND
1006            "RANDOM" => Ok(Expression::Rand(Box::new(crate::expressions::Rand {
1007                seed: None,
1008                lower: None,
1009                upper: None,
1010            }))),
1011
1012            // CURRENT_DATE -> CURRENT_DATE() in BigQuery
1013            // Keep as Function when it has args (e.g., CURRENT_DATE('UTC'))
1014            "CURRENT_DATE" if f.args.is_empty() => {
1015                Ok(Expression::CurrentDate(crate::expressions::CurrentDate))
1016            }
1017            "CURRENT_DATE" => Ok(Expression::Function(Box::new(Function {
1018                name: "CURRENT_DATE".to_string(),
1019                args: f.args,
1020                distinct: false,
1021                trailing_comments: Vec::new(),
1022                use_bracket_syntax: false,
1023                no_parens: false,
1024                quoted: false,
1025                span: None,
1026                inferred_type: None,
1027            }))),
1028
1029            // NOW -> CURRENT_TIMESTAMP in BigQuery
1030            "NOW" => Ok(Expression::CurrentTimestamp(
1031                crate::expressions::CurrentTimestamp {
1032                    precision: None,
1033                    sysdate: false,
1034                },
1035            )),
1036
1037            // TO_DATE -> PARSE_DATE in BigQuery
1038            "TO_DATE" => Ok(Expression::Function(Box::new(Function::new(
1039                "PARSE_DATE".to_string(),
1040                f.args,
1041            )))),
1042
1043            // TO_TIMESTAMP -> PARSE_TIMESTAMP in BigQuery
1044            "TO_TIMESTAMP" => Ok(Expression::Function(Box::new(Function::new(
1045                "PARSE_TIMESTAMP".to_string(),
1046                f.args,
1047            )))),
1048
1049            // TO_TIME -> TIME in BigQuery
1050            "TO_TIME" if f.args.len() == 1 => Ok(Expression::Function(Box::new(Function::new(
1051                "TIME".to_string(),
1052                f.args,
1053            )))),
1054
1055            // DATE_FORMAT -> FORMAT_DATE in BigQuery (argument order may differ)
1056            "DATE_FORMAT" => Ok(Expression::Function(Box::new(Function::new(
1057                "FORMAT_DATE".to_string(),
1058                f.args,
1059            )))),
1060
1061            // POSITION -> STRPOS in BigQuery
1062            // BigQuery uses STRPOS(string, substring)
1063            "POSITION" if f.args.len() == 2 => {
1064                let mut args = f.args;
1065                // Swap arguments: POSITION(sub IN str) -> STRPOS(str, sub)
1066                let first = args.remove(0);
1067                let second = args.remove(0);
1068                Ok(Expression::Function(Box::new(Function::new(
1069                    "STRPOS".to_string(),
1070                    vec![second, first],
1071                ))))
1072            }
1073
1074            // LEN -> LENGTH
1075            "LEN" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc::new(
1076                f.args.into_iter().next().unwrap(),
1077            )))),
1078
1079            // CEILING -> CEIL (both work)
1080            "CEILING" if f.args.len() == 1 => Ok(Expression::Ceil(Box::new(CeilFunc {
1081                this: f.args.into_iter().next().unwrap(),
1082                decimals: None,
1083                to: None,
1084            }))),
1085
1086            // GETDATE -> CURRENT_TIMESTAMP
1087            "GETDATE" => Ok(Expression::CurrentTimestamp(
1088                crate::expressions::CurrentTimestamp {
1089                    precision: None,
1090                    sysdate: false,
1091                },
1092            )),
1093
1094            // ARRAY_LENGTH -> ARRAY_LENGTH (native)
1095            // CARDINALITY -> ARRAY_LENGTH
1096            "CARDINALITY" if f.args.len() == 1 => Ok(Expression::ArrayLength(Box::new(
1097                UnaryFunc::new(f.args.into_iter().next().unwrap()),
1098            ))),
1099
1100            // UNNEST is native to BigQuery
1101
1102            // GENERATE_SERIES -> GENERATE_ARRAY in BigQuery
1103            "GENERATE_SERIES" => Ok(Expression::Function(Box::new(Function::new(
1104                "GENERATE_ARRAY".to_string(),
1105                f.args,
1106            )))),
1107
1108            // APPROX_COUNT_DISTINCT -> APPROX_COUNT_DISTINCT (native)
1109            // APPROX_DISTINCT -> APPROX_COUNT_DISTINCT
1110            "APPROX_DISTINCT" => Ok(Expression::Function(Box::new(Function::new(
1111                "APPROX_COUNT_DISTINCT".to_string(),
1112                f.args,
1113            )))),
1114
1115            // COUNT_IF -> COUNTIF in BigQuery
1116            "COUNT_IF" => Ok(Expression::Function(Box::new(Function::new(
1117                "COUNTIF".to_string(),
1118                f.args,
1119            )))),
1120
1121            // SHA1 -> SHA1 (native), SHA -> SHA1
1122            "SHA" => Ok(Expression::Function(Box::new(Function::new(
1123                "SHA1".to_string(),
1124                f.args,
1125            )))),
1126
1127            // SHA256/SHA2 -> SHA256
1128            "SHA2" => Ok(Expression::Function(Box::new(Function::new(
1129                "SHA256".to_string(),
1130                f.args,
1131            )))),
1132
1133            // MD5 in BigQuery returns bytes, often combined with TO_HEX
1134            // TO_HEX(MD5(x)) pattern
1135            "MD5" => Ok(Expression::Function(Box::new(Function::new(
1136                "MD5".to_string(),
1137                f.args,
1138            )))),
1139
1140            // VARIANCE/VAR_SAMP -> VAR_SAMP (native)
1141            // VAR_POP -> VAR_POP (native)
1142
1143            // DATEADD(unit, amount, date) → DATE_ADD(date, INTERVAL amount unit) for BigQuery
1144            "DATEADD" if f.args.len() == 3 => {
1145                let mut args = f.args;
1146                let unit_expr = args.remove(0);
1147                let amount = args.remove(0);
1148                let date = args.remove(0);
1149                // Convert unit identifier to IntervalUnit
1150                let unit_name = match &unit_expr {
1151                    Expression::Identifier(id) => id.name.to_uppercase(),
1152                    Expression::Var(v) => v.this.to_uppercase(),
1153                    Expression::Column(col) if col.table.is_none() => col.name.name.to_uppercase(),
1154                    _ => "DAY".to_string(),
1155                };
1156                let unit = match unit_name.as_str() {
1157                    "YEAR" | "YEARS" | "YY" | "YYYY" => crate::expressions::IntervalUnit::Year,
1158                    "QUARTER" | "QUARTERS" | "QQ" | "Q" => {
1159                        crate::expressions::IntervalUnit::Quarter
1160                    }
1161                    "MONTH" | "MONTHS" | "MM" | "M" => crate::expressions::IntervalUnit::Month,
1162                    "WEEK" | "WEEKS" | "WK" | "WW" => crate::expressions::IntervalUnit::Week,
1163                    "DAY" | "DAYS" | "DD" | "D" | "DAYOFMONTH" => {
1164                        crate::expressions::IntervalUnit::Day
1165                    }
1166                    "HOUR" | "HOURS" | "HH" => crate::expressions::IntervalUnit::Hour,
1167                    "MINUTE" | "MINUTES" | "MI" | "N" => crate::expressions::IntervalUnit::Minute,
1168                    "SECOND" | "SECONDS" | "SS" | "S" => crate::expressions::IntervalUnit::Second,
1169                    "MILLISECOND" | "MILLISECONDS" | "MS" => {
1170                        crate::expressions::IntervalUnit::Millisecond
1171                    }
1172                    "MICROSECOND" | "MICROSECONDS" | "US" => {
1173                        crate::expressions::IntervalUnit::Microsecond
1174                    }
1175                    _ => crate::expressions::IntervalUnit::Day,
1176                };
1177                Ok(Expression::DateAdd(Box::new(
1178                    crate::expressions::DateAddFunc {
1179                        this: date,
1180                        interval: amount,
1181                        unit,
1182                    },
1183                )))
1184            }
1185            "DATE_ADD" => Ok(Expression::Function(Box::new(Function::new(
1186                "DATE_ADD".to_string(),
1187                f.args,
1188            )))),
1189
1190            // DATE_DIFF in BigQuery (native)
1191            "DATEDIFF" => Ok(Expression::Function(Box::new(Function::new(
1192                "DATE_DIFF".to_string(),
1193                f.args,
1194            )))),
1195
1196            // TIMESTAMP_DIFF in BigQuery
1197            "TIMESTAMPDIFF" => Ok(Expression::Function(Box::new(Function::new(
1198                "TIMESTAMP_DIFF".to_string(),
1199                f.args,
1200            )))),
1201
1202            // TIME -> TIME (native)
1203            // DATETIME -> DATETIME (native)
1204
1205            // SAFE_DIVIDE -> SAFE_DIVIDE (native)
1206
1207            // NEWID/UUID -> GENERATE_UUID
1208            "NEWID" | "UUID" => Ok(Expression::Function(Box::new(Function::new(
1209                "GENERATE_UUID".to_string(),
1210                vec![],
1211            )))),
1212
1213            // LEVENSHTEIN -> EDIT_DISTANCE (BigQuery naming)
1214            "LEVENSHTEIN" => Ok(Expression::Function(Box::new(Function::new(
1215                "EDIT_DISTANCE".to_string(),
1216                f.args,
1217            )))),
1218
1219            // UNIX_TIMESTAMP -> UNIX_SECONDS
1220            "UNIX_TIMESTAMP" => Ok(Expression::Function(Box::new(Function::new(
1221                "UNIX_SECONDS".to_string(),
1222                f.args,
1223            )))),
1224
1225            // FROM_UNIXTIME -> TIMESTAMP_SECONDS
1226            "FROM_UNIXTIME" => Ok(Expression::Function(Box::new(Function::new(
1227                "TIMESTAMP_SECONDS".to_string(),
1228                f.args,
1229            )))),
1230
1231            // CHAR_LENGTH / CHARACTER_LENGTH -> LENGTH
1232            "CHAR_LENGTH" | "CHARACTER_LENGTH" => Ok(Expression::Function(Box::new(
1233                Function::new("LENGTH".to_string(), f.args),
1234            ))),
1235
1236            // OCTET_LENGTH -> BYTE_LENGTH in BigQuery
1237            "OCTET_LENGTH" => Ok(Expression::Function(Box::new(Function::new(
1238                "BYTE_LENGTH".to_string(),
1239                f.args,
1240            )))),
1241
1242            // JSON_EXTRACT_STRING_ARRAY -> JSON_VALUE_ARRAY in BigQuery
1243            "JSON_EXTRACT_STRING_ARRAY" => Ok(Expression::Function(Box::new(Function::new(
1244                "JSON_VALUE_ARRAY".to_string(),
1245                f.args,
1246            )))),
1247
1248            // INSTR is native to BigQuery
1249
1250            // SPLIT: BigQuery defaults to comma separator when none provided
1251            // SPLIT(foo) -> SPLIT(foo, ',')
1252            "SPLIT" if f.args.len() == 1 => {
1253                let mut args = f.args;
1254                args.push(Expression::Literal(Box::new(Literal::String(
1255                    ",".to_string(),
1256                ))));
1257                Ok(Expression::Split(Box::new(SplitFunc {
1258                    this: args.remove(0),
1259                    delimiter: args.remove(0),
1260                })))
1261            }
1262
1263            // SPLIT with two args - convert to Split expression
1264            "SPLIT" if f.args.len() == 2 => {
1265                let mut args = f.args;
1266                Ok(Expression::Split(Box::new(SplitFunc {
1267                    this: args.remove(0),
1268                    delimiter: args.remove(0),
1269                })))
1270            }
1271
1272            // REGEXP_SUBSTR -> REGEXP_EXTRACT in BigQuery (strip extra Snowflake args)
1273            "REGEXP_SUBSTR" if f.args.len() >= 2 => {
1274                // BigQuery REGEXP_EXTRACT supports (subject, pattern, pos, occ) max 4 args
1275                let args = if f.args.len() > 4 {
1276                    f.args[..4].to_vec()
1277                } else {
1278                    f.args
1279                };
1280                Ok(Expression::Function(Box::new(Function::new(
1281                    "REGEXP_EXTRACT".to_string(),
1282                    args,
1283                ))))
1284            }
1285            "REGEXP_SUBSTR" => Ok(Expression::Function(Box::new(Function::new(
1286                "REGEXP_EXTRACT".to_string(),
1287                f.args,
1288            )))),
1289
1290            // REGEXP_REPLACE - strip extra Snowflake-specific args
1291            "REGEXP_REPLACE" if f.args.len() > 3 => {
1292                let args = f.args[..3].to_vec();
1293                Ok(Expression::Function(Box::new(Function::new(
1294                    "REGEXP_REPLACE".to_string(),
1295                    args,
1296                ))))
1297            }
1298
1299            // OBJECT_CONSTRUCT_KEEP_NULL -> JSON_OBJECT
1300            "OBJECT_CONSTRUCT_KEEP_NULL" => Ok(Expression::Function(Box::new(Function::new(
1301                "JSON_OBJECT".to_string(),
1302                f.args,
1303            )))),
1304
1305            // EDITDISTANCE -> EDIT_DISTANCE with named max_distance parameter
1306            "EDITDISTANCE" if f.args.len() == 3 => {
1307                let col1 = f.args[0].clone();
1308                let col2 = f.args[1].clone();
1309                let max_dist = f.args[2].clone();
1310                Ok(Expression::Function(Box::new(Function::new(
1311                    "EDIT_DISTANCE".to_string(),
1312                    vec![
1313                        col1,
1314                        col2,
1315                        Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
1316                            name: crate::expressions::Identifier::new("max_distance".to_string()),
1317                            value: max_dist,
1318                            separator: crate::expressions::NamedArgSeparator::DArrow,
1319                        })),
1320                    ],
1321                ))))
1322            }
1323            "EDITDISTANCE" if f.args.len() == 2 => Ok(Expression::Function(Box::new(
1324                Function::new("EDIT_DISTANCE".to_string(), f.args),
1325            ))),
1326
1327            // HEX_DECODE_BINARY -> FROM_HEX
1328            "HEX_DECODE_BINARY" => Ok(Expression::Function(Box::new(Function::new(
1329                "FROM_HEX".to_string(),
1330                f.args,
1331            )))),
1332
1333            // BigQuery format string normalization for PARSE_DATE/DATETIME/TIMESTAMP functions
1334            // %Y-%m-%d -> %F and %H:%M:%S -> %T
1335            "PARSE_DATE"
1336            | "PARSE_DATETIME"
1337            | "PARSE_TIMESTAMP"
1338            | "SAFE.PARSE_DATE"
1339            | "SAFE.PARSE_DATETIME"
1340            | "SAFE.PARSE_TIMESTAMP" => {
1341                let args = self.normalize_time_format_args(f.args);
1342                Ok(Expression::Function(Box::new(Function {
1343                    name: f.name,
1344                    args,
1345                    distinct: f.distinct,
1346                    no_parens: f.no_parens,
1347                    trailing_comments: f.trailing_comments,
1348                    quoted: f.quoted,
1349                    use_bracket_syntax: f.use_bracket_syntax,
1350                    span: None,
1351                    inferred_type: None,
1352                })))
1353            }
1354
1355            // GET_PATH(obj, path) -> JSON_EXTRACT(obj, json_path) in BigQuery
1356            "GET_PATH" if f.args.len() == 2 => {
1357                let mut args = f.args;
1358                let this = args.remove(0);
1359                let path = args.remove(0);
1360                let json_path = match &path {
1361                    Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
1362                        let Literal::String(s) = lit.as_ref() else {
1363                            unreachable!()
1364                        };
1365                        let normalized = if s.starts_with('$') {
1366                            s.clone()
1367                        } else if s.starts_with('[') {
1368                            format!("${}", s)
1369                        } else {
1370                            format!("$.{}", s)
1371                        };
1372                        Expression::Literal(Box::new(Literal::String(normalized)))
1373                    }
1374                    _ => path,
1375                };
1376                Ok(Expression::JsonExtract(Box::new(JsonExtractFunc {
1377                    this,
1378                    path: json_path,
1379                    returning: None,
1380                    arrow_syntax: false,
1381                    hash_arrow_syntax: false,
1382                    wrapper_option: None,
1383                    quotes_option: None,
1384                    on_scalar_string: false,
1385                    on_error: None,
1386                })))
1387            }
1388
1389            // Pass through everything else
1390            _ => Ok(Expression::Function(Box::new(f))),
1391        }
1392    }
1393
1394    fn transform_aggregate_function(
1395        &self,
1396        f: Box<crate::expressions::AggregateFunction>,
1397    ) -> Result<Expression> {
1398        let name_upper = f.name.to_uppercase();
1399        match name_upper.as_str() {
1400            // GROUP_CONCAT -> STRING_AGG
1401            "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
1402                Function::new("STRING_AGG".to_string(), f.args),
1403            ))),
1404
1405            // Pass through everything else
1406            _ => Ok(Expression::AggregateFunction(f)),
1407        }
1408    }
1409
1410    /// Transform MethodCall expressions
1411    /// Handles SAFE.PARSE_DATE, SAFE.PARSE_DATETIME, SAFE.PARSE_TIMESTAMP format normalization
1412    fn transform_method_call(&self, mc: crate::expressions::MethodCall) -> Result<Expression> {
1413        use crate::expressions::MethodCall;
1414
1415        // Check if this is SAFE.PARSE_DATE/DATETIME/TIMESTAMP
1416        if let Expression::Column(ref col) = mc.this {
1417            if col.name.name.eq_ignore_ascii_case("SAFE") {
1418                let method_upper = mc.method.name.to_uppercase();
1419                if method_upper == "PARSE_DATE"
1420                    || method_upper == "PARSE_DATETIME"
1421                    || method_upper == "PARSE_TIMESTAMP"
1422                {
1423                    // Normalize the format string in the first argument
1424                    let args = self.normalize_time_format_args(mc.args);
1425                    return Ok(Expression::MethodCall(Box::new(MethodCall {
1426                        this: mc.this,
1427                        method: mc.method,
1428                        args,
1429                    })));
1430                }
1431            }
1432        }
1433
1434        // Pass through all other method calls
1435        Ok(Expression::MethodCall(Box::new(mc)))
1436    }
1437
1438    /// Normalize time format strings in function arguments
1439    /// BigQuery normalizes: %Y-%m-%d -> %F, %H:%M:%S -> %T
1440    fn normalize_time_format_args(&self, args: Vec<Expression>) -> Vec<Expression> {
1441        args.into_iter()
1442            .enumerate()
1443            .map(|(i, arg)| {
1444                // Only transform the first argument (the format string)
1445                if i == 0 {
1446                    if let Expression::Literal(ref lit) = arg {
1447                        if let Literal::String(s) = lit.as_ref() {
1448                            let normalized = self.normalize_time_format(&s);
1449                            return Expression::Literal(Box::new(Literal::String(normalized)));
1450                        }
1451                    }
1452                }
1453                arg
1454            })
1455            .collect()
1456    }
1457
1458    /// Normalize a time format string according to BigQuery conventions
1459    /// %Y-%m-%d -> %F (ISO date)
1460    /// %H:%M:%S -> %T (time)
1461    fn normalize_time_format(&self, format: &str) -> String {
1462        format.replace("%Y-%m-%d", "%F").replace("%H:%M:%S", "%T")
1463    }
1464
1465    /// Convert BigQuery CAST FORMAT elements to strftime equivalents,
1466    /// then normalize BigQuery shorthand forms (%Y-%m-%d -> %F, %H:%M:%S -> %T)
1467    fn bq_cast_format_to_strftime(format_expr: &Expression) -> Expression {
1468        use crate::expressions::Literal;
1469        if let Expression::Literal(lit) = format_expr {
1470            if let Literal::String(s) = lit.as_ref() {
1471                let result = s
1472                    .replace("YYYYMMDD", "%Y%m%d")
1473                    .replace("YYYY", "%Y")
1474                    .replace("YY", "%y")
1475                    .replace("MONTH", "%B")
1476                    .replace("MON", "%b")
1477                    .replace("MM", "%m")
1478                    .replace("DD", "%d")
1479                    .replace("HH24", "%H")
1480                    .replace("HH12", "%I")
1481                    .replace("HH", "%I")
1482                    .replace("MI", "%M")
1483                    .replace("SSTZH", "%S%z")
1484                    .replace("SS", "%S")
1485                    .replace("TZH", "%z");
1486                // Normalize: %Y-%m-%d -> %F, %H:%M:%S -> %T
1487                let normalized = result.replace("%Y-%m-%d", "%F").replace("%H:%M:%S", "%T");
1488                return Expression::Literal(Box::new(Literal::String(normalized)));
1489            }
1490        }
1491        format_expr.clone()
1492    }
1493}
1494
1495#[cfg(test)]
1496mod tests {
1497    use super::*;
1498    use crate::dialects::Dialect;
1499    use crate::parse_one;
1500
1501    fn transpile_to_bigquery(sql: &str) -> String {
1502        let dialect = Dialect::get(DialectType::Generic);
1503        let result = dialect
1504            .transpile(sql, DialectType::BigQuery)
1505            .expect("Transpile failed");
1506        result[0].clone()
1507    }
1508
1509    #[test]
1510    fn test_ifnull_identity() {
1511        // Generic -> BigQuery: IFNULL is normalized to COALESCE (matching sqlglot behavior)
1512        let result = transpile_to_bigquery("SELECT IFNULL(a, b)");
1513        assert!(
1514            result.contains("COALESCE"),
1515            "Expected COALESCE, got: {}",
1516            result
1517        );
1518    }
1519
1520    #[test]
1521    fn test_nvl_to_ifnull() {
1522        // NVL is converted to IFNULL in BigQuery
1523        let result = transpile_to_bigquery("SELECT NVL(a, b)");
1524        assert!(
1525            result.contains("IFNULL"),
1526            "Expected IFNULL, got: {}",
1527            result
1528        );
1529    }
1530
1531    #[test]
1532    fn test_try_cast_to_safe_cast() {
1533        let result = transpile_to_bigquery("SELECT TRY_CAST(a AS INT)");
1534        assert!(
1535            result.contains("SAFE_CAST"),
1536            "Expected SAFE_CAST, got: {}",
1537            result
1538        );
1539    }
1540
1541    #[test]
1542    fn test_random_to_rand() {
1543        let result = transpile_to_bigquery("SELECT RANDOM()");
1544        assert!(result.contains("RAND"), "Expected RAND, got: {}", result);
1545    }
1546
1547    #[test]
1548    fn test_basic_select() {
1549        let result = transpile_to_bigquery("SELECT a, b FROM users WHERE id = 1");
1550        assert!(result.contains("SELECT"));
1551        assert!(result.contains("FROM users"));
1552    }
1553
1554    #[test]
1555    fn test_group_concat_to_string_agg() {
1556        let result = transpile_to_bigquery("SELECT GROUP_CONCAT(name)");
1557        assert!(
1558            result.contains("STRING_AGG"),
1559            "Expected STRING_AGG, got: {}",
1560            result
1561        );
1562    }
1563
1564    #[test]
1565    fn test_generate_series_to_generate_array() {
1566        let result = transpile_to_bigquery("SELECT GENERATE_SERIES(1, 10)");
1567        assert!(
1568            result.contains("GENERATE_ARRAY"),
1569            "Expected GENERATE_ARRAY, got: {}",
1570            result
1571        );
1572    }
1573
1574    #[test]
1575    fn test_backtick_identifiers() {
1576        // BigQuery uses backticks for identifiers
1577        let dialect = BigQueryDialect;
1578        let config = dialect.generator_config();
1579        assert_eq!(config.identifier_quote, '`');
1580    }
1581
1582    fn bigquery_identity(sql: &str, expected: &str) {
1583        let dialect = Dialect::get(DialectType::BigQuery);
1584        let ast = dialect.parse(sql).expect("Parse failed");
1585        let transformed = dialect.transform(ast[0].clone()).expect("Transform failed");
1586        let result = dialect.generate(&transformed).expect("Generate failed");
1587        assert_eq!(result, expected, "SQL: {}", sql);
1588    }
1589
1590    #[test]
1591    fn test_cast_char_to_string() {
1592        bigquery_identity("CAST(x AS CHAR)", "CAST(x AS STRING)");
1593    }
1594
1595    #[test]
1596    fn test_cast_varchar_to_string() {
1597        bigquery_identity("CAST(x AS VARCHAR)", "CAST(x AS STRING)");
1598    }
1599
1600    #[test]
1601    fn test_cast_nchar_to_string() {
1602        bigquery_identity("CAST(x AS NCHAR)", "CAST(x AS STRING)");
1603    }
1604
1605    #[test]
1606    fn test_cast_nvarchar_to_string() {
1607        bigquery_identity("CAST(x AS NVARCHAR)", "CAST(x AS STRING)");
1608    }
1609
1610    #[test]
1611    fn test_cast_timestamptz_to_timestamp() {
1612        bigquery_identity("CAST(x AS TIMESTAMPTZ)", "CAST(x AS TIMESTAMP)");
1613    }
1614
1615    #[test]
1616    fn test_cast_record_to_struct() {
1617        bigquery_identity("CAST(x AS RECORD)", "CAST(x AS STRUCT)");
1618    }
1619
1620    #[test]
1621    fn test_json_literal_to_parse_json() {
1622        // JSON 'string' literal syntax should be converted to PARSE_JSON()
1623        bigquery_identity(
1624            "SELECT JSON '\"foo\"' AS json_data",
1625            "SELECT PARSE_JSON('\"foo\"') AS json_data",
1626        );
1627    }
1628
1629    #[test]
1630    fn test_grant_as_alias_not_quoted() {
1631        // GRANT is not a reserved keyword in BigQuery, should not be backtick-quoted
1632        bigquery_identity(
1633            "SELECT GRANT FROM (SELECT 'input' AS GRANT)",
1634            "SELECT GRANT FROM (SELECT 'input' AS GRANT)",
1635        );
1636    }
1637
1638    #[test]
1639    fn test_timestamp_literal_to_cast() {
1640        // TIMESTAMP 'value' literal should be converted to CAST('value' AS TIMESTAMP)
1641        bigquery_identity(
1642            "CREATE VIEW `d.v` OPTIONS (expiration_timestamp=TIMESTAMP '2020-01-02T04:05:06.007Z') AS SELECT 1 AS c",
1643            "CREATE VIEW `d.v` OPTIONS (expiration_timestamp=CAST('2020-01-02T04:05:06.007Z' AS TIMESTAMP)) AS SELECT 1 AS c"
1644        );
1645    }
1646
1647    #[test]
1648    fn test_date_literal_to_cast_in_extract() {
1649        // Issue 1: DATE literal should become CAST syntax in BigQuery
1650        bigquery_identity(
1651            "EXTRACT(WEEK(THURSDAY) FROM DATE '2013-12-25')",
1652            "EXTRACT(WEEK(THURSDAY) FROM CAST('2013-12-25' AS DATE))",
1653        );
1654    }
1655
1656    #[test]
1657    fn test_json_object_with_json_literals() {
1658        // Issue 2: JSON literals in JSON_OBJECT should use PARSE_JSON, not CAST AS JSON
1659        bigquery_identity(
1660            "SELECT JSON_OBJECT('a', JSON '10') AS json_data",
1661            "SELECT JSON_OBJECT('a', PARSE_JSON('10')) AS json_data",
1662        );
1663    }
1664
1665    // NOTE: MOD paren unwrapping is tested in the conformance tests (sqlglot_dialect_identity).
1666    // The unit test version was removed due to stack overflow in debug builds (deep recursion).
1667    // Test case: MOD((a + 1), b) -> MOD(a + 1, b)
1668
1669    #[test]
1670    fn test_safe_parse_date_format_normalization() {
1671        // SAFE.PARSE_DATE format string normalization: %Y-%m-%d -> %F
1672        bigquery_identity(
1673            "SAFE.PARSE_DATE('%Y-%m-%d', '2024-01-15')",
1674            "SAFE.PARSE_DATE('%F', '2024-01-15')",
1675        );
1676    }
1677
1678    #[test]
1679    fn test_safe_parse_datetime_format_normalization() {
1680        // SAFE.PARSE_DATETIME format string normalization: %Y-%m-%d %H:%M:%S -> %F %T
1681        bigquery_identity(
1682            "SAFE.PARSE_DATETIME('%Y-%m-%d %H:%M:%S', '2024-01-15 10:30:00')",
1683            "SAFE.PARSE_DATETIME('%F %T', '2024-01-15 10:30:00')",
1684        );
1685    }
1686
1687    #[test]
1688    fn test_safe_parse_timestamp_format_normalization() {
1689        // SAFE.PARSE_TIMESTAMP format string normalization: %Y-%m-%d %H:%M:%S -> %F %T
1690        bigquery_identity(
1691            "SAFE.PARSE_TIMESTAMP('%Y-%m-%d %H:%M:%S', '2024-01-15 10:30:00')",
1692            "SAFE.PARSE_TIMESTAMP('%F %T', '2024-01-15 10:30:00')",
1693        );
1694    }
1695
1696    #[test]
1697    fn test_datetime_literal_to_cast() {
1698        // DATETIME 'value' literal should be converted to CAST('value' AS DATETIME)
1699        bigquery_identity(
1700            "LAST_DAY(DATETIME '2008-11-10 15:30:00', WEEK(SUNDAY))",
1701            "LAST_DAY(CAST('2008-11-10 15:30:00' AS DATETIME), WEEK)",
1702        );
1703    }
1704
1705    #[test]
1706    fn test_last_day_week_modifier_stripped() {
1707        // WEEK(SUNDAY) should become WEEK in BigQuery LAST_DAY function
1708        bigquery_identity("LAST_DAY(col, WEEK(MONDAY))", "LAST_DAY(col, WEEK)");
1709    }
1710
1711    #[test]
1712    fn test_hash_line_comment_parses() {
1713        // Regression test for issue #38:
1714        // BigQuery should accept # as a single-line comment.
1715        let result = parse_one("SELECT 1 as a #hello world", DialectType::BigQuery);
1716        assert!(result.is_ok(), "Expected parse to succeed, got: {result:?}");
1717    }
1718}