Skip to main content

polyglot_sql/dialects/
bigquery.rs

1//! BigQuery Dialect
2//!
3//! BigQuery-specific transformations based on sqlglot patterns.
4//! Key differences:
5//! - Uses backticks for identifiers (especially for project.dataset.table)
6//! - SAFE_ prefix for safe operations
7//! - Different date/time function names (DATE_DIFF, FORMAT_DATE, PARSE_DATE)
8//! - STRUCT and ARRAY syntax differences
9//! - No ILIKE support
10//! - QUALIFY clause support
11
12use super::{DialectImpl, DialectType};
13use crate::error::Result;
14use crate::expressions::{
15    Alias, BinaryOp, CeilFunc, Column, Exists, Expression, From, Function, FunctionBody,
16    Identifier, JsonExtractFunc, LikeOp, Literal, Select, SplitFunc, StringAggFunc, UnaryFunc,
17    UnnestFunc, VarArgFunc, Where,
18};
19#[cfg(feature = "generate")]
20use crate::generator::GeneratorConfig;
21use crate::tokens::TokenizerConfig;
22
23/// BigQuery dialect
24pub struct BigQueryDialect;
25
26impl DialectImpl for BigQueryDialect {
27    fn dialect_type(&self) -> DialectType {
28        DialectType::BigQuery
29    }
30
31    fn tokenizer_config(&self) -> TokenizerConfig {
32        let mut config = TokenizerConfig::default();
33        // BigQuery uses backticks for identifiers, NOT double quotes
34        // Remove double quote from identifiers (it's in the default config)
35        config.identifiers.remove(&'"');
36        config.identifiers.insert('`', '`');
37        // BigQuery supports double quotes for strings (in addition to single quotes)
38        config.quotes.insert("\"".to_string(), "\"".to_string());
39        // BigQuery supports triple-quoted strings
40        config.quotes.insert("'''".to_string(), "'''".to_string());
41        config
42            .quotes
43            .insert("\"\"\"".to_string(), "\"\"\"".to_string());
44        // BigQuery supports backslash escaping in strings
45        config.string_escapes = vec!['\'', '\\'];
46        // In BigQuery, b'...' is a byte string (bytes), not a bit string (binary digits)
47        config.b_prefix_is_byte_string = true;
48        // BigQuery supports hex number strings like 0xA, 0xFF
49        config.hex_number_strings = true;
50        // BigQuery: 0xA represents integer 10 (not binary/blob)
51        config.hex_string_is_integer_type = true;
52        // BigQuery supports # as single-line comments.
53        config.hash_comments = true;
54        config
55    }
56
57    #[cfg(feature = "generate")]
58
59    fn generator_config(&self) -> GeneratorConfig {
60        use crate::generator::{IdentifierQuoteStyle, NormalizeFunctions};
61        GeneratorConfig {
62            identifier_quote: '`',
63            identifier_quote_style: IdentifierQuoteStyle::BACKTICK,
64            dialect: Some(DialectType::BigQuery),
65            // BigQuery doesn't normalize function names (Python: NORMALIZE_FUNCTIONS = False)
66            normalize_functions: NormalizeFunctions::None,
67            // BigQuery-specific settings from Python sqlglot
68            interval_allows_plural_form: false,
69            join_hints: false,
70            query_hints: false,
71            table_hints: false,
72            limit_fetch_style: crate::generator::LimitFetchStyle::Limit,
73            rename_table_with_db: false,
74            nvl2_supported: false,
75            unnest_with_ordinality: false,
76            collate_is_func: true,
77            limit_only_literals: true,
78            supports_table_alias_columns: false,
79            unpivot_aliases_are_identifiers: false,
80            json_key_value_pair_sep: ",",
81            null_ordering_supported: false,
82            ignore_nulls_in_func: true,
83            json_path_single_quote_escape: true,
84            can_implement_array_any: true,
85            supports_to_number: false,
86            named_placeholder_token: "@",
87            hex_func: "TO_HEX",
88            with_properties_prefix: "OPTIONS",
89            supports_exploding_projections: false,
90            except_intersect_support_all_clause: false,
91            supports_unix_seconds: true,
92            // BigQuery uses SAFE_ prefix for safe operations
93            try_supported: true,
94            // BigQuery does not support SEMI/ANTI JOIN syntax
95            semi_anti_join_with_side: false,
96            ..Default::default()
97        }
98    }
99
100    #[cfg(feature = "transpile")]
101
102    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
103        match expr {
104            // ===== Data Type Mappings =====
105            Expression::DataType(dt) => self.transform_data_type(dt),
106
107            Expression::Table(mut table)
108                if table.catalog.is_none()
109                    && table.alias.is_none()
110                    && table.schema.as_ref().map_or(false, |schema| {
111                        schema.quoted && schema.name.contains("INFORMATION_SCHEMA")
112                    }) =>
113            {
114                let schema = table.schema.take().expect("schema checked above");
115                let old_name = table.name.clone();
116                let alias = old_name.name.clone();
117                table.name = Identifier {
118                    name: format!("{}.{}", schema.name, old_name.name),
119                    quoted: true,
120                    trailing_comments: old_name.trailing_comments,
121                    span: old_name.span,
122                };
123                table.alias = Some(Identifier::new(alias));
124                Ok(Expression::Table(table))
125            }
126
127            // ===== Null handling =====
128            // IFNULL is native to BigQuery - keep as-is for identity
129            Expression::IfNull(f) => Ok(Expression::IfNull(f)),
130
131            // NVL -> IFNULL in BigQuery (BigQuery uses IFNULL, not NVL)
132            Expression::Nvl(f) => Ok(Expression::IfNull(f)),
133
134            // Coalesce stays as Coalesce
135            Expression::Coalesce(f) => Ok(Expression::Coalesce(f)),
136
137            // ===== String aggregation =====
138            // GROUP_CONCAT -> STRING_AGG in BigQuery
139            Expression::GroupConcat(f) => Ok(Expression::StringAgg(Box::new(StringAggFunc {
140                this: f.this,
141                separator: f.separator,
142                order_by: f.order_by,
143                distinct: f.distinct,
144                filter: f.filter,
145                limit: None,
146                inferred_type: None,
147            }))),
148
149            // ===== Cast operations =====
150            // Cast data types are transformed by transform_recursive in mod.rs
151            // which calls transform_data_type via transform_expr(Expression::DataType(...))
152
153            // TryCast -> SafeCast in BigQuery with type transformation
154            Expression::TryCast(c) => {
155                let transformed_type = match self.transform_data_type(c.to)? {
156                    Expression::DataType(dt) => dt,
157                    _ => return Err(crate::error::Error::parse("Expected DataType", 0, 0, 0, 0)),
158                };
159                Ok(Expression::SafeCast(Box::new(crate::expressions::Cast {
160                    this: c.this,
161                    to: transformed_type,
162                    trailing_comments: c.trailing_comments,
163                    double_colon_syntax: c.double_colon_syntax,
164                    format: c.format,
165                    default: c.default,
166                    inferred_type: None,
167                })))
168            }
169
170            // ===== Pattern matching =====
171            // ILIKE -> LOWER() LIKE LOWER() in BigQuery (no ILIKE support)
172            Expression::ILike(op) => {
173                let lower_left = Expression::Lower(Box::new(UnaryFunc::new(op.left)));
174                let lower_right = Expression::Lower(Box::new(UnaryFunc::new(op.right)));
175                Ok(Expression::Like(Box::new(LikeOp {
176                    left: lower_left,
177                    right: lower_right,
178                    escape: op.escape,
179                    quantifier: op.quantifier,
180                    inferred_type: None,
181                })))
182            }
183
184            // RegexpLike -> REGEXP_CONTAINS in BigQuery
185            Expression::RegexpLike(f) => Ok(Expression::Function(Box::new(Function::new(
186                "REGEXP_CONTAINS".to_string(),
187                vec![f.this, f.pattern],
188            )))),
189
190            // ===== Array operations =====
191            // EXPLODE -> UNNEST in BigQuery
192            Expression::Explode(f) => Ok(Expression::Unnest(Box::new(
193                crate::expressions::UnnestFunc {
194                    this: f.this,
195                    expressions: Vec::new(),
196                    with_ordinality: false,
197                    alias: None,
198                    offset_alias: None,
199                },
200            ))),
201
202            // ExplodeOuter -> UNNEST with LEFT JOIN semantics
203            Expression::ExplodeOuter(f) => Ok(Expression::Unnest(Box::new(
204                crate::expressions::UnnestFunc {
205                    this: f.this,
206                    expressions: Vec::new(),
207                    with_ordinality: false,
208                    alias: None,
209                    offset_alias: None,
210                },
211            ))),
212
213            // GenerateSeries -> GENERATE_ARRAY in BigQuery
214            Expression::GenerateSeries(f) => {
215                let mut args = Vec::new();
216                if let Some(start) = f.start {
217                    args.push(*start);
218                }
219                if let Some(end) = f.end {
220                    args.push(*end);
221                }
222                if let Some(step) = f.step {
223                    args.push(*step);
224                }
225                Ok(Expression::Function(Box::new(Function::new(
226                    "GENERATE_ARRAY".to_string(),
227                    args,
228                ))))
229            }
230
231            // ===== Bitwise operations =====
232            // BitwiseAndAgg -> BIT_AND
233            Expression::BitwiseAndAgg(f) => Ok(Expression::Function(Box::new(Function::new(
234                "BIT_AND".to_string(),
235                vec![f.this],
236            )))),
237
238            // BitwiseOrAgg -> BIT_OR
239            Expression::BitwiseOrAgg(f) => Ok(Expression::Function(Box::new(Function::new(
240                "BIT_OR".to_string(),
241                vec![f.this],
242            )))),
243
244            // BitwiseXorAgg -> BIT_XOR
245            Expression::BitwiseXorAgg(f) => Ok(Expression::Function(Box::new(Function::new(
246                "BIT_XOR".to_string(),
247                vec![f.this],
248            )))),
249
250            // BitwiseCount -> BIT_COUNT
251            Expression::BitwiseCount(f) => Ok(Expression::Function(Box::new(Function::new(
252                "BIT_COUNT".to_string(),
253                vec![f.this],
254            )))),
255
256            // ByteLength -> BYTE_LENGTH
257            Expression::ByteLength(f) => Ok(Expression::Function(Box::new(Function::new(
258                "BYTE_LENGTH".to_string(),
259                vec![f.this],
260            )))),
261
262            // IntDiv -> DIV
263            Expression::IntDiv(f) => Ok(Expression::Function(Box::new(Function::new(
264                "DIV".to_string(),
265                vec![f.this, f.expression],
266            )))),
267
268            // Int64 -> INT64
269            Expression::Int64(f) => Ok(Expression::Function(Box::new(Function::new(
270                "INT64".to_string(),
271                vec![f.this],
272            )))),
273
274            // ===== Random =====
275            // RANDOM -> RAND in BigQuery
276            Expression::Random(_) => Ok(Expression::Rand(Box::new(crate::expressions::Rand {
277                seed: None,
278                lower: None,
279                upper: None,
280            }))),
281
282            // ===== UUID =====
283            // Uuid -> GENERATE_UUID in BigQuery
284            Expression::Uuid(_) => Ok(Expression::Function(Box::new(Function::new(
285                "GENERATE_UUID".to_string(),
286                vec![],
287            )))),
288
289            // ===== Approximate functions =====
290            // ApproxDistinct -> APPROX_COUNT_DISTINCT
291            Expression::ApproxDistinct(f) => Ok(Expression::Function(Box::new(Function::new(
292                "APPROX_COUNT_DISTINCT".to_string(),
293                vec![f.this],
294            )))),
295
296            // ArgMax -> MAX_BY in BigQuery
297            Expression::ArgMax(f) => Ok(Expression::Function(Box::new(Function::new(
298                "MAX_BY".to_string(),
299                vec![*f.this, *f.expression],
300            )))),
301
302            // ArgMin -> MIN_BY in BigQuery
303            Expression::ArgMin(f) => Ok(Expression::Function(Box::new(Function::new(
304                "MIN_BY".to_string(),
305                vec![*f.this, *f.expression],
306            )))),
307
308            // ===== Conditional =====
309            // CountIf -> COUNTIF in BigQuery
310            Expression::CountIf(f) => Ok(Expression::Function(Box::new(Function::new(
311                "COUNTIF".to_string(),
312                vec![f.this],
313            )))),
314
315            // ===== String functions =====
316            // StringAgg -> STRING_AGG in BigQuery - keep as-is to preserve ORDER BY
317            Expression::StringAgg(f) => Ok(Expression::StringAgg(f)),
318
319            // ===== Conversion =====
320            // Unhex -> FROM_HEX
321            Expression::Unhex(f) => Ok(Expression::Function(Box::new(Function::new(
322                "FROM_HEX".to_string(),
323                vec![*f.this],
324            )))),
325
326            // UnixToTime -> TIMESTAMP_SECONDS/MILLIS/MICROS based on scale
327            Expression::UnixToTime(f) => {
328                let scale = f.scale.unwrap_or(0);
329                match scale {
330                    0 => Ok(Expression::Function(Box::new(Function::new(
331                        "TIMESTAMP_SECONDS".to_string(),
332                        vec![*f.this],
333                    )))),
334                    3 => Ok(Expression::Function(Box::new(Function::new(
335                        "TIMESTAMP_MILLIS".to_string(),
336                        vec![*f.this],
337                    )))),
338                    6 => Ok(Expression::Function(Box::new(Function::new(
339                        "TIMESTAMP_MICROS".to_string(),
340                        vec![*f.this],
341                    )))),
342                    _ => {
343                        // TIMESTAMP_SECONDS(CAST(value / POWER(10, scale) AS INT64))
344                        let div_expr =
345                            Expression::Div(Box::new(crate::expressions::BinaryOp::new(
346                                *f.this,
347                                Expression::Function(Box::new(Function::new(
348                                    "POWER".to_string(),
349                                    vec![Expression::number(10), Expression::number(scale)],
350                                ))),
351                            )));
352                        let cast_expr = Expression::Cast(Box::new(crate::expressions::Cast {
353                            this: div_expr,
354                            to: crate::expressions::DataType::Custom {
355                                name: "INT64".to_string(),
356                            },
357                            double_colon_syntax: false,
358                            trailing_comments: vec![],
359                            format: None,
360                            default: None,
361                            inferred_type: None,
362                        }));
363                        Ok(Expression::Function(Box::new(Function::new(
364                            "TIMESTAMP_SECONDS".to_string(),
365                            vec![cast_expr],
366                        ))))
367                    }
368                }
369            }
370
371            // ===== Date/time =====
372            // DateDiff -> DATE_DIFF in BigQuery
373            Expression::DateDiff(f) => {
374                // BigQuery: DATE_DIFF(date1, date2, part)
375                let unit_str = match f.unit {
376                    Some(crate::expressions::IntervalUnit::Year) => "YEAR",
377                    Some(crate::expressions::IntervalUnit::Quarter) => "QUARTER",
378                    Some(crate::expressions::IntervalUnit::Month) => "MONTH",
379                    Some(crate::expressions::IntervalUnit::Week) => "WEEK",
380                    Some(crate::expressions::IntervalUnit::Day) => "DAY",
381                    Some(crate::expressions::IntervalUnit::Hour) => "HOUR",
382                    Some(crate::expressions::IntervalUnit::Minute) => "MINUTE",
383                    Some(crate::expressions::IntervalUnit::Second) => "SECOND",
384                    Some(crate::expressions::IntervalUnit::Millisecond) => "MILLISECOND",
385                    Some(crate::expressions::IntervalUnit::Microsecond) => "MICROSECOND",
386                    Some(crate::expressions::IntervalUnit::Nanosecond) => "NANOSECOND",
387                    None => "DAY",
388                };
389                let unit = Expression::Identifier(crate::expressions::Identifier {
390                    name: unit_str.to_string(),
391                    quoted: false,
392                    trailing_comments: Vec::new(),
393                    span: None,
394                });
395                Ok(Expression::Function(Box::new(Function::new(
396                    "DATE_DIFF".to_string(),
397                    vec![f.this, f.expression, unit],
398                ))))
399            }
400
401            // ===== Variance =====
402            // VarPop -> VAR_POP
403            Expression::VarPop(f) => Ok(Expression::Function(Box::new(Function::new(
404                "VAR_POP".to_string(),
405                vec![f.this],
406            )))),
407
408            // ===== Hash functions =====
409            // SHA -> SHA1
410            Expression::SHA(f) => Ok(Expression::Function(Box::new(Function::new(
411                "SHA1".to_string(),
412                vec![f.this],
413            )))),
414
415            // SHA1Digest -> SHA1
416            Expression::SHA1Digest(f) => Ok(Expression::Function(Box::new(Function::new(
417                "SHA1".to_string(),
418                vec![f.this],
419            )))),
420
421            // MD5Digest -> MD5
422            Expression::MD5Digest(f) => Ok(Expression::Function(Box::new(Function::new(
423                "MD5".to_string(),
424                vec![*f.this],
425            )))),
426
427            // ===== Type conversion =====
428            // JSONBool -> BOOL
429            Expression::JSONBool(f) => Ok(Expression::Function(Box::new(Function::new(
430                "BOOL".to_string(),
431                vec![f.this],
432            )))),
433
434            // StringFunc -> STRING
435            Expression::StringFunc(f) => Ok(Expression::Function(Box::new(Function::new(
436                "STRING".to_string(),
437                vec![*f.this],
438            )))),
439
440            // ===== Date/time from parts =====
441            // DateFromUnixDate -> DATE_FROM_UNIX_DATE
442            Expression::DateFromUnixDate(f) => Ok(Expression::Function(Box::new(Function::new(
443                "DATE_FROM_UNIX_DATE".to_string(),
444                vec![f.this],
445            )))),
446
447            // UnixDate -> UNIX_DATE
448            Expression::UnixDate(f) => Ok(Expression::Function(Box::new(Function::new(
449                "UNIX_DATE".to_string(),
450                vec![f.this],
451            )))),
452
453            // TimestampDiff -> TIMESTAMP_DIFF
454            Expression::TimestampDiff(f) => Ok(Expression::Function(Box::new(Function::new(
455                "TIMESTAMP_DIFF".to_string(),
456                vec![*f.this, *f.expression],
457            )))),
458
459            // FromTimeZone -> DATETIME
460            Expression::FromTimeZone(f) => Ok(Expression::Function(Box::new(Function::new(
461                "DATETIME".to_string(),
462                vec![*f.this],
463            )))),
464
465            // TsOrDsToDatetime -> DATETIME
466            Expression::TsOrDsToDatetime(f) => Ok(Expression::Function(Box::new(Function::new(
467                "DATETIME".to_string(),
468                vec![f.this],
469            )))),
470
471            // TsOrDsToTimestamp -> TIMESTAMP
472            Expression::TsOrDsToTimestamp(f) => Ok(Expression::Function(Box::new(Function::new(
473                "TIMESTAMP".to_string(),
474                vec![f.this],
475            )))),
476
477            // ===== IfFunc -> IF in BigQuery =====
478            Expression::IfFunc(f) => {
479                let mut args = vec![f.condition, f.true_value];
480                if let Some(false_val) = f.false_value {
481                    args.push(false_val);
482                } else {
483                    args.push(Expression::Null(crate::expressions::Null));
484                }
485                Ok(Expression::Function(Box::new(Function::new(
486                    "IF".to_string(),
487                    args,
488                ))))
489            }
490
491            // ===== HexString -> FROM_HEX =====
492            Expression::HexStringExpr(f) => Ok(Expression::Function(Box::new(Function::new(
493                "FROM_HEX".to_string(),
494                vec![*f.this],
495            )))),
496
497            // ===== Additional auto-generated transforms from Python sqlglot =====
498            // ApproxTopK -> APPROX_TOP_COUNT
499            Expression::ApproxTopK(f) => {
500                let mut args = vec![*f.this];
501                if let Some(expr) = f.expression {
502                    args.push(*expr);
503                }
504                Ok(Expression::Function(Box::new(Function::new(
505                    "APPROX_TOP_COUNT".to_string(),
506                    args,
507                ))))
508            }
509
510            // SafeDivide -> SAFE_DIVIDE
511            Expression::SafeDivide(f) => Ok(Expression::Function(Box::new(Function::new(
512                "SAFE_DIVIDE".to_string(),
513                vec![*f.this, *f.expression],
514            )))),
515
516            // JSONKeysAtDepth -> JSON_KEYS
517            Expression::JSONKeysAtDepth(f) => Ok(Expression::Function(Box::new(Function::new(
518                "JSON_KEYS".to_string(),
519                vec![*f.this],
520            )))),
521
522            // JSONValueArray -> JSON_VALUE_ARRAY
523            Expression::JSONValueArray(f) => Ok(Expression::Function(Box::new(Function::new(
524                "JSON_VALUE_ARRAY".to_string(),
525                vec![*f.this],
526            )))),
527
528            // DateFromParts -> DATE
529            Expression::DateFromParts(f) => {
530                let mut args = Vec::new();
531                if let Some(y) = f.year {
532                    args.push(*y);
533                }
534                if let Some(m) = f.month {
535                    args.push(*m);
536                }
537                if let Some(d) = f.day {
538                    args.push(*d);
539                }
540                Ok(Expression::Function(Box::new(Function::new(
541                    "DATE".to_string(),
542                    args,
543                ))))
544            }
545
546            // SPLIT: BigQuery defaults to comma separator when none provided
547            // SPLIT(foo) -> SPLIT(foo, ',')
548            Expression::Split(f) => {
549                // Check if delimiter is empty or a placeholder - add default comma
550                let delimiter = match &f.delimiter {
551                    Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s.is_empty()) =>
552                    {
553                        let Literal::String(_) = lit.as_ref() else {
554                            unreachable!()
555                        };
556                        Expression::Literal(Box::new(Literal::String(",".to_string())))
557                    }
558                    _ => f.delimiter,
559                };
560                Ok(Expression::Split(Box::new(SplitFunc {
561                    this: f.this,
562                    delimiter,
563                })))
564            }
565
566            // Cast: Transform the target type according to BigQuery TYPE_MAPPING
567            // Special case: CAST to JSON -> PARSE_JSON in BigQuery
568            // Special case: CAST(x AS temporal FORMAT 'fmt') -> PARSE_DATE/PARSE_TIMESTAMP
569            Expression::Cast(c) => {
570                use crate::expressions::DataType;
571                // Check if casting to JSON - use PARSE_JSON instead
572                let is_json = matches!(c.to, DataType::Json | DataType::JsonB)
573                    || matches!(&c.to, DataType::Custom { name } if name.eq_ignore_ascii_case("JSON") || name.eq_ignore_ascii_case("JSONB"));
574                if is_json {
575                    return Ok(Expression::ParseJson(Box::new(UnaryFunc::new(c.this))));
576                }
577                // CAST(x AS temporal_type FORMAT 'fmt') -> PARSE_DATE/PARSE_TIMESTAMP(strftime_fmt, x)
578                if c.format.is_some() {
579                    let is_temporal = matches!(
580                        c.to,
581                        DataType::Date | DataType::Timestamp { .. } | DataType::Time { .. }
582                    ) || matches!(&c.to, DataType::Custom { name } if
583                        name.eq_ignore_ascii_case("TIMESTAMP") ||
584                        name.eq_ignore_ascii_case("DATE") ||
585                        name.eq_ignore_ascii_case("DATETIME") ||
586                        name.eq_ignore_ascii_case("TIME")
587                    );
588                    if is_temporal {
589                        let format_expr = c.format.as_ref().unwrap().as_ref();
590                        // Extract the actual format expr and timezone (if AT TIME ZONE is present)
591                        let (actual_format, timezone) = match format_expr {
592                            Expression::AtTimeZone(ref atz) => {
593                                (atz.this.clone(), Some(atz.zone.clone()))
594                            }
595                            _ => (format_expr.clone(), None),
596                        };
597                        let strftime_fmt = Self::bq_cast_format_to_strftime(&actual_format);
598                        let func_name = match &c.to {
599                            DataType::Date => "PARSE_DATE",
600                            DataType::Custom { name } if name.eq_ignore_ascii_case("DATE") => {
601                                "PARSE_DATE"
602                            }
603                            DataType::Custom { name } if name.eq_ignore_ascii_case("DATETIME") => {
604                                "PARSE_DATETIME"
605                            }
606                            _ => "PARSE_TIMESTAMP",
607                        };
608                        let mut func_args = vec![strftime_fmt, c.this];
609                        if let Some(tz) = timezone {
610                            func_args.push(tz);
611                        }
612                        return Ok(Expression::Function(Box::new(Function::new(
613                            func_name.to_string(),
614                            func_args,
615                        ))));
616                    }
617                }
618                let transformed_type = match self.transform_data_type(c.to)? {
619                    Expression::DataType(dt) => dt,
620                    _ => return Err(crate::error::Error::parse("Expected DataType", 0, 0, 0, 0)),
621                };
622                Ok(Expression::Cast(Box::new(crate::expressions::Cast {
623                    this: c.this,
624                    to: transformed_type,
625                    trailing_comments: c.trailing_comments,
626                    double_colon_syntax: c.double_colon_syntax,
627                    format: c.format,
628                    default: c.default,
629                    inferred_type: None,
630                })))
631            }
632
633            // SafeCast: Transform the target type according to BigQuery TYPE_MAPPING
634            Expression::SafeCast(c) => {
635                let transformed_type = match self.transform_data_type(c.to)? {
636                    Expression::DataType(dt) => dt,
637                    _ => return Err(crate::error::Error::parse("Expected DataType", 0, 0, 0, 0)),
638                };
639                Ok(Expression::SafeCast(Box::new(crate::expressions::Cast {
640                    this: c.this,
641                    to: transformed_type,
642                    trailing_comments: c.trailing_comments,
643                    double_colon_syntax: c.double_colon_syntax,
644                    format: c.format,
645                    default: c.default,
646                    inferred_type: None,
647                })))
648            }
649
650            // ===== SELECT-level transforms =====
651            // BigQuery: GROUP BY expression → alias when both GROUP BY and ORDER BY exist
652            Expression::Select(mut select) => {
653                if select.group_by.is_some() && select.order_by.is_some() {
654                    // Build map: expression → alias name for aliased projections
655                    let aliases: Vec<(Expression, Identifier)> = select
656                        .expressions
657                        .iter()
658                        .filter_map(|e| {
659                            if let Expression::Alias(a) = e {
660                                Some((a.this.clone(), a.alias.clone()))
661                            } else {
662                                None
663                            }
664                        })
665                        .collect();
666
667                    if let Some(ref mut group_by) = select.group_by {
668                        for grouped in group_by.expressions.iter_mut() {
669                            // Skip numeric indices (already aliased)
670                            if matches!(grouped, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)))
671                            {
672                                continue;
673                            }
674                            // Check if this GROUP BY expression matches a SELECT alias
675                            for (expr, alias_ident) in &aliases {
676                                if grouped == expr {
677                                    *grouped = Expression::boxed_column(Column {
678                                        name: alias_ident.clone(),
679                                        table: None,
680                                        join_mark: false,
681                                        trailing_comments: Vec::new(),
682                                        span: None,
683                                        inferred_type: None,
684                                    });
685                                    break;
686                                }
687                            }
688                        }
689                    }
690                }
691                Ok(Expression::Select(select))
692            }
693
694            // ===== ArrayContains → EXISTS(SELECT 1 FROM UNNEST(arr) AS _col WHERE _col = val) =====
695            Expression::ArrayContains(f) => {
696                let array_expr = f.this;
697                let value_expr = f.expression;
698
699                // Build: SELECT 1 FROM UNNEST(array) AS _col WHERE _col = value
700                let unnest = Expression::Unnest(Box::new(UnnestFunc {
701                    this: array_expr,
702                    expressions: Vec::new(),
703                    with_ordinality: false,
704                    alias: None,
705                    offset_alias: None,
706                }));
707                let aliased_unnest = Expression::Alias(Box::new(Alias {
708                    this: unnest,
709                    alias: Identifier::new("_col"),
710                    column_aliases: Vec::new(),
711                    alias_explicit_as: false,
712                    alias_keyword: None,
713                    pre_alias_comments: Vec::new(),
714                    trailing_comments: Vec::new(),
715                    inferred_type: None,
716                }));
717                let col_ref = Expression::boxed_column(Column {
718                    name: Identifier::new("_col"),
719                    table: None,
720                    join_mark: false,
721                    trailing_comments: Vec::new(),
722                    span: None,
723                    inferred_type: None,
724                });
725                let where_clause = Where {
726                    this: Expression::Eq(Box::new(BinaryOp {
727                        left: col_ref,
728                        right: value_expr,
729                        left_comments: Vec::new(),
730                        operator_comments: Vec::new(),
731                        trailing_comments: Vec::new(),
732                        inferred_type: None,
733                    })),
734                };
735                let inner_select = Expression::Select(Box::new(Select {
736                    expressions: vec![Expression::Literal(Box::new(Literal::Number(
737                        "1".to_string(),
738                    )))],
739                    from: Some(From {
740                        expressions: vec![aliased_unnest],
741                    }),
742                    where_clause: Some(where_clause),
743                    ..Default::default()
744                }));
745                Ok(Expression::Exists(Box::new(Exists {
746                    this: inner_select,
747                    not: false,
748                })))
749            }
750
751            // ===== JSON_OBJECT array form → key-value pairs =====
752            // BigQuery "signature 2": JSON_OBJECT(['a', 'b'], [10, NULL]) → JSON_OBJECT('a', 10, 'b', NULL)
753            Expression::JsonObject(mut f) => {
754                if f.pairs.len() == 1 {
755                    // Extract expressions from both Array and ArrayFunc variants
756                    let keys_exprs = match &f.pairs[0].0 {
757                        Expression::Array(arr) => Some(&arr.expressions),
758                        Expression::ArrayFunc(arr) => Some(&arr.expressions),
759                        _ => None,
760                    };
761                    let vals_exprs = match &f.pairs[0].1 {
762                        Expression::Array(arr) => Some(&arr.expressions),
763                        Expression::ArrayFunc(arr) => Some(&arr.expressions),
764                        _ => None,
765                    };
766                    if let (Some(keys), Some(vals)) = (keys_exprs, vals_exprs) {
767                        if keys.len() == vals.len() {
768                            let new_pairs: Vec<(Expression, Expression)> = keys
769                                .iter()
770                                .zip(vals.iter())
771                                .map(|(k, v)| (k.clone(), v.clone()))
772                                .collect();
773                            f.pairs = new_pairs;
774                        }
775                    }
776                }
777                Ok(Expression::JsonObject(f))
778            }
779
780            // ===== MOD function: unwrap unnecessary Paren from first argument =====
781            // BigQuery normalizes MOD((a + 1), b) -> MOD(a + 1, b)
782            Expression::ModFunc(mut f) => {
783                // Unwrap Paren from first argument if present
784                if let Expression::Paren(paren) = f.this {
785                    f.this = paren.this;
786                }
787                Ok(Expression::ModFunc(f))
788            }
789
790            // JSONExtract with variant_extract (Snowflake colon syntax) -> JSON_EXTRACT
791            Expression::JSONExtract(e) if e.variant_extract.is_some() => {
792                let path = match *e.expression {
793                    Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
794                        let Literal::String(s) = lit.as_ref() else {
795                            unreachable!()
796                        };
797                        let normalized = if s.starts_with('$') {
798                            s.clone()
799                        } else if s.starts_with('[') {
800                            format!("${}", s)
801                        } else {
802                            format!("$.{}", s)
803                        };
804                        Expression::Literal(Box::new(Literal::String(normalized)))
805                    }
806                    other => other,
807                };
808                Ok(Expression::Function(Box::new(Function::new(
809                    "JSON_EXTRACT".to_string(),
810                    vec![*e.this, path],
811                ))))
812            }
813
814            // Generic function transformations
815            Expression::Function(f) => self.transform_function(*f),
816
817            // Generic aggregate function transformations
818            Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
819
820            // MethodCall: Handle SAFE.PARSE_DATE, SAFE.PARSE_DATETIME, SAFE.PARSE_TIMESTAMP
821            // These are parsed as MethodCall(this=SAFE, method=PARSE_DATE, args=[...])
822            Expression::MethodCall(mc) => self.transform_method_call(*mc),
823
824            // CreateFunction: Convert RETURNS TABLE(...) to RETURNS TABLE <...> for BigQuery
825            // and set is_table_function = true for TABLE FUNCTION syntax
826            Expression::CreateFunction(mut cf) => {
827                if let Some(ref mut rtb) = cf.returns_table_body {
828                    if rtb.starts_with("TABLE (") || rtb.starts_with("TABLE(") {
829                        // Convert TABLE (...) to TABLE <...> with BigQuery types
830                        let inner = if rtb.starts_with("TABLE (") {
831                            &rtb["TABLE (".len()..rtb.len() - 1]
832                        } else {
833                            &rtb["TABLE(".len()..rtb.len() - 1]
834                        };
835                        // Convert common types to BigQuery equivalents
836                        let converted = inner
837                            .replace(" INT,", " INT64,")
838                            .replace(" INT)", " INT64)")
839                            .replace(" INTEGER,", " INT64,")
840                            .replace(" INTEGER)", " INT64)")
841                            .replace(" FLOAT,", " FLOAT64,")
842                            .replace(" FLOAT)", " FLOAT64)")
843                            .replace(" BOOLEAN,", " BOOL,")
844                            .replace(" BOOLEAN)", " BOOL)")
845                            .replace(" VARCHAR", " STRING")
846                            .replace(" TEXT", " STRING");
847                        // Handle trailing type (no comma, no paren)
848                        let converted = if converted.ends_with(" INT") {
849                            format!("{}{}", &converted[..converted.len() - 4], " INT64")
850                        } else {
851                            converted
852                        };
853                        *rtb = format!("TABLE <{}>", converted);
854                        cf.is_table_function = true;
855                    }
856                }
857                // Convert string literal body to expression body for BigQuery TABLE FUNCTIONs only
858                if cf.is_table_function {
859                    if let Some(ref body) = cf.body {
860                        if matches!(body, FunctionBody::StringLiteral(_)) {
861                            if let Some(FunctionBody::StringLiteral(sql)) = cf.body.take() {
862                                // Parse the SQL string into an expression
863                                if let Ok(parsed) = crate::parser::Parser::parse_sql(&sql) {
864                                    if let Some(stmt) = parsed.into_iter().next() {
865                                        cf.body = Some(FunctionBody::Expression(stmt));
866                                    } else {
867                                        cf.body = Some(FunctionBody::StringLiteral(sql));
868                                    }
869                                } else {
870                                    cf.body = Some(FunctionBody::StringLiteral(sql));
871                                }
872                            }
873                        }
874                    }
875                }
876                Ok(Expression::CreateFunction(cf))
877            }
878
879            // Pass through everything else
880            _ => Ok(expr),
881        }
882    }
883}
884
885#[cfg(feature = "transpile")]
886impl BigQueryDialect {
887    /// Transform data types according to BigQuery TYPE_MAPPING
888    fn transform_data_type(&self, dt: crate::expressions::DataType) -> Result<Expression> {
889        use crate::expressions::DataType;
890        let transformed = match dt {
891            // BIGINT -> INT64
892            DataType::BigInt { .. } => DataType::Custom {
893                name: "INT64".to_string(),
894            },
895            // INT -> INT64
896            DataType::Int { .. } => DataType::Custom {
897                name: "INT64".to_string(),
898            },
899            // SMALLINT -> INT64
900            DataType::SmallInt { .. } => DataType::Custom {
901                name: "INT64".to_string(),
902            },
903            // TINYINT -> INT64
904            DataType::TinyInt { .. } => DataType::Custom {
905                name: "INT64".to_string(),
906            },
907            // FLOAT -> FLOAT64
908            DataType::Float { .. } => DataType::Custom {
909                name: "FLOAT64".to_string(),
910            },
911            // DOUBLE -> FLOAT64
912            DataType::Double { .. } => DataType::Custom {
913                name: "FLOAT64".to_string(),
914            },
915            // BOOLEAN -> BOOL
916            DataType::Boolean => DataType::Custom {
917                name: "BOOL".to_string(),
918            },
919            // CHAR -> STRING
920            DataType::Char { .. } => DataType::Custom {
921                name: "STRING".to_string(),
922            },
923            // VARCHAR -> STRING
924            DataType::VarChar { .. } => DataType::Custom {
925                name: "STRING".to_string(),
926            },
927            // TEXT -> STRING
928            DataType::Text => DataType::Custom {
929                name: "STRING".to_string(),
930            },
931            // STRING(n) -> STRING (BigQuery doesn't support length for STRING)
932            DataType::String { .. } => DataType::Custom {
933                name: "STRING".to_string(),
934            },
935            // BINARY -> BYTES
936            DataType::Binary { .. } => DataType::Custom {
937                name: "BYTES".to_string(),
938            },
939            // VARBINARY -> BYTES
940            DataType::VarBinary { .. } => DataType::Custom {
941                name: "BYTES".to_string(),
942            },
943            // BLOB -> BYTES
944            DataType::Blob => DataType::Custom {
945                name: "BYTES".to_string(),
946            },
947            // DECIMAL -> NUMERIC (BigQuery strips precision in CAST context)
948            DataType::Decimal { .. } => DataType::Custom {
949                name: "NUMERIC".to_string(),
950            },
951            // For BigQuery identity: preserve TIMESTAMP/DATETIME as Custom types
952            // This avoids the issue where parsed TIMESTAMP (timezone: false) would
953            // be converted to DATETIME by the generator
954            DataType::Timestamp {
955                timezone: false, ..
956            } => DataType::Custom {
957                name: "TIMESTAMP".to_string(),
958            },
959            DataType::Timestamp { timezone: true, .. } => DataType::Custom {
960                name: "TIMESTAMP".to_string(),
961            },
962            // UUID -> STRING (BigQuery doesn't have native UUID type)
963            DataType::Uuid => DataType::Custom {
964                name: "STRING".to_string(),
965            },
966            // RECORD -> STRUCT in BigQuery
967            DataType::Custom { ref name } if name.eq_ignore_ascii_case("RECORD") => {
968                DataType::Custom {
969                    name: "STRUCT".to_string(),
970                }
971            }
972            // TIMESTAMPTZ (custom) -> TIMESTAMP
973            DataType::Custom { ref name } if name.eq_ignore_ascii_case("TIMESTAMPTZ") => {
974                DataType::Custom {
975                    name: "TIMESTAMP".to_string(),
976                }
977            }
978            // BYTEINT (custom) -> INT64
979            DataType::Custom { ref name } if name.eq_ignore_ascii_case("BYTEINT") => {
980                DataType::Custom {
981                    name: "INT64".to_string(),
982                }
983            }
984            // Keep all other types as-is
985            other => other,
986        };
987        Ok(Expression::DataType(transformed))
988    }
989
990    fn transform_function(&self, f: Function) -> Result<Expression> {
991        let name_upper = f.name.to_uppercase();
992        match name_upper.as_str() {
993            // IFNULL -> COALESCE (both work in BigQuery)
994            "IFNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
995                original_name: None,
996                expressions: f.args,
997                inferred_type: None,
998            }))),
999
1000            // NVL -> COALESCE
1001            "NVL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
1002                original_name: None,
1003                expressions: f.args,
1004                inferred_type: None,
1005            }))),
1006
1007            // ISNULL -> COALESCE
1008            "ISNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
1009                original_name: None,
1010                expressions: f.args,
1011                inferred_type: None,
1012            }))),
1013
1014            // GROUP_CONCAT -> STRING_AGG in BigQuery
1015            "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
1016                Function::new("STRING_AGG".to_string(), f.args),
1017            ))),
1018
1019            // SUBSTR -> SUBSTRING (both work)
1020            "SUBSTR" => Ok(Expression::Function(Box::new(Function::new(
1021                "SUBSTRING".to_string(),
1022                f.args,
1023            )))),
1024
1025            // RANDOM -> RAND
1026            "RANDOM" => Ok(Expression::Rand(Box::new(crate::expressions::Rand {
1027                seed: None,
1028                lower: None,
1029                upper: None,
1030            }))),
1031
1032            // CURRENT_DATE -> CURRENT_DATE() in BigQuery
1033            // Keep as Function when it has args (e.g., CURRENT_DATE('UTC'))
1034            "CURRENT_DATE" if f.args.is_empty() => {
1035                Ok(Expression::CurrentDate(crate::expressions::CurrentDate))
1036            }
1037            "CURRENT_DATE" => Ok(Expression::Function(Box::new(Function {
1038                name: "CURRENT_DATE".to_string(),
1039                args: f.args,
1040                distinct: false,
1041                trailing_comments: Vec::new(),
1042                use_bracket_syntax: false,
1043                no_parens: false,
1044                quoted: false,
1045                span: None,
1046                inferred_type: None,
1047            }))),
1048
1049            // NOW -> CURRENT_TIMESTAMP in BigQuery
1050            "NOW" => Ok(Expression::CurrentTimestamp(
1051                crate::expressions::CurrentTimestamp {
1052                    precision: None,
1053                    sysdate: false,
1054                },
1055            )),
1056
1057            // TO_DATE -> PARSE_DATE in BigQuery
1058            "TO_DATE" => Ok(Expression::Function(Box::new(Function::new(
1059                "PARSE_DATE".to_string(),
1060                f.args,
1061            )))),
1062
1063            // TO_TIMESTAMP -> PARSE_TIMESTAMP in BigQuery
1064            "TO_TIMESTAMP" => Ok(Expression::Function(Box::new(Function::new(
1065                "PARSE_TIMESTAMP".to_string(),
1066                f.args,
1067            )))),
1068
1069            // TO_TIME -> TIME in BigQuery
1070            "TO_TIME" if f.args.len() == 1 => Ok(Expression::Function(Box::new(Function::new(
1071                "TIME".to_string(),
1072                f.args,
1073            )))),
1074
1075            // DATE_FORMAT -> FORMAT_DATE in BigQuery (argument order may differ)
1076            "DATE_FORMAT" => Ok(Expression::Function(Box::new(Function::new(
1077                "FORMAT_DATE".to_string(),
1078                f.args,
1079            )))),
1080
1081            // POSITION -> STRPOS in BigQuery
1082            // BigQuery uses STRPOS(string, substring)
1083            "POSITION" if f.args.len() == 2 => {
1084                let mut args = f.args;
1085                // Swap arguments: POSITION(sub IN str) -> STRPOS(str, sub)
1086                let first = args.remove(0);
1087                let second = args.remove(0);
1088                Ok(Expression::Function(Box::new(Function::new(
1089                    "STRPOS".to_string(),
1090                    vec![second, first],
1091                ))))
1092            }
1093
1094            // LEN -> LENGTH
1095            "LEN" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc::new(
1096                f.args.into_iter().next().unwrap(),
1097            )))),
1098
1099            // CEILING -> CEIL (both work)
1100            "CEILING" if f.args.len() == 1 => Ok(Expression::Ceil(Box::new(CeilFunc {
1101                this: f.args.into_iter().next().unwrap(),
1102                decimals: None,
1103                to: None,
1104            }))),
1105
1106            // GETDATE -> CURRENT_TIMESTAMP
1107            "GETDATE" => Ok(Expression::CurrentTimestamp(
1108                crate::expressions::CurrentTimestamp {
1109                    precision: None,
1110                    sysdate: false,
1111                },
1112            )),
1113
1114            // ARRAY_LENGTH -> ARRAY_LENGTH (native)
1115            // CARDINALITY -> ARRAY_LENGTH
1116            "CARDINALITY" if f.args.len() == 1 => Ok(Expression::ArrayLength(Box::new(
1117                UnaryFunc::new(f.args.into_iter().next().unwrap()),
1118            ))),
1119
1120            // UNNEST is native to BigQuery
1121
1122            // GENERATE_SERIES -> GENERATE_ARRAY in BigQuery
1123            "GENERATE_SERIES" => Ok(Expression::Function(Box::new(Function::new(
1124                "GENERATE_ARRAY".to_string(),
1125                f.args,
1126            )))),
1127
1128            // APPROX_COUNT_DISTINCT -> APPROX_COUNT_DISTINCT (native)
1129            // APPROX_DISTINCT -> APPROX_COUNT_DISTINCT
1130            "APPROX_DISTINCT" => Ok(Expression::Function(Box::new(Function::new(
1131                "APPROX_COUNT_DISTINCT".to_string(),
1132                f.args,
1133            )))),
1134
1135            // COUNT_IF -> COUNTIF in BigQuery
1136            "COUNT_IF" => Ok(Expression::Function(Box::new(Function::new(
1137                "COUNTIF".to_string(),
1138                f.args,
1139            )))),
1140
1141            // SHA1 -> SHA1 (native), SHA -> SHA1
1142            "SHA" => Ok(Expression::Function(Box::new(Function::new(
1143                "SHA1".to_string(),
1144                f.args,
1145            )))),
1146
1147            // SHA256/SHA2 -> SHA256
1148            "SHA2" => Ok(Expression::Function(Box::new(Function::new(
1149                "SHA256".to_string(),
1150                f.args,
1151            )))),
1152
1153            // MD5 in BigQuery returns bytes, often combined with TO_HEX
1154            // TO_HEX(MD5(x)) pattern
1155            "MD5" => Ok(Expression::Function(Box::new(Function::new(
1156                "MD5".to_string(),
1157                f.args,
1158            )))),
1159
1160            // VARIANCE/VAR_SAMP -> VAR_SAMP (native)
1161            // VAR_POP -> VAR_POP (native)
1162
1163            // DATEADD(unit, amount, date) → DATE_ADD(date, INTERVAL amount unit) for BigQuery
1164            "DATEADD" if f.args.len() == 3 => {
1165                let mut args = f.args;
1166                let unit_expr = args.remove(0);
1167                let amount = args.remove(0);
1168                let date = args.remove(0);
1169                // Convert unit identifier to IntervalUnit
1170                let unit_name = match &unit_expr {
1171                    Expression::Identifier(id) => id.name.to_uppercase(),
1172                    Expression::Var(v) => v.this.to_uppercase(),
1173                    Expression::Column(col) if col.table.is_none() => col.name.name.to_uppercase(),
1174                    _ => "DAY".to_string(),
1175                };
1176                let unit = match unit_name.as_str() {
1177                    "YEAR" | "YEARS" | "YY" | "YYYY" => crate::expressions::IntervalUnit::Year,
1178                    "QUARTER" | "QUARTERS" | "QQ" | "Q" => {
1179                        crate::expressions::IntervalUnit::Quarter
1180                    }
1181                    "MONTH" | "MONTHS" | "MM" | "M" => crate::expressions::IntervalUnit::Month,
1182                    "WEEK" | "WEEKS" | "WK" | "WW" => crate::expressions::IntervalUnit::Week,
1183                    "DAY" | "DAYS" | "DD" | "D" | "DAYOFMONTH" => {
1184                        crate::expressions::IntervalUnit::Day
1185                    }
1186                    "HOUR" | "HOURS" | "HH" => crate::expressions::IntervalUnit::Hour,
1187                    "MINUTE" | "MINUTES" | "MI" | "N" => crate::expressions::IntervalUnit::Minute,
1188                    "SECOND" | "SECONDS" | "SS" | "S" => crate::expressions::IntervalUnit::Second,
1189                    "MILLISECOND" | "MILLISECONDS" | "MS" => {
1190                        crate::expressions::IntervalUnit::Millisecond
1191                    }
1192                    "MICROSECOND" | "MICROSECONDS" | "US" => {
1193                        crate::expressions::IntervalUnit::Microsecond
1194                    }
1195                    _ => crate::expressions::IntervalUnit::Day,
1196                };
1197                Ok(Expression::DateAdd(Box::new(
1198                    crate::expressions::DateAddFunc {
1199                        this: date,
1200                        interval: amount,
1201                        unit,
1202                    },
1203                )))
1204            }
1205            "DATE_ADD" => Ok(Expression::Function(Box::new(Function::new(
1206                "DATE_ADD".to_string(),
1207                f.args,
1208            )))),
1209
1210            // DATE_DIFF in BigQuery (native)
1211            "DATEDIFF" => Ok(Expression::Function(Box::new(Function::new(
1212                "DATE_DIFF".to_string(),
1213                f.args,
1214            )))),
1215
1216            // TIMESTAMP_DIFF in BigQuery
1217            "TIMESTAMPDIFF" => Ok(Expression::Function(Box::new(Function::new(
1218                "TIMESTAMP_DIFF".to_string(),
1219                f.args,
1220            )))),
1221
1222            // TIME -> TIME (native)
1223            // DATETIME -> DATETIME (native)
1224
1225            // SAFE_DIVIDE -> SAFE_DIVIDE (native)
1226
1227            // NEWID/UUID -> GENERATE_UUID
1228            "NEWID" | "UUID" => Ok(Expression::Function(Box::new(Function::new(
1229                "GENERATE_UUID".to_string(),
1230                vec![],
1231            )))),
1232
1233            // LEVENSHTEIN -> EDIT_DISTANCE (BigQuery naming)
1234            "LEVENSHTEIN" => Ok(Expression::Function(Box::new(Function::new(
1235                "EDIT_DISTANCE".to_string(),
1236                f.args,
1237            )))),
1238
1239            // UNIX_TIMESTAMP -> UNIX_SECONDS
1240            "UNIX_TIMESTAMP" => Ok(Expression::Function(Box::new(Function::new(
1241                "UNIX_SECONDS".to_string(),
1242                f.args,
1243            )))),
1244
1245            // FROM_UNIXTIME -> TIMESTAMP_SECONDS
1246            "FROM_UNIXTIME" => Ok(Expression::Function(Box::new(Function::new(
1247                "TIMESTAMP_SECONDS".to_string(),
1248                f.args,
1249            )))),
1250
1251            // CHAR_LENGTH / CHARACTER_LENGTH -> LENGTH
1252            "CHAR_LENGTH" | "CHARACTER_LENGTH" => Ok(Expression::Function(Box::new(
1253                Function::new("LENGTH".to_string(), f.args),
1254            ))),
1255
1256            // OCTET_LENGTH -> BYTE_LENGTH in BigQuery
1257            "OCTET_LENGTH" => Ok(Expression::Function(Box::new(Function::new(
1258                "BYTE_LENGTH".to_string(),
1259                f.args,
1260            )))),
1261
1262            // JSON_EXTRACT_STRING_ARRAY -> JSON_VALUE_ARRAY in BigQuery
1263            "JSON_EXTRACT_STRING_ARRAY" => Ok(Expression::Function(Box::new(Function::new(
1264                "JSON_VALUE_ARRAY".to_string(),
1265                f.args,
1266            )))),
1267
1268            // INSTR is native to BigQuery
1269
1270            // SPLIT: BigQuery defaults to comma separator when none provided
1271            // SPLIT(foo) -> SPLIT(foo, ',')
1272            "SPLIT" if f.args.len() == 1 => {
1273                let mut args = f.args;
1274                args.push(Expression::Literal(Box::new(Literal::String(
1275                    ",".to_string(),
1276                ))));
1277                Ok(Expression::Split(Box::new(SplitFunc {
1278                    this: args.remove(0),
1279                    delimiter: args.remove(0),
1280                })))
1281            }
1282
1283            // SPLIT with two args - convert to Split expression
1284            "SPLIT" if f.args.len() == 2 => {
1285                let mut args = f.args;
1286                Ok(Expression::Split(Box::new(SplitFunc {
1287                    this: args.remove(0),
1288                    delimiter: args.remove(0),
1289                })))
1290            }
1291
1292            // REGEXP_SUBSTR -> REGEXP_EXTRACT in BigQuery (strip extra Snowflake args)
1293            "REGEXP_SUBSTR" if f.args.len() >= 2 => {
1294                // BigQuery REGEXP_EXTRACT supports (subject, pattern, pos, occ) max 4 args
1295                let args = if f.args.len() > 4 {
1296                    f.args[..4].to_vec()
1297                } else {
1298                    f.args
1299                };
1300                Ok(Expression::Function(Box::new(Function::new(
1301                    "REGEXP_EXTRACT".to_string(),
1302                    args,
1303                ))))
1304            }
1305            "REGEXP_SUBSTR" => Ok(Expression::Function(Box::new(Function::new(
1306                "REGEXP_EXTRACT".to_string(),
1307                f.args,
1308            )))),
1309
1310            // REGEXP_REPLACE - strip extra Snowflake-specific args
1311            "REGEXP_REPLACE" if f.args.len() > 3 => {
1312                let args = f.args[..3].to_vec();
1313                Ok(Expression::Function(Box::new(Function::new(
1314                    "REGEXP_REPLACE".to_string(),
1315                    args,
1316                ))))
1317            }
1318
1319            // OBJECT_CONSTRUCT_KEEP_NULL -> JSON_OBJECT
1320            "OBJECT_CONSTRUCT_KEEP_NULL" => Ok(Expression::Function(Box::new(Function::new(
1321                "JSON_OBJECT".to_string(),
1322                f.args,
1323            )))),
1324
1325            // EDITDISTANCE -> EDIT_DISTANCE with named max_distance parameter
1326            "EDITDISTANCE" if f.args.len() == 3 => {
1327                let col1 = f.args[0].clone();
1328                let col2 = f.args[1].clone();
1329                let max_dist = f.args[2].clone();
1330                Ok(Expression::Function(Box::new(Function::new(
1331                    "EDIT_DISTANCE".to_string(),
1332                    vec![
1333                        col1,
1334                        col2,
1335                        Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
1336                            name: crate::expressions::Identifier::new("max_distance".to_string()),
1337                            value: max_dist,
1338                            separator: crate::expressions::NamedArgSeparator::DArrow,
1339                        })),
1340                    ],
1341                ))))
1342            }
1343            "EDITDISTANCE" if f.args.len() == 2 => Ok(Expression::Function(Box::new(
1344                Function::new("EDIT_DISTANCE".to_string(), f.args),
1345            ))),
1346
1347            // HEX_DECODE_BINARY -> FROM_HEX
1348            "HEX_DECODE_BINARY" => Ok(Expression::Function(Box::new(Function::new(
1349                "FROM_HEX".to_string(),
1350                f.args,
1351            )))),
1352
1353            // BigQuery format string normalization for PARSE_DATE/DATETIME/TIMESTAMP functions
1354            // %Y-%m-%d -> %F and %H:%M:%S -> %T
1355            "PARSE_DATE"
1356            | "PARSE_DATETIME"
1357            | "PARSE_TIMESTAMP"
1358            | "SAFE.PARSE_DATE"
1359            | "SAFE.PARSE_DATETIME"
1360            | "SAFE.PARSE_TIMESTAMP" => {
1361                let args = self.normalize_time_format_args(f.args);
1362                Ok(Expression::Function(Box::new(Function {
1363                    name: f.name,
1364                    args,
1365                    distinct: f.distinct,
1366                    no_parens: f.no_parens,
1367                    trailing_comments: f.trailing_comments,
1368                    quoted: f.quoted,
1369                    use_bracket_syntax: f.use_bracket_syntax,
1370                    span: None,
1371                    inferred_type: None,
1372                })))
1373            }
1374
1375            // GET_PATH(obj, path) -> JSON_EXTRACT(obj, json_path) in BigQuery
1376            "GET_PATH" if f.args.len() == 2 => {
1377                let mut args = f.args;
1378                let this = args.remove(0);
1379                let path = args.remove(0);
1380                let json_path = match &path {
1381                    Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
1382                        let Literal::String(s) = lit.as_ref() else {
1383                            unreachable!()
1384                        };
1385                        let normalized = if s.starts_with('$') {
1386                            s.clone()
1387                        } else if s.starts_with('[') {
1388                            format!("${}", s)
1389                        } else {
1390                            format!("$.{}", s)
1391                        };
1392                        Expression::Literal(Box::new(Literal::String(normalized)))
1393                    }
1394                    _ => path,
1395                };
1396                Ok(Expression::JsonExtract(Box::new(JsonExtractFunc {
1397                    this,
1398                    path: json_path,
1399                    returning: None,
1400                    arrow_syntax: false,
1401                    hash_arrow_syntax: false,
1402                    wrapper_option: None,
1403                    quotes_option: None,
1404                    on_scalar_string: false,
1405                    on_error: None,
1406                })))
1407            }
1408
1409            // Pass through everything else
1410            _ => Ok(Expression::Function(Box::new(f))),
1411        }
1412    }
1413
1414    fn transform_aggregate_function(
1415        &self,
1416        f: Box<crate::expressions::AggregateFunction>,
1417    ) -> Result<Expression> {
1418        let name_upper = f.name.to_uppercase();
1419        match name_upper.as_str() {
1420            // GROUP_CONCAT -> STRING_AGG
1421            "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
1422                Function::new("STRING_AGG".to_string(), f.args),
1423            ))),
1424
1425            // Pass through everything else
1426            _ => Ok(Expression::AggregateFunction(f)),
1427        }
1428    }
1429
1430    /// Transform MethodCall expressions
1431    /// Handles SAFE.PARSE_DATE, SAFE.PARSE_DATETIME, SAFE.PARSE_TIMESTAMP format normalization
1432    fn transform_method_call(&self, mc: crate::expressions::MethodCall) -> Result<Expression> {
1433        use crate::expressions::MethodCall;
1434
1435        // Check if this is SAFE.PARSE_DATE/DATETIME/TIMESTAMP
1436        if let Expression::Column(ref col) = mc.this {
1437            if col.name.name.eq_ignore_ascii_case("SAFE") {
1438                let method_upper = mc.method.name.to_uppercase();
1439                if method_upper == "PARSE_DATE"
1440                    || method_upper == "PARSE_DATETIME"
1441                    || method_upper == "PARSE_TIMESTAMP"
1442                {
1443                    // Normalize the format string in the first argument
1444                    let args = self.normalize_time_format_args(mc.args);
1445                    return Ok(Expression::MethodCall(Box::new(MethodCall {
1446                        this: mc.this,
1447                        method: mc.method,
1448                        args,
1449                    })));
1450                }
1451            }
1452        }
1453
1454        // Pass through all other method calls
1455        Ok(Expression::MethodCall(Box::new(mc)))
1456    }
1457
1458    /// Normalize time format strings in function arguments
1459    /// BigQuery normalizes: %Y-%m-%d -> %F, %H:%M:%S -> %T
1460    fn normalize_time_format_args(&self, args: Vec<Expression>) -> Vec<Expression> {
1461        args.into_iter()
1462            .enumerate()
1463            .map(|(i, arg)| {
1464                // Only transform the first argument (the format string)
1465                if i == 0 {
1466                    if let Expression::Literal(ref lit) = arg {
1467                        if let Literal::String(s) = lit.as_ref() {
1468                            let normalized = self.normalize_time_format(&s);
1469                            return Expression::Literal(Box::new(Literal::String(normalized)));
1470                        }
1471                    }
1472                }
1473                arg
1474            })
1475            .collect()
1476    }
1477
1478    /// Normalize a time format string according to BigQuery conventions
1479    /// %Y-%m-%d -> %F (ISO date)
1480    /// %H:%M:%S -> %T (time)
1481    fn normalize_time_format(&self, format: &str) -> String {
1482        format.replace("%Y-%m-%d", "%F").replace("%H:%M:%S", "%T")
1483    }
1484
1485    /// Convert BigQuery CAST FORMAT elements to strftime equivalents,
1486    /// then normalize BigQuery shorthand forms (%Y-%m-%d -> %F, %H:%M:%S -> %T)
1487    fn bq_cast_format_to_strftime(format_expr: &Expression) -> Expression {
1488        use crate::expressions::Literal;
1489        if let Expression::Literal(lit) = format_expr {
1490            if let Literal::String(s) = lit.as_ref() {
1491                let result = s
1492                    .replace("YYYYMMDD", "%Y%m%d")
1493                    .replace("YYYY", "%Y")
1494                    .replace("YY", "%y")
1495                    .replace("MONTH", "%B")
1496                    .replace("MON", "%b")
1497                    .replace("MM", "%m")
1498                    .replace("DD", "%d")
1499                    .replace("HH24", "%H")
1500                    .replace("HH12", "%I")
1501                    .replace("HH", "%I")
1502                    .replace("MI", "%M")
1503                    .replace("SSTZH", "%S%z")
1504                    .replace("SS", "%S")
1505                    .replace("TZH", "%z");
1506                // Normalize: %Y-%m-%d -> %F, %H:%M:%S -> %T
1507                let normalized = result.replace("%Y-%m-%d", "%F").replace("%H:%M:%S", "%T");
1508                return Expression::Literal(Box::new(Literal::String(normalized)));
1509            }
1510        }
1511        format_expr.clone()
1512    }
1513}
1514
1515#[cfg(test)]
1516mod tests {
1517    use super::*;
1518    use crate::dialects::Dialect;
1519    use crate::parse_one;
1520
1521    fn transpile_to_bigquery(sql: &str) -> String {
1522        let dialect = Dialect::get(DialectType::Generic);
1523        let result = dialect
1524            .transpile(sql, DialectType::BigQuery)
1525            .expect("Transpile failed");
1526        result[0].clone()
1527    }
1528
1529    #[test]
1530    fn test_ifnull_identity() {
1531        // Generic -> BigQuery: IFNULL is normalized to COALESCE (matching sqlglot behavior)
1532        let result = transpile_to_bigquery("SELECT IFNULL(a, b)");
1533        assert!(
1534            result.contains("COALESCE"),
1535            "Expected COALESCE, got: {}",
1536            result
1537        );
1538    }
1539
1540    #[test]
1541    fn test_nvl_to_ifnull() {
1542        // NVL is converted to IFNULL in BigQuery
1543        let result = transpile_to_bigquery("SELECT NVL(a, b)");
1544        assert!(
1545            result.contains("IFNULL"),
1546            "Expected IFNULL, got: {}",
1547            result
1548        );
1549    }
1550
1551    #[test]
1552    fn test_try_cast_to_safe_cast() {
1553        let result = transpile_to_bigquery("SELECT TRY_CAST(a AS INT)");
1554        assert!(
1555            result.contains("SAFE_CAST"),
1556            "Expected SAFE_CAST, got: {}",
1557            result
1558        );
1559    }
1560
1561    #[test]
1562    fn test_random_to_rand() {
1563        let result = transpile_to_bigquery("SELECT RANDOM()");
1564        assert!(result.contains("RAND"), "Expected RAND, got: {}", result);
1565    }
1566
1567    #[test]
1568    fn test_basic_select() {
1569        let result = transpile_to_bigquery("SELECT a, b FROM users WHERE id = 1");
1570        assert!(result.contains("SELECT"));
1571        assert!(result.contains("FROM users"));
1572    }
1573
1574    #[test]
1575    fn test_group_concat_to_string_agg() {
1576        let result = transpile_to_bigquery("SELECT GROUP_CONCAT(name)");
1577        assert!(
1578            result.contains("STRING_AGG"),
1579            "Expected STRING_AGG, got: {}",
1580            result
1581        );
1582    }
1583
1584    #[test]
1585    fn test_generate_series_to_generate_array() {
1586        let result = transpile_to_bigquery("SELECT GENERATE_SERIES(1, 10)");
1587        assert!(
1588            result.contains("GENERATE_ARRAY"),
1589            "Expected GENERATE_ARRAY, got: {}",
1590            result
1591        );
1592    }
1593
1594    #[test]
1595    fn test_backtick_identifiers() {
1596        // BigQuery uses backticks for identifiers
1597        let dialect = BigQueryDialect;
1598        let config = dialect.generator_config();
1599        assert_eq!(config.identifier_quote, '`');
1600    }
1601
1602    fn bigquery_identity(sql: &str, expected: &str) {
1603        let dialect = Dialect::get(DialectType::BigQuery);
1604        let ast = dialect.parse(sql).expect("Parse failed");
1605        let transformed = dialect.transform(ast[0].clone()).expect("Transform failed");
1606        let result = dialect.generate(&transformed).expect("Generate failed");
1607        assert_eq!(result, expected, "SQL: {}", sql);
1608    }
1609
1610    #[test]
1611    fn test_safe_namespace_parses_as_function() {
1612        let expr = parse_one(
1613            "SELECT SAFE.PARSE_JSON(data) AS json_data FROM t",
1614            DialectType::BigQuery,
1615        )
1616        .expect("parse");
1617
1618        let Expression::Select(select) = expr else {
1619            panic!("expected SELECT");
1620        };
1621        let Expression::Alias(alias) = &select.expressions[0] else {
1622            panic!("expected alias");
1623        };
1624        let Expression::Function(function) = &alias.this else {
1625            panic!("expected SAFE namespace call to parse as Function");
1626        };
1627
1628        assert_eq!(function.name, "SAFE.PARSE_JSON");
1629        assert_eq!(function.args.len(), 1);
1630    }
1631
1632    #[test]
1633    fn test_safe_namespace_identity() {
1634        bigquery_identity("SAFE.PARSE_JSON(data)", "SAFE.PARSE_JSON(data)");
1635        bigquery_identity(
1636            "SAFE.PARSE_DATE('%Y-%m-%d', date_col)",
1637            "SAFE.PARSE_DATE('%F', date_col)",
1638        );
1639        bigquery_identity("SAFE.DIVIDE(a, b)", "SAFE.DIVIDE(a, b)");
1640    }
1641
1642    #[test]
1643    fn test_cast_char_to_string() {
1644        bigquery_identity("CAST(x AS CHAR)", "CAST(x AS STRING)");
1645    }
1646
1647    #[test]
1648    fn test_cast_varchar_to_string() {
1649        bigquery_identity("CAST(x AS VARCHAR)", "CAST(x AS STRING)");
1650    }
1651
1652    #[test]
1653    fn test_cast_nchar_to_string() {
1654        bigquery_identity("CAST(x AS NCHAR)", "CAST(x AS STRING)");
1655    }
1656
1657    #[test]
1658    fn test_cast_nvarchar_to_string() {
1659        bigquery_identity("CAST(x AS NVARCHAR)", "CAST(x AS STRING)");
1660    }
1661
1662    #[test]
1663    fn test_cast_timestamptz_to_timestamp() {
1664        bigquery_identity("CAST(x AS TIMESTAMPTZ)", "CAST(x AS TIMESTAMP)");
1665    }
1666
1667    #[test]
1668    fn test_cast_record_to_struct() {
1669        bigquery_identity("CAST(x AS RECORD)", "CAST(x AS STRUCT)");
1670    }
1671
1672    #[test]
1673    fn test_json_literal_to_parse_json() {
1674        // JSON 'string' literal syntax should be converted to PARSE_JSON()
1675        bigquery_identity(
1676            "SELECT JSON '\"foo\"' AS json_data",
1677            "SELECT PARSE_JSON('\"foo\"') AS json_data",
1678        );
1679    }
1680
1681    #[test]
1682    fn test_grant_as_alias_not_quoted() {
1683        // GRANT is not a reserved keyword in BigQuery, should not be backtick-quoted
1684        bigquery_identity(
1685            "SELECT GRANT FROM (SELECT 'input' AS GRANT)",
1686            "SELECT GRANT FROM (SELECT 'input' AS GRANT)",
1687        );
1688    }
1689
1690    #[test]
1691    fn test_timestamp_literal_to_cast() {
1692        // TIMESTAMP 'value' literal should be converted to CAST('value' AS TIMESTAMP)
1693        bigquery_identity(
1694            "CREATE VIEW `d.v` OPTIONS (expiration_timestamp=TIMESTAMP '2020-01-02T04:05:06.007Z') AS SELECT 1 AS c",
1695            "CREATE VIEW `d.v` OPTIONS (expiration_timestamp=CAST('2020-01-02T04:05:06.007Z' AS TIMESTAMP)) AS SELECT 1 AS c"
1696        );
1697    }
1698
1699    #[test]
1700    fn test_date_literal_to_cast_in_extract() {
1701        // Issue 1: DATE literal should become CAST syntax in BigQuery
1702        bigquery_identity(
1703            "EXTRACT(WEEK(THURSDAY) FROM DATE '2013-12-25')",
1704            "EXTRACT(WEEK(THURSDAY) FROM CAST('2013-12-25' AS DATE))",
1705        );
1706    }
1707
1708    #[test]
1709    fn test_json_object_with_json_literals() {
1710        // Issue 2: JSON literals in JSON_OBJECT should use PARSE_JSON, not CAST AS JSON
1711        bigquery_identity(
1712            "SELECT JSON_OBJECT('a', JSON '10') AS json_data",
1713            "SELECT JSON_OBJECT('a', PARSE_JSON('10')) AS json_data",
1714        );
1715    }
1716
1717    // NOTE: MOD paren unwrapping is tested in the conformance tests (sqlglot_dialect_identity).
1718    // The unit test version was removed due to stack overflow in debug builds (deep recursion).
1719    // Test case: MOD((a + 1), b) -> MOD(a + 1, b)
1720
1721    #[test]
1722    fn test_safe_parse_date_format_normalization() {
1723        // SAFE.PARSE_DATE format string normalization: %Y-%m-%d -> %F
1724        bigquery_identity(
1725            "SAFE.PARSE_DATE('%Y-%m-%d', '2024-01-15')",
1726            "SAFE.PARSE_DATE('%F', '2024-01-15')",
1727        );
1728    }
1729
1730    #[test]
1731    fn test_safe_parse_datetime_format_normalization() {
1732        // SAFE.PARSE_DATETIME format string normalization: %Y-%m-%d %H:%M:%S -> %F %T
1733        bigquery_identity(
1734            "SAFE.PARSE_DATETIME('%Y-%m-%d %H:%M:%S', '2024-01-15 10:30:00')",
1735            "SAFE.PARSE_DATETIME('%F %T', '2024-01-15 10:30:00')",
1736        );
1737    }
1738
1739    #[test]
1740    fn test_safe_parse_timestamp_format_normalization() {
1741        // SAFE.PARSE_TIMESTAMP format string normalization: %Y-%m-%d %H:%M:%S -> %F %T
1742        bigquery_identity(
1743            "SAFE.PARSE_TIMESTAMP('%Y-%m-%d %H:%M:%S', '2024-01-15 10:30:00')",
1744            "SAFE.PARSE_TIMESTAMP('%F %T', '2024-01-15 10:30:00')",
1745        );
1746    }
1747
1748    #[test]
1749    fn test_datetime_literal_to_cast() {
1750        // DATETIME 'value' literal should be converted to CAST('value' AS DATETIME)
1751        bigquery_identity(
1752            "LAST_DAY(DATETIME '2008-11-10 15:30:00', WEEK(SUNDAY))",
1753            "LAST_DAY(CAST('2008-11-10 15:30:00' AS DATETIME), WEEK)",
1754        );
1755    }
1756
1757    #[test]
1758    fn test_last_day_week_modifier_stripped() {
1759        // WEEK(SUNDAY) should become WEEK in BigQuery LAST_DAY function
1760        bigquery_identity("LAST_DAY(col, WEEK(MONDAY))", "LAST_DAY(col, WEEK)");
1761    }
1762
1763    #[test]
1764    fn test_hash_line_comment_parses() {
1765        // Regression test for issue #38:
1766        // BigQuery should accept # as a single-line comment.
1767        let result = parse_one("SELECT 1 as a #hello world", DialectType::BigQuery);
1768        assert!(result.is_ok(), "Expected parse to succeed, got: {result:?}");
1769    }
1770}