Skip to main content

polyglot_sql/dialects/
postgres.rs

1//! PostgreSQL Dialect
2//!
3//! PostgreSQL-specific transformations based on sqlglot patterns.
4//! Comprehensive batch translation from Python sqlglot postgres.py
5//!
6//! Key differences from other dialects:
7//! - TRY_CAST not supported (falls back to CAST)
8//! - RANDOM() instead of RAND()
9//! - STRING_AGG instead of GROUP_CONCAT
10//! - Bitwise XOR is # operator
11//! - BOOL_AND/BOOL_OR for logical aggregates
12//! - GEN_RANDOM_UUID() for UUID generation
13//! - UNNEST instead of EXPLODE
14//! - Type mappings: TINYINT→SMALLINT, FLOAT→REAL, DOUBLE→DOUBLE PRECISION, etc.
15//! - RegexpLike uses ~ operator, RegexpILike uses ~* operator
16//! - JSONB operators: #>, #>>, ?, ?|, ?&
17
18use super::{DialectImpl, DialectType};
19use crate::error::Result;
20use crate::expressions::{
21    AggFunc, BinaryOp, BooleanLiteral, Case, Cast, CeilFunc, DataType, DateTimeField, Expression,
22    ExtractFunc, Function, Interval, IntervalUnit, IntervalUnitSpec, Join, JoinKind, Literal,
23    Paren, UnaryFunc, VarArgFunc,
24};
25#[cfg(feature = "generate")]
26use crate::generator::GeneratorConfig;
27use crate::tokens::TokenizerConfig;
28
29/// Helper to wrap JSON arrow expressions in parentheses when they appear
30/// in contexts that require it (Binary, In, Not expressions)
31/// This matches Python sqlglot's WRAPPED_JSON_EXTRACT_EXPRESSIONS behavior
32fn wrap_if_json_arrow(expr: Expression) -> Expression {
33    match &expr {
34        Expression::JsonExtract(f) if f.arrow_syntax => Expression::Paren(Box::new(Paren {
35            this: expr,
36            trailing_comments: Vec::new(),
37        })),
38        Expression::JsonExtractScalar(f) if f.arrow_syntax => Expression::Paren(Box::new(Paren {
39            this: expr,
40            trailing_comments: Vec::new(),
41        })),
42        _ => expr,
43    }
44}
45
46/// PostgreSQL dialect
47pub struct PostgresDialect;
48
49impl DialectImpl for PostgresDialect {
50    fn dialect_type(&self) -> DialectType {
51        DialectType::PostgreSQL
52    }
53
54    fn tokenizer_config(&self) -> TokenizerConfig {
55        use crate::tokens::TokenType;
56        let mut config = TokenizerConfig::default();
57        // PostgreSQL supports $$ string literals (heredoc strings)
58        config.quotes.insert("$$".to_string(), "$$".to_string());
59        // PostgreSQL uses double quotes for identifiers
60        config.identifiers.insert('"', '"');
61        // Nested comments supported
62        config.nested_comments = true;
63        // PostgreSQL treats EXEC as a generic command (not TSQL EXEC statement)
64        // Note: EXECUTE is kept as-is since it's used in GRANT/REVOKE EXECUTE ON FUNCTION
65        config
66            .keywords
67            .insert("EXEC".to_string(), TokenType::Command);
68        config
69    }
70
71    #[cfg(feature = "generate")]
72
73    fn generator_config(&self) -> GeneratorConfig {
74        use crate::generator::IdentifierQuoteStyle;
75        GeneratorConfig {
76            identifier_quote: '"',
77            identifier_quote_style: IdentifierQuoteStyle::DOUBLE_QUOTE,
78            dialect: Some(DialectType::PostgreSQL),
79            // PostgreSQL uses TIMESTAMPTZ shorthand
80            tz_to_with_time_zone: false,
81            // PostgreSQL prefers INTERVAL '1 day' syntax
82            single_string_interval: true,
83            // TABLESAMPLE uses REPEATABLE in PostgreSQL
84            tablesample_seed_keyword: "REPEATABLE",
85            // PostgreSQL doesn't support NVL2
86            nvl2_supported: false,
87            // PostgreSQL uses $ for parameters
88            parameter_token: "$",
89            // PostgreSQL uses % for named placeholders
90            named_placeholder_token: "%",
91            // PostgreSQL supports SELECT INTO
92            supports_select_into: true,
93            // PostgreSQL: USING btree(col) without space before parens
94            index_using_no_space: true,
95            // PostgreSQL supports UNLOGGED tables
96            supports_unlogged_tables: true,
97            // PostgreSQL doesn't support multi-arg DISTINCT
98            multi_arg_distinct: false,
99            // PostgreSQL uses ANY (subquery) with space
100            quantified_no_paren_space: false,
101            // PostgreSQL supports window EXCLUDE clause
102            supports_window_exclude: true,
103            // PostgreSQL normalizes single-bound window frames to BETWEEN form
104            normalize_window_frame_between: true,
105            // PostgreSQL COPY doesn't use INTO keyword
106            copy_has_into_keyword: false,
107            // PostgreSQL ARRAY_SIZE requires dimension argument
108            array_size_dim_required: Some(true),
109            // PostgreSQL supports BETWEEN flags
110            supports_between_flags: true,
111            // PostgreSQL doesn't support hints
112            join_hints: false,
113            table_hints: false,
114            query_hints: false,
115            // PostgreSQL supports locking reads
116            locking_reads_supported: true,
117            // PostgreSQL doesn't rename tables with DB
118            rename_table_with_db: false,
119            // PostgreSQL can implement array any
120            can_implement_array_any: true,
121            // PostgreSQL ARRAY_CONCAT is not var-len
122            array_concat_is_var_len: false,
123            // PostgreSQL doesn't support MEDIAN
124            supports_median: false,
125            // PostgreSQL requires JSON type for extraction
126            json_type_required_for_extraction: true,
127            // PostgreSQL LIKE property inside schema
128            like_property_inside_schema: true,
129            ..Default::default()
130        }
131    }
132
133    #[cfg(feature = "transpile")]
134
135    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
136        match expr {
137            // ============================================
138            // DATA TYPE MAPPINGS (from TYPE_MAPPING)
139            // These are handled specially - transform DataType variants
140            // ============================================
141            Expression::DataType(dt) => self.transform_data_type(dt),
142
143            // ============================================
144            // NULL HANDLING
145            // ============================================
146            // IFNULL -> COALESCE in PostgreSQL
147            Expression::IfNull(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
148                original_name: None,
149                expressions: vec![f.this, f.expression],
150                inferred_type: None,
151            }))),
152
153            // NVL -> COALESCE in PostgreSQL
154            Expression::Nvl(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
155                original_name: None,
156                expressions: vec![f.this, f.expression],
157                inferred_type: None,
158            }))),
159
160            // Coalesce with original_name (e.g., IFNULL parsed as Coalesce) -> clear original_name
161            // so it outputs as COALESCE instead of the original function name
162            Expression::Coalesce(mut f) => {
163                f.original_name = None;
164                Ok(Expression::Coalesce(f))
165            }
166
167            // ============================================
168            // CAST OPERATIONS
169            // ============================================
170            // TryCast -> CAST (PostgreSQL doesn't support TRY_CAST)
171            Expression::TryCast(c) => Ok(Expression::Cast(c)),
172
173            // SafeCast -> CAST (PostgreSQL doesn't support safe casts)
174            Expression::SafeCast(c) => Ok(Expression::Cast(c)),
175
176            // ============================================
177            // RANDOM
178            // ============================================
179            // RAND -> RANDOM in PostgreSQL
180            Expression::Rand(r) => {
181                // PostgreSQL's RANDOM() doesn't take a seed argument
182                let _ = r.seed; // Ignore seed
183                Ok(Expression::Random(crate::expressions::Random))
184            }
185
186            // ============================================
187            // UUID
188            // ============================================
189            // Uuid -> GEN_RANDOM_UUID in PostgreSQL
190            Expression::Uuid(_) => Ok(Expression::Function(Box::new(Function::new(
191                "GEN_RANDOM_UUID".to_string(),
192                vec![],
193            )))),
194
195            // ============================================
196            // ARRAY OPERATIONS
197            // ============================================
198            // EXPLODE -> UNNEST in PostgreSQL
199            Expression::Explode(f) => Ok(Expression::Unnest(Box::new(
200                crate::expressions::UnnestFunc {
201                    this: f.this,
202                    expressions: Vec::new(),
203                    with_ordinality: false,
204                    alias: None,
205                    offset_alias: None,
206                },
207            ))),
208
209            // ExplodeOuter -> UNNEST in PostgreSQL
210            Expression::ExplodeOuter(f) => Ok(Expression::Unnest(Box::new(
211                crate::expressions::UnnestFunc {
212                    this: f.this,
213                    expressions: Vec::new(),
214                    with_ordinality: false,
215                    alias: None,
216                    offset_alias: None,
217                },
218            ))),
219
220            // ArrayConcat -> ARRAY_CAT in PostgreSQL
221            Expression::ArrayConcat(f) => Ok(Expression::Function(Box::new(Function::new(
222                "ARRAY_CAT".to_string(),
223                f.expressions,
224            )))),
225
226            // ArrayPrepend -> ARRAY_PREPEND in PostgreSQL (note: args swapped from other dialects)
227            Expression::ArrayPrepend(f) => Ok(Expression::Function(Box::new(Function::new(
228                "ARRAY_PREPEND".to_string(),
229                vec![f.expression, f.this], // PostgreSQL: ARRAY_PREPEND(element, array)
230            )))),
231
232            // ============================================
233            // BITWISE OPERATIONS
234            // ============================================
235            // BitwiseXor -> # operator in PostgreSQL
236            Expression::BitwiseXor(f) => {
237                // Use a special marker that generator will recognize
238                Ok(Expression::Function(Box::new(Function::new(
239                    "__PG_BITWISE_XOR__".to_string(),
240                    vec![f.left, f.right],
241                ))))
242            }
243
244            // BitwiseAndAgg -> BIT_AND
245            Expression::BitwiseAndAgg(f) => Ok(Expression::Function(Box::new(Function::new(
246                "BIT_AND".to_string(),
247                vec![f.this],
248            )))),
249
250            // BitwiseOrAgg -> BIT_OR
251            Expression::BitwiseOrAgg(f) => Ok(Expression::Function(Box::new(Function::new(
252                "BIT_OR".to_string(),
253                vec![f.this],
254            )))),
255
256            // BitwiseXorAgg -> BIT_XOR
257            Expression::BitwiseXorAgg(f) => Ok(Expression::Function(Box::new(Function::new(
258                "BIT_XOR".to_string(),
259                vec![f.this],
260            )))),
261
262            // ============================================
263            // BOOLEAN AGGREGATES
264            // ============================================
265            // LogicalAnd -> BOOL_AND
266            Expression::LogicalAnd(f) => Ok(Expression::Function(Box::new(Function::new(
267                "BOOL_AND".to_string(),
268                vec![f.this],
269            )))),
270
271            // LogicalOr -> BOOL_OR
272            Expression::LogicalOr(f) => Ok(Expression::Function(Box::new(Function::new(
273                "BOOL_OR".to_string(),
274                vec![f.this],
275            )))),
276
277            // Xor -> PostgreSQL bool_xor pattern: a <> b for boolean values
278            Expression::Xor(f) => {
279                if let (Some(a), Some(b)) = (f.this, f.expression) {
280                    Ok(Expression::Neq(Box::new(BinaryOp {
281                        left: *a,
282                        right: *b,
283                        left_comments: Vec::new(),
284                        operator_comments: Vec::new(),
285                        trailing_comments: Vec::new(),
286                        inferred_type: None,
287                    })))
288                } else {
289                    Ok(Expression::Boolean(BooleanLiteral { value: false }))
290                }
291            }
292
293            // ============================================
294            // ARRAY OPERATORS
295            // ============================================
296            // ArrayContainedBy (<@) -> ArrayContainsAll (@>) with swapped operands
297            // a <@ b -> b @> a (PostgreSQL prefers @> syntax)
298            Expression::ArrayContainedBy(op) => {
299                Ok(Expression::ArrayContainsAll(Box::new(BinaryOp {
300                    left: op.right,
301                    right: op.left,
302                    left_comments: Vec::new(),
303                    operator_comments: Vec::new(),
304                    trailing_comments: Vec::new(),
305                    inferred_type: None,
306                })))
307            }
308
309            // ============================================
310            // REGEXP OPERATIONS (PostgreSQL uses ~ and ~* operators)
311            // ============================================
312            // RegexpLike -> keep as-is, generator handles ~ operator output
313            Expression::RegexpLike(f) => {
314                // Generator will output as: expr ~ pattern
315                Ok(Expression::RegexpLike(f))
316            }
317
318            // ============================================
319            // DATE/TIME FUNCTIONS
320            // ============================================
321            // DateAdd -> date + INTERVAL in PostgreSQL
322            Expression::DateAdd(f) => {
323                let is_literal = matches!(&f.interval, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_) | Literal::String(_)));
324                let right_expr = if is_literal {
325                    // Literal value: INTERVAL 'value' unit
326                    Expression::Interval(Box::new(Interval {
327                        this: Some(f.interval),
328                        unit: Some(IntervalUnitSpec::Simple {
329                            unit: f.unit,
330                            use_plural: false,
331                        }),
332                    }))
333                } else {
334                    // Non-literal (column ref, expression): INTERVAL '1 unit' * value
335                    let unit_str = match f.unit {
336                        IntervalUnit::Year => "YEAR",
337                        IntervalUnit::Quarter => "QUARTER",
338                        IntervalUnit::Month => "MONTH",
339                        IntervalUnit::Week => "WEEK",
340                        IntervalUnit::Day => "DAY",
341                        IntervalUnit::Hour => "HOUR",
342                        IntervalUnit::Minute => "MINUTE",
343                        IntervalUnit::Second => "SECOND",
344                        IntervalUnit::Millisecond => "MILLISECOND",
345                        IntervalUnit::Microsecond => "MICROSECOND",
346                        IntervalUnit::Nanosecond => "NANOSECOND",
347                    };
348                    let interval_one = Expression::Interval(Box::new(Interval {
349                        this: Some(Expression::Literal(Box::new(Literal::String(format!(
350                            "1 {unit_str}"
351                        ))))),
352                        unit: None,
353                    }));
354                    Expression::Mul(Box::new(BinaryOp {
355                        left: interval_one,
356                        right: f.interval,
357                        left_comments: Vec::new(),
358                        operator_comments: Vec::new(),
359                        trailing_comments: Vec::new(),
360                        inferred_type: None,
361                    }))
362                };
363                Ok(Expression::Add(Box::new(BinaryOp {
364                    left: f.this,
365                    right: right_expr,
366                    left_comments: Vec::new(),
367                    operator_comments: Vec::new(),
368                    trailing_comments: Vec::new(),
369                    inferred_type: None,
370                })))
371            }
372
373            // DateSub -> date - INTERVAL in PostgreSQL
374            Expression::DateSub(f) => {
375                let interval_expr = Expression::Interval(Box::new(Interval {
376                    this: Some(f.interval),
377                    unit: Some(IntervalUnitSpec::Simple {
378                        unit: f.unit,
379                        use_plural: false,
380                    }),
381                }));
382                Ok(Expression::Sub(Box::new(BinaryOp {
383                    left: f.this,
384                    right: interval_expr,
385                    left_comments: Vec::new(),
386                    operator_comments: Vec::new(),
387                    trailing_comments: Vec::new(),
388                    inferred_type: None,
389                })))
390            }
391
392            // DateDiff -> Complex PostgreSQL pattern using AGE/EXTRACT
393            Expression::DateDiff(f) => {
394                // For PostgreSQL, DATEDIFF is converted to EXTRACT(epoch FROM ...) pattern
395                // matching the 3-arg string-based DATEDIFF handler below
396                let unit = f.unit.unwrap_or(IntervalUnit::Day);
397
398                // Helper: CAST(expr AS TIMESTAMP)
399                let cast_ts = |e: Expression| -> Expression {
400                    Expression::Cast(Box::new(Cast {
401                        this: e,
402                        to: DataType::Timestamp {
403                            precision: None,
404                            timezone: false,
405                        },
406                        trailing_comments: Vec::new(),
407                        double_colon_syntax: false,
408                        format: None,
409                        default: None,
410                        inferred_type: None,
411                    }))
412                };
413
414                // Helper: CAST(expr AS BIGINT)
415                let cast_bigint = |e: Expression| -> Expression {
416                    Expression::Cast(Box::new(Cast {
417                        this: e,
418                        to: DataType::BigInt { length: None },
419                        trailing_comments: Vec::new(),
420                        double_colon_syntax: false,
421                        format: None,
422                        default: None,
423                        inferred_type: None,
424                    }))
425                };
426
427                // Clone end/start for reuse
428                let end_expr = f.this;
429                let start = f.expression;
430
431                // Helper: end_ts - start_ts
432                let ts_diff = || -> Expression {
433                    Expression::Sub(Box::new(BinaryOp::new(
434                        cast_ts(end_expr.clone()),
435                        cast_ts(start.clone()),
436                    )))
437                };
438
439                // Helper: AGE(end_ts, start_ts)
440                let age_call = || -> Expression {
441                    Expression::Function(Box::new(Function::new(
442                        "AGE".to_string(),
443                        vec![cast_ts(end_expr.clone()), cast_ts(start.clone())],
444                    )))
445                };
446
447                // Helper: EXTRACT(field FROM expr)
448                let extract = |field: DateTimeField, from: Expression| -> Expression {
449                    Expression::Extract(Box::new(ExtractFunc { this: from, field }))
450                };
451
452                // Helper: number literal
453                let num = |n: i64| -> Expression {
454                    Expression::Literal(Box::new(Literal::Number(n.to_string())))
455                };
456
457                let epoch_field = DateTimeField::Custom("epoch".to_string());
458
459                let result = match unit {
460                    IntervalUnit::Nanosecond => {
461                        let epoch = extract(epoch_field.clone(), ts_diff());
462                        cast_bigint(Expression::Mul(Box::new(BinaryOp::new(
463                            epoch,
464                            num(1000000000),
465                        ))))
466                    }
467                    IntervalUnit::Microsecond => {
468                        let epoch = extract(epoch_field, ts_diff());
469                        cast_bigint(Expression::Mul(Box::new(BinaryOp::new(
470                            epoch,
471                            num(1000000),
472                        ))))
473                    }
474                    IntervalUnit::Millisecond => {
475                        let epoch = extract(epoch_field, ts_diff());
476                        cast_bigint(Expression::Mul(Box::new(BinaryOp::new(epoch, num(1000)))))
477                    }
478                    IntervalUnit::Second => {
479                        let epoch = extract(epoch_field, ts_diff());
480                        cast_bigint(epoch)
481                    }
482                    IntervalUnit::Minute => {
483                        let epoch = extract(epoch_field, ts_diff());
484                        cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(60)))))
485                    }
486                    IntervalUnit::Hour => {
487                        let epoch = extract(epoch_field, ts_diff());
488                        cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(3600)))))
489                    }
490                    IntervalUnit::Day => {
491                        let epoch = extract(epoch_field, ts_diff());
492                        cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(86400)))))
493                    }
494                    IntervalUnit::Week => {
495                        let diff_parens = Expression::Paren(Box::new(Paren {
496                            this: ts_diff(),
497                            trailing_comments: Vec::new(),
498                        }));
499                        let days = extract(DateTimeField::Custom("days".to_string()), diff_parens);
500                        cast_bigint(Expression::Div(Box::new(BinaryOp::new(days, num(7)))))
501                    }
502                    IntervalUnit::Month => {
503                        let year_part =
504                            extract(DateTimeField::Custom("year".to_string()), age_call());
505                        let month_part =
506                            extract(DateTimeField::Custom("month".to_string()), age_call());
507                        let year_months =
508                            Expression::Mul(Box::new(BinaryOp::new(year_part, num(12))));
509                        cast_bigint(Expression::Add(Box::new(BinaryOp::new(
510                            year_months,
511                            month_part,
512                        ))))
513                    }
514                    IntervalUnit::Quarter => {
515                        let year_part =
516                            extract(DateTimeField::Custom("year".to_string()), age_call());
517                        let month_part =
518                            extract(DateTimeField::Custom("month".to_string()), age_call());
519                        let year_quarters =
520                            Expression::Mul(Box::new(BinaryOp::new(year_part, num(4))));
521                        let month_quarters =
522                            Expression::Div(Box::new(BinaryOp::new(month_part, num(3))));
523                        cast_bigint(Expression::Add(Box::new(BinaryOp::new(
524                            year_quarters,
525                            month_quarters,
526                        ))))
527                    }
528                    IntervalUnit::Year => cast_bigint(extract(
529                        DateTimeField::Custom("year".to_string()),
530                        age_call(),
531                    )),
532                };
533                Ok(result)
534            }
535
536            // UnixToTime -> TO_TIMESTAMP
537            Expression::UnixToTime(f) => Ok(Expression::Function(Box::new(Function::new(
538                "TO_TIMESTAMP".to_string(),
539                vec![*f.this],
540            )))),
541
542            // TimeToUnix -> DATE_PART('epoch', ...) in PostgreSQL
543            Expression::TimeToUnix(f) => Ok(Expression::Function(Box::new(Function::new(
544                "DATE_PART".to_string(),
545                vec![Expression::string("epoch"), f.this],
546            )))),
547
548            // StrToTime -> TO_TIMESTAMP in PostgreSQL
549            Expression::ToTimestamp(f) => {
550                let mut args = vec![f.this];
551                if let Some(fmt) = f.format {
552                    args.push(fmt);
553                }
554                Ok(Expression::Function(Box::new(Function::new(
555                    "TO_TIMESTAMP".to_string(),
556                    args,
557                ))))
558            }
559
560            // StrToDate -> TO_DATE in PostgreSQL
561            Expression::ToDate(f) => {
562                let mut args = vec![f.this];
563                if let Some(fmt) = f.format {
564                    args.push(fmt);
565                }
566                Ok(Expression::Function(Box::new(Function::new(
567                    "TO_DATE".to_string(),
568                    args,
569                ))))
570            }
571
572            // TimestampTrunc -> DATE_TRUNC
573            Expression::TimestampTrunc(f) => {
574                // Convert DateTimeField to string expression for DATE_TRUNC
575                let unit_str = format!("{:?}", f.unit).to_lowercase();
576                let args = vec![Expression::string(&unit_str), f.this];
577                Ok(Expression::Function(Box::new(Function::new(
578                    "DATE_TRUNC".to_string(),
579                    args,
580                ))))
581            }
582
583            // TimeFromParts -> MAKE_TIME
584            Expression::TimeFromParts(f) => {
585                let mut args = Vec::new();
586                if let Some(h) = f.hour {
587                    args.push(*h);
588                }
589                if let Some(m) = f.min {
590                    args.push(*m);
591                }
592                if let Some(s) = f.sec {
593                    args.push(*s);
594                }
595                Ok(Expression::Function(Box::new(Function::new(
596                    "MAKE_TIME".to_string(),
597                    args,
598                ))))
599            }
600
601            // TimestampFromParts -> MAKE_TIMESTAMP
602            Expression::MakeTimestamp(f) => {
603                // MakeTimestampFunc has direct Expression fields, not Options
604                let args = vec![f.year, f.month, f.day, f.hour, f.minute, f.second];
605                Ok(Expression::Function(Box::new(Function::new(
606                    "MAKE_TIMESTAMP".to_string(),
607                    args,
608                ))))
609            }
610
611            // ============================================
612            // STRING FUNCTIONS
613            // ============================================
614            // StringAgg is native to PostgreSQL - keep as-is
615            Expression::StringAgg(f) => Ok(Expression::StringAgg(f)),
616
617            // GroupConcat -> STRING_AGG in PostgreSQL
618            Expression::GroupConcat(f) => {
619                let mut args = vec![f.this.clone()];
620                if let Some(sep) = f.separator.clone() {
621                    args.push(sep);
622                } else {
623                    args.push(Expression::string(","));
624                }
625                Ok(Expression::Function(Box::new(Function::new(
626                    "STRING_AGG".to_string(),
627                    args,
628                ))))
629            }
630
631            // StrPosition -> POSITION function
632            Expression::Position(f) => {
633                // PostgreSQL: POSITION(substring IN string)
634                // Keep as Position, generator handles it
635                Ok(Expression::Position(f))
636            }
637
638            // ============================================
639            // AGGREGATE FUNCTIONS
640            // ============================================
641            // CountIf -> SUM(CASE WHEN condition THEN 1 ELSE 0 END) in PostgreSQL
642            Expression::CountIf(f) => {
643                let case_expr = Expression::Case(Box::new(Case {
644                    operand: None,
645                    whens: vec![(f.this.clone(), Expression::number(1))],
646                    else_: Some(Expression::number(0)),
647                    comments: Vec::new(),
648                    inferred_type: None,
649                }));
650                Ok(Expression::Sum(Box::new(AggFunc {
651                    ignore_nulls: None,
652                    having_max: None,
653                    this: case_expr,
654                    distinct: f.distinct,
655                    filter: f.filter,
656                    order_by: Vec::new(),
657                    name: None,
658                    limit: None,
659                    inferred_type: None,
660                })))
661            }
662
663            // AnyValue -> keep as ANY_VALUE for PostgreSQL (supported since PG 16)
664            Expression::AnyValue(f) => Ok(Expression::AnyValue(f)),
665
666            // Variance -> VAR_SAMP in PostgreSQL
667            Expression::Variance(f) => Ok(Expression::Function(Box::new(Function::new(
668                "VAR_SAMP".to_string(),
669                vec![f.this],
670            )))),
671
672            // VarPop -> VAR_POP in PostgreSQL
673            Expression::VarPop(f) => Ok(Expression::Function(Box::new(Function::new(
674                "VAR_POP".to_string(),
675                vec![f.this],
676            )))),
677
678            // ============================================
679            // JSON FUNCTIONS
680            // ============================================
681            // JSONExtract -> use arrow syntax (->) in PostgreSQL for simple literal paths
682            // Complex paths (like column references) should use JSON_EXTRACT_PATH function
683            Expression::JsonExtract(mut f) => {
684                // Only use arrow syntax for simple literal paths (string or non-negative number)
685                // Complex expressions like column references should use function form
686                f.arrow_syntax = Self::is_simple_json_path(&f.path);
687                Ok(Expression::JsonExtract(f))
688            }
689
690            // JSONExtractScalar -> use arrow syntax (->>) in PostgreSQL for simple paths
691            // Complex paths (like negative indices) should use JSON_EXTRACT_PATH_TEXT function
692            // #>> (hash_arrow_syntax) stays as #>>
693            Expression::JsonExtractScalar(mut f) => {
694                if !f.hash_arrow_syntax {
695                    // Only use arrow syntax for simple literal paths (string or non-negative number)
696                    // Complex expressions like Neg(-1) should use function form
697                    f.arrow_syntax = Self::is_simple_json_path(&f.path);
698                }
699                Ok(Expression::JsonExtractScalar(f))
700            }
701
702            // ParseJson: handled by generator (outputs CAST(x AS JSON) for PostgreSQL)
703
704            // JSONObjectAgg -> JSON_OBJECT_AGG
705            Expression::JsonObjectAgg(f) => {
706                // JsonObjectAggFunc has key and value as Expression, not Option
707                let args = vec![f.key, f.value];
708                Ok(Expression::Function(Box::new(Function::new(
709                    "JSON_OBJECT_AGG".to_string(),
710                    args,
711                ))))
712            }
713
714            // JSONArrayAgg -> JSON_AGG
715            Expression::JsonArrayAgg(f) => Ok(Expression::Function(Box::new(Function::new(
716                "JSON_AGG".to_string(),
717                vec![f.this],
718            )))),
719
720            // JSONPathRoot -> empty string ($ is implicit in PostgreSQL)
721            Expression::JSONPathRoot(_) => Ok(Expression::Literal(Box::new(Literal::String(
722                String::new(),
723            )))),
724
725            // ============================================
726            // MISC FUNCTIONS
727            // ============================================
728            // IntDiv -> DIV in PostgreSQL
729            Expression::IntDiv(f) => Ok(Expression::Function(Box::new(Function::new(
730                "DIV".to_string(),
731                vec![f.this, f.expression],
732            )))),
733
734            // Unicode -> ASCII in PostgreSQL
735            Expression::Unicode(f) => Ok(Expression::Function(Box::new(Function::new(
736                "ASCII".to_string(),
737                vec![f.this],
738            )))),
739
740            // LastDay -> Complex expression (PostgreSQL doesn't have LAST_DAY)
741            Expression::LastDay(f) => {
742                // (DATE_TRUNC('month', date) + INTERVAL '1 month' - INTERVAL '1 day')::DATE
743                let truncated = Expression::Function(Box::new(Function::new(
744                    "DATE_TRUNC".to_string(),
745                    vec![Expression::string("month"), f.this.clone()],
746                )));
747                let plus_month = Expression::Add(Box::new(BinaryOp {
748                    left: truncated,
749                    right: Expression::Interval(Box::new(Interval {
750                        this: Some(Expression::string("1")),
751                        unit: Some(IntervalUnitSpec::Simple {
752                            unit: IntervalUnit::Month,
753                            use_plural: false,
754                        }),
755                    })),
756                    left_comments: Vec::new(),
757                    operator_comments: Vec::new(),
758                    trailing_comments: Vec::new(),
759                    inferred_type: None,
760                }));
761                let minus_day = Expression::Sub(Box::new(BinaryOp {
762                    left: plus_month,
763                    right: Expression::Interval(Box::new(Interval {
764                        this: Some(Expression::string("1")),
765                        unit: Some(IntervalUnitSpec::Simple {
766                            unit: IntervalUnit::Day,
767                            use_plural: false,
768                        }),
769                    })),
770                    left_comments: Vec::new(),
771                    operator_comments: Vec::new(),
772                    trailing_comments: Vec::new(),
773                    inferred_type: None,
774                }));
775                Ok(Expression::Cast(Box::new(Cast {
776                    this: minus_day,
777                    to: DataType::Date,
778                    trailing_comments: Vec::new(),
779                    double_colon_syntax: true, // Use PostgreSQL :: syntax
780                    format: None,
781                    default: None,
782                    inferred_type: None,
783                })))
784            }
785
786            // GenerateSeries is native to PostgreSQL
787            Expression::GenerateSeries(f) => Ok(Expression::GenerateSeries(f)),
788
789            // ExplodingGenerateSeries -> GENERATE_SERIES
790            Expression::ExplodingGenerateSeries(f) => {
791                let mut args = vec![f.start, f.stop];
792                if let Some(step) = f.step {
793                    args.push(step); // step is Expression, not Box<Expression>
794                }
795                Ok(Expression::Function(Box::new(Function::new(
796                    "GENERATE_SERIES".to_string(),
797                    args,
798                ))))
799            }
800
801            // ============================================
802            // SESSION/TIME FUNCTIONS (no parentheses in PostgreSQL)
803            // ============================================
804            // CurrentTimestamp -> CURRENT_TIMESTAMP (no parens)
805            Expression::CurrentTimestamp(_) => Ok(Expression::Function(Box::new(Function {
806                name: "CURRENT_TIMESTAMP".to_string(),
807                args: vec![],
808                distinct: false,
809                trailing_comments: vec![],
810                use_bracket_syntax: false,
811                no_parens: true,
812                quoted: false,
813                span: None,
814                inferred_type: None,
815            }))),
816
817            // CurrentUser -> CURRENT_USER (no parens)
818            Expression::CurrentUser(_) => Ok(Expression::Function(Box::new(Function::new(
819                "CURRENT_USER".to_string(),
820                vec![],
821            )))),
822
823            // CurrentDate -> CURRENT_DATE (no parens)
824            Expression::CurrentDate(_) => Ok(Expression::Function(Box::new(Function {
825                name: "CURRENT_DATE".to_string(),
826                args: vec![],
827                distinct: false,
828                trailing_comments: vec![],
829                use_bracket_syntax: false,
830                no_parens: true,
831                quoted: false,
832                span: None,
833                inferred_type: None,
834            }))),
835
836            // ============================================
837            // JOIN TRANSFORMATIONS
838            // ============================================
839            // CROSS APPLY -> INNER JOIN LATERAL ... ON TRUE in PostgreSQL
840            Expression::Join(join) if join.kind == JoinKind::CrossApply => {
841                Ok(Expression::Join(Box::new(Join {
842                    this: join.this,
843                    on: Some(Expression::Boolean(BooleanLiteral { value: true })),
844                    using: join.using,
845                    kind: JoinKind::CrossApply,
846                    use_inner_keyword: false,
847                    use_outer_keyword: false,
848                    deferred_condition: false,
849                    join_hint: None,
850                    match_condition: None,
851                    pivots: join.pivots,
852                    comments: join.comments,
853                    nesting_group: 0,
854                    directed: false,
855                })))
856            }
857
858            // OUTER APPLY -> LEFT JOIN LATERAL ... ON TRUE in PostgreSQL
859            Expression::Join(join) if join.kind == JoinKind::OuterApply => {
860                Ok(Expression::Join(Box::new(Join {
861                    this: join.this,
862                    on: Some(Expression::Boolean(BooleanLiteral { value: true })),
863                    using: join.using,
864                    kind: JoinKind::OuterApply,
865                    use_inner_keyword: false,
866                    use_outer_keyword: false,
867                    deferred_condition: false,
868                    join_hint: None,
869                    match_condition: None,
870                    pivots: join.pivots,
871                    comments: join.comments,
872                    nesting_group: 0,
873                    directed: false,
874                })))
875            }
876
877            // ============================================
878            // GENERIC FUNCTION TRANSFORMATIONS
879            // ============================================
880            Expression::Function(f) => self.transform_function(*f),
881
882            // Generic aggregate function transformations
883            Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
884
885            // ===== Context-aware JSON arrow wrapping =====
886            // When JSON arrow expressions appear in Binary/In/Not contexts,
887            // they need to be wrapped in parentheses for correct precedence.
888            // This matches Python sqlglot's WRAPPED_JSON_EXTRACT_EXPRESSIONS behavior.
889
890            // Binary operators that need JSON wrapping
891            Expression::Eq(op) => Ok(Expression::Eq(Box::new(BinaryOp {
892                left: wrap_if_json_arrow(op.left),
893                right: wrap_if_json_arrow(op.right),
894                ..*op
895            }))),
896            Expression::Neq(op) => Ok(Expression::Neq(Box::new(BinaryOp {
897                left: wrap_if_json_arrow(op.left),
898                right: wrap_if_json_arrow(op.right),
899                ..*op
900            }))),
901            Expression::Lt(op) => Ok(Expression::Lt(Box::new(BinaryOp {
902                left: wrap_if_json_arrow(op.left),
903                right: wrap_if_json_arrow(op.right),
904                ..*op
905            }))),
906            Expression::Lte(op) => Ok(Expression::Lte(Box::new(BinaryOp {
907                left: wrap_if_json_arrow(op.left),
908                right: wrap_if_json_arrow(op.right),
909                ..*op
910            }))),
911            Expression::Gt(op) => Ok(Expression::Gt(Box::new(BinaryOp {
912                left: wrap_if_json_arrow(op.left),
913                right: wrap_if_json_arrow(op.right),
914                ..*op
915            }))),
916            Expression::Gte(op) => Ok(Expression::Gte(Box::new(BinaryOp {
917                left: wrap_if_json_arrow(op.left),
918                right: wrap_if_json_arrow(op.right),
919                ..*op
920            }))),
921
922            // In expression - wrap the this part if it's JSON arrow
923            Expression::In(mut i) => {
924                i.this = wrap_if_json_arrow(i.this);
925                Ok(Expression::In(i))
926            }
927
928            // Not expression - wrap the this part if it's JSON arrow
929            Expression::Not(mut n) => {
930                n.this = wrap_if_json_arrow(n.this);
931                Ok(Expression::Not(n))
932            }
933
934            // MERGE: qualifier stripping is handled by the generator (dialect-aware)
935            // PostgreSQL generator strips qualifiers, Snowflake generator keeps them
936            Expression::Merge(m) => Ok(Expression::Merge(m)),
937
938            // JSONExtract with variant_extract (Databricks colon syntax) -> JSON_EXTRACT_PATH
939            Expression::JSONExtract(je) if je.variant_extract.is_some() => {
940                // Convert path from bracketed format to simple key
941                // e.g., '["fr''uit"]' -> 'fr''uit'
942                let path = match *je.expression {
943                    Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
944                        let Literal::String(s) = lit.as_ref() else {
945                            unreachable!()
946                        };
947                        // Strip bracketed JSON path format: ["key"] -> key
948                        let cleaned = if s.starts_with("[\"") && s.ends_with("\"]") {
949                            s[2..s.len() - 2].to_string()
950                        } else {
951                            s.clone()
952                        };
953                        Expression::Literal(Box::new(Literal::String(cleaned)))
954                    }
955                    other => other,
956                };
957                Ok(Expression::Function(Box::new(Function::new(
958                    "JSON_EXTRACT_PATH".to_string(),
959                    vec![*je.this, path],
960                ))))
961            }
962
963            // TRIM(str, chars) -> TRIM(chars FROM str) for PostgreSQL SQL standard syntax
964            Expression::Trim(t) if !t.sql_standard_syntax && t.characters.is_some() => {
965                Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
966                    this: t.this,
967                    characters: t.characters,
968                    position: t.position,
969                    sql_standard_syntax: true,
970                    position_explicit: t.position_explicit,
971                })))
972            }
973
974            // b'a' -> CAST(e'a' AS BYTEA) for PostgreSQL
975            Expression::Literal(lit) if matches!(lit.as_ref(), Literal::ByteString(_)) => {
976                let Literal::ByteString(s) = lit.as_ref() else {
977                    unreachable!()
978                };
979                Ok(Expression::Cast(Box::new(Cast {
980                    this: Expression::Literal(Box::new(Literal::EscapeString(s.clone()))),
981                    to: DataType::VarBinary { length: None },
982                    trailing_comments: Vec::new(),
983                    double_colon_syntax: false,
984                    format: None,
985                    default: None,
986                    inferred_type: None,
987                })))
988            }
989
990            // Pass through everything else
991            _ => Ok(expr),
992        }
993    }
994}
995
996#[cfg(feature = "transpile")]
997impl PostgresDialect {
998    /// Check if a JSON path expression is "simple" (string literal or non-negative integer)
999    /// Simple paths can use arrow syntax (->>) in PostgreSQL
1000    /// Complex paths (like negative indices) should use JSON_EXTRACT_PATH_TEXT function
1001    fn is_simple_json_path(path: &Expression) -> bool {
1002        match path {
1003            // String literals are always simple
1004            Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => true,
1005            // Non-negative integer literals are simple
1006            Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
1007                let Literal::Number(n) = lit.as_ref() else {
1008                    unreachable!()
1009                };
1010                // Check if it's non-negative
1011                !n.starts_with('-')
1012            }
1013            // JSONPath expressions are simple (they're already parsed paths)
1014            Expression::JSONPath(_) => true,
1015            // Everything else (Neg, function calls, etc.) is complex
1016            _ => false,
1017        }
1018    }
1019
1020    /// Transform data types according to PostgreSQL TYPE_MAPPING
1021    fn transform_data_type(&self, dt: DataType) -> Result<Expression> {
1022        let transformed = match dt {
1023            // TINYINT -> SMALLINT
1024            DataType::TinyInt { .. } => DataType::SmallInt { length: None },
1025
1026            // FLOAT -> DOUBLE PRECISION (Python sqlglot tokenizes FLOAT as DOUBLE)
1027            DataType::Float { .. } => DataType::Custom {
1028                name: "DOUBLE PRECISION".to_string(),
1029            },
1030
1031            // DOUBLE -> DOUBLE PRECISION
1032            DataType::Double { .. } => DataType::Custom {
1033                name: "DOUBLE PRECISION".to_string(),
1034            },
1035
1036            // BINARY -> BYTEA (handled by generator which preserves length)
1037            DataType::Binary { .. } => dt,
1038
1039            // VARBINARY -> BYTEA (handled by generator which preserves length)
1040            DataType::VarBinary { .. } => dt,
1041
1042            // BLOB -> BYTEA
1043            DataType::Blob => DataType::Custom {
1044                name: "BYTEA".to_string(),
1045            },
1046
1047            // Custom type normalizations
1048            DataType::Custom { ref name } => {
1049                let upper = name.to_uppercase();
1050                match upper.as_str() {
1051                    // INT8 -> BIGINT (PostgreSQL alias)
1052                    "INT8" => DataType::BigInt { length: None },
1053                    // FLOAT8 -> DOUBLE PRECISION (PostgreSQL alias)
1054                    "FLOAT8" => DataType::Custom {
1055                        name: "DOUBLE PRECISION".to_string(),
1056                    },
1057                    // FLOAT4 -> REAL (PostgreSQL alias)
1058                    "FLOAT4" => DataType::Custom {
1059                        name: "REAL".to_string(),
1060                    },
1061                    // INT4 -> INTEGER (PostgreSQL alias)
1062                    "INT4" => DataType::Int {
1063                        length: None,
1064                        integer_spelling: false,
1065                    },
1066                    // INT2 -> SMALLINT (PostgreSQL alias)
1067                    "INT2" => DataType::SmallInt { length: None },
1068                    _ => dt,
1069                }
1070            }
1071
1072            // Keep all other types as-is
1073            other => other,
1074        };
1075        Ok(Expression::DataType(transformed))
1076    }
1077
1078    fn transform_function(&self, f: Function) -> Result<Expression> {
1079        let name_upper = f.name.to_uppercase();
1080        match name_upper.as_str() {
1081            // IFNULL -> COALESCE
1082            "IFNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
1083                original_name: None,
1084                expressions: f.args,
1085                inferred_type: None,
1086            }))),
1087
1088            // NVL -> COALESCE
1089            "NVL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
1090                original_name: None,
1091                expressions: f.args,
1092                inferred_type: None,
1093            }))),
1094
1095            // ISNULL (SQL Server) -> COALESCE
1096            "ISNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
1097                original_name: None,
1098                expressions: f.args,
1099                inferred_type: None,
1100            }))),
1101
1102            // GROUP_CONCAT -> STRING_AGG in PostgreSQL
1103            "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
1104                Function::new("STRING_AGG".to_string(), f.args),
1105            ))),
1106
1107            // SUBSTR -> SUBSTRING (standard SQL)
1108            "SUBSTR" => Ok(Expression::Function(Box::new(Function::new(
1109                "SUBSTRING".to_string(),
1110                f.args,
1111            )))),
1112
1113            // RAND -> RANDOM in PostgreSQL
1114            "RAND" => Ok(Expression::Random(crate::expressions::Random)),
1115
1116            // CEILING -> CEIL (both work in PostgreSQL, but CEIL is preferred)
1117            "CEILING" if f.args.len() == 1 => Ok(Expression::Ceil(Box::new(CeilFunc {
1118                this: f.args.into_iter().next().unwrap(),
1119                decimals: None,
1120                to: None,
1121            }))),
1122
1123            // LEN -> LENGTH in PostgreSQL
1124            "LEN" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc {
1125                this: f.args.into_iter().next().unwrap(),
1126                original_name: None,
1127                inferred_type: None,
1128            }))),
1129
1130            // CHAR_LENGTH -> LENGTH in PostgreSQL
1131            "CHAR_LENGTH" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc {
1132                this: f.args.into_iter().next().unwrap(),
1133                original_name: None,
1134                inferred_type: None,
1135            }))),
1136
1137            // CHARACTER_LENGTH -> LENGTH in PostgreSQL
1138            "CHARACTER_LENGTH" if f.args.len() == 1 => {
1139                Ok(Expression::Length(Box::new(UnaryFunc {
1140                    this: f.args.into_iter().next().unwrap(),
1141                    original_name: None,
1142                    inferred_type: None,
1143                })))
1144            }
1145
1146            // CHARINDEX -> POSITION in PostgreSQL
1147            // CHARINDEX(substring, string) -> POSITION(substring IN string)
1148            "CHARINDEX" if f.args.len() >= 2 => {
1149                let mut args = f.args;
1150                let substring = args.remove(0);
1151                let string = args.remove(0);
1152                Ok(Expression::Position(Box::new(
1153                    crate::expressions::PositionFunc {
1154                        substring,
1155                        string,
1156                        start: args.pop(),
1157                    },
1158                )))
1159            }
1160
1161            // GETDATE -> CURRENT_TIMESTAMP in PostgreSQL
1162            "GETDATE" => Ok(Expression::CurrentTimestamp(
1163                crate::expressions::CurrentTimestamp {
1164                    precision: None,
1165                    sysdate: false,
1166                },
1167            )),
1168
1169            // SYSDATETIME -> CURRENT_TIMESTAMP in PostgreSQL
1170            "SYSDATETIME" => Ok(Expression::CurrentTimestamp(
1171                crate::expressions::CurrentTimestamp {
1172                    precision: None,
1173                    sysdate: false,
1174                },
1175            )),
1176
1177            // NOW -> CURRENT_TIMESTAMP in PostgreSQL (NOW() is also valid)
1178            "NOW" => Ok(Expression::CurrentTimestamp(
1179                crate::expressions::CurrentTimestamp {
1180                    precision: None,
1181                    sysdate: false,
1182                },
1183            )),
1184
1185            // NEWID -> GEN_RANDOM_UUID in PostgreSQL
1186            "NEWID" => Ok(Expression::Function(Box::new(Function::new(
1187                "GEN_RANDOM_UUID".to_string(),
1188                vec![],
1189            )))),
1190
1191            // UUID -> GEN_RANDOM_UUID in PostgreSQL
1192            "UUID" => Ok(Expression::Function(Box::new(Function::new(
1193                "GEN_RANDOM_UUID".to_string(),
1194                vec![],
1195            )))),
1196
1197            // UNNEST is native to PostgreSQL
1198            "UNNEST" => Ok(Expression::Function(Box::new(f))),
1199
1200            // GENERATE_SERIES is native to PostgreSQL
1201            "GENERATE_SERIES" => Ok(Expression::Function(Box::new(f))),
1202
1203            // SHA256 -> SHA256 in PostgreSQL (via pgcrypto extension)
1204            "SHA256" if f.args.len() == 1 => Ok(Expression::Function(Box::new(Function::new(
1205                "SHA256".to_string(),
1206                f.args,
1207            )))),
1208
1209            // SHA2 -> SHA256/SHA512 based on length argument
1210            "SHA2" if f.args.len() == 2 => {
1211                // SHA2(data, length) -> SHA256/SHA384/SHA512
1212                let args = f.args;
1213                let data = args[0].clone();
1214                // Default to SHA256 - would need runtime inspection for exact mapping
1215                Ok(Expression::Function(Box::new(Function::new(
1216                    "SHA256".to_string(),
1217                    vec![data],
1218                ))))
1219            }
1220
1221            // LEVENSHTEIN is native to PostgreSQL (fuzzystrmatch extension)
1222            "LEVENSHTEIN" => Ok(Expression::Function(Box::new(f))),
1223
1224            // EDITDISTANCE -> LEVENSHTEIN_LESS_EQUAL (with max distance) or LEVENSHTEIN
1225            "EDITDISTANCE" if f.args.len() == 3 => Ok(Expression::Function(Box::new(
1226                Function::new("LEVENSHTEIN_LESS_EQUAL".to_string(), f.args),
1227            ))),
1228            "EDITDISTANCE" if f.args.len() == 2 => Ok(Expression::Function(Box::new(
1229                Function::new("LEVENSHTEIN".to_string(), f.args),
1230            ))),
1231
1232            // TRIM(value, chars) -> TRIM(chars FROM value) for Postgres
1233            "TRIM" if f.args.len() == 2 => {
1234                let value = f.args[0].clone();
1235                let chars = f.args[1].clone();
1236                Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
1237                    this: value,
1238                    characters: Some(chars),
1239                    position: crate::expressions::TrimPosition::Both,
1240                    sql_standard_syntax: true,
1241                    position_explicit: false,
1242                })))
1243            }
1244
1245            // DATEDIFF(unit, start, end) -> PostgreSQL EXTRACT/AGE patterns
1246            "DATEDIFF" if f.args.len() >= 2 => {
1247                let mut args = f.args;
1248                if args.len() == 2 {
1249                    // 2-arg form: DATEDIFF(start, end) -> AGE(start, end)
1250                    let first = args.remove(0);
1251                    let second = args.remove(0);
1252                    Ok(Expression::Function(Box::new(Function::new(
1253                        "AGE".to_string(),
1254                        vec![first, second],
1255                    ))))
1256                } else {
1257                    // 3-arg form: DATEDIFF(unit, start, end)
1258                    let unit_expr = args.remove(0);
1259                    let start = args.remove(0);
1260                    let end_expr = args.remove(0);
1261
1262                    // Extract unit name from identifier or column
1263                    let unit_name = match &unit_expr {
1264                        Expression::Identifier(id) => id.name.to_uppercase(),
1265                        Expression::Var(v) => v.this.to_uppercase(),
1266                        Expression::Column(col) if col.table.is_none() => {
1267                            col.name.name.to_uppercase()
1268                        }
1269                        _ => "DAY".to_string(),
1270                    };
1271
1272                    // Helper: CAST(expr AS TIMESTAMP)
1273                    let cast_ts = |e: Expression| -> Expression {
1274                        Expression::Cast(Box::new(Cast {
1275                            this: e,
1276                            to: DataType::Timestamp {
1277                                precision: None,
1278                                timezone: false,
1279                            },
1280                            trailing_comments: Vec::new(),
1281                            double_colon_syntax: false,
1282                            format: None,
1283                            default: None,
1284                            inferred_type: None,
1285                        }))
1286                    };
1287
1288                    // Helper: CAST(expr AS BIGINT)
1289                    let cast_bigint = |e: Expression| -> Expression {
1290                        Expression::Cast(Box::new(Cast {
1291                            this: e,
1292                            to: DataType::BigInt { length: None },
1293                            trailing_comments: Vec::new(),
1294                            double_colon_syntax: false,
1295                            format: None,
1296                            default: None,
1297                            inferred_type: None,
1298                        }))
1299                    };
1300
1301                    let end_ts = cast_ts(end_expr.clone());
1302                    let start_ts = cast_ts(start.clone());
1303
1304                    // Helper: end_ts - start_ts
1305                    let ts_diff = || -> Expression {
1306                        Expression::Sub(Box::new(BinaryOp::new(
1307                            cast_ts(end_expr.clone()),
1308                            cast_ts(start.clone()),
1309                        )))
1310                    };
1311
1312                    // Helper: AGE(end_ts, start_ts)
1313                    let age_call = || -> Expression {
1314                        Expression::Function(Box::new(Function::new(
1315                            "AGE".to_string(),
1316                            vec![cast_ts(end_expr.clone()), cast_ts(start.clone())],
1317                        )))
1318                    };
1319
1320                    // Helper: EXTRACT(field FROM expr)
1321                    let extract = |field: DateTimeField, from: Expression| -> Expression {
1322                        Expression::Extract(Box::new(ExtractFunc { this: from, field }))
1323                    };
1324
1325                    // Helper: number literal
1326                    let num = |n: i64| -> Expression {
1327                        Expression::Literal(Box::new(Literal::Number(n.to_string())))
1328                    };
1329
1330                    // Use Custom DateTimeField for lowercase output (PostgreSQL convention)
1331                    let epoch_field = DateTimeField::Custom("epoch".to_string());
1332
1333                    let result = match unit_name.as_str() {
1334                        "MICROSECOND" => {
1335                            // CAST(EXTRACT(epoch FROM end_ts - start_ts) * 1000000 AS BIGINT)
1336                            let epoch = extract(epoch_field, ts_diff());
1337                            cast_bigint(Expression::Mul(Box::new(BinaryOp::new(
1338                                epoch,
1339                                num(1000000),
1340                            ))))
1341                        }
1342                        "MILLISECOND" => {
1343                            let epoch = extract(epoch_field, ts_diff());
1344                            cast_bigint(Expression::Mul(Box::new(BinaryOp::new(epoch, num(1000)))))
1345                        }
1346                        "SECOND" => {
1347                            let epoch = extract(epoch_field, ts_diff());
1348                            cast_bigint(epoch)
1349                        }
1350                        "MINUTE" => {
1351                            let epoch = extract(epoch_field, ts_diff());
1352                            cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(60)))))
1353                        }
1354                        "HOUR" => {
1355                            let epoch = extract(epoch_field, ts_diff());
1356                            cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(3600)))))
1357                        }
1358                        "DAY" => {
1359                            let epoch = extract(epoch_field, ts_diff());
1360                            cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(86400)))))
1361                        }
1362                        "WEEK" => {
1363                            // CAST(EXTRACT(days FROM (end_ts - start_ts)) / 7 AS BIGINT)
1364                            let diff_parens = Expression::Paren(Box::new(Paren {
1365                                this: ts_diff(),
1366                                trailing_comments: Vec::new(),
1367                            }));
1368                            let days =
1369                                extract(DateTimeField::Custom("days".to_string()), diff_parens);
1370                            cast_bigint(Expression::Div(Box::new(BinaryOp::new(days, num(7)))))
1371                        }
1372                        "MONTH" => {
1373                            // CAST(EXTRACT(year FROM AGE(...)) * 12 + EXTRACT(month FROM AGE(...)) AS BIGINT)
1374                            let year_part =
1375                                extract(DateTimeField::Custom("year".to_string()), age_call());
1376                            let month_part =
1377                                extract(DateTimeField::Custom("month".to_string()), age_call());
1378                            let year_months =
1379                                Expression::Mul(Box::new(BinaryOp::new(year_part, num(12))));
1380                            cast_bigint(Expression::Add(Box::new(BinaryOp::new(
1381                                year_months,
1382                                month_part,
1383                            ))))
1384                        }
1385                        "QUARTER" => {
1386                            // CAST(EXTRACT(year FROM AGE(...)) * 4 + EXTRACT(month FROM AGE(...)) / 3 AS BIGINT)
1387                            let year_part =
1388                                extract(DateTimeField::Custom("year".to_string()), age_call());
1389                            let month_part =
1390                                extract(DateTimeField::Custom("month".to_string()), age_call());
1391                            let year_quarters =
1392                                Expression::Mul(Box::new(BinaryOp::new(year_part, num(4))));
1393                            let month_quarters =
1394                                Expression::Div(Box::new(BinaryOp::new(month_part, num(3))));
1395                            cast_bigint(Expression::Add(Box::new(BinaryOp::new(
1396                                year_quarters,
1397                                month_quarters,
1398                            ))))
1399                        }
1400                        "YEAR" => {
1401                            // CAST(EXTRACT(year FROM AGE(...)) AS BIGINT)
1402                            cast_bigint(extract(
1403                                DateTimeField::Custom("year".to_string()),
1404                                age_call(),
1405                            ))
1406                        }
1407                        _ => {
1408                            // Fallback: simple AGE
1409                            Expression::Function(Box::new(Function::new(
1410                                "AGE".to_string(),
1411                                vec![end_ts, start_ts],
1412                            )))
1413                        }
1414                    };
1415                    Ok(result)
1416                }
1417            }
1418
1419            // TIMESTAMPDIFF -> AGE or EXTRACT pattern
1420            "TIMESTAMPDIFF" if f.args.len() >= 3 => {
1421                let mut args = f.args;
1422                let _unit = args.remove(0); // Unit (ignored, AGE returns full interval)
1423                let start = args.remove(0);
1424                let end = args.remove(0);
1425                Ok(Expression::Function(Box::new(Function::new(
1426                    "AGE".to_string(),
1427                    vec![end, start],
1428                ))))
1429            }
1430
1431            // FROM_UNIXTIME -> TO_TIMESTAMP
1432            "FROM_UNIXTIME" => Ok(Expression::Function(Box::new(Function::new(
1433                "TO_TIMESTAMP".to_string(),
1434                f.args,
1435            )))),
1436
1437            // UNIX_TIMESTAMP -> EXTRACT(EPOCH FROM ...)
1438            "UNIX_TIMESTAMP" if f.args.len() == 1 => {
1439                let arg = f.args.into_iter().next().unwrap();
1440                Ok(Expression::Function(Box::new(Function::new(
1441                    "DATE_PART".to_string(),
1442                    vec![Expression::string("epoch"), arg],
1443                ))))
1444            }
1445
1446            // UNIX_TIMESTAMP() with no args -> EXTRACT(EPOCH FROM CURRENT_TIMESTAMP)
1447            "UNIX_TIMESTAMP" if f.args.is_empty() => {
1448                Ok(Expression::Function(Box::new(Function::new(
1449                    "DATE_PART".to_string(),
1450                    vec![
1451                        Expression::string("epoch"),
1452                        Expression::CurrentTimestamp(crate::expressions::CurrentTimestamp {
1453                            precision: None,
1454                            sysdate: false,
1455                        }),
1456                    ],
1457                ))))
1458            }
1459
1460            // DATEADD -> date + interval pattern
1461            "DATEADD" if f.args.len() == 3 => {
1462                // DATEADD(unit, count, date) -> date + interval 'count unit'
1463                // This is a simplified version - full impl would construct proper interval
1464                let mut args = f.args;
1465                let _unit = args.remove(0);
1466                let count = args.remove(0);
1467                let date = args.remove(0);
1468                Ok(Expression::Add(Box::new(BinaryOp {
1469                    left: date,
1470                    right: count,
1471                    left_comments: Vec::new(),
1472                    operator_comments: Vec::new(),
1473                    trailing_comments: Vec::new(),
1474                    inferred_type: None,
1475                })))
1476            }
1477
1478            // INSTR -> POSITION (simplified)
1479            "INSTR" if f.args.len() >= 2 => {
1480                let mut args = f.args;
1481                let string = args.remove(0);
1482                let substring = args.remove(0);
1483                Ok(Expression::Position(Box::new(
1484                    crate::expressions::PositionFunc {
1485                        substring,
1486                        string,
1487                        start: args.pop(),
1488                    },
1489                )))
1490            }
1491
1492            // CONCAT_WS is native to PostgreSQL
1493            "CONCAT_WS" => Ok(Expression::Function(Box::new(f))),
1494
1495            // REGEXP_REPLACE: pass through without adding 'g' flag
1496            // The 'g' flag handling is managed by cross_dialect_normalize based on source dialect's default behavior
1497            "REGEXP_REPLACE" if f.args.len() == 3 || f.args.len() == 4 => {
1498                Ok(Expression::Function(Box::new(f)))
1499            }
1500            // 6 args from Snowflake: (subject, pattern, replacement, position, occurrence, params)
1501            // If occurrence is 0 (global), append 'g' to flags
1502            "REGEXP_REPLACE" if f.args.len() == 6 => {
1503                let is_global = match &f.args[4] {
1504                    Expression::Literal(lit)
1505                        if matches!(lit.as_ref(), crate::expressions::Literal::Number(_)) =>
1506                    {
1507                        let crate::expressions::Literal::Number(n) = lit.as_ref() else {
1508                            unreachable!()
1509                        };
1510                        n == "0"
1511                    }
1512                    _ => false,
1513                };
1514                if is_global {
1515                    let subject = f.args[0].clone();
1516                    let pattern = f.args[1].clone();
1517                    let replacement = f.args[2].clone();
1518                    let position = f.args[3].clone();
1519                    let occurrence = f.args[4].clone();
1520                    let params = &f.args[5];
1521                    let mut flags = if let Expression::Literal(lit) = params {
1522                        if let crate::expressions::Literal::String(s) = lit.as_ref() {
1523                            s.clone()
1524                        } else {
1525                            String::new()
1526                        }
1527                    } else {
1528                        String::new()
1529                    };
1530                    if !flags.contains('g') {
1531                        flags.push('g');
1532                    }
1533                    Ok(Expression::Function(Box::new(Function::new(
1534                        "REGEXP_REPLACE".to_string(),
1535                        vec![
1536                            subject,
1537                            pattern,
1538                            replacement,
1539                            position,
1540                            occurrence,
1541                            Expression::Literal(Box::new(crate::expressions::Literal::String(
1542                                flags,
1543                            ))),
1544                        ],
1545                    ))))
1546                } else {
1547                    Ok(Expression::Function(Box::new(f)))
1548                }
1549            }
1550            // Default: pass through
1551            "REGEXP_REPLACE" => Ok(Expression::Function(Box::new(f))),
1552
1553            // Pass through everything else
1554            _ => Ok(Expression::Function(Box::new(f))),
1555        }
1556    }
1557
1558    fn transform_aggregate_function(
1559        &self,
1560        f: Box<crate::expressions::AggregateFunction>,
1561    ) -> Result<Expression> {
1562        let name_upper = f.name.to_uppercase();
1563        match name_upper.as_str() {
1564            // COUNT_IF -> SUM(CASE WHEN...)
1565            "COUNT_IF" if !f.args.is_empty() => {
1566                let condition = f.args.into_iter().next().unwrap();
1567                let case_expr = Expression::Case(Box::new(Case {
1568                    operand: None,
1569                    whens: vec![(condition, Expression::number(1))],
1570                    else_: Some(Expression::number(0)),
1571                    comments: Vec::new(),
1572                    inferred_type: None,
1573                }));
1574                Ok(Expression::Sum(Box::new(AggFunc {
1575                    ignore_nulls: None,
1576                    having_max: None,
1577                    this: case_expr,
1578                    distinct: f.distinct,
1579                    filter: f.filter,
1580                    order_by: Vec::new(),
1581                    name: None,
1582                    limit: None,
1583                    inferred_type: None,
1584                })))
1585            }
1586
1587            // GROUP_CONCAT -> STRING_AGG
1588            "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
1589                Function::new("STRING_AGG".to_string(), f.args),
1590            ))),
1591
1592            // STDEV -> STDDEV in PostgreSQL
1593            "STDEV" if !f.args.is_empty() => Ok(Expression::Stddev(Box::new(AggFunc {
1594                ignore_nulls: None,
1595                having_max: None,
1596                this: f.args.into_iter().next().unwrap(),
1597                distinct: f.distinct,
1598                filter: f.filter,
1599                order_by: Vec::new(),
1600                name: None,
1601                limit: None,
1602                inferred_type: None,
1603            }))),
1604
1605            // STDEVP -> STDDEV_POP in PostgreSQL
1606            "STDEVP" if !f.args.is_empty() => Ok(Expression::StddevPop(Box::new(AggFunc {
1607                ignore_nulls: None,
1608                having_max: None,
1609                this: f.args.into_iter().next().unwrap(),
1610                distinct: f.distinct,
1611                filter: f.filter,
1612                order_by: Vec::new(),
1613                name: None,
1614                limit: None,
1615                inferred_type: None,
1616            }))),
1617
1618            // VAR -> VAR_SAMP in PostgreSQL
1619            "VAR" if !f.args.is_empty() => Ok(Expression::VarSamp(Box::new(AggFunc {
1620                ignore_nulls: None,
1621                having_max: None,
1622                this: f.args.into_iter().next().unwrap(),
1623                distinct: f.distinct,
1624                filter: f.filter,
1625                order_by: Vec::new(),
1626                name: None,
1627                limit: None,
1628                inferred_type: None,
1629            }))),
1630
1631            // VARP -> VAR_POP in PostgreSQL
1632            "VARP" if !f.args.is_empty() => Ok(Expression::VarPop(Box::new(AggFunc {
1633                ignore_nulls: None,
1634                having_max: None,
1635                this: f.args.into_iter().next().unwrap(),
1636                distinct: f.distinct,
1637                filter: f.filter,
1638                order_by: Vec::new(),
1639                name: None,
1640                limit: None,
1641                inferred_type: None,
1642            }))),
1643
1644            // BIT_AND is native to PostgreSQL
1645            "BIT_AND" => Ok(Expression::AggregateFunction(f)),
1646
1647            // BIT_OR is native to PostgreSQL
1648            "BIT_OR" => Ok(Expression::AggregateFunction(f)),
1649
1650            // BIT_XOR is native to PostgreSQL
1651            "BIT_XOR" => Ok(Expression::AggregateFunction(f)),
1652
1653            // BOOL_AND is native to PostgreSQL
1654            "BOOL_AND" => Ok(Expression::AggregateFunction(f)),
1655
1656            // BOOL_OR is native to PostgreSQL
1657            "BOOL_OR" => Ok(Expression::AggregateFunction(f)),
1658
1659            // VARIANCE -> VAR_SAMP in PostgreSQL
1660            "VARIANCE" if !f.args.is_empty() => Ok(Expression::VarSamp(Box::new(AggFunc {
1661                ignore_nulls: None,
1662                having_max: None,
1663                this: f.args.into_iter().next().unwrap(),
1664                distinct: f.distinct,
1665                filter: f.filter,
1666                order_by: Vec::new(),
1667                name: None,
1668                limit: None,
1669                inferred_type: None,
1670            }))),
1671
1672            // LOGICAL_OR -> BOOL_OR in PostgreSQL
1673            "LOGICAL_OR" if !f.args.is_empty() => {
1674                let mut new_agg = f.clone();
1675                new_agg.name = "BOOL_OR".to_string();
1676                Ok(Expression::AggregateFunction(new_agg))
1677            }
1678
1679            // LOGICAL_AND -> BOOL_AND in PostgreSQL
1680            "LOGICAL_AND" if !f.args.is_empty() => {
1681                let mut new_agg = f.clone();
1682                new_agg.name = "BOOL_AND".to_string();
1683                Ok(Expression::AggregateFunction(new_agg))
1684            }
1685
1686            // Pass through everything else
1687            _ => Ok(Expression::AggregateFunction(f)),
1688        }
1689    }
1690}
1691
1692#[cfg(test)]
1693mod tests {
1694    use super::*;
1695    use crate::dialects::Dialect;
1696
1697    fn transpile_to_postgres(sql: &str) -> String {
1698        let dialect = Dialect::get(DialectType::Generic);
1699        let result = dialect
1700            .transpile(sql, DialectType::PostgreSQL)
1701            .expect("Transpile failed");
1702        result[0].clone()
1703    }
1704
1705    #[test]
1706    fn test_ifnull_to_coalesce() {
1707        let result = transpile_to_postgres("SELECT IFNULL(a, b)");
1708        assert!(
1709            result.contains("COALESCE"),
1710            "Expected COALESCE, got: {}",
1711            result
1712        );
1713    }
1714
1715    #[test]
1716    fn test_nvl_to_coalesce() {
1717        let result = transpile_to_postgres("SELECT NVL(a, b)");
1718        assert!(
1719            result.contains("COALESCE"),
1720            "Expected COALESCE, got: {}",
1721            result
1722        );
1723    }
1724
1725    #[test]
1726    fn test_rand_to_random() {
1727        let result = transpile_to_postgres("SELECT RAND()");
1728        assert!(
1729            result.contains("RANDOM"),
1730            "Expected RANDOM, got: {}",
1731            result
1732        );
1733    }
1734
1735    #[test]
1736    fn test_basic_select() {
1737        let result = transpile_to_postgres("SELECT a, b FROM users WHERE id = 1");
1738        assert!(result.contains("SELECT"));
1739        assert!(result.contains("FROM users"));
1740    }
1741
1742    #[test]
1743    fn test_len_to_length() {
1744        let result = transpile_to_postgres("SELECT LEN(name)");
1745        assert!(
1746            result.contains("LENGTH"),
1747            "Expected LENGTH, got: {}",
1748            result
1749        );
1750    }
1751
1752    #[test]
1753    fn test_getdate_to_current_timestamp() {
1754        let result = transpile_to_postgres("SELECT GETDATE()");
1755        assert!(
1756            result.contains("CURRENT_TIMESTAMP"),
1757            "Expected CURRENT_TIMESTAMP, got: {}",
1758            result
1759        );
1760    }
1761
1762    #[test]
1763    fn test_substr_to_substring() {
1764        let result = transpile_to_postgres("SELECT SUBSTR(name, 1, 3)");
1765        assert!(
1766            result.contains("SUBSTRING"),
1767            "Expected SUBSTRING, got: {}",
1768            result
1769        );
1770    }
1771
1772    #[test]
1773    fn test_group_concat_to_string_agg() {
1774        let result = transpile_to_postgres("SELECT GROUP_CONCAT(name)");
1775        assert!(
1776            result.contains("STRING_AGG"),
1777            "Expected STRING_AGG, got: {}",
1778            result
1779        );
1780    }
1781
1782    #[test]
1783    fn test_double_quote_identifiers() {
1784        // PostgreSQL uses double quotes for identifiers
1785        let dialect = PostgresDialect;
1786        let config = dialect.generator_config();
1787        assert_eq!(config.identifier_quote, '"');
1788    }
1789
1790    #[test]
1791    fn test_char_length_to_length() {
1792        let result = transpile_to_postgres("SELECT CHAR_LENGTH(name)");
1793        assert!(
1794            result.contains("LENGTH"),
1795            "Expected LENGTH, got: {}",
1796            result
1797        );
1798    }
1799
1800    #[test]
1801    fn test_character_length_to_length() {
1802        let result = transpile_to_postgres("SELECT CHARACTER_LENGTH(name)");
1803        assert!(
1804            result.contains("LENGTH"),
1805            "Expected LENGTH, got: {}",
1806            result
1807        );
1808    }
1809
1810    /// Helper for PostgreSQL identity tests (parse and regenerate with PostgreSQL dialect)
1811    fn identity_postgres(sql: &str) -> String {
1812        let dialect = Dialect::get(DialectType::PostgreSQL);
1813        let exprs = dialect.parse(sql).expect("Parse failed");
1814        let transformed = dialect
1815            .transform(exprs[0].clone())
1816            .expect("Transform failed");
1817        dialect.generate(&transformed).expect("Generate failed")
1818    }
1819
1820    #[test]
1821    fn test_json_extract_with_column_path() {
1822        // When the path is a column reference (not a literal), should use function form
1823        let result = identity_postgres("json_data.data -> field_ids.field_id");
1824        assert!(
1825            result.contains("JSON_EXTRACT_PATH"),
1826            "Expected JSON_EXTRACT_PATH for column path, got: {}",
1827            result
1828        );
1829    }
1830
1831    #[test]
1832    fn test_json_extract_scalar_with_negative_index() {
1833        // When the path is a negative index, should use JSON_EXTRACT_PATH_TEXT function
1834        let result = identity_postgres("x::JSON -> 'duration' ->> -1");
1835        assert!(
1836            result.contains("JSON_EXTRACT_PATH_TEXT"),
1837            "Expected JSON_EXTRACT_PATH_TEXT for negative index, got: {}",
1838            result
1839        );
1840        // The first -> should still be arrow syntax since 'duration' is a string literal
1841        assert!(
1842            result.contains("->"),
1843            "Expected -> for string literal path, got: {}",
1844            result
1845        );
1846    }
1847
1848    #[test]
1849    fn test_json_extract_with_string_literal() {
1850        // When the path is a string literal, should keep arrow syntax
1851        let result = identity_postgres("data -> 'key'");
1852        assert!(
1853            result.contains("->"),
1854            "Expected -> for string literal path, got: {}",
1855            result
1856        );
1857        assert!(
1858            !result.contains("JSON_EXTRACT_PATH"),
1859            "Should NOT use function form for string literal, got: {}",
1860            result
1861        );
1862    }
1863}