Skip to main content

polyglot_sql/dialects/
postgres.rs

1//! PostgreSQL Dialect
2//!
3//! PostgreSQL-specific transformations based on sqlglot patterns.
4//! Comprehensive batch translation from Python sqlglot postgres.py
5//!
6//! Key differences from other dialects:
7//! - TRY_CAST not supported (falls back to CAST)
8//! - RANDOM() instead of RAND()
9//! - STRING_AGG instead of GROUP_CONCAT
10//! - Bitwise XOR is # operator
11//! - BOOL_AND/BOOL_OR for logical aggregates
12//! - GEN_RANDOM_UUID() for UUID generation
13//! - UNNEST instead of EXPLODE
14//! - Type mappings: TINYINT→SMALLINT, FLOAT→REAL, DOUBLE→DOUBLE PRECISION, etc.
15//! - RegexpLike uses ~ operator, RegexpILike uses ~* operator
16//! - JSONB operators: #>, #>>, ?, ?|, ?&
17
18use super::{DialectImpl, DialectType};
19use crate::error::Result;
20use crate::expressions::{
21    AggFunc, BinaryOp, BooleanLiteral, Case, Cast, CeilFunc, DataType, DateTimeField, Expression,
22    ExtractFunc, Function, Interval, IntervalUnit, IntervalUnitSpec, Join, JoinKind, Literal,
23    Paren, UnaryFunc, VarArgFunc,
24};
25use crate::generator::GeneratorConfig;
26use crate::tokens::TokenizerConfig;
27
28/// Helper to wrap JSON arrow expressions in parentheses when they appear
29/// in contexts that require it (Binary, In, Not expressions)
30/// This matches Python sqlglot's WRAPPED_JSON_EXTRACT_EXPRESSIONS behavior
31fn wrap_if_json_arrow(expr: Expression) -> Expression {
32    match &expr {
33        Expression::JsonExtract(f) if f.arrow_syntax => Expression::Paren(Box::new(Paren {
34            this: expr,
35            trailing_comments: Vec::new(),
36        })),
37        Expression::JsonExtractScalar(f) if f.arrow_syntax => Expression::Paren(Box::new(Paren {
38            this: expr,
39            trailing_comments: Vec::new(),
40        })),
41        _ => expr,
42    }
43}
44
45/// PostgreSQL dialect
46pub struct PostgresDialect;
47
48impl DialectImpl for PostgresDialect {
49    fn dialect_type(&self) -> DialectType {
50        DialectType::PostgreSQL
51    }
52
53    fn tokenizer_config(&self) -> TokenizerConfig {
54        use crate::tokens::TokenType;
55        let mut config = TokenizerConfig::default();
56        // PostgreSQL supports $$ string literals (heredoc strings)
57        config.quotes.insert("$$".to_string(), "$$".to_string());
58        // PostgreSQL uses double quotes for identifiers
59        config.identifiers.insert('"', '"');
60        // Nested comments supported
61        config.nested_comments = true;
62        // PostgreSQL treats EXEC as a generic command (not TSQL EXEC statement)
63        // Note: EXECUTE is kept as-is since it's used in GRANT/REVOKE EXECUTE ON FUNCTION
64        config
65            .keywords
66            .insert("EXEC".to_string(), TokenType::Command);
67        config
68    }
69
70    fn generator_config(&self) -> GeneratorConfig {
71        use crate::generator::IdentifierQuoteStyle;
72        GeneratorConfig {
73            identifier_quote: '"',
74            identifier_quote_style: IdentifierQuoteStyle::DOUBLE_QUOTE,
75            dialect: Some(DialectType::PostgreSQL),
76            // PostgreSQL uses TIMESTAMPTZ shorthand
77            tz_to_with_time_zone: false,
78            // PostgreSQL prefers INTERVAL '1 day' syntax
79            single_string_interval: true,
80            // TABLESAMPLE uses REPEATABLE in PostgreSQL
81            tablesample_seed_keyword: "REPEATABLE",
82            // PostgreSQL doesn't support NVL2
83            nvl2_supported: false,
84            // PostgreSQL uses $ for parameters
85            parameter_token: "$",
86            // PostgreSQL uses % for named placeholders
87            named_placeholder_token: "%",
88            // PostgreSQL supports SELECT INTO
89            supports_select_into: true,
90            // PostgreSQL: USING btree(col) without space before parens
91            index_using_no_space: true,
92            // PostgreSQL supports UNLOGGED tables
93            supports_unlogged_tables: true,
94            // PostgreSQL doesn't support multi-arg DISTINCT
95            multi_arg_distinct: false,
96            // PostgreSQL uses ANY (subquery) with space
97            quantified_no_paren_space: false,
98            // PostgreSQL supports window EXCLUDE clause
99            supports_window_exclude: true,
100            // PostgreSQL normalizes single-bound window frames to BETWEEN form
101            normalize_window_frame_between: true,
102            // PostgreSQL COPY doesn't use INTO keyword
103            copy_has_into_keyword: false,
104            // PostgreSQL ARRAY_SIZE requires dimension argument
105            array_size_dim_required: Some(true),
106            // PostgreSQL supports BETWEEN flags
107            supports_between_flags: true,
108            // PostgreSQL doesn't support hints
109            join_hints: false,
110            table_hints: false,
111            query_hints: false,
112            // PostgreSQL supports locking reads
113            locking_reads_supported: true,
114            // PostgreSQL doesn't rename tables with DB
115            rename_table_with_db: false,
116            // PostgreSQL can implement array any
117            can_implement_array_any: true,
118            // PostgreSQL ARRAY_CONCAT is not var-len
119            array_concat_is_var_len: false,
120            // PostgreSQL doesn't support MEDIAN
121            supports_median: false,
122            // PostgreSQL requires JSON type for extraction
123            json_type_required_for_extraction: true,
124            // PostgreSQL LIKE property inside schema
125            like_property_inside_schema: true,
126            ..Default::default()
127        }
128    }
129
130    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
131        match expr {
132            // ============================================
133            // DATA TYPE MAPPINGS (from TYPE_MAPPING)
134            // These are handled specially - transform DataType variants
135            // ============================================
136            Expression::DataType(dt) => self.transform_data_type(dt),
137
138            // ============================================
139            // NULL HANDLING
140            // ============================================
141            // IFNULL -> COALESCE in PostgreSQL
142            Expression::IfNull(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
143                original_name: None,
144                expressions: vec![f.this, f.expression],
145                inferred_type: None,
146            }))),
147
148            // NVL -> COALESCE in PostgreSQL
149            Expression::Nvl(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
150                original_name: None,
151                expressions: vec![f.this, f.expression],
152                inferred_type: None,
153            }))),
154
155            // Coalesce with original_name (e.g., IFNULL parsed as Coalesce) -> clear original_name
156            // so it outputs as COALESCE instead of the original function name
157            Expression::Coalesce(mut f) => {
158                f.original_name = None;
159                Ok(Expression::Coalesce(f))
160            }
161
162            // ============================================
163            // CAST OPERATIONS
164            // ============================================
165            // TryCast -> CAST (PostgreSQL doesn't support TRY_CAST)
166            Expression::TryCast(c) => Ok(Expression::Cast(c)),
167
168            // SafeCast -> CAST (PostgreSQL doesn't support safe casts)
169            Expression::SafeCast(c) => Ok(Expression::Cast(c)),
170
171            // ============================================
172            // RANDOM
173            // ============================================
174            // RAND -> RANDOM in PostgreSQL
175            Expression::Rand(r) => {
176                // PostgreSQL's RANDOM() doesn't take a seed argument
177                let _ = r.seed; // Ignore seed
178                Ok(Expression::Random(crate::expressions::Random))
179            }
180
181            // ============================================
182            // UUID
183            // ============================================
184            // Uuid -> GEN_RANDOM_UUID in PostgreSQL
185            Expression::Uuid(_) => Ok(Expression::Function(Box::new(Function::new(
186                "GEN_RANDOM_UUID".to_string(),
187                vec![],
188            )))),
189
190            // ============================================
191            // ARRAY OPERATIONS
192            // ============================================
193            // EXPLODE -> UNNEST in PostgreSQL
194            Expression::Explode(f) => Ok(Expression::Unnest(Box::new(
195                crate::expressions::UnnestFunc {
196                    this: f.this,
197                    expressions: Vec::new(),
198                    with_ordinality: false,
199                    alias: None,
200                    offset_alias: None,
201                },
202            ))),
203
204            // ExplodeOuter -> UNNEST in PostgreSQL
205            Expression::ExplodeOuter(f) => Ok(Expression::Unnest(Box::new(
206                crate::expressions::UnnestFunc {
207                    this: f.this,
208                    expressions: Vec::new(),
209                    with_ordinality: false,
210                    alias: None,
211                    offset_alias: None,
212                },
213            ))),
214
215            // ArrayConcat -> ARRAY_CAT in PostgreSQL
216            Expression::ArrayConcat(f) => Ok(Expression::Function(Box::new(Function::new(
217                "ARRAY_CAT".to_string(),
218                f.expressions,
219            )))),
220
221            // ArrayPrepend -> ARRAY_PREPEND in PostgreSQL (note: args swapped from other dialects)
222            Expression::ArrayPrepend(f) => Ok(Expression::Function(Box::new(Function::new(
223                "ARRAY_PREPEND".to_string(),
224                vec![f.expression, f.this], // PostgreSQL: ARRAY_PREPEND(element, array)
225            )))),
226
227            // ============================================
228            // BITWISE OPERATIONS
229            // ============================================
230            // BitwiseXor -> # operator in PostgreSQL
231            Expression::BitwiseXor(f) => {
232                // Use a special marker that generator will recognize
233                Ok(Expression::Function(Box::new(Function::new(
234                    "__PG_BITWISE_XOR__".to_string(),
235                    vec![f.left, f.right],
236                ))))
237            }
238
239            // BitwiseAndAgg -> BIT_AND
240            Expression::BitwiseAndAgg(f) => Ok(Expression::Function(Box::new(Function::new(
241                "BIT_AND".to_string(),
242                vec![f.this],
243            )))),
244
245            // BitwiseOrAgg -> BIT_OR
246            Expression::BitwiseOrAgg(f) => Ok(Expression::Function(Box::new(Function::new(
247                "BIT_OR".to_string(),
248                vec![f.this],
249            )))),
250
251            // BitwiseXorAgg -> BIT_XOR
252            Expression::BitwiseXorAgg(f) => Ok(Expression::Function(Box::new(Function::new(
253                "BIT_XOR".to_string(),
254                vec![f.this],
255            )))),
256
257            // ============================================
258            // BOOLEAN AGGREGATES
259            // ============================================
260            // LogicalAnd -> BOOL_AND
261            Expression::LogicalAnd(f) => Ok(Expression::Function(Box::new(Function::new(
262                "BOOL_AND".to_string(),
263                vec![f.this],
264            )))),
265
266            // LogicalOr -> BOOL_OR
267            Expression::LogicalOr(f) => Ok(Expression::Function(Box::new(Function::new(
268                "BOOL_OR".to_string(),
269                vec![f.this],
270            )))),
271
272            // Xor -> PostgreSQL bool_xor pattern: a <> b for boolean values
273            Expression::Xor(f) => {
274                if let (Some(a), Some(b)) = (f.this, f.expression) {
275                    Ok(Expression::Neq(Box::new(BinaryOp {
276                        left: *a,
277                        right: *b,
278                        left_comments: Vec::new(),
279                        operator_comments: Vec::new(),
280                        trailing_comments: Vec::new(),
281                        inferred_type: None,
282                    })))
283                } else {
284                    Ok(Expression::Boolean(BooleanLiteral { value: false }))
285                }
286            }
287
288            // ============================================
289            // ARRAY OPERATORS
290            // ============================================
291            // ArrayContainedBy (<@) -> ArrayContainsAll (@>) with swapped operands
292            // a <@ b -> b @> a (PostgreSQL prefers @> syntax)
293            Expression::ArrayContainedBy(op) => {
294                Ok(Expression::ArrayContainsAll(Box::new(BinaryOp {
295                    left: op.right,
296                    right: op.left,
297                    left_comments: Vec::new(),
298                    operator_comments: Vec::new(),
299                    trailing_comments: Vec::new(),
300                    inferred_type: None,
301                })))
302            }
303
304            // ============================================
305            // REGEXP OPERATIONS (PostgreSQL uses ~ and ~* operators)
306            // ============================================
307            // RegexpLike -> keep as-is, generator handles ~ operator output
308            Expression::RegexpLike(f) => {
309                // Generator will output as: expr ~ pattern
310                Ok(Expression::RegexpLike(f))
311            }
312
313            // ============================================
314            // DATE/TIME FUNCTIONS
315            // ============================================
316            // DateAdd -> date + INTERVAL in PostgreSQL
317            Expression::DateAdd(f) => {
318                let is_literal = matches!(&f.interval, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_) | Literal::String(_)));
319                let right_expr = if is_literal {
320                    // Literal value: INTERVAL 'value' unit
321                    Expression::Interval(Box::new(Interval {
322                        this: Some(f.interval),
323                        unit: Some(IntervalUnitSpec::Simple {
324                            unit: f.unit,
325                            use_plural: false,
326                        }),
327                    }))
328                } else {
329                    // Non-literal (column ref, expression): INTERVAL '1 unit' * value
330                    let unit_str = match f.unit {
331                        IntervalUnit::Year => "YEAR",
332                        IntervalUnit::Quarter => "QUARTER",
333                        IntervalUnit::Month => "MONTH",
334                        IntervalUnit::Week => "WEEK",
335                        IntervalUnit::Day => "DAY",
336                        IntervalUnit::Hour => "HOUR",
337                        IntervalUnit::Minute => "MINUTE",
338                        IntervalUnit::Second => "SECOND",
339                        IntervalUnit::Millisecond => "MILLISECOND",
340                        IntervalUnit::Microsecond => "MICROSECOND",
341                        IntervalUnit::Nanosecond => "NANOSECOND",
342                    };
343                    let interval_one = Expression::Interval(Box::new(Interval {
344                        this: Some(Expression::Literal(Box::new(Literal::String(format!(
345                            "1 {unit_str}"
346                        ))))),
347                        unit: None,
348                    }));
349                    Expression::Mul(Box::new(BinaryOp {
350                        left: interval_one,
351                        right: f.interval,
352                        left_comments: Vec::new(),
353                        operator_comments: Vec::new(),
354                        trailing_comments: Vec::new(),
355                        inferred_type: None,
356                    }))
357                };
358                Ok(Expression::Add(Box::new(BinaryOp {
359                    left: f.this,
360                    right: right_expr,
361                    left_comments: Vec::new(),
362                    operator_comments: Vec::new(),
363                    trailing_comments: Vec::new(),
364                    inferred_type: None,
365                })))
366            }
367
368            // DateSub -> date - INTERVAL in PostgreSQL
369            Expression::DateSub(f) => {
370                let interval_expr = Expression::Interval(Box::new(Interval {
371                    this: Some(f.interval),
372                    unit: Some(IntervalUnitSpec::Simple {
373                        unit: f.unit,
374                        use_plural: false,
375                    }),
376                }));
377                Ok(Expression::Sub(Box::new(BinaryOp {
378                    left: f.this,
379                    right: interval_expr,
380                    left_comments: Vec::new(),
381                    operator_comments: Vec::new(),
382                    trailing_comments: Vec::new(),
383                    inferred_type: None,
384                })))
385            }
386
387            // DateDiff -> Complex PostgreSQL pattern using AGE/EXTRACT
388            Expression::DateDiff(f) => {
389                // For PostgreSQL, DATEDIFF is converted to EXTRACT(epoch FROM ...) pattern
390                // matching the 3-arg string-based DATEDIFF handler below
391                let unit = f.unit.unwrap_or(IntervalUnit::Day);
392
393                // Helper: CAST(expr AS TIMESTAMP)
394                let cast_ts = |e: Expression| -> Expression {
395                    Expression::Cast(Box::new(Cast {
396                        this: e,
397                        to: DataType::Timestamp {
398                            precision: None,
399                            timezone: false,
400                        },
401                        trailing_comments: Vec::new(),
402                        double_colon_syntax: false,
403                        format: None,
404                        default: None,
405                        inferred_type: None,
406                    }))
407                };
408
409                // Helper: CAST(expr AS BIGINT)
410                let cast_bigint = |e: Expression| -> Expression {
411                    Expression::Cast(Box::new(Cast {
412                        this: e,
413                        to: DataType::BigInt { length: None },
414                        trailing_comments: Vec::new(),
415                        double_colon_syntax: false,
416                        format: None,
417                        default: None,
418                        inferred_type: None,
419                    }))
420                };
421
422                // Clone end/start for reuse
423                let end_expr = f.this;
424                let start = f.expression;
425
426                // Helper: end_ts - start_ts
427                let ts_diff = || -> Expression {
428                    Expression::Sub(Box::new(BinaryOp::new(
429                        cast_ts(end_expr.clone()),
430                        cast_ts(start.clone()),
431                    )))
432                };
433
434                // Helper: AGE(end_ts, start_ts)
435                let age_call = || -> Expression {
436                    Expression::Function(Box::new(Function::new(
437                        "AGE".to_string(),
438                        vec![cast_ts(end_expr.clone()), cast_ts(start.clone())],
439                    )))
440                };
441
442                // Helper: EXTRACT(field FROM expr)
443                let extract = |field: DateTimeField, from: Expression| -> Expression {
444                    Expression::Extract(Box::new(ExtractFunc { this: from, field }))
445                };
446
447                // Helper: number literal
448                let num = |n: i64| -> Expression {
449                    Expression::Literal(Box::new(Literal::Number(n.to_string())))
450                };
451
452                let epoch_field = DateTimeField::Custom("epoch".to_string());
453
454                let result = match unit {
455                    IntervalUnit::Nanosecond => {
456                        let epoch = extract(epoch_field.clone(), ts_diff());
457                        cast_bigint(Expression::Mul(Box::new(BinaryOp::new(
458                            epoch,
459                            num(1000000000),
460                        ))))
461                    }
462                    IntervalUnit::Microsecond => {
463                        let epoch = extract(epoch_field, ts_diff());
464                        cast_bigint(Expression::Mul(Box::new(BinaryOp::new(
465                            epoch,
466                            num(1000000),
467                        ))))
468                    }
469                    IntervalUnit::Millisecond => {
470                        let epoch = extract(epoch_field, ts_diff());
471                        cast_bigint(Expression::Mul(Box::new(BinaryOp::new(epoch, num(1000)))))
472                    }
473                    IntervalUnit::Second => {
474                        let epoch = extract(epoch_field, ts_diff());
475                        cast_bigint(epoch)
476                    }
477                    IntervalUnit::Minute => {
478                        let epoch = extract(epoch_field, ts_diff());
479                        cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(60)))))
480                    }
481                    IntervalUnit::Hour => {
482                        let epoch = extract(epoch_field, ts_diff());
483                        cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(3600)))))
484                    }
485                    IntervalUnit::Day => {
486                        let epoch = extract(epoch_field, ts_diff());
487                        cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(86400)))))
488                    }
489                    IntervalUnit::Week => {
490                        let diff_parens = Expression::Paren(Box::new(Paren {
491                            this: ts_diff(),
492                            trailing_comments: Vec::new(),
493                        }));
494                        let days = extract(DateTimeField::Custom("days".to_string()), diff_parens);
495                        cast_bigint(Expression::Div(Box::new(BinaryOp::new(days, num(7)))))
496                    }
497                    IntervalUnit::Month => {
498                        let year_part =
499                            extract(DateTimeField::Custom("year".to_string()), age_call());
500                        let month_part =
501                            extract(DateTimeField::Custom("month".to_string()), age_call());
502                        let year_months =
503                            Expression::Mul(Box::new(BinaryOp::new(year_part, num(12))));
504                        cast_bigint(Expression::Add(Box::new(BinaryOp::new(
505                            year_months,
506                            month_part,
507                        ))))
508                    }
509                    IntervalUnit::Quarter => {
510                        let year_part =
511                            extract(DateTimeField::Custom("year".to_string()), age_call());
512                        let month_part =
513                            extract(DateTimeField::Custom("month".to_string()), age_call());
514                        let year_quarters =
515                            Expression::Mul(Box::new(BinaryOp::new(year_part, num(4))));
516                        let month_quarters =
517                            Expression::Div(Box::new(BinaryOp::new(month_part, num(3))));
518                        cast_bigint(Expression::Add(Box::new(BinaryOp::new(
519                            year_quarters,
520                            month_quarters,
521                        ))))
522                    }
523                    IntervalUnit::Year => cast_bigint(extract(
524                        DateTimeField::Custom("year".to_string()),
525                        age_call(),
526                    )),
527                };
528                Ok(result)
529            }
530
531            // UnixToTime -> TO_TIMESTAMP
532            Expression::UnixToTime(f) => Ok(Expression::Function(Box::new(Function::new(
533                "TO_TIMESTAMP".to_string(),
534                vec![*f.this],
535            )))),
536
537            // TimeToUnix -> DATE_PART('epoch', ...) in PostgreSQL
538            Expression::TimeToUnix(f) => Ok(Expression::Function(Box::new(Function::new(
539                "DATE_PART".to_string(),
540                vec![Expression::string("epoch"), f.this],
541            )))),
542
543            // StrToTime -> TO_TIMESTAMP in PostgreSQL
544            Expression::ToTimestamp(f) => {
545                let mut args = vec![f.this];
546                if let Some(fmt) = f.format {
547                    args.push(fmt);
548                }
549                Ok(Expression::Function(Box::new(Function::new(
550                    "TO_TIMESTAMP".to_string(),
551                    args,
552                ))))
553            }
554
555            // StrToDate -> TO_DATE in PostgreSQL
556            Expression::ToDate(f) => {
557                let mut args = vec![f.this];
558                if let Some(fmt) = f.format {
559                    args.push(fmt);
560                }
561                Ok(Expression::Function(Box::new(Function::new(
562                    "TO_DATE".to_string(),
563                    args,
564                ))))
565            }
566
567            // TimestampTrunc -> DATE_TRUNC
568            Expression::TimestampTrunc(f) => {
569                // Convert DateTimeField to string expression for DATE_TRUNC
570                let unit_str = format!("{:?}", f.unit).to_lowercase();
571                let args = vec![Expression::string(&unit_str), f.this];
572                Ok(Expression::Function(Box::new(Function::new(
573                    "DATE_TRUNC".to_string(),
574                    args,
575                ))))
576            }
577
578            // TimeFromParts -> MAKE_TIME
579            Expression::TimeFromParts(f) => {
580                let mut args = Vec::new();
581                if let Some(h) = f.hour {
582                    args.push(*h);
583                }
584                if let Some(m) = f.min {
585                    args.push(*m);
586                }
587                if let Some(s) = f.sec {
588                    args.push(*s);
589                }
590                Ok(Expression::Function(Box::new(Function::new(
591                    "MAKE_TIME".to_string(),
592                    args,
593                ))))
594            }
595
596            // TimestampFromParts -> MAKE_TIMESTAMP
597            Expression::MakeTimestamp(f) => {
598                // MakeTimestampFunc has direct Expression fields, not Options
599                let args = vec![f.year, f.month, f.day, f.hour, f.minute, f.second];
600                Ok(Expression::Function(Box::new(Function::new(
601                    "MAKE_TIMESTAMP".to_string(),
602                    args,
603                ))))
604            }
605
606            // ============================================
607            // STRING FUNCTIONS
608            // ============================================
609            // StringAgg is native to PostgreSQL - keep as-is
610            Expression::StringAgg(f) => Ok(Expression::StringAgg(f)),
611
612            // GroupConcat -> STRING_AGG in PostgreSQL
613            Expression::GroupConcat(f) => {
614                let mut args = vec![f.this.clone()];
615                if let Some(sep) = f.separator.clone() {
616                    args.push(sep);
617                } else {
618                    args.push(Expression::string(","));
619                }
620                Ok(Expression::Function(Box::new(Function::new(
621                    "STRING_AGG".to_string(),
622                    args,
623                ))))
624            }
625
626            // StrPosition -> POSITION function
627            Expression::Position(f) => {
628                // PostgreSQL: POSITION(substring IN string)
629                // Keep as Position, generator handles it
630                Ok(Expression::Position(f))
631            }
632
633            // ============================================
634            // AGGREGATE FUNCTIONS
635            // ============================================
636            // CountIf -> SUM(CASE WHEN condition THEN 1 ELSE 0 END) in PostgreSQL
637            Expression::CountIf(f) => {
638                let case_expr = Expression::Case(Box::new(Case {
639                    operand: None,
640                    whens: vec![(f.this.clone(), Expression::number(1))],
641                    else_: Some(Expression::number(0)),
642                    comments: Vec::new(),
643                    inferred_type: None,
644                }));
645                Ok(Expression::Sum(Box::new(AggFunc {
646                    ignore_nulls: None,
647                    having_max: None,
648                    this: case_expr,
649                    distinct: f.distinct,
650                    filter: f.filter,
651                    order_by: Vec::new(),
652                    name: None,
653                    limit: None,
654                    inferred_type: None,
655                })))
656            }
657
658            // AnyValue -> keep as ANY_VALUE for PostgreSQL (supported since PG 16)
659            Expression::AnyValue(f) => Ok(Expression::AnyValue(f)),
660
661            // Variance -> VAR_SAMP in PostgreSQL
662            Expression::Variance(f) => Ok(Expression::Function(Box::new(Function::new(
663                "VAR_SAMP".to_string(),
664                vec![f.this],
665            )))),
666
667            // VarPop -> VAR_POP in PostgreSQL
668            Expression::VarPop(f) => Ok(Expression::Function(Box::new(Function::new(
669                "VAR_POP".to_string(),
670                vec![f.this],
671            )))),
672
673            // ============================================
674            // JSON FUNCTIONS
675            // ============================================
676            // JSONExtract -> use arrow syntax (->) in PostgreSQL for simple literal paths
677            // Complex paths (like column references) should use JSON_EXTRACT_PATH function
678            Expression::JsonExtract(mut f) => {
679                // Only use arrow syntax for simple literal paths (string or non-negative number)
680                // Complex expressions like column references should use function form
681                f.arrow_syntax = Self::is_simple_json_path(&f.path);
682                Ok(Expression::JsonExtract(f))
683            }
684
685            // JSONExtractScalar -> use arrow syntax (->>) in PostgreSQL for simple paths
686            // Complex paths (like negative indices) should use JSON_EXTRACT_PATH_TEXT function
687            // #>> (hash_arrow_syntax) stays as #>>
688            Expression::JsonExtractScalar(mut f) => {
689                if !f.hash_arrow_syntax {
690                    // Only use arrow syntax for simple literal paths (string or non-negative number)
691                    // Complex expressions like Neg(-1) should use function form
692                    f.arrow_syntax = Self::is_simple_json_path(&f.path);
693                }
694                Ok(Expression::JsonExtractScalar(f))
695            }
696
697            // ParseJson: handled by generator (outputs CAST(x AS JSON) for PostgreSQL)
698
699            // JSONObjectAgg -> JSON_OBJECT_AGG
700            Expression::JsonObjectAgg(f) => {
701                // JsonObjectAggFunc has key and value as Expression, not Option
702                let args = vec![f.key, f.value];
703                Ok(Expression::Function(Box::new(Function::new(
704                    "JSON_OBJECT_AGG".to_string(),
705                    args,
706                ))))
707            }
708
709            // JSONArrayAgg -> JSON_AGG
710            Expression::JsonArrayAgg(f) => Ok(Expression::Function(Box::new(Function::new(
711                "JSON_AGG".to_string(),
712                vec![f.this],
713            )))),
714
715            // JSONPathRoot -> empty string ($ is implicit in PostgreSQL)
716            Expression::JSONPathRoot(_) => Ok(Expression::Literal(Box::new(Literal::String(
717                String::new(),
718            )))),
719
720            // ============================================
721            // MISC FUNCTIONS
722            // ============================================
723            // IntDiv -> DIV in PostgreSQL
724            Expression::IntDiv(f) => Ok(Expression::Function(Box::new(Function::new(
725                "DIV".to_string(),
726                vec![f.this, f.expression],
727            )))),
728
729            // Unicode -> ASCII in PostgreSQL
730            Expression::Unicode(f) => Ok(Expression::Function(Box::new(Function::new(
731                "ASCII".to_string(),
732                vec![f.this],
733            )))),
734
735            // LastDay -> Complex expression (PostgreSQL doesn't have LAST_DAY)
736            Expression::LastDay(f) => {
737                // (DATE_TRUNC('month', date) + INTERVAL '1 month' - INTERVAL '1 day')::DATE
738                let truncated = Expression::Function(Box::new(Function::new(
739                    "DATE_TRUNC".to_string(),
740                    vec![Expression::string("month"), f.this.clone()],
741                )));
742                let plus_month = Expression::Add(Box::new(BinaryOp {
743                    left: truncated,
744                    right: Expression::Interval(Box::new(Interval {
745                        this: Some(Expression::string("1")),
746                        unit: Some(IntervalUnitSpec::Simple {
747                            unit: IntervalUnit::Month,
748                            use_plural: false,
749                        }),
750                    })),
751                    left_comments: Vec::new(),
752                    operator_comments: Vec::new(),
753                    trailing_comments: Vec::new(),
754                    inferred_type: None,
755                }));
756                let minus_day = Expression::Sub(Box::new(BinaryOp {
757                    left: plus_month,
758                    right: Expression::Interval(Box::new(Interval {
759                        this: Some(Expression::string("1")),
760                        unit: Some(IntervalUnitSpec::Simple {
761                            unit: IntervalUnit::Day,
762                            use_plural: false,
763                        }),
764                    })),
765                    left_comments: Vec::new(),
766                    operator_comments: Vec::new(),
767                    trailing_comments: Vec::new(),
768                    inferred_type: None,
769                }));
770                Ok(Expression::Cast(Box::new(Cast {
771                    this: minus_day,
772                    to: DataType::Date,
773                    trailing_comments: Vec::new(),
774                    double_colon_syntax: true, // Use PostgreSQL :: syntax
775                    format: None,
776                    default: None,
777                    inferred_type: None,
778                })))
779            }
780
781            // GenerateSeries is native to PostgreSQL
782            Expression::GenerateSeries(f) => Ok(Expression::GenerateSeries(f)),
783
784            // ExplodingGenerateSeries -> GENERATE_SERIES
785            Expression::ExplodingGenerateSeries(f) => {
786                let mut args = vec![f.start, f.stop];
787                if let Some(step) = f.step {
788                    args.push(step); // step is Expression, not Box<Expression>
789                }
790                Ok(Expression::Function(Box::new(Function::new(
791                    "GENERATE_SERIES".to_string(),
792                    args,
793                ))))
794            }
795
796            // ============================================
797            // SESSION/TIME FUNCTIONS (no parentheses in PostgreSQL)
798            // ============================================
799            // CurrentTimestamp -> CURRENT_TIMESTAMP (no parens)
800            Expression::CurrentTimestamp(_) => Ok(Expression::Function(Box::new(Function {
801                name: "CURRENT_TIMESTAMP".to_string(),
802                args: vec![],
803                distinct: false,
804                trailing_comments: vec![],
805                use_bracket_syntax: false,
806                no_parens: true,
807                quoted: false,
808                span: None,
809                inferred_type: None,
810            }))),
811
812            // CurrentUser -> CURRENT_USER (no parens)
813            Expression::CurrentUser(_) => Ok(Expression::Function(Box::new(Function::new(
814                "CURRENT_USER".to_string(),
815                vec![],
816            )))),
817
818            // CurrentDate -> CURRENT_DATE (no parens)
819            Expression::CurrentDate(_) => Ok(Expression::Function(Box::new(Function {
820                name: "CURRENT_DATE".to_string(),
821                args: vec![],
822                distinct: false,
823                trailing_comments: vec![],
824                use_bracket_syntax: false,
825                no_parens: true,
826                quoted: false,
827                span: None,
828                inferred_type: None,
829            }))),
830
831            // ============================================
832            // JOIN TRANSFORMATIONS
833            // ============================================
834            // CROSS APPLY -> INNER JOIN LATERAL ... ON TRUE in PostgreSQL
835            Expression::Join(join) if join.kind == JoinKind::CrossApply => {
836                Ok(Expression::Join(Box::new(Join {
837                    this: join.this,
838                    on: Some(Expression::Boolean(BooleanLiteral { value: true })),
839                    using: join.using,
840                    kind: JoinKind::CrossApply,
841                    use_inner_keyword: false,
842                    use_outer_keyword: false,
843                    deferred_condition: false,
844                    join_hint: None,
845                    match_condition: None,
846                    pivots: join.pivots,
847                    comments: join.comments,
848                    nesting_group: 0,
849                    directed: false,
850                })))
851            }
852
853            // OUTER APPLY -> LEFT JOIN LATERAL ... ON TRUE in PostgreSQL
854            Expression::Join(join) if join.kind == JoinKind::OuterApply => {
855                Ok(Expression::Join(Box::new(Join {
856                    this: join.this,
857                    on: Some(Expression::Boolean(BooleanLiteral { value: true })),
858                    using: join.using,
859                    kind: JoinKind::OuterApply,
860                    use_inner_keyword: false,
861                    use_outer_keyword: false,
862                    deferred_condition: false,
863                    join_hint: None,
864                    match_condition: None,
865                    pivots: join.pivots,
866                    comments: join.comments,
867                    nesting_group: 0,
868                    directed: false,
869                })))
870            }
871
872            // ============================================
873            // GENERIC FUNCTION TRANSFORMATIONS
874            // ============================================
875            Expression::Function(f) => self.transform_function(*f),
876
877            // Generic aggregate function transformations
878            Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
879
880            // ===== Context-aware JSON arrow wrapping =====
881            // When JSON arrow expressions appear in Binary/In/Not contexts,
882            // they need to be wrapped in parentheses for correct precedence.
883            // This matches Python sqlglot's WRAPPED_JSON_EXTRACT_EXPRESSIONS behavior.
884
885            // Binary operators that need JSON wrapping
886            Expression::Eq(op) => Ok(Expression::Eq(Box::new(BinaryOp {
887                left: wrap_if_json_arrow(op.left),
888                right: wrap_if_json_arrow(op.right),
889                ..*op
890            }))),
891            Expression::Neq(op) => Ok(Expression::Neq(Box::new(BinaryOp {
892                left: wrap_if_json_arrow(op.left),
893                right: wrap_if_json_arrow(op.right),
894                ..*op
895            }))),
896            Expression::Lt(op) => Ok(Expression::Lt(Box::new(BinaryOp {
897                left: wrap_if_json_arrow(op.left),
898                right: wrap_if_json_arrow(op.right),
899                ..*op
900            }))),
901            Expression::Lte(op) => Ok(Expression::Lte(Box::new(BinaryOp {
902                left: wrap_if_json_arrow(op.left),
903                right: wrap_if_json_arrow(op.right),
904                ..*op
905            }))),
906            Expression::Gt(op) => Ok(Expression::Gt(Box::new(BinaryOp {
907                left: wrap_if_json_arrow(op.left),
908                right: wrap_if_json_arrow(op.right),
909                ..*op
910            }))),
911            Expression::Gte(op) => Ok(Expression::Gte(Box::new(BinaryOp {
912                left: wrap_if_json_arrow(op.left),
913                right: wrap_if_json_arrow(op.right),
914                ..*op
915            }))),
916
917            // In expression - wrap the this part if it's JSON arrow
918            Expression::In(mut i) => {
919                i.this = wrap_if_json_arrow(i.this);
920                Ok(Expression::In(i))
921            }
922
923            // Not expression - wrap the this part if it's JSON arrow
924            Expression::Not(mut n) => {
925                n.this = wrap_if_json_arrow(n.this);
926                Ok(Expression::Not(n))
927            }
928
929            // MERGE: qualifier stripping is handled by the generator (dialect-aware)
930            // PostgreSQL generator strips qualifiers, Snowflake generator keeps them
931            Expression::Merge(m) => Ok(Expression::Merge(m)),
932
933            // JSONExtract with variant_extract (Databricks colon syntax) -> JSON_EXTRACT_PATH
934            Expression::JSONExtract(je) if je.variant_extract.is_some() => {
935                // Convert path from bracketed format to simple key
936                // e.g., '["fr''uit"]' -> 'fr''uit'
937                let path = match *je.expression {
938                    Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
939                        let Literal::String(s) = lit.as_ref() else {
940                            unreachable!()
941                        };
942                        // Strip bracketed JSON path format: ["key"] -> key
943                        let cleaned = if s.starts_with("[\"") && s.ends_with("\"]") {
944                            s[2..s.len() - 2].to_string()
945                        } else {
946                            s.clone()
947                        };
948                        Expression::Literal(Box::new(Literal::String(cleaned)))
949                    }
950                    other => other,
951                };
952                Ok(Expression::Function(Box::new(Function::new(
953                    "JSON_EXTRACT_PATH".to_string(),
954                    vec![*je.this, path],
955                ))))
956            }
957
958            // TRIM(str, chars) -> TRIM(chars FROM str) for PostgreSQL SQL standard syntax
959            Expression::Trim(t) if !t.sql_standard_syntax && t.characters.is_some() => {
960                Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
961                    this: t.this,
962                    characters: t.characters,
963                    position: t.position,
964                    sql_standard_syntax: true,
965                    position_explicit: t.position_explicit,
966                })))
967            }
968
969            // b'a' -> CAST(e'a' AS BYTEA) for PostgreSQL
970            Expression::Literal(lit) if matches!(lit.as_ref(), Literal::ByteString(_)) => {
971                let Literal::ByteString(s) = lit.as_ref() else {
972                    unreachable!()
973                };
974                Ok(Expression::Cast(Box::new(Cast {
975                    this: Expression::Literal(Box::new(Literal::EscapeString(s.clone()))),
976                    to: DataType::VarBinary { length: None },
977                    trailing_comments: Vec::new(),
978                    double_colon_syntax: false,
979                    format: None,
980                    default: None,
981                    inferred_type: None,
982                })))
983            }
984
985            // Pass through everything else
986            _ => Ok(expr),
987        }
988    }
989}
990
991impl PostgresDialect {
992    /// Check if a JSON path expression is "simple" (string literal or non-negative integer)
993    /// Simple paths can use arrow syntax (->>) in PostgreSQL
994    /// Complex paths (like negative indices) should use JSON_EXTRACT_PATH_TEXT function
995    fn is_simple_json_path(path: &Expression) -> bool {
996        match path {
997            // String literals are always simple
998            Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => true,
999            // Non-negative integer literals are simple
1000            Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
1001                let Literal::Number(n) = lit.as_ref() else {
1002                    unreachable!()
1003                };
1004                // Check if it's non-negative
1005                !n.starts_with('-')
1006            }
1007            // JSONPath expressions are simple (they're already parsed paths)
1008            Expression::JSONPath(_) => true,
1009            // Everything else (Neg, function calls, etc.) is complex
1010            _ => false,
1011        }
1012    }
1013
1014    /// Transform data types according to PostgreSQL TYPE_MAPPING
1015    fn transform_data_type(&self, dt: DataType) -> Result<Expression> {
1016        let transformed = match dt {
1017            // TINYINT -> SMALLINT
1018            DataType::TinyInt { .. } => DataType::SmallInt { length: None },
1019
1020            // FLOAT -> DOUBLE PRECISION (Python sqlglot tokenizes FLOAT as DOUBLE)
1021            DataType::Float { .. } => DataType::Custom {
1022                name: "DOUBLE PRECISION".to_string(),
1023            },
1024
1025            // DOUBLE -> DOUBLE PRECISION
1026            DataType::Double { .. } => DataType::Custom {
1027                name: "DOUBLE PRECISION".to_string(),
1028            },
1029
1030            // BINARY -> BYTEA (handled by generator which preserves length)
1031            DataType::Binary { .. } => dt,
1032
1033            // VARBINARY -> BYTEA (handled by generator which preserves length)
1034            DataType::VarBinary { .. } => dt,
1035
1036            // BLOB -> BYTEA
1037            DataType::Blob => DataType::Custom {
1038                name: "BYTEA".to_string(),
1039            },
1040
1041            // Custom type normalizations
1042            DataType::Custom { ref name } => {
1043                let upper = name.to_uppercase();
1044                match upper.as_str() {
1045                    // INT8 -> BIGINT (PostgreSQL alias)
1046                    "INT8" => DataType::BigInt { length: None },
1047                    // FLOAT8 -> DOUBLE PRECISION (PostgreSQL alias)
1048                    "FLOAT8" => DataType::Custom {
1049                        name: "DOUBLE PRECISION".to_string(),
1050                    },
1051                    // FLOAT4 -> REAL (PostgreSQL alias)
1052                    "FLOAT4" => DataType::Custom {
1053                        name: "REAL".to_string(),
1054                    },
1055                    // INT4 -> INTEGER (PostgreSQL alias)
1056                    "INT4" => DataType::Int {
1057                        length: None,
1058                        integer_spelling: false,
1059                    },
1060                    // INT2 -> SMALLINT (PostgreSQL alias)
1061                    "INT2" => DataType::SmallInt { length: None },
1062                    _ => dt,
1063                }
1064            }
1065
1066            // Keep all other types as-is
1067            other => other,
1068        };
1069        Ok(Expression::DataType(transformed))
1070    }
1071
1072    fn transform_function(&self, f: Function) -> Result<Expression> {
1073        let name_upper = f.name.to_uppercase();
1074        match name_upper.as_str() {
1075            // IFNULL -> COALESCE
1076            "IFNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
1077                original_name: None,
1078                expressions: f.args,
1079                inferred_type: None,
1080            }))),
1081
1082            // NVL -> COALESCE
1083            "NVL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
1084                original_name: None,
1085                expressions: f.args,
1086                inferred_type: None,
1087            }))),
1088
1089            // ISNULL (SQL Server) -> COALESCE
1090            "ISNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
1091                original_name: None,
1092                expressions: f.args,
1093                inferred_type: None,
1094            }))),
1095
1096            // GROUP_CONCAT -> STRING_AGG in PostgreSQL
1097            "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
1098                Function::new("STRING_AGG".to_string(), f.args),
1099            ))),
1100
1101            // SUBSTR -> SUBSTRING (standard SQL)
1102            "SUBSTR" => Ok(Expression::Function(Box::new(Function::new(
1103                "SUBSTRING".to_string(),
1104                f.args,
1105            )))),
1106
1107            // RAND -> RANDOM in PostgreSQL
1108            "RAND" => Ok(Expression::Random(crate::expressions::Random)),
1109
1110            // CEILING -> CEIL (both work in PostgreSQL, but CEIL is preferred)
1111            "CEILING" if f.args.len() == 1 => Ok(Expression::Ceil(Box::new(CeilFunc {
1112                this: f.args.into_iter().next().unwrap(),
1113                decimals: None,
1114                to: None,
1115            }))),
1116
1117            // LEN -> LENGTH in PostgreSQL
1118            "LEN" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc {
1119                this: f.args.into_iter().next().unwrap(),
1120                original_name: None,
1121                inferred_type: None,
1122            }))),
1123
1124            // CHAR_LENGTH -> LENGTH in PostgreSQL
1125            "CHAR_LENGTH" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc {
1126                this: f.args.into_iter().next().unwrap(),
1127                original_name: None,
1128                inferred_type: None,
1129            }))),
1130
1131            // CHARACTER_LENGTH -> LENGTH in PostgreSQL
1132            "CHARACTER_LENGTH" if f.args.len() == 1 => {
1133                Ok(Expression::Length(Box::new(UnaryFunc {
1134                    this: f.args.into_iter().next().unwrap(),
1135                    original_name: None,
1136                    inferred_type: None,
1137                })))
1138            }
1139
1140            // CHARINDEX -> POSITION in PostgreSQL
1141            // CHARINDEX(substring, string) -> POSITION(substring IN string)
1142            "CHARINDEX" if f.args.len() >= 2 => {
1143                let mut args = f.args;
1144                let substring = args.remove(0);
1145                let string = args.remove(0);
1146                Ok(Expression::Position(Box::new(
1147                    crate::expressions::PositionFunc {
1148                        substring,
1149                        string,
1150                        start: args.pop(),
1151                    },
1152                )))
1153            }
1154
1155            // GETDATE -> CURRENT_TIMESTAMP in PostgreSQL
1156            "GETDATE" => Ok(Expression::CurrentTimestamp(
1157                crate::expressions::CurrentTimestamp {
1158                    precision: None,
1159                    sysdate: false,
1160                },
1161            )),
1162
1163            // SYSDATETIME -> CURRENT_TIMESTAMP in PostgreSQL
1164            "SYSDATETIME" => Ok(Expression::CurrentTimestamp(
1165                crate::expressions::CurrentTimestamp {
1166                    precision: None,
1167                    sysdate: false,
1168                },
1169            )),
1170
1171            // NOW -> CURRENT_TIMESTAMP in PostgreSQL (NOW() is also valid)
1172            "NOW" => Ok(Expression::CurrentTimestamp(
1173                crate::expressions::CurrentTimestamp {
1174                    precision: None,
1175                    sysdate: false,
1176                },
1177            )),
1178
1179            // NEWID -> GEN_RANDOM_UUID in PostgreSQL
1180            "NEWID" => Ok(Expression::Function(Box::new(Function::new(
1181                "GEN_RANDOM_UUID".to_string(),
1182                vec![],
1183            )))),
1184
1185            // UUID -> GEN_RANDOM_UUID in PostgreSQL
1186            "UUID" => Ok(Expression::Function(Box::new(Function::new(
1187                "GEN_RANDOM_UUID".to_string(),
1188                vec![],
1189            )))),
1190
1191            // UNNEST is native to PostgreSQL
1192            "UNNEST" => Ok(Expression::Function(Box::new(f))),
1193
1194            // GENERATE_SERIES is native to PostgreSQL
1195            "GENERATE_SERIES" => Ok(Expression::Function(Box::new(f))),
1196
1197            // SHA256 -> SHA256 in PostgreSQL (via pgcrypto extension)
1198            "SHA256" if f.args.len() == 1 => Ok(Expression::Function(Box::new(Function::new(
1199                "SHA256".to_string(),
1200                f.args,
1201            )))),
1202
1203            // SHA2 -> SHA256/SHA512 based on length argument
1204            "SHA2" if f.args.len() == 2 => {
1205                // SHA2(data, length) -> SHA256/SHA384/SHA512
1206                let args = f.args;
1207                let data = args[0].clone();
1208                // Default to SHA256 - would need runtime inspection for exact mapping
1209                Ok(Expression::Function(Box::new(Function::new(
1210                    "SHA256".to_string(),
1211                    vec![data],
1212                ))))
1213            }
1214
1215            // LEVENSHTEIN is native to PostgreSQL (fuzzystrmatch extension)
1216            "LEVENSHTEIN" => Ok(Expression::Function(Box::new(f))),
1217
1218            // EDITDISTANCE -> LEVENSHTEIN_LESS_EQUAL (with max distance) or LEVENSHTEIN
1219            "EDITDISTANCE" if f.args.len() == 3 => Ok(Expression::Function(Box::new(
1220                Function::new("LEVENSHTEIN_LESS_EQUAL".to_string(), f.args),
1221            ))),
1222            "EDITDISTANCE" if f.args.len() == 2 => Ok(Expression::Function(Box::new(
1223                Function::new("LEVENSHTEIN".to_string(), f.args),
1224            ))),
1225
1226            // TRIM(value, chars) -> TRIM(chars FROM value) for Postgres
1227            "TRIM" if f.args.len() == 2 => {
1228                let value = f.args[0].clone();
1229                let chars = f.args[1].clone();
1230                Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
1231                    this: value,
1232                    characters: Some(chars),
1233                    position: crate::expressions::TrimPosition::Both,
1234                    sql_standard_syntax: true,
1235                    position_explicit: false,
1236                })))
1237            }
1238
1239            // DATEDIFF(unit, start, end) -> PostgreSQL EXTRACT/AGE patterns
1240            "DATEDIFF" if f.args.len() >= 2 => {
1241                let mut args = f.args;
1242                if args.len() == 2 {
1243                    // 2-arg form: DATEDIFF(start, end) -> AGE(start, end)
1244                    let first = args.remove(0);
1245                    let second = args.remove(0);
1246                    Ok(Expression::Function(Box::new(Function::new(
1247                        "AGE".to_string(),
1248                        vec![first, second],
1249                    ))))
1250                } else {
1251                    // 3-arg form: DATEDIFF(unit, start, end)
1252                    let unit_expr = args.remove(0);
1253                    let start = args.remove(0);
1254                    let end_expr = args.remove(0);
1255
1256                    // Extract unit name from identifier or column
1257                    let unit_name = match &unit_expr {
1258                        Expression::Identifier(id) => id.name.to_uppercase(),
1259                        Expression::Var(v) => v.this.to_uppercase(),
1260                        Expression::Column(col) if col.table.is_none() => {
1261                            col.name.name.to_uppercase()
1262                        }
1263                        _ => "DAY".to_string(),
1264                    };
1265
1266                    // Helper: CAST(expr AS TIMESTAMP)
1267                    let cast_ts = |e: Expression| -> Expression {
1268                        Expression::Cast(Box::new(Cast {
1269                            this: e,
1270                            to: DataType::Timestamp {
1271                                precision: None,
1272                                timezone: false,
1273                            },
1274                            trailing_comments: Vec::new(),
1275                            double_colon_syntax: false,
1276                            format: None,
1277                            default: None,
1278                            inferred_type: None,
1279                        }))
1280                    };
1281
1282                    // Helper: CAST(expr AS BIGINT)
1283                    let cast_bigint = |e: Expression| -> Expression {
1284                        Expression::Cast(Box::new(Cast {
1285                            this: e,
1286                            to: DataType::BigInt { length: None },
1287                            trailing_comments: Vec::new(),
1288                            double_colon_syntax: false,
1289                            format: None,
1290                            default: None,
1291                            inferred_type: None,
1292                        }))
1293                    };
1294
1295                    let end_ts = cast_ts(end_expr.clone());
1296                    let start_ts = cast_ts(start.clone());
1297
1298                    // Helper: end_ts - start_ts
1299                    let ts_diff = || -> Expression {
1300                        Expression::Sub(Box::new(BinaryOp::new(
1301                            cast_ts(end_expr.clone()),
1302                            cast_ts(start.clone()),
1303                        )))
1304                    };
1305
1306                    // Helper: AGE(end_ts, start_ts)
1307                    let age_call = || -> Expression {
1308                        Expression::Function(Box::new(Function::new(
1309                            "AGE".to_string(),
1310                            vec![cast_ts(end_expr.clone()), cast_ts(start.clone())],
1311                        )))
1312                    };
1313
1314                    // Helper: EXTRACT(field FROM expr)
1315                    let extract = |field: DateTimeField, from: Expression| -> Expression {
1316                        Expression::Extract(Box::new(ExtractFunc { this: from, field }))
1317                    };
1318
1319                    // Helper: number literal
1320                    let num = |n: i64| -> Expression {
1321                        Expression::Literal(Box::new(Literal::Number(n.to_string())))
1322                    };
1323
1324                    // Use Custom DateTimeField for lowercase output (PostgreSQL convention)
1325                    let epoch_field = DateTimeField::Custom("epoch".to_string());
1326
1327                    let result = match unit_name.as_str() {
1328                        "MICROSECOND" => {
1329                            // CAST(EXTRACT(epoch FROM end_ts - start_ts) * 1000000 AS BIGINT)
1330                            let epoch = extract(epoch_field, ts_diff());
1331                            cast_bigint(Expression::Mul(Box::new(BinaryOp::new(
1332                                epoch,
1333                                num(1000000),
1334                            ))))
1335                        }
1336                        "MILLISECOND" => {
1337                            let epoch = extract(epoch_field, ts_diff());
1338                            cast_bigint(Expression::Mul(Box::new(BinaryOp::new(epoch, num(1000)))))
1339                        }
1340                        "SECOND" => {
1341                            let epoch = extract(epoch_field, ts_diff());
1342                            cast_bigint(epoch)
1343                        }
1344                        "MINUTE" => {
1345                            let epoch = extract(epoch_field, ts_diff());
1346                            cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(60)))))
1347                        }
1348                        "HOUR" => {
1349                            let epoch = extract(epoch_field, ts_diff());
1350                            cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(3600)))))
1351                        }
1352                        "DAY" => {
1353                            let epoch = extract(epoch_field, ts_diff());
1354                            cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(86400)))))
1355                        }
1356                        "WEEK" => {
1357                            // CAST(EXTRACT(days FROM (end_ts - start_ts)) / 7 AS BIGINT)
1358                            let diff_parens = Expression::Paren(Box::new(Paren {
1359                                this: ts_diff(),
1360                                trailing_comments: Vec::new(),
1361                            }));
1362                            let days =
1363                                extract(DateTimeField::Custom("days".to_string()), diff_parens);
1364                            cast_bigint(Expression::Div(Box::new(BinaryOp::new(days, num(7)))))
1365                        }
1366                        "MONTH" => {
1367                            // CAST(EXTRACT(year FROM AGE(...)) * 12 + EXTRACT(month FROM AGE(...)) AS BIGINT)
1368                            let year_part =
1369                                extract(DateTimeField::Custom("year".to_string()), age_call());
1370                            let month_part =
1371                                extract(DateTimeField::Custom("month".to_string()), age_call());
1372                            let year_months =
1373                                Expression::Mul(Box::new(BinaryOp::new(year_part, num(12))));
1374                            cast_bigint(Expression::Add(Box::new(BinaryOp::new(
1375                                year_months,
1376                                month_part,
1377                            ))))
1378                        }
1379                        "QUARTER" => {
1380                            // CAST(EXTRACT(year FROM AGE(...)) * 4 + EXTRACT(month FROM AGE(...)) / 3 AS BIGINT)
1381                            let year_part =
1382                                extract(DateTimeField::Custom("year".to_string()), age_call());
1383                            let month_part =
1384                                extract(DateTimeField::Custom("month".to_string()), age_call());
1385                            let year_quarters =
1386                                Expression::Mul(Box::new(BinaryOp::new(year_part, num(4))));
1387                            let month_quarters =
1388                                Expression::Div(Box::new(BinaryOp::new(month_part, num(3))));
1389                            cast_bigint(Expression::Add(Box::new(BinaryOp::new(
1390                                year_quarters,
1391                                month_quarters,
1392                            ))))
1393                        }
1394                        "YEAR" => {
1395                            // CAST(EXTRACT(year FROM AGE(...)) AS BIGINT)
1396                            cast_bigint(extract(
1397                                DateTimeField::Custom("year".to_string()),
1398                                age_call(),
1399                            ))
1400                        }
1401                        _ => {
1402                            // Fallback: simple AGE
1403                            Expression::Function(Box::new(Function::new(
1404                                "AGE".to_string(),
1405                                vec![end_ts, start_ts],
1406                            )))
1407                        }
1408                    };
1409                    Ok(result)
1410                }
1411            }
1412
1413            // TIMESTAMPDIFF -> AGE or EXTRACT pattern
1414            "TIMESTAMPDIFF" if f.args.len() >= 3 => {
1415                let mut args = f.args;
1416                let _unit = args.remove(0); // Unit (ignored, AGE returns full interval)
1417                let start = args.remove(0);
1418                let end = args.remove(0);
1419                Ok(Expression::Function(Box::new(Function::new(
1420                    "AGE".to_string(),
1421                    vec![end, start],
1422                ))))
1423            }
1424
1425            // FROM_UNIXTIME -> TO_TIMESTAMP
1426            "FROM_UNIXTIME" => Ok(Expression::Function(Box::new(Function::new(
1427                "TO_TIMESTAMP".to_string(),
1428                f.args,
1429            )))),
1430
1431            // UNIX_TIMESTAMP -> EXTRACT(EPOCH FROM ...)
1432            "UNIX_TIMESTAMP" if f.args.len() == 1 => {
1433                let arg = f.args.into_iter().next().unwrap();
1434                Ok(Expression::Function(Box::new(Function::new(
1435                    "DATE_PART".to_string(),
1436                    vec![Expression::string("epoch"), arg],
1437                ))))
1438            }
1439
1440            // UNIX_TIMESTAMP() with no args -> EXTRACT(EPOCH FROM CURRENT_TIMESTAMP)
1441            "UNIX_TIMESTAMP" if f.args.is_empty() => {
1442                Ok(Expression::Function(Box::new(Function::new(
1443                    "DATE_PART".to_string(),
1444                    vec![
1445                        Expression::string("epoch"),
1446                        Expression::CurrentTimestamp(crate::expressions::CurrentTimestamp {
1447                            precision: None,
1448                            sysdate: false,
1449                        }),
1450                    ],
1451                ))))
1452            }
1453
1454            // DATEADD -> date + interval pattern
1455            "DATEADD" if f.args.len() == 3 => {
1456                // DATEADD(unit, count, date) -> date + interval 'count unit'
1457                // This is a simplified version - full impl would construct proper interval
1458                let mut args = f.args;
1459                let _unit = args.remove(0);
1460                let count = args.remove(0);
1461                let date = args.remove(0);
1462                Ok(Expression::Add(Box::new(BinaryOp {
1463                    left: date,
1464                    right: count,
1465                    left_comments: Vec::new(),
1466                    operator_comments: Vec::new(),
1467                    trailing_comments: Vec::new(),
1468                    inferred_type: None,
1469                })))
1470            }
1471
1472            // INSTR -> POSITION (simplified)
1473            "INSTR" if f.args.len() >= 2 => {
1474                let mut args = f.args;
1475                let string = args.remove(0);
1476                let substring = args.remove(0);
1477                Ok(Expression::Position(Box::new(
1478                    crate::expressions::PositionFunc {
1479                        substring,
1480                        string,
1481                        start: args.pop(),
1482                    },
1483                )))
1484            }
1485
1486            // CONCAT_WS is native to PostgreSQL
1487            "CONCAT_WS" => Ok(Expression::Function(Box::new(f))),
1488
1489            // REGEXP_REPLACE: pass through without adding 'g' flag
1490            // The 'g' flag handling is managed by cross_dialect_normalize based on source dialect's default behavior
1491            "REGEXP_REPLACE" if f.args.len() == 3 || f.args.len() == 4 => {
1492                Ok(Expression::Function(Box::new(f)))
1493            }
1494            // 6 args from Snowflake: (subject, pattern, replacement, position, occurrence, params)
1495            // If occurrence is 0 (global), append 'g' to flags
1496            "REGEXP_REPLACE" if f.args.len() == 6 => {
1497                let is_global = match &f.args[4] {
1498                    Expression::Literal(lit)
1499                        if matches!(lit.as_ref(), crate::expressions::Literal::Number(_)) =>
1500                    {
1501                        let crate::expressions::Literal::Number(n) = lit.as_ref() else {
1502                            unreachable!()
1503                        };
1504                        n == "0"
1505                    }
1506                    _ => false,
1507                };
1508                if is_global {
1509                    let subject = f.args[0].clone();
1510                    let pattern = f.args[1].clone();
1511                    let replacement = f.args[2].clone();
1512                    let position = f.args[3].clone();
1513                    let occurrence = f.args[4].clone();
1514                    let params = &f.args[5];
1515                    let mut flags = if let Expression::Literal(lit) = params {
1516                        if let crate::expressions::Literal::String(s) = lit.as_ref() {
1517                            s.clone()
1518                        } else {
1519                            String::new()
1520                        }
1521                    } else {
1522                        String::new()
1523                    };
1524                    if !flags.contains('g') {
1525                        flags.push('g');
1526                    }
1527                    Ok(Expression::Function(Box::new(Function::new(
1528                        "REGEXP_REPLACE".to_string(),
1529                        vec![
1530                            subject,
1531                            pattern,
1532                            replacement,
1533                            position,
1534                            occurrence,
1535                            Expression::Literal(Box::new(crate::expressions::Literal::String(
1536                                flags,
1537                            ))),
1538                        ],
1539                    ))))
1540                } else {
1541                    Ok(Expression::Function(Box::new(f)))
1542                }
1543            }
1544            // Default: pass through
1545            "REGEXP_REPLACE" => Ok(Expression::Function(Box::new(f))),
1546
1547            // Pass through everything else
1548            _ => Ok(Expression::Function(Box::new(f))),
1549        }
1550    }
1551
1552    fn transform_aggregate_function(
1553        &self,
1554        f: Box<crate::expressions::AggregateFunction>,
1555    ) -> Result<Expression> {
1556        let name_upper = f.name.to_uppercase();
1557        match name_upper.as_str() {
1558            // COUNT_IF -> SUM(CASE WHEN...)
1559            "COUNT_IF" if !f.args.is_empty() => {
1560                let condition = f.args.into_iter().next().unwrap();
1561                let case_expr = Expression::Case(Box::new(Case {
1562                    operand: None,
1563                    whens: vec![(condition, Expression::number(1))],
1564                    else_: Some(Expression::number(0)),
1565                    comments: Vec::new(),
1566                    inferred_type: None,
1567                }));
1568                Ok(Expression::Sum(Box::new(AggFunc {
1569                    ignore_nulls: None,
1570                    having_max: None,
1571                    this: case_expr,
1572                    distinct: f.distinct,
1573                    filter: f.filter,
1574                    order_by: Vec::new(),
1575                    name: None,
1576                    limit: None,
1577                    inferred_type: None,
1578                })))
1579            }
1580
1581            // GROUP_CONCAT -> STRING_AGG
1582            "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
1583                Function::new("STRING_AGG".to_string(), f.args),
1584            ))),
1585
1586            // STDEV -> STDDEV in PostgreSQL
1587            "STDEV" if !f.args.is_empty() => Ok(Expression::Stddev(Box::new(AggFunc {
1588                ignore_nulls: None,
1589                having_max: None,
1590                this: f.args.into_iter().next().unwrap(),
1591                distinct: f.distinct,
1592                filter: f.filter,
1593                order_by: Vec::new(),
1594                name: None,
1595                limit: None,
1596                inferred_type: None,
1597            }))),
1598
1599            // STDEVP -> STDDEV_POP in PostgreSQL
1600            "STDEVP" if !f.args.is_empty() => Ok(Expression::StddevPop(Box::new(AggFunc {
1601                ignore_nulls: None,
1602                having_max: None,
1603                this: f.args.into_iter().next().unwrap(),
1604                distinct: f.distinct,
1605                filter: f.filter,
1606                order_by: Vec::new(),
1607                name: None,
1608                limit: None,
1609                inferred_type: None,
1610            }))),
1611
1612            // VAR -> VAR_SAMP in PostgreSQL
1613            "VAR" if !f.args.is_empty() => Ok(Expression::VarSamp(Box::new(AggFunc {
1614                ignore_nulls: None,
1615                having_max: None,
1616                this: f.args.into_iter().next().unwrap(),
1617                distinct: f.distinct,
1618                filter: f.filter,
1619                order_by: Vec::new(),
1620                name: None,
1621                limit: None,
1622                inferred_type: None,
1623            }))),
1624
1625            // VARP -> VAR_POP in PostgreSQL
1626            "VARP" if !f.args.is_empty() => Ok(Expression::VarPop(Box::new(AggFunc {
1627                ignore_nulls: None,
1628                having_max: None,
1629                this: f.args.into_iter().next().unwrap(),
1630                distinct: f.distinct,
1631                filter: f.filter,
1632                order_by: Vec::new(),
1633                name: None,
1634                limit: None,
1635                inferred_type: None,
1636            }))),
1637
1638            // BIT_AND is native to PostgreSQL
1639            "BIT_AND" => Ok(Expression::AggregateFunction(f)),
1640
1641            // BIT_OR is native to PostgreSQL
1642            "BIT_OR" => Ok(Expression::AggregateFunction(f)),
1643
1644            // BIT_XOR is native to PostgreSQL
1645            "BIT_XOR" => Ok(Expression::AggregateFunction(f)),
1646
1647            // BOOL_AND is native to PostgreSQL
1648            "BOOL_AND" => Ok(Expression::AggregateFunction(f)),
1649
1650            // BOOL_OR is native to PostgreSQL
1651            "BOOL_OR" => Ok(Expression::AggregateFunction(f)),
1652
1653            // VARIANCE -> VAR_SAMP in PostgreSQL
1654            "VARIANCE" if !f.args.is_empty() => Ok(Expression::VarSamp(Box::new(AggFunc {
1655                ignore_nulls: None,
1656                having_max: None,
1657                this: f.args.into_iter().next().unwrap(),
1658                distinct: f.distinct,
1659                filter: f.filter,
1660                order_by: Vec::new(),
1661                name: None,
1662                limit: None,
1663                inferred_type: None,
1664            }))),
1665
1666            // LOGICAL_OR -> BOOL_OR in PostgreSQL
1667            "LOGICAL_OR" if !f.args.is_empty() => {
1668                let mut new_agg = f.clone();
1669                new_agg.name = "BOOL_OR".to_string();
1670                Ok(Expression::AggregateFunction(new_agg))
1671            }
1672
1673            // LOGICAL_AND -> BOOL_AND in PostgreSQL
1674            "LOGICAL_AND" if !f.args.is_empty() => {
1675                let mut new_agg = f.clone();
1676                new_agg.name = "BOOL_AND".to_string();
1677                Ok(Expression::AggregateFunction(new_agg))
1678            }
1679
1680            // Pass through everything else
1681            _ => Ok(Expression::AggregateFunction(f)),
1682        }
1683    }
1684}
1685
1686#[cfg(test)]
1687mod tests {
1688    use super::*;
1689    use crate::dialects::Dialect;
1690
1691    fn transpile_to_postgres(sql: &str) -> String {
1692        let dialect = Dialect::get(DialectType::Generic);
1693        let result = dialect
1694            .transpile_to(sql, DialectType::PostgreSQL)
1695            .expect("Transpile failed");
1696        result[0].clone()
1697    }
1698
1699    #[test]
1700    fn test_ifnull_to_coalesce() {
1701        let result = transpile_to_postgres("SELECT IFNULL(a, b)");
1702        assert!(
1703            result.contains("COALESCE"),
1704            "Expected COALESCE, got: {}",
1705            result
1706        );
1707    }
1708
1709    #[test]
1710    fn test_nvl_to_coalesce() {
1711        let result = transpile_to_postgres("SELECT NVL(a, b)");
1712        assert!(
1713            result.contains("COALESCE"),
1714            "Expected COALESCE, got: {}",
1715            result
1716        );
1717    }
1718
1719    #[test]
1720    fn test_rand_to_random() {
1721        let result = transpile_to_postgres("SELECT RAND()");
1722        assert!(
1723            result.contains("RANDOM"),
1724            "Expected RANDOM, got: {}",
1725            result
1726        );
1727    }
1728
1729    #[test]
1730    fn test_basic_select() {
1731        let result = transpile_to_postgres("SELECT a, b FROM users WHERE id = 1");
1732        assert!(result.contains("SELECT"));
1733        assert!(result.contains("FROM users"));
1734    }
1735
1736    #[test]
1737    fn test_len_to_length() {
1738        let result = transpile_to_postgres("SELECT LEN(name)");
1739        assert!(
1740            result.contains("LENGTH"),
1741            "Expected LENGTH, got: {}",
1742            result
1743        );
1744    }
1745
1746    #[test]
1747    fn test_getdate_to_current_timestamp() {
1748        let result = transpile_to_postgres("SELECT GETDATE()");
1749        assert!(
1750            result.contains("CURRENT_TIMESTAMP"),
1751            "Expected CURRENT_TIMESTAMP, got: {}",
1752            result
1753        );
1754    }
1755
1756    #[test]
1757    fn test_substr_to_substring() {
1758        let result = transpile_to_postgres("SELECT SUBSTR(name, 1, 3)");
1759        assert!(
1760            result.contains("SUBSTRING"),
1761            "Expected SUBSTRING, got: {}",
1762            result
1763        );
1764    }
1765
1766    #[test]
1767    fn test_group_concat_to_string_agg() {
1768        let result = transpile_to_postgres("SELECT GROUP_CONCAT(name)");
1769        assert!(
1770            result.contains("STRING_AGG"),
1771            "Expected STRING_AGG, got: {}",
1772            result
1773        );
1774    }
1775
1776    #[test]
1777    fn test_double_quote_identifiers() {
1778        // PostgreSQL uses double quotes for identifiers
1779        let dialect = PostgresDialect;
1780        let config = dialect.generator_config();
1781        assert_eq!(config.identifier_quote, '"');
1782    }
1783
1784    #[test]
1785    fn test_char_length_to_length() {
1786        let result = transpile_to_postgres("SELECT CHAR_LENGTH(name)");
1787        assert!(
1788            result.contains("LENGTH"),
1789            "Expected LENGTH, got: {}",
1790            result
1791        );
1792    }
1793
1794    #[test]
1795    fn test_character_length_to_length() {
1796        let result = transpile_to_postgres("SELECT CHARACTER_LENGTH(name)");
1797        assert!(
1798            result.contains("LENGTH"),
1799            "Expected LENGTH, got: {}",
1800            result
1801        );
1802    }
1803
1804    /// Helper for PostgreSQL identity tests (parse and regenerate with PostgreSQL dialect)
1805    fn identity_postgres(sql: &str) -> String {
1806        let dialect = Dialect::get(DialectType::PostgreSQL);
1807        let exprs = dialect.parse(sql).expect("Parse failed");
1808        let transformed = dialect
1809            .transform(exprs[0].clone())
1810            .expect("Transform failed");
1811        dialect.generate(&transformed).expect("Generate failed")
1812    }
1813
1814    #[test]
1815    fn test_json_extract_with_column_path() {
1816        // When the path is a column reference (not a literal), should use function form
1817        let result = identity_postgres("json_data.data -> field_ids.field_id");
1818        assert!(
1819            result.contains("JSON_EXTRACT_PATH"),
1820            "Expected JSON_EXTRACT_PATH for column path, got: {}",
1821            result
1822        );
1823    }
1824
1825    #[test]
1826    fn test_json_extract_scalar_with_negative_index() {
1827        // When the path is a negative index, should use JSON_EXTRACT_PATH_TEXT function
1828        let result = identity_postgres("x::JSON -> 'duration' ->> -1");
1829        assert!(
1830            result.contains("JSON_EXTRACT_PATH_TEXT"),
1831            "Expected JSON_EXTRACT_PATH_TEXT for negative index, got: {}",
1832            result
1833        );
1834        // The first -> should still be arrow syntax since 'duration' is a string literal
1835        assert!(
1836            result.contains("->"),
1837            "Expected -> for string literal path, got: {}",
1838            result
1839        );
1840    }
1841
1842    #[test]
1843    fn test_json_extract_with_string_literal() {
1844        // When the path is a string literal, should keep arrow syntax
1845        let result = identity_postgres("data -> 'key'");
1846        assert!(
1847            result.contains("->"),
1848            "Expected -> for string literal path, got: {}",
1849            result
1850        );
1851        assert!(
1852            !result.contains("JSON_EXTRACT_PATH"),
1853            "Should NOT use function form for string literal, got: {}",
1854            result
1855        );
1856    }
1857}