Skip to main content

polyglot_sql/dialects/
postgres.rs

1//! PostgreSQL Dialect
2//!
3//! PostgreSQL-specific transformations based on sqlglot patterns.
4//! Comprehensive batch translation from Python sqlglot postgres.py
5//!
6//! Key differences from other dialects:
7//! - TRY_CAST not supported (falls back to CAST)
8//! - RANDOM() instead of RAND()
9//! - STRING_AGG instead of GROUP_CONCAT
10//! - Bitwise XOR is # operator
11//! - BOOL_AND/BOOL_OR for logical aggregates
12//! - GEN_RANDOM_UUID() for UUID generation
13//! - UNNEST instead of EXPLODE
14//! - Type mappings: TINYINT→SMALLINT, FLOAT→REAL, DOUBLE→DOUBLE PRECISION, etc.
15//! - RegexpLike uses ~ operator, RegexpILike uses ~* operator
16//! - JSONB operators: #>, #>>, ?, ?|, ?&
17
18use super::{DialectImpl, DialectType};
19use crate::error::Result;
20use crate::expressions::{
21    AggFunc, BinaryOp, BooleanLiteral, Case, Cast, CeilFunc, DataType, DateTimeField, Expression,
22    ExtractFunc, Function, Interval, IntervalUnit, IntervalUnitSpec, Join, JoinKind, Literal,
23    Paren, UnaryFunc, VarArgFunc,
24};
25use crate::generator::GeneratorConfig;
26use crate::tokens::TokenizerConfig;
27
28/// Helper to wrap JSON arrow expressions in parentheses when they appear
29/// in contexts that require it (Binary, In, Not expressions)
30/// This matches Python sqlglot's WRAPPED_JSON_EXTRACT_EXPRESSIONS behavior
31fn wrap_if_json_arrow(expr: Expression) -> Expression {
32    match &expr {
33        Expression::JsonExtract(f) if f.arrow_syntax => Expression::Paren(Box::new(Paren {
34            this: expr,
35            trailing_comments: Vec::new(),
36        })),
37        Expression::JsonExtractScalar(f) if f.arrow_syntax => Expression::Paren(Box::new(Paren {
38            this: expr,
39            trailing_comments: Vec::new(),
40        })),
41        _ => expr,
42    }
43}
44
45/// PostgreSQL dialect
46pub struct PostgresDialect;
47
48impl DialectImpl for PostgresDialect {
49    fn dialect_type(&self) -> DialectType {
50        DialectType::PostgreSQL
51    }
52
53    fn tokenizer_config(&self) -> TokenizerConfig {
54        use crate::tokens::TokenType;
55        let mut config = TokenizerConfig::default();
56        // PostgreSQL supports $$ string literals (heredoc strings)
57        config.quotes.insert("$$".to_string(), "$$".to_string());
58        // PostgreSQL uses double quotes for identifiers
59        config.identifiers.insert('"', '"');
60        // Nested comments supported
61        config.nested_comments = true;
62        // PostgreSQL treats EXEC as a generic command (not TSQL EXEC statement)
63        // Note: EXECUTE is kept as-is since it's used in GRANT/REVOKE EXECUTE ON FUNCTION
64        config
65            .keywords
66            .insert("EXEC".to_string(), TokenType::Command);
67        config
68    }
69
70    fn generator_config(&self) -> GeneratorConfig {
71        use crate::generator::IdentifierQuoteStyle;
72        GeneratorConfig {
73            identifier_quote: '"',
74            identifier_quote_style: IdentifierQuoteStyle::DOUBLE_QUOTE,
75            dialect: Some(DialectType::PostgreSQL),
76            // PostgreSQL uses TIMESTAMPTZ shorthand
77            tz_to_with_time_zone: false,
78            // PostgreSQL prefers INTERVAL '1 day' syntax
79            single_string_interval: true,
80            // TABLESAMPLE uses REPEATABLE in PostgreSQL
81            tablesample_seed_keyword: "REPEATABLE",
82            // PostgreSQL doesn't support NVL2
83            nvl2_supported: false,
84            // PostgreSQL uses $ for parameters
85            parameter_token: "$",
86            // PostgreSQL uses % for named placeholders
87            named_placeholder_token: "%",
88            // PostgreSQL supports SELECT INTO
89            supports_select_into: true,
90            // PostgreSQL: USING btree(col) without space before parens
91            index_using_no_space: true,
92            // PostgreSQL supports UNLOGGED tables
93            supports_unlogged_tables: true,
94            // PostgreSQL doesn't support multi-arg DISTINCT
95            multi_arg_distinct: false,
96            // PostgreSQL uses ANY (subquery) with space
97            quantified_no_paren_space: false,
98            // PostgreSQL supports window EXCLUDE clause
99            supports_window_exclude: true,
100            // PostgreSQL normalizes single-bound window frames to BETWEEN form
101            normalize_window_frame_between: true,
102            // PostgreSQL COPY doesn't use INTO keyword
103            copy_has_into_keyword: false,
104            // PostgreSQL ARRAY_SIZE requires dimension argument
105            array_size_dim_required: Some(true),
106            // PostgreSQL supports BETWEEN flags
107            supports_between_flags: true,
108            // PostgreSQL doesn't support hints
109            join_hints: false,
110            table_hints: false,
111            query_hints: false,
112            // PostgreSQL supports locking reads
113            locking_reads_supported: true,
114            // PostgreSQL doesn't rename tables with DB
115            rename_table_with_db: false,
116            // PostgreSQL can implement array any
117            can_implement_array_any: true,
118            // PostgreSQL ARRAY_CONCAT is not var-len
119            array_concat_is_var_len: false,
120            // PostgreSQL doesn't support MEDIAN
121            supports_median: false,
122            // PostgreSQL requires JSON type for extraction
123            json_type_required_for_extraction: true,
124            // PostgreSQL LIKE property inside schema
125            like_property_inside_schema: true,
126            ..Default::default()
127        }
128    }
129
130    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
131        match expr {
132            // ============================================
133            // DATA TYPE MAPPINGS (from TYPE_MAPPING)
134            // These are handled specially - transform DataType variants
135            // ============================================
136            Expression::DataType(dt) => self.transform_data_type(dt),
137
138            // ============================================
139            // NULL HANDLING
140            // ============================================
141            // IFNULL -> COALESCE in PostgreSQL
142            Expression::IfNull(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
143                original_name: None,
144                expressions: vec![f.this, f.expression],
145                inferred_type: None,
146            }))),
147
148            // NVL -> COALESCE in PostgreSQL
149            Expression::Nvl(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
150                original_name: None,
151                expressions: vec![f.this, f.expression],
152                inferred_type: None,
153            }))),
154
155            // Coalesce with original_name (e.g., IFNULL parsed as Coalesce) -> clear original_name
156            // so it outputs as COALESCE instead of the original function name
157            Expression::Coalesce(mut f) => {
158                f.original_name = None;
159                Ok(Expression::Coalesce(f))
160            }
161
162            // ============================================
163            // CAST OPERATIONS
164            // ============================================
165            // TryCast -> CAST (PostgreSQL doesn't support TRY_CAST)
166            Expression::TryCast(c) => Ok(Expression::Cast(c)),
167
168            // SafeCast -> CAST (PostgreSQL doesn't support safe casts)
169            Expression::SafeCast(c) => Ok(Expression::Cast(c)),
170
171            // ============================================
172            // RANDOM
173            // ============================================
174            // RAND -> RANDOM in PostgreSQL
175            Expression::Rand(r) => {
176                // PostgreSQL's RANDOM() doesn't take a seed argument
177                let _ = r.seed; // Ignore seed
178                Ok(Expression::Random(crate::expressions::Random))
179            }
180
181            // ============================================
182            // UUID
183            // ============================================
184            // Uuid -> GEN_RANDOM_UUID in PostgreSQL
185            Expression::Uuid(_) => Ok(Expression::Function(Box::new(Function::new(
186                "GEN_RANDOM_UUID".to_string(),
187                vec![],
188            )))),
189
190            // ============================================
191            // ARRAY OPERATIONS
192            // ============================================
193            // EXPLODE -> UNNEST in PostgreSQL
194            Expression::Explode(f) => Ok(Expression::Unnest(Box::new(
195                crate::expressions::UnnestFunc {
196                    this: f.this,
197                    expressions: Vec::new(),
198                    with_ordinality: false,
199                    alias: None,
200                    offset_alias: None,
201                },
202            ))),
203
204            // ExplodeOuter -> UNNEST in PostgreSQL
205            Expression::ExplodeOuter(f) => Ok(Expression::Unnest(Box::new(
206                crate::expressions::UnnestFunc {
207                    this: f.this,
208                    expressions: Vec::new(),
209                    with_ordinality: false,
210                    alias: None,
211                    offset_alias: None,
212                },
213            ))),
214
215            // ArrayConcat -> ARRAY_CAT in PostgreSQL
216            Expression::ArrayConcat(f) => Ok(Expression::Function(Box::new(Function::new(
217                "ARRAY_CAT".to_string(),
218                f.expressions,
219            )))),
220
221            // ArrayPrepend -> ARRAY_PREPEND in PostgreSQL (note: args swapped from other dialects)
222            Expression::ArrayPrepend(f) => Ok(Expression::Function(Box::new(Function::new(
223                "ARRAY_PREPEND".to_string(),
224                vec![f.expression, f.this], // PostgreSQL: ARRAY_PREPEND(element, array)
225            )))),
226
227            // ============================================
228            // BITWISE OPERATIONS
229            // ============================================
230            // BitwiseXor -> # operator in PostgreSQL
231            Expression::BitwiseXor(f) => {
232                // Use a special marker that generator will recognize
233                Ok(Expression::Function(Box::new(Function::new(
234                    "__PG_BITWISE_XOR__".to_string(),
235                    vec![f.left, f.right],
236                ))))
237            }
238
239            // BitwiseAndAgg -> BIT_AND
240            Expression::BitwiseAndAgg(f) => Ok(Expression::Function(Box::new(Function::new(
241                "BIT_AND".to_string(),
242                vec![f.this],
243            )))),
244
245            // BitwiseOrAgg -> BIT_OR
246            Expression::BitwiseOrAgg(f) => Ok(Expression::Function(Box::new(Function::new(
247                "BIT_OR".to_string(),
248                vec![f.this],
249            )))),
250
251            // BitwiseXorAgg -> BIT_XOR
252            Expression::BitwiseXorAgg(f) => Ok(Expression::Function(Box::new(Function::new(
253                "BIT_XOR".to_string(),
254                vec![f.this],
255            )))),
256
257            // ============================================
258            // BOOLEAN AGGREGATES
259            // ============================================
260            // LogicalAnd -> BOOL_AND
261            Expression::LogicalAnd(f) => Ok(Expression::Function(Box::new(Function::new(
262                "BOOL_AND".to_string(),
263                vec![f.this],
264            )))),
265
266            // LogicalOr -> BOOL_OR
267            Expression::LogicalOr(f) => Ok(Expression::Function(Box::new(Function::new(
268                "BOOL_OR".to_string(),
269                vec![f.this],
270            )))),
271
272            // Xor -> PostgreSQL bool_xor pattern: a <> b for boolean values
273            Expression::Xor(f) => {
274                if let (Some(a), Some(b)) = (f.this, f.expression) {
275                    Ok(Expression::Neq(Box::new(BinaryOp {
276                        left: *a,
277                        right: *b,
278                        left_comments: Vec::new(),
279                        operator_comments: Vec::new(),
280                        trailing_comments: Vec::new(),
281                        inferred_type: None,
282                    })))
283                } else {
284                    Ok(Expression::Boolean(BooleanLiteral { value: false }))
285                }
286            }
287
288            // ============================================
289            // ARRAY OPERATORS
290            // ============================================
291            // ArrayContainedBy (<@) -> ArrayContainsAll (@>) with swapped operands
292            // a <@ b -> b @> a (PostgreSQL prefers @> syntax)
293            Expression::ArrayContainedBy(op) => {
294                Ok(Expression::ArrayContainsAll(Box::new(BinaryOp {
295                    left: op.right,
296                    right: op.left,
297                    left_comments: Vec::new(),
298                    operator_comments: Vec::new(),
299                    trailing_comments: Vec::new(),
300                    inferred_type: None,
301                })))
302            }
303
304            // ============================================
305            // REGEXP OPERATIONS (PostgreSQL uses ~ and ~* operators)
306            // ============================================
307            // RegexpLike -> keep as-is, generator handles ~ operator output
308            Expression::RegexpLike(f) => {
309                // Generator will output as: expr ~ pattern
310                Ok(Expression::RegexpLike(f))
311            }
312
313            // ============================================
314            // DATE/TIME FUNCTIONS
315            // ============================================
316            // DateAdd -> date + INTERVAL in PostgreSQL
317            Expression::DateAdd(f) => {
318                let is_literal = matches!(&f.interval, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_) | Literal::String(_)));
319                let right_expr = if is_literal {
320                    // Literal value: INTERVAL 'value' unit
321                    Expression::Interval(Box::new(Interval {
322                        this: Some(f.interval),
323                        unit: Some(IntervalUnitSpec::Simple {
324                            unit: f.unit,
325                            use_plural: false,
326                        }),
327                    }))
328                } else {
329                    // Non-literal (column ref, expression): INTERVAL '1 unit' * value
330                    let unit_str = match f.unit {
331                        IntervalUnit::Year => "YEAR",
332                        IntervalUnit::Quarter => "QUARTER",
333                        IntervalUnit::Month => "MONTH",
334                        IntervalUnit::Week => "WEEK",
335                        IntervalUnit::Day => "DAY",
336                        IntervalUnit::Hour => "HOUR",
337                        IntervalUnit::Minute => "MINUTE",
338                        IntervalUnit::Second => "SECOND",
339                        IntervalUnit::Millisecond => "MILLISECOND",
340                        IntervalUnit::Microsecond => "MICROSECOND",
341                        IntervalUnit::Nanosecond => "NANOSECOND",
342                    };
343                    let interval_one = Expression::Interval(Box::new(Interval {
344                        this: Some(Expression::Literal(Box::new(Literal::String(format!(
345                            "1 {unit_str}"
346                        ))))),
347                        unit: None,
348                    }));
349                    Expression::Mul(Box::new(BinaryOp {
350                        left: interval_one,
351                        right: f.interval,
352                        left_comments: Vec::new(),
353                        operator_comments: Vec::new(),
354                        trailing_comments: Vec::new(),
355                        inferred_type: None,
356                    }))
357                };
358                Ok(Expression::Add(Box::new(BinaryOp {
359                    left: f.this,
360                    right: right_expr,
361                    left_comments: Vec::new(),
362                    operator_comments: Vec::new(),
363                    trailing_comments: Vec::new(),
364                    inferred_type: None,
365                })))
366            }
367
368            // DateSub -> date - INTERVAL in PostgreSQL
369            Expression::DateSub(f) => {
370                let interval_expr = Expression::Interval(Box::new(Interval {
371                    this: Some(f.interval),
372                    unit: Some(IntervalUnitSpec::Simple {
373                        unit: f.unit,
374                        use_plural: false,
375                    }),
376                }));
377                Ok(Expression::Sub(Box::new(BinaryOp {
378                    left: f.this,
379                    right: interval_expr,
380                    left_comments: Vec::new(),
381                    operator_comments: Vec::new(),
382                    trailing_comments: Vec::new(),
383                    inferred_type: None,
384                })))
385            }
386
387            // DateDiff -> Complex PostgreSQL pattern using AGE/EXTRACT
388            Expression::DateDiff(f) => {
389                // For PostgreSQL, DATEDIFF is converted to EXTRACT(epoch FROM ...) pattern
390                // matching the 3-arg string-based DATEDIFF handler below
391                let unit = f.unit.unwrap_or(IntervalUnit::Day);
392
393                // Helper: CAST(expr AS TIMESTAMP)
394                let cast_ts = |e: Expression| -> Expression {
395                    Expression::Cast(Box::new(Cast {
396                        this: e,
397                        to: DataType::Timestamp {
398                            precision: None,
399                            timezone: false,
400                        },
401                        trailing_comments: Vec::new(),
402                        double_colon_syntax: false,
403                        format: None,
404                        default: None,
405                        inferred_type: None,
406                    }))
407                };
408
409                // Helper: CAST(expr AS BIGINT)
410                let cast_bigint = |e: Expression| -> Expression {
411                    Expression::Cast(Box::new(Cast {
412                        this: e,
413                        to: DataType::BigInt { length: None },
414                        trailing_comments: Vec::new(),
415                        double_colon_syntax: false,
416                        format: None,
417                        default: None,
418                        inferred_type: None,
419                    }))
420                };
421
422                // Clone end/start for reuse
423                let end_expr = f.this;
424                let start = f.expression;
425
426                // Helper: end_ts - start_ts
427                let ts_diff = || -> Expression {
428                    Expression::Sub(Box::new(BinaryOp::new(
429                        cast_ts(end_expr.clone()),
430                        cast_ts(start.clone()),
431                    )))
432                };
433
434                // Helper: AGE(end_ts, start_ts)
435                let age_call = || -> Expression {
436                    Expression::Function(Box::new(Function::new(
437                        "AGE".to_string(),
438                        vec![cast_ts(end_expr.clone()), cast_ts(start.clone())],
439                    )))
440                };
441
442                // Helper: EXTRACT(field FROM expr)
443                let extract = |field: DateTimeField, from: Expression| -> Expression {
444                    Expression::Extract(Box::new(ExtractFunc { this: from, field }))
445                };
446
447                // Helper: number literal
448                let num = |n: i64| -> Expression {
449                    Expression::Literal(Box::new(Literal::Number(n.to_string())))
450                };
451
452                let epoch_field = DateTimeField::Custom("epoch".to_string());
453
454                let result = match unit {
455                    IntervalUnit::Nanosecond => {
456                        let epoch = extract(epoch_field.clone(), ts_diff());
457                        cast_bigint(Expression::Mul(Box::new(BinaryOp::new(
458                            epoch,
459                            num(1000000000),
460                        ))))
461                    }
462                    IntervalUnit::Microsecond => {
463                        let epoch = extract(epoch_field, ts_diff());
464                        cast_bigint(Expression::Mul(Box::new(BinaryOp::new(
465                            epoch,
466                            num(1000000),
467                        ))))
468                    }
469                    IntervalUnit::Millisecond => {
470                        let epoch = extract(epoch_field, ts_diff());
471                        cast_bigint(Expression::Mul(Box::new(BinaryOp::new(epoch, num(1000)))))
472                    }
473                    IntervalUnit::Second => {
474                        let epoch = extract(epoch_field, ts_diff());
475                        cast_bigint(epoch)
476                    }
477                    IntervalUnit::Minute => {
478                        let epoch = extract(epoch_field, ts_diff());
479                        cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(60)))))
480                    }
481                    IntervalUnit::Hour => {
482                        let epoch = extract(epoch_field, ts_diff());
483                        cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(3600)))))
484                    }
485                    IntervalUnit::Day => {
486                        let epoch = extract(epoch_field, ts_diff());
487                        cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(86400)))))
488                    }
489                    IntervalUnit::Week => {
490                        let diff_parens = Expression::Paren(Box::new(Paren {
491                            this: ts_diff(),
492                            trailing_comments: Vec::new(),
493                        }));
494                        let days = extract(DateTimeField::Custom("days".to_string()), diff_parens);
495                        cast_bigint(Expression::Div(Box::new(BinaryOp::new(days, num(7)))))
496                    }
497                    IntervalUnit::Month => {
498                        let year_part =
499                            extract(DateTimeField::Custom("year".to_string()), age_call());
500                        let month_part =
501                            extract(DateTimeField::Custom("month".to_string()), age_call());
502                        let year_months =
503                            Expression::Mul(Box::new(BinaryOp::new(year_part, num(12))));
504                        cast_bigint(Expression::Add(Box::new(BinaryOp::new(
505                            year_months,
506                            month_part,
507                        ))))
508                    }
509                    IntervalUnit::Quarter => {
510                        let year_part =
511                            extract(DateTimeField::Custom("year".to_string()), age_call());
512                        let month_part =
513                            extract(DateTimeField::Custom("month".to_string()), age_call());
514                        let year_quarters =
515                            Expression::Mul(Box::new(BinaryOp::new(year_part, num(4))));
516                        let month_quarters =
517                            Expression::Div(Box::new(BinaryOp::new(month_part, num(3))));
518                        cast_bigint(Expression::Add(Box::new(BinaryOp::new(
519                            year_quarters,
520                            month_quarters,
521                        ))))
522                    }
523                    IntervalUnit::Year => cast_bigint(extract(
524                        DateTimeField::Custom("year".to_string()),
525                        age_call(),
526                    )),
527                };
528                Ok(result)
529            }
530
531            // UnixToTime -> TO_TIMESTAMP
532            Expression::UnixToTime(f) => Ok(Expression::Function(Box::new(Function::new(
533                "TO_TIMESTAMP".to_string(),
534                vec![*f.this],
535            )))),
536
537            // TimeToUnix -> DATE_PART('epoch', ...) in PostgreSQL
538            Expression::TimeToUnix(f) => Ok(Expression::Function(Box::new(Function::new(
539                "DATE_PART".to_string(),
540                vec![Expression::string("epoch"), f.this],
541            )))),
542
543            // StrToTime -> TO_TIMESTAMP in PostgreSQL
544            Expression::ToTimestamp(f) => {
545                let mut args = vec![f.this];
546                if let Some(fmt) = f.format {
547                    args.push(fmt);
548                }
549                Ok(Expression::Function(Box::new(Function::new(
550                    "TO_TIMESTAMP".to_string(),
551                    args,
552                ))))
553            }
554
555            // StrToDate -> TO_DATE in PostgreSQL
556            Expression::ToDate(f) => {
557                let mut args = vec![f.this];
558                if let Some(fmt) = f.format {
559                    args.push(fmt);
560                }
561                Ok(Expression::Function(Box::new(Function::new(
562                    "TO_DATE".to_string(),
563                    args,
564                ))))
565            }
566
567            // TimestampTrunc -> DATE_TRUNC
568            Expression::TimestampTrunc(f) => {
569                // Convert DateTimeField to string expression for DATE_TRUNC
570                let unit_str = format!("{:?}", f.unit).to_lowercase();
571                let args = vec![Expression::string(&unit_str), f.this];
572                Ok(Expression::Function(Box::new(Function::new(
573                    "DATE_TRUNC".to_string(),
574                    args,
575                ))))
576            }
577
578            // TimeFromParts -> MAKE_TIME
579            Expression::TimeFromParts(f) => {
580                let mut args = Vec::new();
581                if let Some(h) = f.hour {
582                    args.push(*h);
583                }
584                if let Some(m) = f.min {
585                    args.push(*m);
586                }
587                if let Some(s) = f.sec {
588                    args.push(*s);
589                }
590                Ok(Expression::Function(Box::new(Function::new(
591                    "MAKE_TIME".to_string(),
592                    args,
593                ))))
594            }
595
596            // TimestampFromParts -> MAKE_TIMESTAMP
597            Expression::MakeTimestamp(f) => {
598                // MakeTimestampFunc has direct Expression fields, not Options
599                let args = vec![f.year, f.month, f.day, f.hour, f.minute, f.second];
600                Ok(Expression::Function(Box::new(Function::new(
601                    "MAKE_TIMESTAMP".to_string(),
602                    args,
603                ))))
604            }
605
606            // ============================================
607            // STRING FUNCTIONS
608            // ============================================
609            // StringAgg is native to PostgreSQL - keep as-is
610            Expression::StringAgg(f) => Ok(Expression::StringAgg(f)),
611
612            // GroupConcat -> STRING_AGG in PostgreSQL
613            Expression::GroupConcat(f) => {
614                let mut args = vec![f.this.clone()];
615                if let Some(sep) = f.separator.clone() {
616                    args.push(sep);
617                } else {
618                    args.push(Expression::string(","));
619                }
620                Ok(Expression::Function(Box::new(Function::new(
621                    "STRING_AGG".to_string(),
622                    args,
623                ))))
624            }
625
626            // StrPosition -> POSITION function
627            Expression::Position(f) => {
628                // PostgreSQL: POSITION(substring IN string)
629                // Keep as Position, generator handles it
630                Ok(Expression::Position(f))
631            }
632
633            // ============================================
634            // AGGREGATE FUNCTIONS
635            // ============================================
636            // CountIf -> SUM(CASE WHEN condition THEN 1 ELSE 0 END) in PostgreSQL
637            Expression::CountIf(f) => {
638                let case_expr = Expression::Case(Box::new(Case {
639                    operand: None,
640                    whens: vec![(f.this.clone(), Expression::number(1))],
641                    else_: Some(Expression::number(0)),
642                    comments: Vec::new(),
643                    inferred_type: None,
644                }));
645                Ok(Expression::Sum(Box::new(AggFunc {
646                    ignore_nulls: None,
647                    having_max: None,
648                    this: case_expr,
649                    distinct: f.distinct,
650                    filter: f.filter,
651                    order_by: Vec::new(),
652                    name: None,
653                    limit: None,
654                    inferred_type: None,
655                })))
656            }
657
658            // AnyValue -> keep as ANY_VALUE for PostgreSQL (supported since PG 16)
659            Expression::AnyValue(f) => Ok(Expression::AnyValue(f)),
660
661            // Variance -> VAR_SAMP in PostgreSQL
662            Expression::Variance(f) => Ok(Expression::Function(Box::new(Function::new(
663                "VAR_SAMP".to_string(),
664                vec![f.this],
665            )))),
666
667            // VarPop -> VAR_POP in PostgreSQL
668            Expression::VarPop(f) => Ok(Expression::Function(Box::new(Function::new(
669                "VAR_POP".to_string(),
670                vec![f.this],
671            )))),
672
673            // ============================================
674            // JSON FUNCTIONS
675            // ============================================
676            // JSONExtract -> use arrow syntax (->) in PostgreSQL for simple literal paths
677            // Complex paths (like column references) should use JSON_EXTRACT_PATH function
678            Expression::JsonExtract(mut f) => {
679                // Only use arrow syntax for simple literal paths (string or non-negative number)
680                // Complex expressions like column references should use function form
681                f.arrow_syntax = Self::is_simple_json_path(&f.path);
682                Ok(Expression::JsonExtract(f))
683            }
684
685            // JSONExtractScalar -> use arrow syntax (->>) in PostgreSQL for simple paths
686            // Complex paths (like negative indices) should use JSON_EXTRACT_PATH_TEXT function
687            // #>> (hash_arrow_syntax) stays as #>>
688            Expression::JsonExtractScalar(mut f) => {
689                if !f.hash_arrow_syntax {
690                    // Only use arrow syntax for simple literal paths (string or non-negative number)
691                    // Complex expressions like Neg(-1) should use function form
692                    f.arrow_syntax = Self::is_simple_json_path(&f.path);
693                }
694                Ok(Expression::JsonExtractScalar(f))
695            }
696
697            // ParseJson: handled by generator (outputs CAST(x AS JSON) for PostgreSQL)
698
699            // JSONObjectAgg -> JSON_OBJECT_AGG
700            Expression::JsonObjectAgg(f) => {
701                // JsonObjectAggFunc has key and value as Expression, not Option
702                let args = vec![f.key, f.value];
703                Ok(Expression::Function(Box::new(Function::new(
704                    "JSON_OBJECT_AGG".to_string(),
705                    args,
706                ))))
707            }
708
709            // JSONArrayAgg -> JSON_AGG
710            Expression::JsonArrayAgg(f) => Ok(Expression::Function(Box::new(Function::new(
711                "JSON_AGG".to_string(),
712                vec![f.this],
713            )))),
714
715            // JSONPathRoot -> empty string ($ is implicit in PostgreSQL)
716            Expression::JSONPathRoot(_) => Ok(Expression::Literal(Box::new(Literal::String(
717                String::new(),
718            )))),
719
720            // ============================================
721            // MISC FUNCTIONS
722            // ============================================
723            // IntDiv -> DIV in PostgreSQL
724            Expression::IntDiv(f) => Ok(Expression::Function(Box::new(Function::new(
725                "DIV".to_string(),
726                vec![f.this, f.expression],
727            )))),
728
729            // Unicode -> ASCII in PostgreSQL
730            Expression::Unicode(f) => Ok(Expression::Function(Box::new(Function::new(
731                "ASCII".to_string(),
732                vec![f.this],
733            )))),
734
735            // LastDay -> Complex expression (PostgreSQL doesn't have LAST_DAY)
736            Expression::LastDay(f) => {
737                // (DATE_TRUNC('month', date) + INTERVAL '1 month' - INTERVAL '1 day')::DATE
738                let truncated = Expression::Function(Box::new(Function::new(
739                    "DATE_TRUNC".to_string(),
740                    vec![Expression::string("month"), f.this.clone()],
741                )));
742                let plus_month = Expression::Add(Box::new(BinaryOp {
743                    left: truncated,
744                    right: Expression::Interval(Box::new(Interval {
745                        this: Some(Expression::string("1")),
746                        unit: Some(IntervalUnitSpec::Simple {
747                            unit: IntervalUnit::Month,
748                            use_plural: false,
749                        }),
750                    })),
751                    left_comments: Vec::new(),
752                    operator_comments: Vec::new(),
753                    trailing_comments: Vec::new(),
754                    inferred_type: None,
755                }));
756                let minus_day = Expression::Sub(Box::new(BinaryOp {
757                    left: plus_month,
758                    right: Expression::Interval(Box::new(Interval {
759                        this: Some(Expression::string("1")),
760                        unit: Some(IntervalUnitSpec::Simple {
761                            unit: IntervalUnit::Day,
762                            use_plural: false,
763                        }),
764                    })),
765                    left_comments: Vec::new(),
766                    operator_comments: Vec::new(),
767                    trailing_comments: Vec::new(),
768                    inferred_type: None,
769                }));
770                Ok(Expression::Cast(Box::new(Cast {
771                    this: minus_day,
772                    to: DataType::Date,
773                    trailing_comments: Vec::new(),
774                    double_colon_syntax: true, // Use PostgreSQL :: syntax
775                    format: None,
776                    default: None,
777                    inferred_type: None,
778                })))
779            }
780
781            // GenerateSeries is native to PostgreSQL
782            Expression::GenerateSeries(f) => Ok(Expression::GenerateSeries(f)),
783
784            // ExplodingGenerateSeries -> GENERATE_SERIES
785            Expression::ExplodingGenerateSeries(f) => {
786                let mut args = vec![f.start, f.stop];
787                if let Some(step) = f.step {
788                    args.push(step); // step is Expression, not Box<Expression>
789                }
790                Ok(Expression::Function(Box::new(Function::new(
791                    "GENERATE_SERIES".to_string(),
792                    args,
793                ))))
794            }
795
796            // ============================================
797            // SESSION/TIME FUNCTIONS (no parentheses in PostgreSQL)
798            // ============================================
799            // CurrentTimestamp -> CURRENT_TIMESTAMP (no parens)
800            Expression::CurrentTimestamp(_) => Ok(Expression::Function(Box::new(Function {
801                name: "CURRENT_TIMESTAMP".to_string(),
802                args: vec![],
803                distinct: false,
804                trailing_comments: vec![],
805                use_bracket_syntax: false,
806                no_parens: true,
807                quoted: false,
808                span: None,
809                inferred_type: None,
810            }))),
811
812            // CurrentUser -> CURRENT_USER (no parens)
813            Expression::CurrentUser(_) => Ok(Expression::Function(Box::new(Function::new(
814                "CURRENT_USER".to_string(),
815                vec![],
816            )))),
817
818            // CurrentDate -> CURRENT_DATE (no parens)
819            Expression::CurrentDate(_) => Ok(Expression::Function(Box::new(Function {
820                name: "CURRENT_DATE".to_string(),
821                args: vec![],
822                distinct: false,
823                trailing_comments: vec![],
824                use_bracket_syntax: false,
825                no_parens: true,
826                quoted: false,
827                span: None,
828                inferred_type: None,
829            }))),
830
831            // ============================================
832            // JOIN TRANSFORMATIONS
833            // ============================================
834            // CROSS APPLY -> INNER JOIN LATERAL ... ON TRUE in PostgreSQL
835            Expression::Join(join) if join.kind == JoinKind::CrossApply => {
836                Ok(Expression::Join(Box::new(Join {
837                    this: join.this,
838                    on: Some(Expression::Boolean(BooleanLiteral { value: true })),
839                    using: join.using,
840                    kind: JoinKind::CrossApply,
841                    use_inner_keyword: false,
842                    use_outer_keyword: false,
843                    deferred_condition: false,
844                    join_hint: None,
845                    match_condition: None,
846                    pivots: join.pivots,
847                    comments: join.comments,
848                    nesting_group: 0,
849                    directed: false,
850                })))
851            }
852
853            // OUTER APPLY -> LEFT JOIN LATERAL ... ON TRUE in PostgreSQL
854            Expression::Join(join) if join.kind == JoinKind::OuterApply => {
855                Ok(Expression::Join(Box::new(Join {
856                    this: join.this,
857                    on: Some(Expression::Boolean(BooleanLiteral { value: true })),
858                    using: join.using,
859                    kind: JoinKind::OuterApply,
860                    use_inner_keyword: false,
861                    use_outer_keyword: false,
862                    deferred_condition: false,
863                    join_hint: None,
864                    match_condition: None,
865                    pivots: join.pivots,
866                    comments: join.comments,
867                    nesting_group: 0,
868                    directed: false,
869                })))
870            }
871
872            // ============================================
873            // GENERIC FUNCTION TRANSFORMATIONS
874            // ============================================
875            Expression::Function(f) => self.transform_function(*f),
876
877            // Generic aggregate function transformations
878            Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
879
880            // ===== Context-aware JSON arrow wrapping =====
881            // When JSON arrow expressions appear in Binary/In/Not contexts,
882            // they need to be wrapped in parentheses for correct precedence.
883            // This matches Python sqlglot's WRAPPED_JSON_EXTRACT_EXPRESSIONS behavior.
884
885            // Binary operators that need JSON wrapping
886            Expression::Eq(op) => Ok(Expression::Eq(Box::new(BinaryOp {
887                left: wrap_if_json_arrow(op.left),
888                right: wrap_if_json_arrow(op.right),
889                ..*op
890            }))),
891            Expression::Neq(op) => Ok(Expression::Neq(Box::new(BinaryOp {
892                left: wrap_if_json_arrow(op.left),
893                right: wrap_if_json_arrow(op.right),
894                ..*op
895            }))),
896            Expression::Lt(op) => Ok(Expression::Lt(Box::new(BinaryOp {
897                left: wrap_if_json_arrow(op.left),
898                right: wrap_if_json_arrow(op.right),
899                ..*op
900            }))),
901            Expression::Lte(op) => Ok(Expression::Lte(Box::new(BinaryOp {
902                left: wrap_if_json_arrow(op.left),
903                right: wrap_if_json_arrow(op.right),
904                ..*op
905            }))),
906            Expression::Gt(op) => Ok(Expression::Gt(Box::new(BinaryOp {
907                left: wrap_if_json_arrow(op.left),
908                right: wrap_if_json_arrow(op.right),
909                ..*op
910            }))),
911            Expression::Gte(op) => Ok(Expression::Gte(Box::new(BinaryOp {
912                left: wrap_if_json_arrow(op.left),
913                right: wrap_if_json_arrow(op.right),
914                ..*op
915            }))),
916
917            // In expression - wrap the this part if it's JSON arrow
918            Expression::In(mut i) => {
919                i.this = wrap_if_json_arrow(i.this);
920                Ok(Expression::In(i))
921            }
922
923            // Not expression - wrap the this part if it's JSON arrow
924            Expression::Not(mut n) => {
925                n.this = wrap_if_json_arrow(n.this);
926                Ok(Expression::Not(n))
927            }
928
929            // MERGE: qualifier stripping is handled by the generator (dialect-aware)
930            // PostgreSQL generator strips qualifiers, Snowflake generator keeps them
931            Expression::Merge(m) => Ok(Expression::Merge(m)),
932
933            // JSONExtract with variant_extract (Databricks colon syntax) -> JSON_EXTRACT_PATH
934            Expression::JSONExtract(je) if je.variant_extract.is_some() => {
935                // Convert path from bracketed format to simple key
936                // e.g., '["fr''uit"]' -> 'fr''uit'
937                let path = match *je.expression {
938                    Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
939                        let Literal::String(s) = lit.as_ref() else {
940                            unreachable!()
941                        };
942                        // Strip bracketed JSON path format: ["key"] -> key
943                        let cleaned = if s.starts_with("[\"") && s.ends_with("\"]") {
944                            s[2..s.len() - 2].to_string()
945                        } else {
946                            s.clone()
947                        };
948                        Expression::Literal(Box::new(Literal::String(cleaned)))
949                    }
950                    other => other,
951                };
952                Ok(Expression::Function(Box::new(Function::new(
953                    "JSON_EXTRACT_PATH".to_string(),
954                    vec![*je.this, path],
955                ))))
956            }
957
958            // TRIM(str, chars) -> TRIM(chars FROM str) for PostgreSQL SQL standard syntax
959            Expression::Trim(t) if !t.sql_standard_syntax && t.characters.is_some() => {
960                Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
961                    this: t.this,
962                    characters: t.characters,
963                    position: t.position,
964                    sql_standard_syntax: true,
965                    position_explicit: t.position_explicit,
966                })))
967            }
968
969            // b'a' -> CAST(e'a' AS BYTEA) for PostgreSQL
970            Expression::Literal(lit) if matches!(lit.as_ref(), Literal::ByteString(_)) => {
971                let Literal::ByteString(s) = lit.as_ref() else {
972                    unreachable!()
973                };
974                Ok(Expression::Cast(Box::new(Cast {
975                    this: Expression::Literal(Box::new(Literal::EscapeString(s.clone()))),
976                    to: DataType::VarBinary { length: None },
977                    trailing_comments: Vec::new(),
978                    double_colon_syntax: false,
979                    format: None,
980                    default: None,
981                    inferred_type: None,
982                })))
983            }
984
985            // Pass through everything else
986            _ => Ok(expr),
987        }
988    }
989}
990
991impl PostgresDialect {
992    /// Check if a JSON path expression is "simple" (string literal or non-negative integer)
993    /// Simple paths can use arrow syntax (->>) in PostgreSQL
994    /// Complex paths (like negative indices) should use JSON_EXTRACT_PATH_TEXT function
995    fn is_simple_json_path(path: &Expression) -> bool {
996        match path {
997            // String literals are always simple
998            Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => true,
999            // Non-negative integer literals are simple
1000            Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
1001                let Literal::Number(n) = lit.as_ref() else {
1002                    unreachable!()
1003                };
1004                // Check if it's non-negative
1005                !n.starts_with('-')
1006            }
1007            // JSONPath expressions are simple (they're already parsed paths)
1008            Expression::JSONPath(_) => true,
1009            // Everything else (Neg, function calls, etc.) is complex
1010            _ => false,
1011        }
1012    }
1013
1014    /// Transform data types according to PostgreSQL TYPE_MAPPING
1015    fn transform_data_type(&self, dt: DataType) -> Result<Expression> {
1016        let transformed = match dt {
1017            // TINYINT -> SMALLINT
1018            DataType::TinyInt { .. } => DataType::SmallInt { length: None },
1019
1020            // FLOAT -> DOUBLE PRECISION (Python sqlglot tokenizes FLOAT as DOUBLE)
1021            DataType::Float { .. } => DataType::Custom {
1022                name: "DOUBLE PRECISION".to_string(),
1023            },
1024
1025            // DOUBLE -> DOUBLE PRECISION
1026            DataType::Double { .. } => DataType::Custom {
1027                name: "DOUBLE PRECISION".to_string(),
1028            },
1029
1030            // BINARY -> BYTEA (handled by generator which preserves length)
1031            DataType::Binary { .. } => dt,
1032
1033            // VARBINARY -> BYTEA (handled by generator which preserves length)
1034            DataType::VarBinary { .. } => dt,
1035
1036            // BLOB -> BYTEA
1037            DataType::Blob => DataType::Custom {
1038                name: "BYTEA".to_string(),
1039            },
1040
1041            // Custom type normalizations
1042            DataType::Custom { ref name } => {
1043                let upper = name.to_uppercase();
1044                match upper.as_str() {
1045                    // INT8 -> BIGINT (PostgreSQL alias)
1046                    "INT8" => DataType::BigInt { length: None },
1047                    // FLOAT8 -> DOUBLE PRECISION (PostgreSQL alias)
1048                    "FLOAT8" => DataType::Custom {
1049                        name: "DOUBLE PRECISION".to_string(),
1050                    },
1051                    // FLOAT4 -> REAL (PostgreSQL alias)
1052                    "FLOAT4" => DataType::Custom {
1053                        name: "REAL".to_string(),
1054                    },
1055                    // INT4 -> INTEGER (PostgreSQL alias)
1056                    "INT4" => DataType::Int {
1057                        length: None,
1058                        integer_spelling: false,
1059                    },
1060                    // INT2 -> SMALLINT (PostgreSQL alias)
1061                    "INT2" => DataType::SmallInt { length: None },
1062                    _ => dt,
1063                }
1064            }
1065
1066            // Keep all other types as-is
1067            other => other,
1068        };
1069        Ok(Expression::DataType(transformed))
1070    }
1071
1072    fn transform_function(&self, f: Function) -> Result<Expression> {
1073        let name_upper = f.name.to_uppercase();
1074        match name_upper.as_str() {
1075            // IFNULL -> COALESCE
1076            "IFNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
1077                original_name: None,
1078                expressions: f.args,
1079                inferred_type: None,
1080            }))),
1081
1082            // NVL -> COALESCE
1083            "NVL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
1084                original_name: None,
1085                expressions: f.args,
1086                inferred_type: None,
1087            }))),
1088
1089            // ISNULL (SQL Server) -> COALESCE
1090            "ISNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
1091                original_name: None,
1092                expressions: f.args,
1093                inferred_type: None,
1094            }))),
1095
1096            // GROUP_CONCAT -> STRING_AGG in PostgreSQL
1097            "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
1098                Function::new("STRING_AGG".to_string(), f.args),
1099            ))),
1100
1101            // SUBSTR -> SUBSTRING (standard SQL)
1102            "SUBSTR" => Ok(Expression::Function(Box::new(Function::new(
1103                "SUBSTRING".to_string(),
1104                f.args,
1105            )))),
1106
1107            // RAND -> RANDOM in PostgreSQL
1108            "RAND" => Ok(Expression::Random(crate::expressions::Random)),
1109
1110            // CEILING -> CEIL (both work in PostgreSQL, but CEIL is preferred)
1111            "CEILING" if f.args.len() == 1 => Ok(Expression::Ceil(Box::new(CeilFunc {
1112                this: f.args.into_iter().next().unwrap(),
1113                decimals: None,
1114                to: None,
1115            }))),
1116
1117            // LEN -> LENGTH in PostgreSQL
1118            "LEN" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc {
1119                this: f.args.into_iter().next().unwrap(),
1120                original_name: None,
1121                inferred_type: None,
1122            }))),
1123
1124            // CHAR_LENGTH -> LENGTH in PostgreSQL
1125            "CHAR_LENGTH" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc {
1126                this: f.args.into_iter().next().unwrap(),
1127                original_name: None,
1128                inferred_type: None,
1129            }))),
1130
1131            // CHARACTER_LENGTH -> LENGTH in PostgreSQL
1132            "CHARACTER_LENGTH" if f.args.len() == 1 => {
1133                Ok(Expression::Length(Box::new(UnaryFunc {
1134                    this: f.args.into_iter().next().unwrap(),
1135                    original_name: None,
1136                    inferred_type: None,
1137                })))
1138            }
1139
1140            // CHARINDEX -> POSITION in PostgreSQL
1141            // CHARINDEX(substring, string) -> POSITION(substring IN string)
1142            "CHARINDEX" if f.args.len() >= 2 => {
1143                let mut args = f.args;
1144                let substring = args.remove(0);
1145                let string = args.remove(0);
1146                Ok(Expression::Position(Box::new(
1147                    crate::expressions::PositionFunc {
1148                        substring,
1149                        string,
1150                        start: args.pop(),
1151                    },
1152                )))
1153            }
1154
1155            // GETDATE -> CURRENT_TIMESTAMP in PostgreSQL
1156            "GETDATE" => Ok(Expression::CurrentTimestamp(
1157                crate::expressions::CurrentTimestamp {
1158                    precision: None,
1159                    sysdate: false,
1160                },
1161            )),
1162
1163            // SYSDATETIME -> CURRENT_TIMESTAMP in PostgreSQL
1164            "SYSDATETIME" => Ok(Expression::CurrentTimestamp(
1165                crate::expressions::CurrentTimestamp {
1166                    precision: None,
1167                    sysdate: false,
1168                },
1169            )),
1170
1171            // NOW -> CURRENT_TIMESTAMP in PostgreSQL (NOW() is also valid)
1172            "NOW" => Ok(Expression::CurrentTimestamp(
1173                crate::expressions::CurrentTimestamp {
1174                    precision: None,
1175                    sysdate: false,
1176                },
1177            )),
1178
1179            // NEWID -> GEN_RANDOM_UUID in PostgreSQL
1180            "NEWID" => Ok(Expression::Function(Box::new(Function::new(
1181                "GEN_RANDOM_UUID".to_string(),
1182                vec![],
1183            )))),
1184
1185            // UUID -> GEN_RANDOM_UUID in PostgreSQL
1186            "UUID" => Ok(Expression::Function(Box::new(Function::new(
1187                "GEN_RANDOM_UUID".to_string(),
1188                vec![],
1189            )))),
1190
1191            // UNNEST is native to PostgreSQL
1192            "UNNEST" => Ok(Expression::Function(Box::new(f))),
1193
1194            // GENERATE_SERIES is native to PostgreSQL
1195            "GENERATE_SERIES" => Ok(Expression::Function(Box::new(f))),
1196
1197            // SHA256 -> SHA256 in PostgreSQL (via pgcrypto extension)
1198            "SHA256" if f.args.len() == 1 => Ok(Expression::Function(Box::new(Function::new(
1199                "SHA256".to_string(),
1200                f.args,
1201            )))),
1202
1203            // SHA2 -> SHA256/SHA512 based on length argument
1204            "SHA2" if f.args.len() == 2 => {
1205                // SHA2(data, length) -> SHA256/SHA384/SHA512
1206                let args = f.args;
1207                let data = args[0].clone();
1208                // Default to SHA256 - would need runtime inspection for exact mapping
1209                Ok(Expression::Function(Box::new(Function::new(
1210                    "SHA256".to_string(),
1211                    vec![data],
1212                ))))
1213            }
1214
1215            // LEVENSHTEIN is native to PostgreSQL (fuzzystrmatch extension)
1216            "LEVENSHTEIN" => Ok(Expression::Function(Box::new(f))),
1217
1218            // EDITDISTANCE -> LEVENSHTEIN_LESS_EQUAL (with max distance) or LEVENSHTEIN
1219            "EDITDISTANCE" if f.args.len() == 3 => Ok(Expression::Function(Box::new(
1220                Function::new("LEVENSHTEIN_LESS_EQUAL".to_string(), f.args),
1221            ))),
1222            "EDITDISTANCE" if f.args.len() == 2 => Ok(Expression::Function(Box::new(
1223                Function::new("LEVENSHTEIN".to_string(), f.args),
1224            ))),
1225
1226            // TRIM(value, chars) -> TRIM(chars FROM value) for Postgres
1227            "TRIM" if f.args.len() == 2 => {
1228                let value = f.args[0].clone();
1229                let chars = f.args[1].clone();
1230                Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
1231                    this: value,
1232                    characters: Some(chars),
1233                    position: crate::expressions::TrimPosition::Both,
1234                    sql_standard_syntax: true,
1235                    position_explicit: false,
1236                })))
1237            }
1238
1239            // DATEDIFF(unit, start, end) -> PostgreSQL EXTRACT/AGE patterns
1240            "DATEDIFF" if f.args.len() >= 2 => {
1241                let mut args = f.args;
1242                if args.len() == 2 {
1243                    // 2-arg form: DATEDIFF(start, end) -> AGE(start, end)
1244                    let first = args.remove(0);
1245                    let second = args.remove(0);
1246                    Ok(Expression::Function(Box::new(Function::new(
1247                        "AGE".to_string(),
1248                        vec![first, second],
1249                    ))))
1250                } else {
1251                    // 3-arg form: DATEDIFF(unit, start, end)
1252                    let unit_expr = args.remove(0);
1253                    let start = args.remove(0);
1254                    let end_expr = args.remove(0);
1255
1256                    // Extract unit name from identifier or column
1257                    let unit_name = match &unit_expr {
1258                        Expression::Identifier(id) => id.name.to_uppercase(),
1259                        Expression::Column(col) if col.table.is_none() => {
1260                            col.name.name.to_uppercase()
1261                        }
1262                        _ => "DAY".to_string(),
1263                    };
1264
1265                    // Helper: CAST(expr AS TIMESTAMP)
1266                    let cast_ts = |e: Expression| -> Expression {
1267                        Expression::Cast(Box::new(Cast {
1268                            this: e,
1269                            to: DataType::Timestamp {
1270                                precision: None,
1271                                timezone: false,
1272                            },
1273                            trailing_comments: Vec::new(),
1274                            double_colon_syntax: false,
1275                            format: None,
1276                            default: None,
1277                            inferred_type: None,
1278                        }))
1279                    };
1280
1281                    // Helper: CAST(expr AS BIGINT)
1282                    let cast_bigint = |e: Expression| -> Expression {
1283                        Expression::Cast(Box::new(Cast {
1284                            this: e,
1285                            to: DataType::BigInt { length: None },
1286                            trailing_comments: Vec::new(),
1287                            double_colon_syntax: false,
1288                            format: None,
1289                            default: None,
1290                            inferred_type: None,
1291                        }))
1292                    };
1293
1294                    let end_ts = cast_ts(end_expr.clone());
1295                    let start_ts = cast_ts(start.clone());
1296
1297                    // Helper: end_ts - start_ts
1298                    let ts_diff = || -> Expression {
1299                        Expression::Sub(Box::new(BinaryOp::new(
1300                            cast_ts(end_expr.clone()),
1301                            cast_ts(start.clone()),
1302                        )))
1303                    };
1304
1305                    // Helper: AGE(end_ts, start_ts)
1306                    let age_call = || -> Expression {
1307                        Expression::Function(Box::new(Function::new(
1308                            "AGE".to_string(),
1309                            vec![cast_ts(end_expr.clone()), cast_ts(start.clone())],
1310                        )))
1311                    };
1312
1313                    // Helper: EXTRACT(field FROM expr)
1314                    let extract = |field: DateTimeField, from: Expression| -> Expression {
1315                        Expression::Extract(Box::new(ExtractFunc { this: from, field }))
1316                    };
1317
1318                    // Helper: number literal
1319                    let num = |n: i64| -> Expression {
1320                        Expression::Literal(Box::new(Literal::Number(n.to_string())))
1321                    };
1322
1323                    // Use Custom DateTimeField for lowercase output (PostgreSQL convention)
1324                    let epoch_field = DateTimeField::Custom("epoch".to_string());
1325
1326                    let result = match unit_name.as_str() {
1327                        "MICROSECOND" => {
1328                            // CAST(EXTRACT(epoch FROM end_ts - start_ts) * 1000000 AS BIGINT)
1329                            let epoch = extract(epoch_field, ts_diff());
1330                            cast_bigint(Expression::Mul(Box::new(BinaryOp::new(
1331                                epoch,
1332                                num(1000000),
1333                            ))))
1334                        }
1335                        "MILLISECOND" => {
1336                            let epoch = extract(epoch_field, ts_diff());
1337                            cast_bigint(Expression::Mul(Box::new(BinaryOp::new(epoch, num(1000)))))
1338                        }
1339                        "SECOND" => {
1340                            let epoch = extract(epoch_field, ts_diff());
1341                            cast_bigint(epoch)
1342                        }
1343                        "MINUTE" => {
1344                            let epoch = extract(epoch_field, ts_diff());
1345                            cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(60)))))
1346                        }
1347                        "HOUR" => {
1348                            let epoch = extract(epoch_field, ts_diff());
1349                            cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(3600)))))
1350                        }
1351                        "DAY" => {
1352                            let epoch = extract(epoch_field, ts_diff());
1353                            cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(86400)))))
1354                        }
1355                        "WEEK" => {
1356                            // CAST(EXTRACT(days FROM (end_ts - start_ts)) / 7 AS BIGINT)
1357                            let diff_parens = Expression::Paren(Box::new(Paren {
1358                                this: ts_diff(),
1359                                trailing_comments: Vec::new(),
1360                            }));
1361                            let days =
1362                                extract(DateTimeField::Custom("days".to_string()), diff_parens);
1363                            cast_bigint(Expression::Div(Box::new(BinaryOp::new(days, num(7)))))
1364                        }
1365                        "MONTH" => {
1366                            // CAST(EXTRACT(year FROM AGE(...)) * 12 + EXTRACT(month FROM AGE(...)) AS BIGINT)
1367                            let year_part =
1368                                extract(DateTimeField::Custom("year".to_string()), age_call());
1369                            let month_part =
1370                                extract(DateTimeField::Custom("month".to_string()), age_call());
1371                            let year_months =
1372                                Expression::Mul(Box::new(BinaryOp::new(year_part, num(12))));
1373                            cast_bigint(Expression::Add(Box::new(BinaryOp::new(
1374                                year_months,
1375                                month_part,
1376                            ))))
1377                        }
1378                        "QUARTER" => {
1379                            // CAST(EXTRACT(year FROM AGE(...)) * 4 + EXTRACT(month FROM AGE(...)) / 3 AS BIGINT)
1380                            let year_part =
1381                                extract(DateTimeField::Custom("year".to_string()), age_call());
1382                            let month_part =
1383                                extract(DateTimeField::Custom("month".to_string()), age_call());
1384                            let year_quarters =
1385                                Expression::Mul(Box::new(BinaryOp::new(year_part, num(4))));
1386                            let month_quarters =
1387                                Expression::Div(Box::new(BinaryOp::new(month_part, num(3))));
1388                            cast_bigint(Expression::Add(Box::new(BinaryOp::new(
1389                                year_quarters,
1390                                month_quarters,
1391                            ))))
1392                        }
1393                        "YEAR" => {
1394                            // CAST(EXTRACT(year FROM AGE(...)) AS BIGINT)
1395                            cast_bigint(extract(
1396                                DateTimeField::Custom("year".to_string()),
1397                                age_call(),
1398                            ))
1399                        }
1400                        _ => {
1401                            // Fallback: simple AGE
1402                            Expression::Function(Box::new(Function::new(
1403                                "AGE".to_string(),
1404                                vec![end_ts, start_ts],
1405                            )))
1406                        }
1407                    };
1408                    Ok(result)
1409                }
1410            }
1411
1412            // TIMESTAMPDIFF -> AGE or EXTRACT pattern
1413            "TIMESTAMPDIFF" if f.args.len() >= 3 => {
1414                let mut args = f.args;
1415                let _unit = args.remove(0); // Unit (ignored, AGE returns full interval)
1416                let start = args.remove(0);
1417                let end = args.remove(0);
1418                Ok(Expression::Function(Box::new(Function::new(
1419                    "AGE".to_string(),
1420                    vec![end, start],
1421                ))))
1422            }
1423
1424            // FROM_UNIXTIME -> TO_TIMESTAMP
1425            "FROM_UNIXTIME" => Ok(Expression::Function(Box::new(Function::new(
1426                "TO_TIMESTAMP".to_string(),
1427                f.args,
1428            )))),
1429
1430            // UNIX_TIMESTAMP -> EXTRACT(EPOCH FROM ...)
1431            "UNIX_TIMESTAMP" if f.args.len() == 1 => {
1432                let arg = f.args.into_iter().next().unwrap();
1433                Ok(Expression::Function(Box::new(Function::new(
1434                    "DATE_PART".to_string(),
1435                    vec![Expression::string("epoch"), arg],
1436                ))))
1437            }
1438
1439            // UNIX_TIMESTAMP() with no args -> EXTRACT(EPOCH FROM CURRENT_TIMESTAMP)
1440            "UNIX_TIMESTAMP" if f.args.is_empty() => {
1441                Ok(Expression::Function(Box::new(Function::new(
1442                    "DATE_PART".to_string(),
1443                    vec![
1444                        Expression::string("epoch"),
1445                        Expression::CurrentTimestamp(crate::expressions::CurrentTimestamp {
1446                            precision: None,
1447                            sysdate: false,
1448                        }),
1449                    ],
1450                ))))
1451            }
1452
1453            // DATEADD -> date + interval pattern
1454            "DATEADD" if f.args.len() == 3 => {
1455                // DATEADD(unit, count, date) -> date + interval 'count unit'
1456                // This is a simplified version - full impl would construct proper interval
1457                let mut args = f.args;
1458                let _unit = args.remove(0);
1459                let count = args.remove(0);
1460                let date = args.remove(0);
1461                Ok(Expression::Add(Box::new(BinaryOp {
1462                    left: date,
1463                    right: count,
1464                    left_comments: Vec::new(),
1465                    operator_comments: Vec::new(),
1466                    trailing_comments: Vec::new(),
1467                    inferred_type: None,
1468                })))
1469            }
1470
1471            // INSTR -> POSITION (simplified)
1472            "INSTR" if f.args.len() >= 2 => {
1473                let mut args = f.args;
1474                let string = args.remove(0);
1475                let substring = args.remove(0);
1476                Ok(Expression::Position(Box::new(
1477                    crate::expressions::PositionFunc {
1478                        substring,
1479                        string,
1480                        start: args.pop(),
1481                    },
1482                )))
1483            }
1484
1485            // CONCAT_WS is native to PostgreSQL
1486            "CONCAT_WS" => Ok(Expression::Function(Box::new(f))),
1487
1488            // REGEXP_REPLACE: pass through without adding 'g' flag
1489            // The 'g' flag handling is managed by cross_dialect_normalize based on source dialect's default behavior
1490            "REGEXP_REPLACE" if f.args.len() == 3 || f.args.len() == 4 => {
1491                Ok(Expression::Function(Box::new(f)))
1492            }
1493            // 6 args from Snowflake: (subject, pattern, replacement, position, occurrence, params)
1494            // If occurrence is 0 (global), append 'g' to flags
1495            "REGEXP_REPLACE" if f.args.len() == 6 => {
1496                let is_global = match &f.args[4] {
1497                    Expression::Literal(lit)
1498                        if matches!(lit.as_ref(), crate::expressions::Literal::Number(_)) =>
1499                    {
1500                        let crate::expressions::Literal::Number(n) = lit.as_ref() else {
1501                            unreachable!()
1502                        };
1503                        n == "0"
1504                    }
1505                    _ => false,
1506                };
1507                if is_global {
1508                    let subject = f.args[0].clone();
1509                    let pattern = f.args[1].clone();
1510                    let replacement = f.args[2].clone();
1511                    let position = f.args[3].clone();
1512                    let occurrence = f.args[4].clone();
1513                    let params = &f.args[5];
1514                    let mut flags = if let Expression::Literal(lit) = params {
1515                        if let crate::expressions::Literal::String(s) = lit.as_ref() {
1516                            s.clone()
1517                        } else {
1518                            String::new()
1519                        }
1520                    } else {
1521                        String::new()
1522                    };
1523                    if !flags.contains('g') {
1524                        flags.push('g');
1525                    }
1526                    Ok(Expression::Function(Box::new(Function::new(
1527                        "REGEXP_REPLACE".to_string(),
1528                        vec![
1529                            subject,
1530                            pattern,
1531                            replacement,
1532                            position,
1533                            occurrence,
1534                            Expression::Literal(Box::new(crate::expressions::Literal::String(
1535                                flags,
1536                            ))),
1537                        ],
1538                    ))))
1539                } else {
1540                    Ok(Expression::Function(Box::new(f)))
1541                }
1542            }
1543            // Default: pass through
1544            "REGEXP_REPLACE" => Ok(Expression::Function(Box::new(f))),
1545
1546            // Pass through everything else
1547            _ => Ok(Expression::Function(Box::new(f))),
1548        }
1549    }
1550
1551    fn transform_aggregate_function(
1552        &self,
1553        f: Box<crate::expressions::AggregateFunction>,
1554    ) -> Result<Expression> {
1555        let name_upper = f.name.to_uppercase();
1556        match name_upper.as_str() {
1557            // COUNT_IF -> SUM(CASE WHEN...)
1558            "COUNT_IF" if !f.args.is_empty() => {
1559                let condition = f.args.into_iter().next().unwrap();
1560                let case_expr = Expression::Case(Box::new(Case {
1561                    operand: None,
1562                    whens: vec![(condition, Expression::number(1))],
1563                    else_: Some(Expression::number(0)),
1564                    comments: Vec::new(),
1565                    inferred_type: None,
1566                }));
1567                Ok(Expression::Sum(Box::new(AggFunc {
1568                    ignore_nulls: None,
1569                    having_max: None,
1570                    this: case_expr,
1571                    distinct: f.distinct,
1572                    filter: f.filter,
1573                    order_by: Vec::new(),
1574                    name: None,
1575                    limit: None,
1576                    inferred_type: None,
1577                })))
1578            }
1579
1580            // GROUP_CONCAT -> STRING_AGG
1581            "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
1582                Function::new("STRING_AGG".to_string(), f.args),
1583            ))),
1584
1585            // STDEV -> STDDEV in PostgreSQL
1586            "STDEV" if !f.args.is_empty() => Ok(Expression::Stddev(Box::new(AggFunc {
1587                ignore_nulls: None,
1588                having_max: None,
1589                this: f.args.into_iter().next().unwrap(),
1590                distinct: f.distinct,
1591                filter: f.filter,
1592                order_by: Vec::new(),
1593                name: None,
1594                limit: None,
1595                inferred_type: None,
1596            }))),
1597
1598            // STDEVP -> STDDEV_POP in PostgreSQL
1599            "STDEVP" if !f.args.is_empty() => Ok(Expression::StddevPop(Box::new(AggFunc {
1600                ignore_nulls: None,
1601                having_max: None,
1602                this: f.args.into_iter().next().unwrap(),
1603                distinct: f.distinct,
1604                filter: f.filter,
1605                order_by: Vec::new(),
1606                name: None,
1607                limit: None,
1608                inferred_type: None,
1609            }))),
1610
1611            // VAR -> VAR_SAMP in PostgreSQL
1612            "VAR" if !f.args.is_empty() => Ok(Expression::VarSamp(Box::new(AggFunc {
1613                ignore_nulls: None,
1614                having_max: None,
1615                this: f.args.into_iter().next().unwrap(),
1616                distinct: f.distinct,
1617                filter: f.filter,
1618                order_by: Vec::new(),
1619                name: None,
1620                limit: None,
1621                inferred_type: None,
1622            }))),
1623
1624            // VARP -> VAR_POP in PostgreSQL
1625            "VARP" if !f.args.is_empty() => Ok(Expression::VarPop(Box::new(AggFunc {
1626                ignore_nulls: None,
1627                having_max: None,
1628                this: f.args.into_iter().next().unwrap(),
1629                distinct: f.distinct,
1630                filter: f.filter,
1631                order_by: Vec::new(),
1632                name: None,
1633                limit: None,
1634                inferred_type: None,
1635            }))),
1636
1637            // BIT_AND is native to PostgreSQL
1638            "BIT_AND" => Ok(Expression::AggregateFunction(f)),
1639
1640            // BIT_OR is native to PostgreSQL
1641            "BIT_OR" => Ok(Expression::AggregateFunction(f)),
1642
1643            // BIT_XOR is native to PostgreSQL
1644            "BIT_XOR" => Ok(Expression::AggregateFunction(f)),
1645
1646            // BOOL_AND is native to PostgreSQL
1647            "BOOL_AND" => Ok(Expression::AggregateFunction(f)),
1648
1649            // BOOL_OR is native to PostgreSQL
1650            "BOOL_OR" => Ok(Expression::AggregateFunction(f)),
1651
1652            // VARIANCE -> VAR_SAMP in PostgreSQL
1653            "VARIANCE" if !f.args.is_empty() => Ok(Expression::VarSamp(Box::new(AggFunc {
1654                ignore_nulls: None,
1655                having_max: None,
1656                this: f.args.into_iter().next().unwrap(),
1657                distinct: f.distinct,
1658                filter: f.filter,
1659                order_by: Vec::new(),
1660                name: None,
1661                limit: None,
1662                inferred_type: None,
1663            }))),
1664
1665            // LOGICAL_OR -> BOOL_OR in PostgreSQL
1666            "LOGICAL_OR" if !f.args.is_empty() => {
1667                let mut new_agg = f.clone();
1668                new_agg.name = "BOOL_OR".to_string();
1669                Ok(Expression::AggregateFunction(new_agg))
1670            }
1671
1672            // LOGICAL_AND -> BOOL_AND in PostgreSQL
1673            "LOGICAL_AND" if !f.args.is_empty() => {
1674                let mut new_agg = f.clone();
1675                new_agg.name = "BOOL_AND".to_string();
1676                Ok(Expression::AggregateFunction(new_agg))
1677            }
1678
1679            // Pass through everything else
1680            _ => Ok(Expression::AggregateFunction(f)),
1681        }
1682    }
1683}
1684
1685#[cfg(test)]
1686mod tests {
1687    use super::*;
1688    use crate::dialects::Dialect;
1689
1690    fn transpile_to_postgres(sql: &str) -> String {
1691        let dialect = Dialect::get(DialectType::Generic);
1692        let result = dialect
1693            .transpile_to(sql, DialectType::PostgreSQL)
1694            .expect("Transpile failed");
1695        result[0].clone()
1696    }
1697
1698    #[test]
1699    fn test_ifnull_to_coalesce() {
1700        let result = transpile_to_postgres("SELECT IFNULL(a, b)");
1701        assert!(
1702            result.contains("COALESCE"),
1703            "Expected COALESCE, got: {}",
1704            result
1705        );
1706    }
1707
1708    #[test]
1709    fn test_nvl_to_coalesce() {
1710        let result = transpile_to_postgres("SELECT NVL(a, b)");
1711        assert!(
1712            result.contains("COALESCE"),
1713            "Expected COALESCE, got: {}",
1714            result
1715        );
1716    }
1717
1718    #[test]
1719    fn test_rand_to_random() {
1720        let result = transpile_to_postgres("SELECT RAND()");
1721        assert!(
1722            result.contains("RANDOM"),
1723            "Expected RANDOM, got: {}",
1724            result
1725        );
1726    }
1727
1728    #[test]
1729    fn test_basic_select() {
1730        let result = transpile_to_postgres("SELECT a, b FROM users WHERE id = 1");
1731        assert!(result.contains("SELECT"));
1732        assert!(result.contains("FROM users"));
1733    }
1734
1735    #[test]
1736    fn test_len_to_length() {
1737        let result = transpile_to_postgres("SELECT LEN(name)");
1738        assert!(
1739            result.contains("LENGTH"),
1740            "Expected LENGTH, got: {}",
1741            result
1742        );
1743    }
1744
1745    #[test]
1746    fn test_getdate_to_current_timestamp() {
1747        let result = transpile_to_postgres("SELECT GETDATE()");
1748        assert!(
1749            result.contains("CURRENT_TIMESTAMP"),
1750            "Expected CURRENT_TIMESTAMP, got: {}",
1751            result
1752        );
1753    }
1754
1755    #[test]
1756    fn test_substr_to_substring() {
1757        let result = transpile_to_postgres("SELECT SUBSTR(name, 1, 3)");
1758        assert!(
1759            result.contains("SUBSTRING"),
1760            "Expected SUBSTRING, got: {}",
1761            result
1762        );
1763    }
1764
1765    #[test]
1766    fn test_group_concat_to_string_agg() {
1767        let result = transpile_to_postgres("SELECT GROUP_CONCAT(name)");
1768        assert!(
1769            result.contains("STRING_AGG"),
1770            "Expected STRING_AGG, got: {}",
1771            result
1772        );
1773    }
1774
1775    #[test]
1776    fn test_double_quote_identifiers() {
1777        // PostgreSQL uses double quotes for identifiers
1778        let dialect = PostgresDialect;
1779        let config = dialect.generator_config();
1780        assert_eq!(config.identifier_quote, '"');
1781    }
1782
1783    #[test]
1784    fn test_char_length_to_length() {
1785        let result = transpile_to_postgres("SELECT CHAR_LENGTH(name)");
1786        assert!(
1787            result.contains("LENGTH"),
1788            "Expected LENGTH, got: {}",
1789            result
1790        );
1791    }
1792
1793    #[test]
1794    fn test_character_length_to_length() {
1795        let result = transpile_to_postgres("SELECT CHARACTER_LENGTH(name)");
1796        assert!(
1797            result.contains("LENGTH"),
1798            "Expected LENGTH, got: {}",
1799            result
1800        );
1801    }
1802
1803    /// Helper for PostgreSQL identity tests (parse and regenerate with PostgreSQL dialect)
1804    fn identity_postgres(sql: &str) -> String {
1805        let dialect = Dialect::get(DialectType::PostgreSQL);
1806        let exprs = dialect.parse(sql).expect("Parse failed");
1807        let transformed = dialect
1808            .transform(exprs[0].clone())
1809            .expect("Transform failed");
1810        dialect.generate(&transformed).expect("Generate failed")
1811    }
1812
1813    #[test]
1814    fn test_json_extract_with_column_path() {
1815        // When the path is a column reference (not a literal), should use function form
1816        let result = identity_postgres("json_data.data -> field_ids.field_id");
1817        assert!(
1818            result.contains("JSON_EXTRACT_PATH"),
1819            "Expected JSON_EXTRACT_PATH for column path, got: {}",
1820            result
1821        );
1822    }
1823
1824    #[test]
1825    fn test_json_extract_scalar_with_negative_index() {
1826        // When the path is a negative index, should use JSON_EXTRACT_PATH_TEXT function
1827        let result = identity_postgres("x::JSON -> 'duration' ->> -1");
1828        assert!(
1829            result.contains("JSON_EXTRACT_PATH_TEXT"),
1830            "Expected JSON_EXTRACT_PATH_TEXT for negative index, got: {}",
1831            result
1832        );
1833        // The first -> should still be arrow syntax since 'duration' is a string literal
1834        assert!(
1835            result.contains("->"),
1836            "Expected -> for string literal path, got: {}",
1837            result
1838        );
1839    }
1840
1841    #[test]
1842    fn test_json_extract_with_string_literal() {
1843        // When the path is a string literal, should keep arrow syntax
1844        let result = identity_postgres("data -> 'key'");
1845        assert!(
1846            result.contains("->"),
1847            "Expected -> for string literal path, got: {}",
1848            result
1849        );
1850        assert!(
1851            !result.contains("JSON_EXTRACT_PATH"),
1852            "Should NOT use function form for string literal, got: {}",
1853            result
1854        );
1855    }
1856}