Skip to main content

polyglot_sql/dialects/
postgres.rs

1//! PostgreSQL Dialect
2//!
3//! PostgreSQL-specific transformations based on sqlglot patterns.
4//! Comprehensive batch translation from Python sqlglot postgres.py
5//!
6//! Key differences from other dialects:
7//! - TRY_CAST not supported (falls back to CAST)
8//! - RANDOM() instead of RAND()
9//! - STRING_AGG instead of GROUP_CONCAT
10//! - Bitwise XOR is # operator
11//! - BOOL_AND/BOOL_OR for logical aggregates
12//! - GEN_RANDOM_UUID() for UUID generation
13//! - UNNEST instead of EXPLODE
14//! - Type mappings: TINYINT→SMALLINT, FLOAT→REAL, DOUBLE→DOUBLE PRECISION, etc.
15//! - RegexpLike uses ~ operator, RegexpILike uses ~* operator
16//! - JSONB operators: #>, #>>, ?, ?|, ?&
17
18use super::{DialectImpl, DialectType};
19use crate::error::Result;
20use crate::expressions::{
21    AggFunc, AggregateFunction, BinaryOp, BooleanLiteral, Case, Cast, CeilFunc, DataType,
22    DateTimeField, Expression, ExtractFunc, Function, Interval, IntervalUnit, IntervalUnitSpec,
23    Join, JoinKind, Literal, Paren, UnaryFunc, VarArgFunc,
24};
25#[cfg(feature = "generate")]
26use crate::generator::GeneratorConfig;
27use crate::tokens::TokenizerConfig;
28
29/// Helper to wrap JSON arrow expressions in parentheses when they appear
30/// in contexts that require it (Binary, In, Not expressions)
31/// This matches Python sqlglot's WRAPPED_JSON_EXTRACT_EXPRESSIONS behavior
32fn wrap_if_json_arrow(expr: Expression) -> Expression {
33    match &expr {
34        Expression::JsonExtract(f) if f.arrow_syntax => Expression::Paren(Box::new(Paren {
35            this: expr,
36            trailing_comments: Vec::new(),
37        })),
38        Expression::JsonExtractScalar(f) if f.arrow_syntax => Expression::Paren(Box::new(Paren {
39            this: expr,
40            trailing_comments: Vec::new(),
41        })),
42        _ => expr,
43    }
44}
45
46/// PostgreSQL dialect
47pub struct PostgresDialect;
48
49impl DialectImpl for PostgresDialect {
50    fn dialect_type(&self) -> DialectType {
51        DialectType::PostgreSQL
52    }
53
54    fn tokenizer_config(&self) -> TokenizerConfig {
55        use crate::tokens::TokenType;
56        let mut config = TokenizerConfig::default();
57        // PostgreSQL supports $$ string literals (heredoc strings)
58        config.quotes.insert("$$".to_string(), "$$".to_string());
59        // PostgreSQL uses double quotes for identifiers
60        config.identifiers.insert('"', '"');
61        // Nested comments supported
62        config.nested_comments = true;
63        // PostgreSQL treats EXEC as a generic command (not TSQL EXEC statement)
64        // Note: EXECUTE is kept as-is since it's used in GRANT/REVOKE EXECUTE ON FUNCTION
65        config
66            .keywords
67            .insert("EXEC".to_string(), TokenType::Command);
68        config
69    }
70
71    #[cfg(feature = "generate")]
72
73    fn generator_config(&self) -> GeneratorConfig {
74        use crate::generator::IdentifierQuoteStyle;
75        GeneratorConfig {
76            identifier_quote: '"',
77            identifier_quote_style: IdentifierQuoteStyle::DOUBLE_QUOTE,
78            dialect: Some(DialectType::PostgreSQL),
79            // PostgreSQL uses TIMESTAMPTZ shorthand
80            tz_to_with_time_zone: false,
81            // PostgreSQL prefers INTERVAL '1 day' syntax
82            single_string_interval: true,
83            // TABLESAMPLE uses REPEATABLE in PostgreSQL
84            tablesample_seed_keyword: "REPEATABLE",
85            // PostgreSQL doesn't support NVL2
86            nvl2_supported: false,
87            // PostgreSQL uses $ for parameters
88            parameter_token: "$",
89            // PostgreSQL uses % for named placeholders
90            named_placeholder_token: "%",
91            // PostgreSQL supports SELECT INTO
92            supports_select_into: true,
93            // PostgreSQL: USING btree(col) without space before parens
94            index_using_no_space: true,
95            // PostgreSQL supports UNLOGGED tables
96            supports_unlogged_tables: true,
97            // PostgreSQL doesn't support multi-arg DISTINCT
98            multi_arg_distinct: false,
99            // PostgreSQL uses ANY (subquery) with space
100            quantified_no_paren_space: false,
101            // PostgreSQL supports window EXCLUDE clause
102            supports_window_exclude: true,
103            // PostgreSQL normalizes single-bound window frames to BETWEEN form
104            normalize_window_frame_between: true,
105            // PostgreSQL COPY doesn't use INTO keyword
106            copy_has_into_keyword: false,
107            // PostgreSQL ARRAY_SIZE requires dimension argument
108            array_size_dim_required: Some(true),
109            // PostgreSQL supports BETWEEN flags
110            supports_between_flags: true,
111            // PostgreSQL doesn't support hints
112            join_hints: false,
113            table_hints: false,
114            query_hints: false,
115            // PostgreSQL supports locking reads
116            locking_reads_supported: true,
117            // PostgreSQL doesn't rename tables with DB
118            rename_table_with_db: false,
119            // PostgreSQL can implement array any
120            can_implement_array_any: true,
121            // PostgreSQL ARRAY_CONCAT is not var-len
122            array_concat_is_var_len: false,
123            // PostgreSQL doesn't support MEDIAN
124            supports_median: false,
125            // PostgreSQL requires JSON type for extraction
126            json_type_required_for_extraction: true,
127            // PostgreSQL LIKE property inside schema
128            like_property_inside_schema: true,
129            ..Default::default()
130        }
131    }
132
133    #[cfg(feature = "transpile")]
134
135    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
136        match expr {
137            // ============================================
138            // DATA TYPE MAPPINGS (from TYPE_MAPPING)
139            // These are handled specially - transform DataType variants
140            // ============================================
141            Expression::DataType(dt) => self.transform_data_type(dt),
142
143            // ============================================
144            // NULL HANDLING
145            // ============================================
146            // IFNULL -> COALESCE in PostgreSQL
147            Expression::IfNull(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
148                original_name: None,
149                expressions: vec![f.this, f.expression],
150                inferred_type: None,
151            }))),
152
153            // NVL -> COALESCE in PostgreSQL
154            Expression::Nvl(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
155                original_name: None,
156                expressions: vec![f.this, f.expression],
157                inferred_type: None,
158            }))),
159
160            // Coalesce with original_name (e.g., IFNULL parsed as Coalesce) -> clear original_name
161            // so it outputs as COALESCE instead of the original function name
162            Expression::Coalesce(mut f) => {
163                f.original_name = None;
164                Ok(Expression::Coalesce(f))
165            }
166
167            // ============================================
168            // CAST OPERATIONS
169            // ============================================
170            // TryCast -> CAST (PostgreSQL doesn't support TRY_CAST)
171            Expression::TryCast(c) => Ok(Expression::Cast(c)),
172
173            // SafeCast -> CAST (PostgreSQL doesn't support safe casts)
174            Expression::SafeCast(c) => Ok(Expression::Cast(c)),
175
176            // ============================================
177            // RANDOM
178            // ============================================
179            // RAND -> RANDOM in PostgreSQL
180            Expression::Rand(r) => {
181                // PostgreSQL's RANDOM() doesn't take a seed argument
182                let _ = r.seed; // Ignore seed
183                Ok(Expression::Random(crate::expressions::Random))
184            }
185
186            // ============================================
187            // UUID
188            // ============================================
189            // Uuid -> GEN_RANDOM_UUID in PostgreSQL
190            Expression::Uuid(_) => Ok(Expression::Function(Box::new(Function::new(
191                "GEN_RANDOM_UUID".to_string(),
192                vec![],
193            )))),
194
195            // ============================================
196            // ARRAY OPERATIONS
197            // ============================================
198            // EXPLODE -> UNNEST in PostgreSQL
199            Expression::Explode(f) => Ok(Expression::Unnest(Box::new(
200                crate::expressions::UnnestFunc {
201                    this: f.this,
202                    expressions: Vec::new(),
203                    with_ordinality: false,
204                    alias: None,
205                    offset_alias: None,
206                },
207            ))),
208
209            // ExplodeOuter -> UNNEST in PostgreSQL
210            Expression::ExplodeOuter(f) => Ok(Expression::Unnest(Box::new(
211                crate::expressions::UnnestFunc {
212                    this: f.this,
213                    expressions: Vec::new(),
214                    with_ordinality: false,
215                    alias: None,
216                    offset_alias: None,
217                },
218            ))),
219
220            // ArrayConcat -> ARRAY_CAT in PostgreSQL
221            Expression::ArrayConcat(f) => Ok(Expression::Function(Box::new(Function::new(
222                "ARRAY_CAT".to_string(),
223                f.expressions,
224            )))),
225
226            // ArrayPrepend -> ARRAY_PREPEND in PostgreSQL (note: args swapped from other dialects)
227            Expression::ArrayPrepend(f) => Ok(Expression::Function(Box::new(Function::new(
228                "ARRAY_PREPEND".to_string(),
229                vec![f.expression, f.this], // PostgreSQL: ARRAY_PREPEND(element, array)
230            )))),
231
232            // ============================================
233            // BITWISE OPERATIONS
234            // ============================================
235            // BitwiseXor -> # operator in PostgreSQL
236            Expression::BitwiseXor(f) => {
237                // Use a special marker that generator will recognize
238                Ok(Expression::Function(Box::new(Function::new(
239                    "__PG_BITWISE_XOR__".to_string(),
240                    vec![f.left, f.right],
241                ))))
242            }
243
244            // BitwiseAndAgg -> BIT_AND
245            Expression::BitwiseAndAgg(f) => Ok(Expression::Function(Box::new(Function::new(
246                "BIT_AND".to_string(),
247                vec![f.this],
248            )))),
249
250            // BitwiseOrAgg -> BIT_OR
251            Expression::BitwiseOrAgg(f) => Ok(Expression::Function(Box::new(Function::new(
252                "BIT_OR".to_string(),
253                vec![f.this],
254            )))),
255
256            // BitwiseXorAgg -> BIT_XOR
257            Expression::BitwiseXorAgg(f) => Ok(Expression::Function(Box::new(Function::new(
258                "BIT_XOR".to_string(),
259                vec![f.this],
260            )))),
261
262            // ============================================
263            // BOOLEAN AGGREGATES
264            // ============================================
265            // LogicalAnd -> BOOL_AND
266            Expression::LogicalAnd(f) => {
267                Ok(Expression::AggregateFunction(Box::new(AggregateFunction {
268                    name: "BOOL_AND".to_string(),
269                    args: vec![f.this],
270                    distinct: f.distinct,
271                    filter: f.filter,
272                    order_by: f.order_by,
273                    limit: f.limit,
274                    ignore_nulls: f.ignore_nulls,
275                    inferred_type: f.inferred_type,
276                })))
277            }
278
279            // LogicalOr -> BOOL_OR
280            Expression::LogicalOr(f) => {
281                Ok(Expression::AggregateFunction(Box::new(AggregateFunction {
282                    name: "BOOL_OR".to_string(),
283                    args: vec![f.this],
284                    distinct: f.distinct,
285                    filter: f.filter,
286                    order_by: f.order_by,
287                    limit: f.limit,
288                    ignore_nulls: f.ignore_nulls,
289                    inferred_type: f.inferred_type,
290                })))
291            }
292
293            // Xor -> PostgreSQL bool_xor pattern: a <> b for boolean values
294            Expression::Xor(f) => {
295                if let (Some(a), Some(b)) = (f.this, f.expression) {
296                    Ok(Expression::Neq(Box::new(BinaryOp {
297                        left: *a,
298                        right: *b,
299                        left_comments: Vec::new(),
300                        operator_comments: Vec::new(),
301                        trailing_comments: Vec::new(),
302                        inferred_type: None,
303                    })))
304                } else {
305                    Ok(Expression::Boolean(BooleanLiteral { value: false }))
306                }
307            }
308
309            // ============================================
310            // ARRAY OPERATORS
311            // ============================================
312            // ArrayContainedBy (<@) -> ArrayContainsAll (@>) with swapped operands
313            // a <@ b -> b @> a (PostgreSQL prefers @> syntax)
314            Expression::ArrayContainedBy(op) => {
315                Ok(Expression::ArrayContainsAll(Box::new(BinaryOp {
316                    left: op.right,
317                    right: op.left,
318                    left_comments: Vec::new(),
319                    operator_comments: Vec::new(),
320                    trailing_comments: Vec::new(),
321                    inferred_type: None,
322                })))
323            }
324
325            // ============================================
326            // REGEXP OPERATIONS (PostgreSQL uses ~ and ~* operators)
327            // ============================================
328            // RegexpLike -> keep as-is, generator handles ~ operator output
329            Expression::RegexpLike(f) => {
330                // Generator will output as: expr ~ pattern
331                Ok(Expression::RegexpLike(f))
332            }
333
334            // ============================================
335            // DATE/TIME FUNCTIONS
336            // ============================================
337            // DateAdd -> date + INTERVAL in PostgreSQL
338            Expression::DateAdd(f) => {
339                let is_literal = matches!(&f.interval, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_) | Literal::String(_)));
340                let right_expr = if is_literal {
341                    // Literal value: INTERVAL 'value' unit
342                    Expression::Interval(Box::new(Interval {
343                        this: Some(f.interval),
344                        unit: Some(IntervalUnitSpec::Simple {
345                            unit: f.unit,
346                            use_plural: false,
347                        }),
348                    }))
349                } else {
350                    // Non-literal (column ref, expression): INTERVAL '1 unit' * value
351                    let unit_str = match f.unit {
352                        IntervalUnit::Year => "YEAR",
353                        IntervalUnit::Quarter => "QUARTER",
354                        IntervalUnit::Month => "MONTH",
355                        IntervalUnit::Week => "WEEK",
356                        IntervalUnit::Day => "DAY",
357                        IntervalUnit::Hour => "HOUR",
358                        IntervalUnit::Minute => "MINUTE",
359                        IntervalUnit::Second => "SECOND",
360                        IntervalUnit::Millisecond => "MILLISECOND",
361                        IntervalUnit::Microsecond => "MICROSECOND",
362                        IntervalUnit::Nanosecond => "NANOSECOND",
363                    };
364                    let interval_one = Expression::Interval(Box::new(Interval {
365                        this: Some(Expression::Literal(Box::new(Literal::String(format!(
366                            "1 {unit_str}"
367                        ))))),
368                        unit: None,
369                    }));
370                    Expression::Mul(Box::new(BinaryOp {
371                        left: interval_one,
372                        right: f.interval,
373                        left_comments: Vec::new(),
374                        operator_comments: Vec::new(),
375                        trailing_comments: Vec::new(),
376                        inferred_type: None,
377                    }))
378                };
379                Ok(Expression::Add(Box::new(BinaryOp {
380                    left: f.this,
381                    right: right_expr,
382                    left_comments: Vec::new(),
383                    operator_comments: Vec::new(),
384                    trailing_comments: Vec::new(),
385                    inferred_type: None,
386                })))
387            }
388
389            // DateSub -> date - INTERVAL in PostgreSQL
390            Expression::DateSub(f) => {
391                let interval_expr = Expression::Interval(Box::new(Interval {
392                    this: Some(f.interval),
393                    unit: Some(IntervalUnitSpec::Simple {
394                        unit: f.unit,
395                        use_plural: false,
396                    }),
397                }));
398                Ok(Expression::Sub(Box::new(BinaryOp {
399                    left: f.this,
400                    right: interval_expr,
401                    left_comments: Vec::new(),
402                    operator_comments: Vec::new(),
403                    trailing_comments: Vec::new(),
404                    inferred_type: None,
405                })))
406            }
407
408            // DateDiff -> Complex PostgreSQL pattern using AGE/EXTRACT
409            Expression::DateDiff(f) => {
410                // For PostgreSQL, DATEDIFF is converted to EXTRACT(epoch FROM ...) pattern
411                // matching the 3-arg string-based DATEDIFF handler below
412                let unit = f.unit.unwrap_or(IntervalUnit::Day);
413
414                // Helper: CAST(expr AS TIMESTAMP)
415                let cast_ts = |e: Expression| -> Expression {
416                    Expression::Cast(Box::new(Cast {
417                        this: e,
418                        to: DataType::Timestamp {
419                            precision: None,
420                            timezone: false,
421                        },
422                        trailing_comments: Vec::new(),
423                        double_colon_syntax: false,
424                        format: None,
425                        default: None,
426                        inferred_type: None,
427                    }))
428                };
429
430                // Helper: CAST(expr AS BIGINT)
431                let cast_bigint = |e: Expression| -> Expression {
432                    Expression::Cast(Box::new(Cast {
433                        this: e,
434                        to: DataType::BigInt { length: None },
435                        trailing_comments: Vec::new(),
436                        double_colon_syntax: false,
437                        format: None,
438                        default: None,
439                        inferred_type: None,
440                    }))
441                };
442
443                // Clone end/start for reuse
444                let end_expr = f.this;
445                let start = f.expression;
446
447                // Helper: end_ts - start_ts
448                let ts_diff = || -> Expression {
449                    Expression::Sub(Box::new(BinaryOp::new(
450                        cast_ts(end_expr.clone()),
451                        cast_ts(start.clone()),
452                    )))
453                };
454
455                // Helper: AGE(end_ts, start_ts)
456                let age_call = || -> Expression {
457                    Expression::Function(Box::new(Function::new(
458                        "AGE".to_string(),
459                        vec![cast_ts(end_expr.clone()), cast_ts(start.clone())],
460                    )))
461                };
462
463                // Helper: EXTRACT(field FROM expr)
464                let extract = |field: DateTimeField, from: Expression| -> Expression {
465                    Expression::Extract(Box::new(ExtractFunc { this: from, field }))
466                };
467
468                // Helper: number literal
469                let num = |n: i64| -> Expression {
470                    Expression::Literal(Box::new(Literal::Number(n.to_string())))
471                };
472
473                let epoch_field = DateTimeField::Custom("epoch".to_string());
474
475                let result = match unit {
476                    IntervalUnit::Nanosecond => {
477                        let epoch = extract(epoch_field.clone(), ts_diff());
478                        cast_bigint(Expression::Mul(Box::new(BinaryOp::new(
479                            epoch,
480                            num(1000000000),
481                        ))))
482                    }
483                    IntervalUnit::Microsecond => {
484                        let epoch = extract(epoch_field, ts_diff());
485                        cast_bigint(Expression::Mul(Box::new(BinaryOp::new(
486                            epoch,
487                            num(1000000),
488                        ))))
489                    }
490                    IntervalUnit::Millisecond => {
491                        let epoch = extract(epoch_field, ts_diff());
492                        cast_bigint(Expression::Mul(Box::new(BinaryOp::new(epoch, num(1000)))))
493                    }
494                    IntervalUnit::Second => {
495                        let epoch = extract(epoch_field, ts_diff());
496                        cast_bigint(epoch)
497                    }
498                    IntervalUnit::Minute => {
499                        let epoch = extract(epoch_field, ts_diff());
500                        cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(60)))))
501                    }
502                    IntervalUnit::Hour => {
503                        let epoch = extract(epoch_field, ts_diff());
504                        cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(3600)))))
505                    }
506                    IntervalUnit::Day => {
507                        let epoch = extract(epoch_field, ts_diff());
508                        cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(86400)))))
509                    }
510                    IntervalUnit::Week => {
511                        let diff_parens = Expression::Paren(Box::new(Paren {
512                            this: ts_diff(),
513                            trailing_comments: Vec::new(),
514                        }));
515                        let days = extract(DateTimeField::Custom("days".to_string()), diff_parens);
516                        cast_bigint(Expression::Div(Box::new(BinaryOp::new(days, num(7)))))
517                    }
518                    IntervalUnit::Month => {
519                        let year_part =
520                            extract(DateTimeField::Custom("year".to_string()), age_call());
521                        let month_part =
522                            extract(DateTimeField::Custom("month".to_string()), age_call());
523                        let year_months =
524                            Expression::Mul(Box::new(BinaryOp::new(year_part, num(12))));
525                        cast_bigint(Expression::Add(Box::new(BinaryOp::new(
526                            year_months,
527                            month_part,
528                        ))))
529                    }
530                    IntervalUnit::Quarter => {
531                        let year_part =
532                            extract(DateTimeField::Custom("year".to_string()), age_call());
533                        let month_part =
534                            extract(DateTimeField::Custom("month".to_string()), age_call());
535                        let year_quarters =
536                            Expression::Mul(Box::new(BinaryOp::new(year_part, num(4))));
537                        let month_quarters =
538                            Expression::Div(Box::new(BinaryOp::new(month_part, num(3))));
539                        cast_bigint(Expression::Add(Box::new(BinaryOp::new(
540                            year_quarters,
541                            month_quarters,
542                        ))))
543                    }
544                    IntervalUnit::Year => cast_bigint(extract(
545                        DateTimeField::Custom("year".to_string()),
546                        age_call(),
547                    )),
548                };
549                Ok(result)
550            }
551
552            // UnixToTime -> TO_TIMESTAMP
553            Expression::UnixToTime(f) => Ok(Expression::Function(Box::new(Function::new(
554                "TO_TIMESTAMP".to_string(),
555                vec![*f.this],
556            )))),
557
558            // TimeToUnix -> DATE_PART('epoch', ...) in PostgreSQL
559            Expression::TimeToUnix(f) => Ok(Expression::Function(Box::new(Function::new(
560                "DATE_PART".to_string(),
561                vec![Expression::string("epoch"), f.this],
562            )))),
563
564            // StrToTime -> TO_TIMESTAMP in PostgreSQL
565            Expression::ToTimestamp(f) => {
566                let mut args = vec![f.this];
567                if let Some(fmt) = f.format {
568                    args.push(fmt);
569                }
570                Ok(Expression::Function(Box::new(Function::new(
571                    "TO_TIMESTAMP".to_string(),
572                    args,
573                ))))
574            }
575
576            // StrToDate -> TO_DATE in PostgreSQL
577            Expression::ToDate(f) => {
578                let mut args = vec![f.this];
579                if let Some(fmt) = f.format {
580                    args.push(fmt);
581                }
582                Ok(Expression::Function(Box::new(Function::new(
583                    "TO_DATE".to_string(),
584                    args,
585                ))))
586            }
587
588            // TimestampTrunc -> DATE_TRUNC
589            Expression::TimestampTrunc(f) => {
590                // Convert DateTimeField to string expression for DATE_TRUNC
591                let unit_str = format!("{:?}", f.unit).to_lowercase();
592                let args = vec![Expression::string(&unit_str), f.this];
593                Ok(Expression::Function(Box::new(Function::new(
594                    "DATE_TRUNC".to_string(),
595                    args,
596                ))))
597            }
598
599            // TimeFromParts -> MAKE_TIME
600            Expression::TimeFromParts(f) => {
601                let mut args = Vec::new();
602                if let Some(h) = f.hour {
603                    args.push(*h);
604                }
605                if let Some(m) = f.min {
606                    args.push(*m);
607                }
608                if let Some(s) = f.sec {
609                    args.push(*s);
610                }
611                Ok(Expression::Function(Box::new(Function::new(
612                    "MAKE_TIME".to_string(),
613                    args,
614                ))))
615            }
616
617            // TimestampFromParts -> MAKE_TIMESTAMP
618            Expression::MakeTimestamp(f) => {
619                // MakeTimestampFunc has direct Expression fields, not Options
620                let args = vec![f.year, f.month, f.day, f.hour, f.minute, f.second];
621                Ok(Expression::Function(Box::new(Function::new(
622                    "MAKE_TIMESTAMP".to_string(),
623                    args,
624                ))))
625            }
626
627            // ============================================
628            // STRING FUNCTIONS
629            // ============================================
630            // StringAgg is native to PostgreSQL - keep as-is
631            Expression::StringAgg(f) => Ok(Expression::StringAgg(f)),
632
633            // GroupConcat -> STRING_AGG in PostgreSQL
634            Expression::GroupConcat(f) => {
635                let mut args = vec![f.this.clone()];
636                if let Some(sep) = f.separator.clone() {
637                    args.push(sep);
638                } else {
639                    args.push(Expression::string(","));
640                }
641                Ok(Expression::Function(Box::new(Function::new(
642                    "STRING_AGG".to_string(),
643                    args,
644                ))))
645            }
646
647            // StrPosition -> POSITION function
648            Expression::Position(f) => {
649                // PostgreSQL: POSITION(substring IN string)
650                // Keep as Position, generator handles it
651                Ok(Expression::Position(f))
652            }
653
654            // ============================================
655            // AGGREGATE FUNCTIONS
656            // ============================================
657            // CountIf -> SUM(CASE WHEN condition THEN 1 ELSE 0 END) in PostgreSQL
658            Expression::CountIf(f) => {
659                let case_expr = Expression::Case(Box::new(Case {
660                    operand: None,
661                    whens: vec![(f.this.clone(), Expression::number(1))],
662                    else_: Some(Expression::number(0)),
663                    comments: Vec::new(),
664                    inferred_type: None,
665                }));
666                Ok(Expression::Sum(Box::new(AggFunc {
667                    ignore_nulls: None,
668                    having_max: None,
669                    this: case_expr,
670                    distinct: f.distinct,
671                    filter: f.filter,
672                    order_by: Vec::new(),
673                    name: None,
674                    limit: None,
675                    inferred_type: None,
676                })))
677            }
678
679            // AnyValue -> keep as ANY_VALUE for PostgreSQL (supported since PG 16)
680            Expression::AnyValue(f) => Ok(Expression::AnyValue(f)),
681
682            // Variance -> VAR_SAMP in PostgreSQL
683            Expression::Variance(f) => Ok(Expression::Function(Box::new(Function::new(
684                "VAR_SAMP".to_string(),
685                vec![f.this],
686            )))),
687
688            // VarPop -> VAR_POP in PostgreSQL
689            Expression::VarPop(f) => Ok(Expression::Function(Box::new(Function::new(
690                "VAR_POP".to_string(),
691                vec![f.this],
692            )))),
693
694            // ============================================
695            // JSON FUNCTIONS
696            // ============================================
697            // JSONExtract -> use arrow syntax (->) in PostgreSQL for simple literal paths
698            // Complex paths (like column references) should use JSON_EXTRACT_PATH function
699            Expression::JsonExtract(mut f) => {
700                // Only use arrow syntax for simple literal paths (string or non-negative number)
701                // Complex expressions like column references should use function form
702                f.arrow_syntax = Self::is_simple_json_path(&f.path);
703                Ok(Expression::JsonExtract(f))
704            }
705
706            // JSONExtractScalar -> use arrow syntax (->>) in PostgreSQL for simple paths
707            // Complex paths (like negative indices) should use JSON_EXTRACT_PATH_TEXT function
708            // #>> (hash_arrow_syntax) stays as #>>
709            Expression::JsonExtractScalar(mut f) => {
710                if !f.hash_arrow_syntax {
711                    // Only use arrow syntax for simple literal paths (string or non-negative number)
712                    // Complex expressions like Neg(-1) should use function form
713                    f.arrow_syntax = Self::is_simple_json_path(&f.path);
714                }
715                Ok(Expression::JsonExtractScalar(f))
716            }
717
718            // ParseJson: handled by generator (outputs CAST(x AS JSON) for PostgreSQL)
719
720            // JSONObjectAgg -> JSON_OBJECT_AGG
721            Expression::JsonObjectAgg(f) => {
722                // JsonObjectAggFunc has key and value as Expression, not Option
723                let args = vec![f.key, f.value];
724                Ok(Expression::Function(Box::new(Function::new(
725                    "JSON_OBJECT_AGG".to_string(),
726                    args,
727                ))))
728            }
729
730            // JSONArrayAgg -> JSON_AGG
731            Expression::JsonArrayAgg(f) => Ok(Expression::Function(Box::new(Function::new(
732                "JSON_AGG".to_string(),
733                vec![f.this],
734            )))),
735
736            // JSONPathRoot -> empty string ($ is implicit in PostgreSQL)
737            Expression::JSONPathRoot(_) => Ok(Expression::Literal(Box::new(Literal::String(
738                String::new(),
739            )))),
740
741            // ============================================
742            // MISC FUNCTIONS
743            // ============================================
744            // IntDiv -> DIV in PostgreSQL
745            Expression::IntDiv(f) => Ok(Expression::Function(Box::new(Function::new(
746                "DIV".to_string(),
747                vec![f.this, f.expression],
748            )))),
749
750            // Unicode -> ASCII in PostgreSQL
751            Expression::Unicode(f) => Ok(Expression::Function(Box::new(Function::new(
752                "ASCII".to_string(),
753                vec![f.this],
754            )))),
755
756            // LastDay -> Complex expression (PostgreSQL doesn't have LAST_DAY)
757            Expression::LastDay(f) => {
758                // (DATE_TRUNC('month', date) + INTERVAL '1 month' - INTERVAL '1 day')::DATE
759                let truncated = Expression::Function(Box::new(Function::new(
760                    "DATE_TRUNC".to_string(),
761                    vec![Expression::string("month"), f.this.clone()],
762                )));
763                let plus_month = Expression::Add(Box::new(BinaryOp {
764                    left: truncated,
765                    right: Expression::Interval(Box::new(Interval {
766                        this: Some(Expression::string("1")),
767                        unit: Some(IntervalUnitSpec::Simple {
768                            unit: IntervalUnit::Month,
769                            use_plural: false,
770                        }),
771                    })),
772                    left_comments: Vec::new(),
773                    operator_comments: Vec::new(),
774                    trailing_comments: Vec::new(),
775                    inferred_type: None,
776                }));
777                let minus_day = Expression::Sub(Box::new(BinaryOp {
778                    left: plus_month,
779                    right: Expression::Interval(Box::new(Interval {
780                        this: Some(Expression::string("1")),
781                        unit: Some(IntervalUnitSpec::Simple {
782                            unit: IntervalUnit::Day,
783                            use_plural: false,
784                        }),
785                    })),
786                    left_comments: Vec::new(),
787                    operator_comments: Vec::new(),
788                    trailing_comments: Vec::new(),
789                    inferred_type: None,
790                }));
791                Ok(Expression::Cast(Box::new(Cast {
792                    this: minus_day,
793                    to: DataType::Date,
794                    trailing_comments: Vec::new(),
795                    double_colon_syntax: true, // Use PostgreSQL :: syntax
796                    format: None,
797                    default: None,
798                    inferred_type: None,
799                })))
800            }
801
802            // GenerateSeries is native to PostgreSQL
803            Expression::GenerateSeries(f) => Ok(Expression::GenerateSeries(f)),
804
805            // ExplodingGenerateSeries -> GENERATE_SERIES
806            Expression::ExplodingGenerateSeries(f) => {
807                let mut args = vec![f.start, f.stop];
808                if let Some(step) = f.step {
809                    args.push(step); // step is Expression, not Box<Expression>
810                }
811                Ok(Expression::Function(Box::new(Function::new(
812                    "GENERATE_SERIES".to_string(),
813                    args,
814                ))))
815            }
816
817            // ============================================
818            // SESSION/TIME FUNCTIONS (no parentheses in PostgreSQL)
819            // ============================================
820            // CurrentTimestamp -> CURRENT_TIMESTAMP (no parens)
821            Expression::CurrentTimestamp(_) => Ok(Expression::Function(Box::new(Function {
822                name: "CURRENT_TIMESTAMP".to_string(),
823                args: vec![],
824                distinct: false,
825                trailing_comments: vec![],
826                use_bracket_syntax: false,
827                no_parens: true,
828                quoted: false,
829                span: None,
830                inferred_type: None,
831            }))),
832
833            // CurrentUser -> CURRENT_USER (no parens)
834            Expression::CurrentUser(_) => Ok(Expression::Function(Box::new(Function::new(
835                "CURRENT_USER".to_string(),
836                vec![],
837            )))),
838
839            // CurrentDate -> CURRENT_DATE (no parens)
840            Expression::CurrentDate(_) => Ok(Expression::Function(Box::new(Function {
841                name: "CURRENT_DATE".to_string(),
842                args: vec![],
843                distinct: false,
844                trailing_comments: vec![],
845                use_bracket_syntax: false,
846                no_parens: true,
847                quoted: false,
848                span: None,
849                inferred_type: None,
850            }))),
851
852            // ============================================
853            // JOIN TRANSFORMATIONS
854            // ============================================
855            // CROSS APPLY -> INNER JOIN LATERAL ... ON TRUE in PostgreSQL
856            Expression::Join(join) if join.kind == JoinKind::CrossApply => {
857                Ok(Expression::Join(Box::new(Join {
858                    this: join.this,
859                    on: Some(Expression::Boolean(BooleanLiteral { value: true })),
860                    using: join.using,
861                    kind: JoinKind::CrossApply,
862                    use_inner_keyword: false,
863                    use_outer_keyword: false,
864                    deferred_condition: false,
865                    join_hint: None,
866                    match_condition: None,
867                    pivots: join.pivots,
868                    comments: join.comments,
869                    nesting_group: 0,
870                    directed: false,
871                })))
872            }
873
874            // OUTER APPLY -> LEFT JOIN LATERAL ... ON TRUE in PostgreSQL
875            Expression::Join(join) if join.kind == JoinKind::OuterApply => {
876                Ok(Expression::Join(Box::new(Join {
877                    this: join.this,
878                    on: Some(Expression::Boolean(BooleanLiteral { value: true })),
879                    using: join.using,
880                    kind: JoinKind::OuterApply,
881                    use_inner_keyword: false,
882                    use_outer_keyword: false,
883                    deferred_condition: false,
884                    join_hint: None,
885                    match_condition: None,
886                    pivots: join.pivots,
887                    comments: join.comments,
888                    nesting_group: 0,
889                    directed: false,
890                })))
891            }
892
893            // ============================================
894            // GENERIC FUNCTION TRANSFORMATIONS
895            // ============================================
896            Expression::Function(f) => self.transform_function(*f),
897
898            // Generic aggregate function transformations
899            Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
900
901            // ===== Context-aware JSON arrow wrapping =====
902            // When JSON arrow expressions appear in Binary/In/Not contexts,
903            // they need to be wrapped in parentheses for correct precedence.
904            // This matches Python sqlglot's WRAPPED_JSON_EXTRACT_EXPRESSIONS behavior.
905
906            // Binary operators that need JSON wrapping
907            Expression::Eq(op) => Ok(Expression::Eq(Box::new(BinaryOp {
908                left: wrap_if_json_arrow(op.left),
909                right: wrap_if_json_arrow(op.right),
910                ..*op
911            }))),
912            Expression::Neq(op) => Ok(Expression::Neq(Box::new(BinaryOp {
913                left: wrap_if_json_arrow(op.left),
914                right: wrap_if_json_arrow(op.right),
915                ..*op
916            }))),
917            Expression::Lt(op) => Ok(Expression::Lt(Box::new(BinaryOp {
918                left: wrap_if_json_arrow(op.left),
919                right: wrap_if_json_arrow(op.right),
920                ..*op
921            }))),
922            Expression::Lte(op) => Ok(Expression::Lte(Box::new(BinaryOp {
923                left: wrap_if_json_arrow(op.left),
924                right: wrap_if_json_arrow(op.right),
925                ..*op
926            }))),
927            Expression::Gt(op) => Ok(Expression::Gt(Box::new(BinaryOp {
928                left: wrap_if_json_arrow(op.left),
929                right: wrap_if_json_arrow(op.right),
930                ..*op
931            }))),
932            Expression::Gte(op) => Ok(Expression::Gte(Box::new(BinaryOp {
933                left: wrap_if_json_arrow(op.left),
934                right: wrap_if_json_arrow(op.right),
935                ..*op
936            }))),
937
938            // In expression - wrap the this part if it's JSON arrow
939            Expression::In(mut i) => {
940                i.this = wrap_if_json_arrow(i.this);
941                Ok(Expression::In(i))
942            }
943
944            // Not expression - wrap the this part if it's JSON arrow
945            Expression::Not(mut n) => {
946                n.this = wrap_if_json_arrow(n.this);
947                Ok(Expression::Not(n))
948            }
949
950            // MERGE: qualifier stripping is handled by the generator (dialect-aware)
951            // PostgreSQL generator strips qualifiers, Snowflake generator keeps them
952            Expression::Merge(m) => Ok(Expression::Merge(m)),
953
954            // JSONExtract with variant_extract (Databricks colon syntax) -> JSON_EXTRACT_PATH
955            Expression::JSONExtract(je) if je.variant_extract.is_some() => {
956                // Convert path from bracketed format to simple key
957                // e.g., '["fr''uit"]' -> 'fr''uit'
958                let path = match *je.expression {
959                    Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
960                        let Literal::String(s) = lit.as_ref() else {
961                            unreachable!()
962                        };
963                        // Strip bracketed JSON path format: ["key"] -> key
964                        let cleaned = if s.starts_with("[\"") && s.ends_with("\"]") {
965                            s[2..s.len() - 2].to_string()
966                        } else {
967                            s.clone()
968                        };
969                        Expression::Literal(Box::new(Literal::String(cleaned)))
970                    }
971                    other => other,
972                };
973                Ok(Expression::Function(Box::new(Function::new(
974                    "JSON_EXTRACT_PATH".to_string(),
975                    vec![*je.this, path],
976                ))))
977            }
978
979            // TRIM(str, chars) -> TRIM(chars FROM str) for PostgreSQL SQL standard syntax
980            Expression::Trim(t) if !t.sql_standard_syntax && t.characters.is_some() => {
981                Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
982                    this: t.this,
983                    characters: t.characters,
984                    position: t.position,
985                    sql_standard_syntax: true,
986                    position_explicit: t.position_explicit,
987                })))
988            }
989
990            // b'a' -> CAST(e'a' AS BYTEA) for PostgreSQL
991            Expression::Literal(lit) if matches!(lit.as_ref(), Literal::ByteString(_)) => {
992                let Literal::ByteString(s) = lit.as_ref() else {
993                    unreachable!()
994                };
995                Ok(Expression::Cast(Box::new(Cast {
996                    this: Expression::Literal(Box::new(Literal::EscapeString(s.clone()))),
997                    to: DataType::VarBinary { length: None },
998                    trailing_comments: Vec::new(),
999                    double_colon_syntax: false,
1000                    format: None,
1001                    default: None,
1002                    inferred_type: None,
1003                })))
1004            }
1005
1006            // Pass through everything else
1007            _ => Ok(expr),
1008        }
1009    }
1010}
1011
1012#[cfg(feature = "transpile")]
1013impl PostgresDialect {
1014    /// Check if a JSON path expression is "simple" (string literal or non-negative integer)
1015    /// Simple paths can use arrow syntax (->>) in PostgreSQL
1016    /// Complex paths (like negative indices) should use JSON_EXTRACT_PATH_TEXT function
1017    fn is_simple_json_path(path: &Expression) -> bool {
1018        match path {
1019            // String literals are always simple
1020            Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => true,
1021            // Non-negative integer literals are simple
1022            Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
1023                let Literal::Number(n) = lit.as_ref() else {
1024                    unreachable!()
1025                };
1026                // Check if it's non-negative
1027                !n.starts_with('-')
1028            }
1029            // JSONPath expressions are simple (they're already parsed paths)
1030            Expression::JSONPath(_) => true,
1031            // Everything else (Neg, function calls, etc.) is complex
1032            _ => false,
1033        }
1034    }
1035
1036    /// Transform data types according to PostgreSQL TYPE_MAPPING
1037    fn transform_data_type(&self, dt: DataType) -> Result<Expression> {
1038        let transformed = match dt {
1039            // TINYINT -> SMALLINT
1040            DataType::TinyInt { .. } => DataType::SmallInt { length: None },
1041
1042            // FLOAT -> DOUBLE PRECISION (Python sqlglot tokenizes FLOAT as DOUBLE)
1043            DataType::Float { .. } => DataType::Custom {
1044                name: "DOUBLE PRECISION".to_string(),
1045            },
1046
1047            // DOUBLE -> DOUBLE PRECISION
1048            DataType::Double { .. } => DataType::Custom {
1049                name: "DOUBLE PRECISION".to_string(),
1050            },
1051
1052            // BINARY -> BYTEA (handled by generator which preserves length)
1053            DataType::Binary { .. } => dt,
1054
1055            // VARBINARY -> BYTEA (handled by generator which preserves length)
1056            DataType::VarBinary { .. } => dt,
1057
1058            // BLOB -> BYTEA
1059            DataType::Blob => DataType::Custom {
1060                name: "BYTEA".to_string(),
1061            },
1062
1063            // Custom type normalizations
1064            DataType::Custom { ref name } => {
1065                let upper = name.to_uppercase();
1066                match upper.as_str() {
1067                    // INT8 -> BIGINT (PostgreSQL alias)
1068                    "INT8" => DataType::BigInt { length: None },
1069                    // FLOAT8 -> DOUBLE PRECISION (PostgreSQL alias)
1070                    "FLOAT8" => DataType::Custom {
1071                        name: "DOUBLE PRECISION".to_string(),
1072                    },
1073                    // FLOAT4 -> REAL (PostgreSQL alias)
1074                    "FLOAT4" => DataType::Custom {
1075                        name: "REAL".to_string(),
1076                    },
1077                    // INT4 -> INTEGER (PostgreSQL alias)
1078                    "INT4" => DataType::Int {
1079                        length: None,
1080                        integer_spelling: false,
1081                    },
1082                    // INT2 -> SMALLINT (PostgreSQL alias)
1083                    "INT2" => DataType::SmallInt { length: None },
1084                    _ => dt,
1085                }
1086            }
1087
1088            // Keep all other types as-is
1089            other => other,
1090        };
1091        Ok(Expression::DataType(transformed))
1092    }
1093
1094    fn transform_function(&self, f: Function) -> Result<Expression> {
1095        let name_upper = f.name.to_uppercase();
1096        match name_upper.as_str() {
1097            // IFNULL -> COALESCE
1098            "IFNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
1099                original_name: None,
1100                expressions: f.args,
1101                inferred_type: None,
1102            }))),
1103
1104            // NVL -> COALESCE
1105            "NVL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
1106                original_name: None,
1107                expressions: f.args,
1108                inferred_type: None,
1109            }))),
1110
1111            // ISNULL (SQL Server) -> COALESCE
1112            "ISNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
1113                original_name: None,
1114                expressions: f.args,
1115                inferred_type: None,
1116            }))),
1117
1118            // GROUP_CONCAT -> STRING_AGG in PostgreSQL
1119            "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
1120                Function::new("STRING_AGG".to_string(), f.args),
1121            ))),
1122
1123            // SUBSTR -> SUBSTRING (standard SQL)
1124            "SUBSTR" => Ok(Expression::Function(Box::new(Function::new(
1125                "SUBSTRING".to_string(),
1126                f.args,
1127            )))),
1128
1129            // RAND -> RANDOM in PostgreSQL
1130            "RAND" => Ok(Expression::Random(crate::expressions::Random)),
1131
1132            // CEILING -> CEIL (both work in PostgreSQL, but CEIL is preferred)
1133            "CEILING" if f.args.len() == 1 => Ok(Expression::Ceil(Box::new(CeilFunc {
1134                this: f.args.into_iter().next().unwrap(),
1135                decimals: None,
1136                to: None,
1137            }))),
1138
1139            // LEN -> LENGTH in PostgreSQL
1140            "LEN" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc {
1141                this: f.args.into_iter().next().unwrap(),
1142                original_name: None,
1143                inferred_type: None,
1144            }))),
1145
1146            // CHAR_LENGTH -> LENGTH in PostgreSQL
1147            "CHAR_LENGTH" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc {
1148                this: f.args.into_iter().next().unwrap(),
1149                original_name: None,
1150                inferred_type: None,
1151            }))),
1152
1153            // CHARACTER_LENGTH -> LENGTH in PostgreSQL
1154            "CHARACTER_LENGTH" if f.args.len() == 1 => {
1155                Ok(Expression::Length(Box::new(UnaryFunc {
1156                    this: f.args.into_iter().next().unwrap(),
1157                    original_name: None,
1158                    inferred_type: None,
1159                })))
1160            }
1161
1162            // CHARINDEX -> POSITION in PostgreSQL
1163            // CHARINDEX(substring, string) -> POSITION(substring IN string)
1164            "CHARINDEX" if f.args.len() >= 2 => {
1165                let mut args = f.args;
1166                let substring = args.remove(0);
1167                let string = args.remove(0);
1168                Ok(Expression::Position(Box::new(
1169                    crate::expressions::PositionFunc {
1170                        substring,
1171                        string,
1172                        start: args.pop(),
1173                    },
1174                )))
1175            }
1176
1177            // GETDATE -> CURRENT_TIMESTAMP in PostgreSQL
1178            "GETDATE" => Ok(Expression::CurrentTimestamp(
1179                crate::expressions::CurrentTimestamp {
1180                    precision: None,
1181                    sysdate: false,
1182                },
1183            )),
1184
1185            // SYSDATETIME -> CURRENT_TIMESTAMP in PostgreSQL
1186            "SYSDATETIME" => Ok(Expression::CurrentTimestamp(
1187                crate::expressions::CurrentTimestamp {
1188                    precision: None,
1189                    sysdate: false,
1190                },
1191            )),
1192
1193            // NOW -> CURRENT_TIMESTAMP in PostgreSQL (NOW() is also valid)
1194            "NOW" => Ok(Expression::CurrentTimestamp(
1195                crate::expressions::CurrentTimestamp {
1196                    precision: None,
1197                    sysdate: false,
1198                },
1199            )),
1200
1201            // NEWID -> GEN_RANDOM_UUID in PostgreSQL
1202            "NEWID" => Ok(Expression::Function(Box::new(Function::new(
1203                "GEN_RANDOM_UUID".to_string(),
1204                vec![],
1205            )))),
1206
1207            // UUID -> GEN_RANDOM_UUID in PostgreSQL
1208            "UUID" => Ok(Expression::Function(Box::new(Function::new(
1209                "GEN_RANDOM_UUID".to_string(),
1210                vec![],
1211            )))),
1212
1213            // UNNEST is native to PostgreSQL
1214            "UNNEST" => Ok(Expression::Function(Box::new(f))),
1215
1216            // GENERATE_SERIES is native to PostgreSQL
1217            "GENERATE_SERIES" => Ok(Expression::Function(Box::new(f))),
1218
1219            // SHA256 -> SHA256 in PostgreSQL (via pgcrypto extension)
1220            "SHA256" if f.args.len() == 1 => Ok(Expression::Function(Box::new(Function::new(
1221                "SHA256".to_string(),
1222                f.args,
1223            )))),
1224
1225            // SHA2 -> SHA256/SHA512 based on length argument
1226            "SHA2" if f.args.len() == 2 => {
1227                // SHA2(data, length) -> SHA256/SHA384/SHA512
1228                let args = f.args;
1229                let data = args[0].clone();
1230                // Default to SHA256 - would need runtime inspection for exact mapping
1231                Ok(Expression::Function(Box::new(Function::new(
1232                    "SHA256".to_string(),
1233                    vec![data],
1234                ))))
1235            }
1236
1237            // LEVENSHTEIN is native to PostgreSQL (fuzzystrmatch extension)
1238            "LEVENSHTEIN" => Ok(Expression::Function(Box::new(f))),
1239
1240            // EDITDISTANCE -> LEVENSHTEIN_LESS_EQUAL (with max distance) or LEVENSHTEIN
1241            "EDITDISTANCE" if f.args.len() == 3 => Ok(Expression::Function(Box::new(
1242                Function::new("LEVENSHTEIN_LESS_EQUAL".to_string(), f.args),
1243            ))),
1244            "EDITDISTANCE" if f.args.len() == 2 => Ok(Expression::Function(Box::new(
1245                Function::new("LEVENSHTEIN".to_string(), f.args),
1246            ))),
1247
1248            // TRIM(value, chars) -> TRIM(chars FROM value) for Postgres
1249            "TRIM" if f.args.len() == 2 => {
1250                let value = f.args[0].clone();
1251                let chars = f.args[1].clone();
1252                Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
1253                    this: value,
1254                    characters: Some(chars),
1255                    position: crate::expressions::TrimPosition::Both,
1256                    sql_standard_syntax: true,
1257                    position_explicit: false,
1258                })))
1259            }
1260
1261            // DATEDIFF(unit, start, end) -> PostgreSQL EXTRACT/AGE patterns
1262            "DATEDIFF" if f.args.len() >= 2 => {
1263                let mut args = f.args;
1264                if args.len() == 2 {
1265                    // 2-arg form: DATEDIFF(start, end) -> AGE(start, end)
1266                    let first = args.remove(0);
1267                    let second = args.remove(0);
1268                    Ok(Expression::Function(Box::new(Function::new(
1269                        "AGE".to_string(),
1270                        vec![first, second],
1271                    ))))
1272                } else {
1273                    // 3-arg form: DATEDIFF(unit, start, end)
1274                    let unit_expr = args.remove(0);
1275                    let start = args.remove(0);
1276                    let end_expr = args.remove(0);
1277
1278                    // Extract unit name from identifier or column
1279                    let unit_name = match &unit_expr {
1280                        Expression::Identifier(id) => id.name.to_uppercase(),
1281                        Expression::Var(v) => v.this.to_uppercase(),
1282                        Expression::Column(col) if col.table.is_none() => {
1283                            col.name.name.to_uppercase()
1284                        }
1285                        _ => "DAY".to_string(),
1286                    };
1287
1288                    // Helper: CAST(expr AS TIMESTAMP)
1289                    let cast_ts = |e: Expression| -> Expression {
1290                        Expression::Cast(Box::new(Cast {
1291                            this: e,
1292                            to: DataType::Timestamp {
1293                                precision: None,
1294                                timezone: false,
1295                            },
1296                            trailing_comments: Vec::new(),
1297                            double_colon_syntax: false,
1298                            format: None,
1299                            default: None,
1300                            inferred_type: None,
1301                        }))
1302                    };
1303
1304                    // Helper: CAST(expr AS BIGINT)
1305                    let cast_bigint = |e: Expression| -> Expression {
1306                        Expression::Cast(Box::new(Cast {
1307                            this: e,
1308                            to: DataType::BigInt { length: None },
1309                            trailing_comments: Vec::new(),
1310                            double_colon_syntax: false,
1311                            format: None,
1312                            default: None,
1313                            inferred_type: None,
1314                        }))
1315                    };
1316
1317                    let end_ts = cast_ts(end_expr.clone());
1318                    let start_ts = cast_ts(start.clone());
1319
1320                    // Helper: end_ts - start_ts
1321                    let ts_diff = || -> Expression {
1322                        Expression::Sub(Box::new(BinaryOp::new(
1323                            cast_ts(end_expr.clone()),
1324                            cast_ts(start.clone()),
1325                        )))
1326                    };
1327
1328                    // Helper: AGE(end_ts, start_ts)
1329                    let age_call = || -> Expression {
1330                        Expression::Function(Box::new(Function::new(
1331                            "AGE".to_string(),
1332                            vec![cast_ts(end_expr.clone()), cast_ts(start.clone())],
1333                        )))
1334                    };
1335
1336                    // Helper: EXTRACT(field FROM expr)
1337                    let extract = |field: DateTimeField, from: Expression| -> Expression {
1338                        Expression::Extract(Box::new(ExtractFunc { this: from, field }))
1339                    };
1340
1341                    // Helper: number literal
1342                    let num = |n: i64| -> Expression {
1343                        Expression::Literal(Box::new(Literal::Number(n.to_string())))
1344                    };
1345
1346                    // Use Custom DateTimeField for lowercase output (PostgreSQL convention)
1347                    let epoch_field = DateTimeField::Custom("epoch".to_string());
1348
1349                    let result = match unit_name.as_str() {
1350                        "MICROSECOND" => {
1351                            // CAST(EXTRACT(epoch FROM end_ts - start_ts) * 1000000 AS BIGINT)
1352                            let epoch = extract(epoch_field, ts_diff());
1353                            cast_bigint(Expression::Mul(Box::new(BinaryOp::new(
1354                                epoch,
1355                                num(1000000),
1356                            ))))
1357                        }
1358                        "MILLISECOND" => {
1359                            let epoch = extract(epoch_field, ts_diff());
1360                            cast_bigint(Expression::Mul(Box::new(BinaryOp::new(epoch, num(1000)))))
1361                        }
1362                        "SECOND" => {
1363                            let epoch = extract(epoch_field, ts_diff());
1364                            cast_bigint(epoch)
1365                        }
1366                        "MINUTE" => {
1367                            let epoch = extract(epoch_field, ts_diff());
1368                            cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(60)))))
1369                        }
1370                        "HOUR" => {
1371                            let epoch = extract(epoch_field, ts_diff());
1372                            cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(3600)))))
1373                        }
1374                        "DAY" => {
1375                            let epoch = extract(epoch_field, ts_diff());
1376                            cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(86400)))))
1377                        }
1378                        "WEEK" => {
1379                            // CAST(EXTRACT(days FROM (end_ts - start_ts)) / 7 AS BIGINT)
1380                            let diff_parens = Expression::Paren(Box::new(Paren {
1381                                this: ts_diff(),
1382                                trailing_comments: Vec::new(),
1383                            }));
1384                            let days =
1385                                extract(DateTimeField::Custom("days".to_string()), diff_parens);
1386                            cast_bigint(Expression::Div(Box::new(BinaryOp::new(days, num(7)))))
1387                        }
1388                        "MONTH" => {
1389                            // CAST(EXTRACT(year FROM AGE(...)) * 12 + EXTRACT(month FROM AGE(...)) AS BIGINT)
1390                            let year_part =
1391                                extract(DateTimeField::Custom("year".to_string()), age_call());
1392                            let month_part =
1393                                extract(DateTimeField::Custom("month".to_string()), age_call());
1394                            let year_months =
1395                                Expression::Mul(Box::new(BinaryOp::new(year_part, num(12))));
1396                            cast_bigint(Expression::Add(Box::new(BinaryOp::new(
1397                                year_months,
1398                                month_part,
1399                            ))))
1400                        }
1401                        "QUARTER" => {
1402                            // CAST(EXTRACT(year FROM AGE(...)) * 4 + EXTRACT(month FROM AGE(...)) / 3 AS BIGINT)
1403                            let year_part =
1404                                extract(DateTimeField::Custom("year".to_string()), age_call());
1405                            let month_part =
1406                                extract(DateTimeField::Custom("month".to_string()), age_call());
1407                            let year_quarters =
1408                                Expression::Mul(Box::new(BinaryOp::new(year_part, num(4))));
1409                            let month_quarters =
1410                                Expression::Div(Box::new(BinaryOp::new(month_part, num(3))));
1411                            cast_bigint(Expression::Add(Box::new(BinaryOp::new(
1412                                year_quarters,
1413                                month_quarters,
1414                            ))))
1415                        }
1416                        "YEAR" => {
1417                            // CAST(EXTRACT(year FROM AGE(...)) AS BIGINT)
1418                            cast_bigint(extract(
1419                                DateTimeField::Custom("year".to_string()),
1420                                age_call(),
1421                            ))
1422                        }
1423                        _ => {
1424                            // Fallback: simple AGE
1425                            Expression::Function(Box::new(Function::new(
1426                                "AGE".to_string(),
1427                                vec![end_ts, start_ts],
1428                            )))
1429                        }
1430                    };
1431                    Ok(result)
1432                }
1433            }
1434
1435            // TIMESTAMPDIFF -> AGE or EXTRACT pattern
1436            "TIMESTAMPDIFF" if f.args.len() >= 3 => {
1437                let mut args = f.args;
1438                let _unit = args.remove(0); // Unit (ignored, AGE returns full interval)
1439                let start = args.remove(0);
1440                let end = args.remove(0);
1441                Ok(Expression::Function(Box::new(Function::new(
1442                    "AGE".to_string(),
1443                    vec![end, start],
1444                ))))
1445            }
1446
1447            // FROM_UNIXTIME -> TO_TIMESTAMP
1448            "FROM_UNIXTIME" => Ok(Expression::Function(Box::new(Function::new(
1449                "TO_TIMESTAMP".to_string(),
1450                f.args,
1451            )))),
1452
1453            // UNIX_TIMESTAMP -> EXTRACT(EPOCH FROM ...)
1454            "UNIX_TIMESTAMP" if f.args.len() == 1 => {
1455                let arg = f.args.into_iter().next().unwrap();
1456                Ok(Expression::Function(Box::new(Function::new(
1457                    "DATE_PART".to_string(),
1458                    vec![Expression::string("epoch"), arg],
1459                ))))
1460            }
1461
1462            // UNIX_TIMESTAMP() with no args -> EXTRACT(EPOCH FROM CURRENT_TIMESTAMP)
1463            "UNIX_TIMESTAMP" if f.args.is_empty() => {
1464                Ok(Expression::Function(Box::new(Function::new(
1465                    "DATE_PART".to_string(),
1466                    vec![
1467                        Expression::string("epoch"),
1468                        Expression::CurrentTimestamp(crate::expressions::CurrentTimestamp {
1469                            precision: None,
1470                            sysdate: false,
1471                        }),
1472                    ],
1473                ))))
1474            }
1475
1476            // DATEADD -> date + interval pattern
1477            "DATEADD" if f.args.len() == 3 => {
1478                // DATEADD(unit, count, date) -> date + interval 'count unit'
1479                // This is a simplified version - full impl would construct proper interval
1480                let mut args = f.args;
1481                let _unit = args.remove(0);
1482                let count = args.remove(0);
1483                let date = args.remove(0);
1484                Ok(Expression::Add(Box::new(BinaryOp {
1485                    left: date,
1486                    right: count,
1487                    left_comments: Vec::new(),
1488                    operator_comments: Vec::new(),
1489                    trailing_comments: Vec::new(),
1490                    inferred_type: None,
1491                })))
1492            }
1493
1494            // INSTR -> POSITION (simplified)
1495            "INSTR" if f.args.len() >= 2 => {
1496                let mut args = f.args;
1497                let string = args.remove(0);
1498                let substring = args.remove(0);
1499                Ok(Expression::Position(Box::new(
1500                    crate::expressions::PositionFunc {
1501                        substring,
1502                        string,
1503                        start: args.pop(),
1504                    },
1505                )))
1506            }
1507
1508            // CONCAT_WS is native to PostgreSQL
1509            "CONCAT_WS" => Ok(Expression::Function(Box::new(f))),
1510
1511            // REGEXP_REPLACE: pass through without adding 'g' flag
1512            // The 'g' flag handling is managed by cross_dialect_normalize based on source dialect's default behavior
1513            "REGEXP_REPLACE" if f.args.len() == 3 || f.args.len() == 4 => {
1514                Ok(Expression::Function(Box::new(f)))
1515            }
1516            // 6 args from Snowflake: (subject, pattern, replacement, position, occurrence, params)
1517            // If occurrence is 0 (global), append 'g' to flags
1518            "REGEXP_REPLACE" if f.args.len() == 6 => {
1519                let is_global = match &f.args[4] {
1520                    Expression::Literal(lit)
1521                        if matches!(lit.as_ref(), crate::expressions::Literal::Number(_)) =>
1522                    {
1523                        let crate::expressions::Literal::Number(n) = lit.as_ref() else {
1524                            unreachable!()
1525                        };
1526                        n == "0"
1527                    }
1528                    _ => false,
1529                };
1530                if is_global {
1531                    let subject = f.args[0].clone();
1532                    let pattern = f.args[1].clone();
1533                    let replacement = f.args[2].clone();
1534                    let position = f.args[3].clone();
1535                    let occurrence = f.args[4].clone();
1536                    let params = &f.args[5];
1537                    let mut flags = if let Expression::Literal(lit) = params {
1538                        if let crate::expressions::Literal::String(s) = lit.as_ref() {
1539                            s.clone()
1540                        } else {
1541                            String::new()
1542                        }
1543                    } else {
1544                        String::new()
1545                    };
1546                    if !flags.contains('g') {
1547                        flags.push('g');
1548                    }
1549                    Ok(Expression::Function(Box::new(Function::new(
1550                        "REGEXP_REPLACE".to_string(),
1551                        vec![
1552                            subject,
1553                            pattern,
1554                            replacement,
1555                            position,
1556                            occurrence,
1557                            Expression::Literal(Box::new(crate::expressions::Literal::String(
1558                                flags,
1559                            ))),
1560                        ],
1561                    ))))
1562                } else {
1563                    Ok(Expression::Function(Box::new(f)))
1564                }
1565            }
1566            // Default: pass through
1567            "REGEXP_REPLACE" => Ok(Expression::Function(Box::new(f))),
1568
1569            // Pass through everything else
1570            _ => Ok(Expression::Function(Box::new(f))),
1571        }
1572    }
1573
1574    fn transform_aggregate_function(
1575        &self,
1576        f: Box<crate::expressions::AggregateFunction>,
1577    ) -> Result<Expression> {
1578        let name_upper = f.name.to_uppercase();
1579        match name_upper.as_str() {
1580            // COUNT_IF -> SUM(CASE WHEN...)
1581            "COUNT_IF" if !f.args.is_empty() => {
1582                let condition = f.args.into_iter().next().unwrap();
1583                let case_expr = Expression::Case(Box::new(Case {
1584                    operand: None,
1585                    whens: vec![(condition, Expression::number(1))],
1586                    else_: Some(Expression::number(0)),
1587                    comments: Vec::new(),
1588                    inferred_type: None,
1589                }));
1590                Ok(Expression::Sum(Box::new(AggFunc {
1591                    ignore_nulls: None,
1592                    having_max: None,
1593                    this: case_expr,
1594                    distinct: f.distinct,
1595                    filter: f.filter,
1596                    order_by: Vec::new(),
1597                    name: None,
1598                    limit: None,
1599                    inferred_type: None,
1600                })))
1601            }
1602
1603            // GROUP_CONCAT -> STRING_AGG
1604            "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
1605                Function::new("STRING_AGG".to_string(), f.args),
1606            ))),
1607
1608            // STDEV -> STDDEV in PostgreSQL
1609            "STDEV" if !f.args.is_empty() => Ok(Expression::Stddev(Box::new(AggFunc {
1610                ignore_nulls: None,
1611                having_max: None,
1612                this: f.args.into_iter().next().unwrap(),
1613                distinct: f.distinct,
1614                filter: f.filter,
1615                order_by: Vec::new(),
1616                name: None,
1617                limit: None,
1618                inferred_type: None,
1619            }))),
1620
1621            // STDEVP -> STDDEV_POP in PostgreSQL
1622            "STDEVP" if !f.args.is_empty() => Ok(Expression::StddevPop(Box::new(AggFunc {
1623                ignore_nulls: None,
1624                having_max: None,
1625                this: f.args.into_iter().next().unwrap(),
1626                distinct: f.distinct,
1627                filter: f.filter,
1628                order_by: Vec::new(),
1629                name: None,
1630                limit: None,
1631                inferred_type: None,
1632            }))),
1633
1634            // VAR -> VAR_SAMP in PostgreSQL
1635            "VAR" if !f.args.is_empty() => Ok(Expression::VarSamp(Box::new(AggFunc {
1636                ignore_nulls: None,
1637                having_max: None,
1638                this: f.args.into_iter().next().unwrap(),
1639                distinct: f.distinct,
1640                filter: f.filter,
1641                order_by: Vec::new(),
1642                name: None,
1643                limit: None,
1644                inferred_type: None,
1645            }))),
1646
1647            // VARP -> VAR_POP in PostgreSQL
1648            "VARP" if !f.args.is_empty() => Ok(Expression::VarPop(Box::new(AggFunc {
1649                ignore_nulls: None,
1650                having_max: None,
1651                this: f.args.into_iter().next().unwrap(),
1652                distinct: f.distinct,
1653                filter: f.filter,
1654                order_by: Vec::new(),
1655                name: None,
1656                limit: None,
1657                inferred_type: None,
1658            }))),
1659
1660            // BIT_AND is native to PostgreSQL
1661            "BIT_AND" => Ok(Expression::AggregateFunction(f)),
1662
1663            // BIT_OR is native to PostgreSQL
1664            "BIT_OR" => Ok(Expression::AggregateFunction(f)),
1665
1666            // BIT_XOR is native to PostgreSQL
1667            "BIT_XOR" => Ok(Expression::AggregateFunction(f)),
1668
1669            // BOOL_AND is native to PostgreSQL
1670            "BOOL_AND" => Ok(Expression::AggregateFunction(f)),
1671
1672            // BOOL_OR is native to PostgreSQL
1673            "BOOL_OR" => Ok(Expression::AggregateFunction(f)),
1674
1675            // VARIANCE -> VAR_SAMP in PostgreSQL
1676            "VARIANCE" if !f.args.is_empty() => Ok(Expression::VarSamp(Box::new(AggFunc {
1677                ignore_nulls: None,
1678                having_max: None,
1679                this: f.args.into_iter().next().unwrap(),
1680                distinct: f.distinct,
1681                filter: f.filter,
1682                order_by: Vec::new(),
1683                name: None,
1684                limit: None,
1685                inferred_type: None,
1686            }))),
1687
1688            // LOGICAL_OR -> BOOL_OR in PostgreSQL
1689            "LOGICAL_OR" if !f.args.is_empty() => {
1690                let mut new_agg = f.clone();
1691                new_agg.name = "BOOL_OR".to_string();
1692                Ok(Expression::AggregateFunction(new_agg))
1693            }
1694
1695            // LOGICAL_AND -> BOOL_AND in PostgreSQL
1696            "LOGICAL_AND" if !f.args.is_empty() => {
1697                let mut new_agg = f.clone();
1698                new_agg.name = "BOOL_AND".to_string();
1699                Ok(Expression::AggregateFunction(new_agg))
1700            }
1701
1702            // Pass through everything else
1703            _ => Ok(Expression::AggregateFunction(f)),
1704        }
1705    }
1706}
1707
1708#[cfg(test)]
1709mod tests {
1710    use super::*;
1711    use crate::dialects::Dialect;
1712
1713    fn transpile_to_postgres(sql: &str) -> String {
1714        let dialect = Dialect::get(DialectType::Generic);
1715        let result = dialect
1716            .transpile(sql, DialectType::PostgreSQL)
1717            .expect("Transpile failed");
1718        result[0].clone()
1719    }
1720
1721    #[test]
1722    fn test_ifnull_to_coalesce() {
1723        let result = transpile_to_postgres("SELECT IFNULL(a, b)");
1724        assert!(
1725            result.contains("COALESCE"),
1726            "Expected COALESCE, got: {}",
1727            result
1728        );
1729    }
1730
1731    #[test]
1732    fn test_nvl_to_coalesce() {
1733        let result = transpile_to_postgres("SELECT NVL(a, b)");
1734        assert!(
1735            result.contains("COALESCE"),
1736            "Expected COALESCE, got: {}",
1737            result
1738        );
1739    }
1740
1741    #[test]
1742    fn test_rand_to_random() {
1743        let result = transpile_to_postgres("SELECT RAND()");
1744        assert!(
1745            result.contains("RANDOM"),
1746            "Expected RANDOM, got: {}",
1747            result
1748        );
1749    }
1750
1751    #[test]
1752    fn test_basic_select() {
1753        let result = transpile_to_postgres("SELECT a, b FROM users WHERE id = 1");
1754        assert!(result.contains("SELECT"));
1755        assert!(result.contains("FROM users"));
1756    }
1757
1758    #[test]
1759    fn test_len_to_length() {
1760        let result = transpile_to_postgres("SELECT LEN(name)");
1761        assert!(
1762            result.contains("LENGTH"),
1763            "Expected LENGTH, got: {}",
1764            result
1765        );
1766    }
1767
1768    #[test]
1769    fn test_getdate_to_current_timestamp() {
1770        let result = transpile_to_postgres("SELECT GETDATE()");
1771        assert!(
1772            result.contains("CURRENT_TIMESTAMP"),
1773            "Expected CURRENT_TIMESTAMP, got: {}",
1774            result
1775        );
1776    }
1777
1778    #[test]
1779    fn test_substr_to_substring() {
1780        let result = transpile_to_postgres("SELECT SUBSTR(name, 1, 3)");
1781        assert!(
1782            result.contains("SUBSTRING"),
1783            "Expected SUBSTRING, got: {}",
1784            result
1785        );
1786    }
1787
1788    #[test]
1789    fn test_group_concat_to_string_agg() {
1790        let result = transpile_to_postgres("SELECT GROUP_CONCAT(name)");
1791        assert!(
1792            result.contains("STRING_AGG"),
1793            "Expected STRING_AGG, got: {}",
1794            result
1795        );
1796    }
1797
1798    #[test]
1799    fn test_double_quote_identifiers() {
1800        // PostgreSQL uses double quotes for identifiers
1801        let dialect = PostgresDialect;
1802        let config = dialect.generator_config();
1803        assert_eq!(config.identifier_quote, '"');
1804    }
1805
1806    #[test]
1807    fn test_char_length_to_length() {
1808        let result = transpile_to_postgres("SELECT CHAR_LENGTH(name)");
1809        assert!(
1810            result.contains("LENGTH"),
1811            "Expected LENGTH, got: {}",
1812            result
1813        );
1814    }
1815
1816    #[test]
1817    fn test_character_length_to_length() {
1818        let result = transpile_to_postgres("SELECT CHARACTER_LENGTH(name)");
1819        assert!(
1820            result.contains("LENGTH"),
1821            "Expected LENGTH, got: {}",
1822            result
1823        );
1824    }
1825
1826    /// Helper for PostgreSQL identity tests (parse and regenerate with PostgreSQL dialect)
1827    fn identity_postgres(sql: &str) -> String {
1828        let dialect = Dialect::get(DialectType::PostgreSQL);
1829        let exprs = dialect.parse(sql).expect("Parse failed");
1830        let transformed = dialect
1831            .transform(exprs[0].clone())
1832            .expect("Transform failed");
1833        dialect.generate(&transformed).expect("Generate failed")
1834    }
1835
1836    #[test]
1837    fn test_json_extract_with_column_path() {
1838        // When the path is a column reference (not a literal), should use function form
1839        let result = identity_postgres("json_data.data -> field_ids.field_id");
1840        assert!(
1841            result.contains("JSON_EXTRACT_PATH"),
1842            "Expected JSON_EXTRACT_PATH for column path, got: {}",
1843            result
1844        );
1845    }
1846
1847    #[test]
1848    fn test_json_extract_scalar_with_negative_index() {
1849        // When the path is a negative index, should use JSON_EXTRACT_PATH_TEXT function
1850        let result = identity_postgres("x::JSON -> 'duration' ->> -1");
1851        assert!(
1852            result.contains("JSON_EXTRACT_PATH_TEXT"),
1853            "Expected JSON_EXTRACT_PATH_TEXT for negative index, got: {}",
1854            result
1855        );
1856        // The first -> should still be arrow syntax since 'duration' is a string literal
1857        assert!(
1858            result.contains("->"),
1859            "Expected -> for string literal path, got: {}",
1860            result
1861        );
1862    }
1863
1864    #[test]
1865    fn test_json_extract_with_string_literal() {
1866        // When the path is a string literal, should keep arrow syntax
1867        let result = identity_postgres("data -> 'key'");
1868        assert!(
1869            result.contains("->"),
1870            "Expected -> for string literal path, got: {}",
1871            result
1872        );
1873        assert!(
1874            !result.contains("JSON_EXTRACT_PATH"),
1875            "Should NOT use function form for string literal, got: {}",
1876            result
1877        );
1878    }
1879}