Skip to main content

polyglot_sql/dialects/
postgres.rs

1//! PostgreSQL Dialect
2//!
3//! PostgreSQL-specific transformations based on sqlglot patterns.
4//! Comprehensive batch translation from Python sqlglot postgres.py
5//!
6//! Key differences from other dialects:
7//! - TRY_CAST not supported (falls back to CAST)
8//! - RANDOM() instead of RAND()
9//! - STRING_AGG instead of GROUP_CONCAT
10//! - Bitwise XOR is # operator
11//! - BOOL_AND/BOOL_OR for logical aggregates
12//! - GEN_RANDOM_UUID() for UUID generation
13//! - UNNEST instead of EXPLODE
14//! - Type mappings: TINYINT→SMALLINT, FLOAT→REAL, DOUBLE→DOUBLE PRECISION, etc.
15//! - RegexpLike uses ~ operator, RegexpILike uses ~* operator
16//! - JSONB operators: #>, #>>, ?, ?|, ?&
17
18use super::{DialectImpl, DialectType};
19use crate::error::Result;
20use crate::expressions::{
21    AggFunc, AggregateFunction, BinaryOp, BooleanLiteral, Case, Cast, CeilFunc, DataType,
22    DateTimeField, Expression, ExtractFunc, Function, Interval, IntervalUnit, IntervalUnitSpec,
23    Join, JoinKind, Literal, Paren, UnaryFunc, VarArgFunc,
24};
25#[cfg(feature = "generate")]
26use crate::generator::GeneratorConfig;
27use crate::tokens::TokenizerConfig;
28
29/// Helper to wrap JSON arrow expressions in parentheses when they appear
30/// in contexts that require it (Binary, In, Not expressions)
31/// This matches Python sqlglot's WRAPPED_JSON_EXTRACT_EXPRESSIONS behavior
32fn wrap_if_json_arrow(expr: Expression) -> Expression {
33    match &expr {
34        Expression::JsonExtract(f) if f.arrow_syntax => Expression::Paren(Box::new(Paren {
35            this: expr,
36            trailing_comments: Vec::new(),
37        })),
38        Expression::JsonExtractScalar(f) if f.arrow_syntax => Expression::Paren(Box::new(Paren {
39            this: expr,
40            trailing_comments: Vec::new(),
41        })),
42        _ => expr,
43    }
44}
45
46/// PostgreSQL dialect
47pub struct PostgresDialect;
48
49impl DialectImpl for PostgresDialect {
50    fn dialect_type(&self) -> DialectType {
51        DialectType::PostgreSQL
52    }
53
54    fn tokenizer_config(&self) -> TokenizerConfig {
55        use crate::tokens::TokenType;
56        let mut config = TokenizerConfig::default();
57        // PostgreSQL supports $$ string literals (heredoc strings)
58        config.quotes.insert("$$".to_string(), "$$".to_string());
59        // PostgreSQL uses double quotes for identifiers
60        config.identifiers.insert('"', '"');
61        // Nested comments supported
62        config.nested_comments = true;
63        // PostgreSQL treats EXEC as a generic command (not TSQL EXEC statement)
64        // Note: EXECUTE is kept as-is since it's used in GRANT/REVOKE EXECUTE ON FUNCTION
65        config
66            .keywords
67            .insert("EXEC".to_string(), TokenType::Command);
68        for command in [
69            "BASE_BACKUP",
70            "CREATE_REPLICATION_SLOT",
71            "DROP_REPLICATION_SLOT",
72            "IDENTIFY_SYSTEM",
73            "READ_REPLICATION_SLOT",
74            "START_REPLICATION",
75            "TIMELINE_HISTORY",
76        ] {
77            config
78                .keywords
79                .insert(command.to_string(), TokenType::Command);
80        }
81        config
82    }
83
84    #[cfg(feature = "generate")]
85
86    fn generator_config(&self) -> GeneratorConfig {
87        use crate::generator::IdentifierQuoteStyle;
88        GeneratorConfig {
89            identifier_quote: '"',
90            identifier_quote_style: IdentifierQuoteStyle::DOUBLE_QUOTE,
91            dialect: Some(DialectType::PostgreSQL),
92            // PostgreSQL uses TIMESTAMPTZ shorthand
93            tz_to_with_time_zone: false,
94            // PostgreSQL prefers INTERVAL '1 day' syntax
95            single_string_interval: true,
96            // TABLESAMPLE uses REPEATABLE in PostgreSQL
97            tablesample_seed_keyword: "REPEATABLE",
98            // PostgreSQL doesn't support NVL2
99            nvl2_supported: false,
100            // PostgreSQL uses $ for parameters
101            parameter_token: "$",
102            // PostgreSQL uses % for named placeholders
103            named_placeholder_token: "%",
104            // PostgreSQL supports SELECT INTO
105            supports_select_into: true,
106            // PostgreSQL: USING btree(col) without space before parens
107            index_using_no_space: true,
108            // PostgreSQL supports UNLOGGED tables
109            supports_unlogged_tables: true,
110            // PostgreSQL doesn't support multi-arg DISTINCT
111            multi_arg_distinct: false,
112            // PostgreSQL uses ANY (subquery) with space
113            quantified_no_paren_space: false,
114            // PostgreSQL supports window EXCLUDE clause
115            supports_window_exclude: true,
116            // PostgreSQL normalizes single-bound window frames to BETWEEN form
117            normalize_window_frame_between: true,
118            // PostgreSQL COPY doesn't use INTO keyword
119            copy_has_into_keyword: false,
120            // PostgreSQL ARRAY_SIZE requires dimension argument
121            array_size_dim_required: Some(true),
122            // PostgreSQL supports BETWEEN flags
123            supports_between_flags: true,
124            // PostgreSQL doesn't support hints
125            join_hints: false,
126            table_hints: false,
127            query_hints: false,
128            // PostgreSQL supports locking reads
129            locking_reads_supported: true,
130            // PostgreSQL doesn't rename tables with DB
131            rename_table_with_db: false,
132            // PostgreSQL can implement array any
133            can_implement_array_any: true,
134            // PostgreSQL ARRAY_CONCAT is not var-len
135            array_concat_is_var_len: false,
136            // PostgreSQL doesn't support MEDIAN
137            supports_median: false,
138            // PostgreSQL requires JSON type for extraction
139            json_type_required_for_extraction: true,
140            // PostgreSQL LIKE property inside schema
141            like_property_inside_schema: true,
142            ..Default::default()
143        }
144    }
145
146    #[cfg(feature = "transpile")]
147
148    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
149        match expr {
150            // ============================================
151            // DATA TYPE MAPPINGS (from TYPE_MAPPING)
152            // These are handled specially - transform DataType variants
153            // ============================================
154            Expression::DataType(dt) => self.transform_data_type(dt),
155
156            // ============================================
157            // NULL HANDLING
158            // ============================================
159            // IFNULL -> COALESCE in PostgreSQL
160            Expression::IfNull(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
161                original_name: None,
162                expressions: vec![f.this, f.expression],
163                inferred_type: None,
164            }))),
165
166            // NVL -> COALESCE in PostgreSQL
167            Expression::Nvl(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
168                original_name: None,
169                expressions: vec![f.this, f.expression],
170                inferred_type: None,
171            }))),
172
173            // Coalesce with original_name (e.g., IFNULL parsed as Coalesce) -> clear original_name
174            // so it outputs as COALESCE instead of the original function name
175            Expression::Coalesce(mut f) => {
176                f.original_name = None;
177                Ok(Expression::Coalesce(f))
178            }
179
180            // ============================================
181            // CAST OPERATIONS
182            // ============================================
183            // TryCast -> CAST (PostgreSQL doesn't support TRY_CAST)
184            Expression::TryCast(c) => Ok(Expression::Cast(c)),
185
186            // SafeCast -> CAST (PostgreSQL doesn't support safe casts)
187            Expression::SafeCast(c) => Ok(Expression::Cast(c)),
188
189            // ============================================
190            // RANDOM
191            // ============================================
192            // RAND -> RANDOM in PostgreSQL
193            Expression::Rand(r) => {
194                // PostgreSQL's RANDOM() doesn't take a seed argument
195                let _ = r.seed; // Ignore seed
196                Ok(Expression::Random(crate::expressions::Random))
197            }
198
199            // ============================================
200            // UUID
201            // ============================================
202            // Uuid -> GEN_RANDOM_UUID in PostgreSQL
203            Expression::Uuid(_) => Ok(Expression::Function(Box::new(Function::new(
204                "GEN_RANDOM_UUID".to_string(),
205                vec![],
206            )))),
207
208            // ============================================
209            // ARRAY OPERATIONS
210            // ============================================
211            // EXPLODE -> UNNEST in PostgreSQL
212            Expression::Explode(f) => Ok(Expression::Unnest(Box::new(
213                crate::expressions::UnnestFunc {
214                    this: f.this,
215                    expressions: Vec::new(),
216                    with_ordinality: false,
217                    alias: None,
218                    offset_alias: None,
219                },
220            ))),
221
222            // ExplodeOuter -> UNNEST in PostgreSQL
223            Expression::ExplodeOuter(f) => Ok(Expression::Unnest(Box::new(
224                crate::expressions::UnnestFunc {
225                    this: f.this,
226                    expressions: Vec::new(),
227                    with_ordinality: false,
228                    alias: None,
229                    offset_alias: None,
230                },
231            ))),
232
233            // ArrayConcat -> ARRAY_CAT in PostgreSQL
234            Expression::ArrayConcat(f) => Ok(Expression::Function(Box::new(Function::new(
235                "ARRAY_CAT".to_string(),
236                f.expressions,
237            )))),
238
239            // ArrayPrepend -> ARRAY_PREPEND in PostgreSQL (note: args swapped from other dialects)
240            Expression::ArrayPrepend(f) => Ok(Expression::Function(Box::new(Function::new(
241                "ARRAY_PREPEND".to_string(),
242                vec![f.expression, f.this], // PostgreSQL: ARRAY_PREPEND(element, array)
243            )))),
244
245            // ============================================
246            // BITWISE OPERATIONS
247            // ============================================
248            // BitwiseXor -> # operator in PostgreSQL
249            Expression::BitwiseXor(f) => {
250                // Use a special marker that generator will recognize
251                Ok(Expression::Function(Box::new(Function::new(
252                    "__PG_BITWISE_XOR__".to_string(),
253                    vec![f.left, f.right],
254                ))))
255            }
256
257            // BitwiseAndAgg -> BIT_AND
258            Expression::BitwiseAndAgg(f) => Ok(Expression::Function(Box::new(Function::new(
259                "BIT_AND".to_string(),
260                vec![f.this],
261            )))),
262
263            // BitwiseOrAgg -> BIT_OR
264            Expression::BitwiseOrAgg(f) => Ok(Expression::Function(Box::new(Function::new(
265                "BIT_OR".to_string(),
266                vec![f.this],
267            )))),
268
269            // BitwiseXorAgg -> BIT_XOR
270            Expression::BitwiseXorAgg(f) => Ok(Expression::Function(Box::new(Function::new(
271                "BIT_XOR".to_string(),
272                vec![f.this],
273            )))),
274
275            // ============================================
276            // BOOLEAN AGGREGATES
277            // ============================================
278            // LogicalAnd -> BOOL_AND
279            Expression::LogicalAnd(f) => {
280                Ok(Expression::AggregateFunction(Box::new(AggregateFunction {
281                    name: "BOOL_AND".to_string(),
282                    args: vec![f.this],
283                    distinct: f.distinct,
284                    filter: f.filter,
285                    order_by: f.order_by,
286                    limit: f.limit,
287                    ignore_nulls: f.ignore_nulls,
288                    inferred_type: f.inferred_type,
289                })))
290            }
291
292            // LogicalOr -> BOOL_OR
293            Expression::LogicalOr(f) => {
294                Ok(Expression::AggregateFunction(Box::new(AggregateFunction {
295                    name: "BOOL_OR".to_string(),
296                    args: vec![f.this],
297                    distinct: f.distinct,
298                    filter: f.filter,
299                    order_by: f.order_by,
300                    limit: f.limit,
301                    ignore_nulls: f.ignore_nulls,
302                    inferred_type: f.inferred_type,
303                })))
304            }
305
306            // Xor -> PostgreSQL bool_xor pattern: a <> b for boolean values
307            Expression::Xor(f) => {
308                if let (Some(a), Some(b)) = (f.this, f.expression) {
309                    Ok(Expression::Neq(Box::new(BinaryOp {
310                        left: *a,
311                        right: *b,
312                        left_comments: Vec::new(),
313                        operator_comments: Vec::new(),
314                        trailing_comments: Vec::new(),
315                        inferred_type: None,
316                    })))
317                } else {
318                    Ok(Expression::Boolean(BooleanLiteral { value: false }))
319                }
320            }
321
322            // ============================================
323            // ARRAY OPERATORS
324            // ============================================
325            // ArrayContainedBy (<@) -> ArrayContainsAll (@>) with swapped operands
326            // a <@ b -> b @> a (PostgreSQL prefers @> syntax)
327            Expression::ArrayContainedBy(op) => {
328                Ok(Expression::ArrayContainsAll(Box::new(BinaryOp {
329                    left: op.right,
330                    right: op.left,
331                    left_comments: Vec::new(),
332                    operator_comments: Vec::new(),
333                    trailing_comments: Vec::new(),
334                    inferred_type: None,
335                })))
336            }
337
338            // ============================================
339            // REGEXP OPERATIONS (PostgreSQL uses ~ and ~* operators)
340            // ============================================
341            // RegexpLike -> keep as-is, generator handles ~ operator output
342            Expression::RegexpLike(f) => {
343                // Generator will output as: expr ~ pattern
344                Ok(Expression::RegexpLike(f))
345            }
346
347            // ============================================
348            // DATE/TIME FUNCTIONS
349            // ============================================
350            // DateAdd -> date + INTERVAL in PostgreSQL
351            Expression::DateAdd(f) => {
352                let is_literal = matches!(&f.interval, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_) | Literal::String(_)));
353                let right_expr = if is_literal {
354                    // Literal value: INTERVAL 'value' unit
355                    Expression::Interval(Box::new(Interval {
356                        this: Some(f.interval),
357                        unit: Some(IntervalUnitSpec::Simple {
358                            unit: f.unit,
359                            use_plural: false,
360                        }),
361                    }))
362                } else {
363                    // Non-literal (column ref, expression): INTERVAL '1 unit' * value
364                    let unit_str = match f.unit {
365                        IntervalUnit::Year => "YEAR",
366                        IntervalUnit::Quarter => "QUARTER",
367                        IntervalUnit::Month => "MONTH",
368                        IntervalUnit::Week => "WEEK",
369                        IntervalUnit::Day => "DAY",
370                        IntervalUnit::Hour => "HOUR",
371                        IntervalUnit::Minute => "MINUTE",
372                        IntervalUnit::Second => "SECOND",
373                        IntervalUnit::Millisecond => "MILLISECOND",
374                        IntervalUnit::Microsecond => "MICROSECOND",
375                        IntervalUnit::Nanosecond => "NANOSECOND",
376                    };
377                    let interval_one = Expression::Interval(Box::new(Interval {
378                        this: Some(Expression::Literal(Box::new(Literal::String(format!(
379                            "1 {unit_str}"
380                        ))))),
381                        unit: None,
382                    }));
383                    Expression::Mul(Box::new(BinaryOp {
384                        left: interval_one,
385                        right: f.interval,
386                        left_comments: Vec::new(),
387                        operator_comments: Vec::new(),
388                        trailing_comments: Vec::new(),
389                        inferred_type: None,
390                    }))
391                };
392                Ok(Expression::Add(Box::new(BinaryOp {
393                    left: f.this,
394                    right: right_expr,
395                    left_comments: Vec::new(),
396                    operator_comments: Vec::new(),
397                    trailing_comments: Vec::new(),
398                    inferred_type: None,
399                })))
400            }
401
402            // DateSub -> date - INTERVAL in PostgreSQL
403            Expression::DateSub(f) => {
404                let interval_expr = Expression::Interval(Box::new(Interval {
405                    this: Some(f.interval),
406                    unit: Some(IntervalUnitSpec::Simple {
407                        unit: f.unit,
408                        use_plural: false,
409                    }),
410                }));
411                Ok(Expression::Sub(Box::new(BinaryOp {
412                    left: f.this,
413                    right: interval_expr,
414                    left_comments: Vec::new(),
415                    operator_comments: Vec::new(),
416                    trailing_comments: Vec::new(),
417                    inferred_type: None,
418                })))
419            }
420
421            // DateDiff -> Complex PostgreSQL pattern using AGE/EXTRACT
422            Expression::DateDiff(f) => {
423                // For PostgreSQL, DATEDIFF is converted to EXTRACT(epoch FROM ...) pattern
424                // matching the 3-arg string-based DATEDIFF handler below
425                let unit = f.unit.unwrap_or(IntervalUnit::Day);
426
427                // Helper: CAST(expr AS TIMESTAMP)
428                let cast_ts = |e: Expression| -> Expression {
429                    Expression::Cast(Box::new(Cast {
430                        this: e,
431                        to: DataType::Timestamp {
432                            precision: None,
433                            timezone: false,
434                        },
435                        trailing_comments: Vec::new(),
436                        double_colon_syntax: false,
437                        format: None,
438                        default: None,
439                        inferred_type: None,
440                    }))
441                };
442
443                // Helper: CAST(expr AS BIGINT)
444                let cast_bigint = |e: Expression| -> Expression {
445                    Expression::Cast(Box::new(Cast {
446                        this: e,
447                        to: DataType::BigInt { length: None },
448                        trailing_comments: Vec::new(),
449                        double_colon_syntax: false,
450                        format: None,
451                        default: None,
452                        inferred_type: None,
453                    }))
454                };
455
456                // Clone end/start for reuse
457                let end_expr = f.this;
458                let start = f.expression;
459
460                // Helper: end_ts - start_ts
461                let ts_diff = || -> Expression {
462                    Expression::Sub(Box::new(BinaryOp::new(
463                        cast_ts(end_expr.clone()),
464                        cast_ts(start.clone()),
465                    )))
466                };
467
468                // Helper: AGE(end_ts, start_ts)
469                let age_call = || -> Expression {
470                    Expression::Function(Box::new(Function::new(
471                        "AGE".to_string(),
472                        vec![cast_ts(end_expr.clone()), cast_ts(start.clone())],
473                    )))
474                };
475
476                // Helper: EXTRACT(field FROM expr)
477                let extract = |field: DateTimeField, from: Expression| -> Expression {
478                    Expression::Extract(Box::new(ExtractFunc { this: from, field }))
479                };
480
481                // Helper: number literal
482                let num = |n: i64| -> Expression {
483                    Expression::Literal(Box::new(Literal::Number(n.to_string())))
484                };
485
486                let epoch_field = DateTimeField::Custom("epoch".to_string());
487
488                let result = match unit {
489                    IntervalUnit::Nanosecond => {
490                        let epoch = extract(epoch_field.clone(), ts_diff());
491                        cast_bigint(Expression::Mul(Box::new(BinaryOp::new(
492                            epoch,
493                            num(1000000000),
494                        ))))
495                    }
496                    IntervalUnit::Microsecond => {
497                        let epoch = extract(epoch_field, ts_diff());
498                        cast_bigint(Expression::Mul(Box::new(BinaryOp::new(
499                            epoch,
500                            num(1000000),
501                        ))))
502                    }
503                    IntervalUnit::Millisecond => {
504                        let epoch = extract(epoch_field, ts_diff());
505                        cast_bigint(Expression::Mul(Box::new(BinaryOp::new(epoch, num(1000)))))
506                    }
507                    IntervalUnit::Second => {
508                        let epoch = extract(epoch_field, ts_diff());
509                        cast_bigint(epoch)
510                    }
511                    IntervalUnit::Minute => {
512                        let epoch = extract(epoch_field, ts_diff());
513                        cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(60)))))
514                    }
515                    IntervalUnit::Hour => {
516                        let epoch = extract(epoch_field, ts_diff());
517                        cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(3600)))))
518                    }
519                    IntervalUnit::Day => {
520                        let epoch = extract(epoch_field, ts_diff());
521                        cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(86400)))))
522                    }
523                    IntervalUnit::Week => {
524                        let diff_parens = Expression::Paren(Box::new(Paren {
525                            this: ts_diff(),
526                            trailing_comments: Vec::new(),
527                        }));
528                        let days = extract(DateTimeField::Custom("days".to_string()), diff_parens);
529                        cast_bigint(Expression::Div(Box::new(BinaryOp::new(days, num(7)))))
530                    }
531                    IntervalUnit::Month => {
532                        let year_part =
533                            extract(DateTimeField::Custom("year".to_string()), age_call());
534                        let month_part =
535                            extract(DateTimeField::Custom("month".to_string()), age_call());
536                        let year_months =
537                            Expression::Mul(Box::new(BinaryOp::new(year_part, num(12))));
538                        cast_bigint(Expression::Add(Box::new(BinaryOp::new(
539                            year_months,
540                            month_part,
541                        ))))
542                    }
543                    IntervalUnit::Quarter => {
544                        let year_part =
545                            extract(DateTimeField::Custom("year".to_string()), age_call());
546                        let month_part =
547                            extract(DateTimeField::Custom("month".to_string()), age_call());
548                        let year_quarters =
549                            Expression::Mul(Box::new(BinaryOp::new(year_part, num(4))));
550                        let month_quarters =
551                            Expression::Div(Box::new(BinaryOp::new(month_part, num(3))));
552                        cast_bigint(Expression::Add(Box::new(BinaryOp::new(
553                            year_quarters,
554                            month_quarters,
555                        ))))
556                    }
557                    IntervalUnit::Year => cast_bigint(extract(
558                        DateTimeField::Custom("year".to_string()),
559                        age_call(),
560                    )),
561                };
562                Ok(result)
563            }
564
565            // UnixToTime -> TO_TIMESTAMP
566            Expression::UnixToTime(f) => Ok(Expression::Function(Box::new(Function::new(
567                "TO_TIMESTAMP".to_string(),
568                vec![*f.this],
569            )))),
570
571            // TimeToUnix -> DATE_PART('epoch', ...) in PostgreSQL
572            Expression::TimeToUnix(f) => Ok(Expression::Function(Box::new(Function::new(
573                "DATE_PART".to_string(),
574                vec![Expression::string("epoch"), f.this],
575            )))),
576
577            // StrToTime -> TO_TIMESTAMP in PostgreSQL
578            Expression::ToTimestamp(f) => {
579                let mut args = vec![f.this];
580                if let Some(fmt) = f.format {
581                    args.push(fmt);
582                }
583                Ok(Expression::Function(Box::new(Function::new(
584                    "TO_TIMESTAMP".to_string(),
585                    args,
586                ))))
587            }
588
589            // StrToDate -> TO_DATE in PostgreSQL
590            Expression::ToDate(f) => {
591                let mut args = vec![f.this];
592                if let Some(fmt) = f.format {
593                    args.push(fmt);
594                }
595                Ok(Expression::Function(Box::new(Function::new(
596                    "TO_DATE".to_string(),
597                    args,
598                ))))
599            }
600
601            // TimestampTrunc -> DATE_TRUNC
602            Expression::TimestampTrunc(f) => {
603                // Convert DateTimeField to string expression for DATE_TRUNC
604                let unit_str = format!("{:?}", f.unit).to_lowercase();
605                let args = vec![Expression::string(&unit_str), f.this];
606                Ok(Expression::Function(Box::new(Function::new(
607                    "DATE_TRUNC".to_string(),
608                    args,
609                ))))
610            }
611
612            // TimeFromParts -> MAKE_TIME
613            Expression::TimeFromParts(f) => {
614                let mut args = Vec::new();
615                if let Some(h) = f.hour {
616                    args.push(*h);
617                }
618                if let Some(m) = f.min {
619                    args.push(*m);
620                }
621                if let Some(s) = f.sec {
622                    args.push(*s);
623                }
624                Ok(Expression::Function(Box::new(Function::new(
625                    "MAKE_TIME".to_string(),
626                    args,
627                ))))
628            }
629
630            // TimestampFromParts -> MAKE_TIMESTAMP
631            Expression::MakeTimestamp(f) => {
632                // MakeTimestampFunc has direct Expression fields, not Options
633                let args = vec![f.year, f.month, f.day, f.hour, f.minute, f.second];
634                Ok(Expression::Function(Box::new(Function::new(
635                    "MAKE_TIMESTAMP".to_string(),
636                    args,
637                ))))
638            }
639
640            // ============================================
641            // STRING FUNCTIONS
642            // ============================================
643            // StringAgg is native to PostgreSQL - keep as-is
644            Expression::StringAgg(f) => Ok(Expression::StringAgg(f)),
645
646            // GroupConcat -> STRING_AGG in PostgreSQL
647            Expression::GroupConcat(f) => {
648                let mut args = vec![f.this.clone()];
649                if let Some(sep) = f.separator.clone() {
650                    args.push(sep);
651                } else {
652                    args.push(Expression::string(","));
653                }
654                Ok(Expression::Function(Box::new(Function::new(
655                    "STRING_AGG".to_string(),
656                    args,
657                ))))
658            }
659
660            // StrPosition -> POSITION function
661            Expression::Position(f) => {
662                // PostgreSQL: POSITION(substring IN string)
663                // Keep as Position, generator handles it
664                Ok(Expression::Position(f))
665            }
666
667            // ============================================
668            // AGGREGATE FUNCTIONS
669            // ============================================
670            // CountIf -> SUM(CASE WHEN condition THEN 1 ELSE 0 END) in PostgreSQL
671            Expression::CountIf(f) => {
672                let case_expr = Expression::Case(Box::new(Case {
673                    operand: None,
674                    whens: vec![(f.this.clone(), Expression::number(1))],
675                    else_: Some(Expression::number(0)),
676                    comments: Vec::new(),
677                    inferred_type: None,
678                }));
679                Ok(Expression::Sum(Box::new(AggFunc {
680                    ignore_nulls: None,
681                    having_max: None,
682                    this: case_expr,
683                    distinct: f.distinct,
684                    filter: f.filter,
685                    order_by: Vec::new(),
686                    name: None,
687                    limit: None,
688                    inferred_type: None,
689                })))
690            }
691
692            // AnyValue -> keep as ANY_VALUE for PostgreSQL (supported since PG 16)
693            Expression::AnyValue(f) => Ok(Expression::AnyValue(f)),
694
695            // Variance -> VAR_SAMP in PostgreSQL
696            Expression::Variance(f) => Ok(Expression::Function(Box::new(Function::new(
697                "VAR_SAMP".to_string(),
698                vec![f.this],
699            )))),
700
701            // VarPop -> VAR_POP in PostgreSQL
702            Expression::VarPop(f) => Ok(Expression::Function(Box::new(Function::new(
703                "VAR_POP".to_string(),
704                vec![f.this],
705            )))),
706
707            // ============================================
708            // JSON FUNCTIONS
709            // ============================================
710            // JSONExtract -> use arrow syntax (->) in PostgreSQL for simple literal paths
711            // Complex paths (like column references) should use JSON_EXTRACT_PATH function
712            Expression::JsonExtract(mut f) => {
713                // Only use arrow syntax for simple literal paths (string or non-negative number)
714                // Complex expressions like column references should use function form
715                f.arrow_syntax = Self::is_simple_json_path(&f.path);
716                Ok(Expression::JsonExtract(f))
717            }
718
719            // JSONExtractScalar -> use arrow syntax (->>) in PostgreSQL for simple paths
720            // Complex paths (like negative indices) should use JSON_EXTRACT_PATH_TEXT function
721            // #>> (hash_arrow_syntax) stays as #>>
722            Expression::JsonExtractScalar(mut f) => {
723                if !f.hash_arrow_syntax {
724                    // Only use arrow syntax for simple literal paths (string or non-negative number)
725                    // Complex expressions like Neg(-1) should use function form
726                    f.arrow_syntax = Self::is_simple_json_path(&f.path);
727                }
728                Ok(Expression::JsonExtractScalar(f))
729            }
730
731            // ParseJson: handled by generator (outputs CAST(x AS JSON) for PostgreSQL)
732
733            // JSONObjectAgg -> JSON_OBJECT_AGG
734            Expression::JsonObjectAgg(f) => {
735                // JsonObjectAggFunc has key and value as Expression, not Option
736                let args = vec![f.key, f.value];
737                Ok(Expression::Function(Box::new(Function::new(
738                    "JSON_OBJECT_AGG".to_string(),
739                    args,
740                ))))
741            }
742
743            // JSONArrayAgg -> JSON_AGG
744            Expression::JsonArrayAgg(f) => Ok(Expression::Function(Box::new(Function::new(
745                "JSON_AGG".to_string(),
746                vec![f.this],
747            )))),
748
749            // JSONPathRoot -> empty string ($ is implicit in PostgreSQL)
750            Expression::JSONPathRoot(_) => Ok(Expression::Literal(Box::new(Literal::String(
751                String::new(),
752            )))),
753
754            // ============================================
755            // MISC FUNCTIONS
756            // ============================================
757            // IntDiv -> DIV in PostgreSQL
758            Expression::IntDiv(f) => Ok(Expression::Function(Box::new(Function::new(
759                "DIV".to_string(),
760                vec![f.this, f.expression],
761            )))),
762
763            // Unicode -> ASCII in PostgreSQL
764            Expression::Unicode(f) => Ok(Expression::Function(Box::new(Function::new(
765                "ASCII".to_string(),
766                vec![f.this],
767            )))),
768
769            // LastDay -> Complex expression (PostgreSQL doesn't have LAST_DAY)
770            Expression::LastDay(f) => {
771                // (DATE_TRUNC('month', date) + INTERVAL '1 month' - INTERVAL '1 day')::DATE
772                let truncated = Expression::Function(Box::new(Function::new(
773                    "DATE_TRUNC".to_string(),
774                    vec![Expression::string("month"), f.this.clone()],
775                )));
776                let plus_month = Expression::Add(Box::new(BinaryOp {
777                    left: truncated,
778                    right: Expression::Interval(Box::new(Interval {
779                        this: Some(Expression::string("1")),
780                        unit: Some(IntervalUnitSpec::Simple {
781                            unit: IntervalUnit::Month,
782                            use_plural: false,
783                        }),
784                    })),
785                    left_comments: Vec::new(),
786                    operator_comments: Vec::new(),
787                    trailing_comments: Vec::new(),
788                    inferred_type: None,
789                }));
790                let minus_day = Expression::Sub(Box::new(BinaryOp {
791                    left: plus_month,
792                    right: Expression::Interval(Box::new(Interval {
793                        this: Some(Expression::string("1")),
794                        unit: Some(IntervalUnitSpec::Simple {
795                            unit: IntervalUnit::Day,
796                            use_plural: false,
797                        }),
798                    })),
799                    left_comments: Vec::new(),
800                    operator_comments: Vec::new(),
801                    trailing_comments: Vec::new(),
802                    inferred_type: None,
803                }));
804                Ok(Expression::Cast(Box::new(Cast {
805                    this: minus_day,
806                    to: DataType::Date,
807                    trailing_comments: Vec::new(),
808                    double_colon_syntax: true, // Use PostgreSQL :: syntax
809                    format: None,
810                    default: None,
811                    inferred_type: None,
812                })))
813            }
814
815            // GenerateSeries is native to PostgreSQL
816            Expression::GenerateSeries(f) => Ok(Expression::GenerateSeries(f)),
817
818            // ExplodingGenerateSeries -> GENERATE_SERIES
819            Expression::ExplodingGenerateSeries(f) => {
820                let mut args = vec![f.start, f.stop];
821                if let Some(step) = f.step {
822                    args.push(step); // step is Expression, not Box<Expression>
823                }
824                Ok(Expression::Function(Box::new(Function::new(
825                    "GENERATE_SERIES".to_string(),
826                    args,
827                ))))
828            }
829
830            // ============================================
831            // SESSION/TIME FUNCTIONS (no parentheses in PostgreSQL)
832            // ============================================
833            // CurrentTimestamp -> CURRENT_TIMESTAMP (no parens)
834            Expression::CurrentTimestamp(_) => Ok(Expression::Function(Box::new(Function {
835                name: "CURRENT_TIMESTAMP".to_string(),
836                args: vec![],
837                distinct: false,
838                trailing_comments: vec![],
839                use_bracket_syntax: false,
840                no_parens: true,
841                quoted: false,
842                span: None,
843                inferred_type: None,
844            }))),
845
846            // CurrentUser -> CURRENT_USER (no parens)
847            Expression::CurrentUser(_) => Ok(Expression::Function(Box::new(Function::new(
848                "CURRENT_USER".to_string(),
849                vec![],
850            )))),
851
852            // CurrentDate -> CURRENT_DATE (no parens)
853            Expression::CurrentDate(_) => Ok(Expression::Function(Box::new(Function {
854                name: "CURRENT_DATE".to_string(),
855                args: vec![],
856                distinct: false,
857                trailing_comments: vec![],
858                use_bracket_syntax: false,
859                no_parens: true,
860                quoted: false,
861                span: None,
862                inferred_type: None,
863            }))),
864
865            // ============================================
866            // JOIN TRANSFORMATIONS
867            // ============================================
868            // CROSS APPLY -> INNER JOIN LATERAL ... ON TRUE in PostgreSQL
869            Expression::Join(join) if join.kind == JoinKind::CrossApply => {
870                Ok(Expression::Join(Box::new(Join {
871                    this: join.this,
872                    on: Some(Expression::Boolean(BooleanLiteral { value: true })),
873                    using: join.using,
874                    kind: JoinKind::CrossApply,
875                    use_inner_keyword: false,
876                    use_outer_keyword: false,
877                    deferred_condition: false,
878                    join_hint: None,
879                    match_condition: None,
880                    pivots: join.pivots,
881                    comments: join.comments,
882                    nesting_group: 0,
883                    directed: false,
884                })))
885            }
886
887            // OUTER APPLY -> LEFT JOIN LATERAL ... ON TRUE in PostgreSQL
888            Expression::Join(join) if join.kind == JoinKind::OuterApply => {
889                Ok(Expression::Join(Box::new(Join {
890                    this: join.this,
891                    on: Some(Expression::Boolean(BooleanLiteral { value: true })),
892                    using: join.using,
893                    kind: JoinKind::OuterApply,
894                    use_inner_keyword: false,
895                    use_outer_keyword: false,
896                    deferred_condition: false,
897                    join_hint: None,
898                    match_condition: None,
899                    pivots: join.pivots,
900                    comments: join.comments,
901                    nesting_group: 0,
902                    directed: false,
903                })))
904            }
905
906            // ============================================
907            // GENERIC FUNCTION TRANSFORMATIONS
908            // ============================================
909            Expression::Function(f) => self.transform_function(*f),
910
911            // Generic aggregate function transformations
912            Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
913
914            // ===== Context-aware JSON arrow wrapping =====
915            // When JSON arrow expressions appear in Binary/In/Not contexts,
916            // they need to be wrapped in parentheses for correct precedence.
917            // This matches Python sqlglot's WRAPPED_JSON_EXTRACT_EXPRESSIONS behavior.
918
919            // Binary operators that need JSON wrapping
920            Expression::Eq(op) => Ok(Expression::Eq(Box::new(BinaryOp {
921                left: wrap_if_json_arrow(op.left),
922                right: wrap_if_json_arrow(op.right),
923                ..*op
924            }))),
925            Expression::Neq(op) => Ok(Expression::Neq(Box::new(BinaryOp {
926                left: wrap_if_json_arrow(op.left),
927                right: wrap_if_json_arrow(op.right),
928                ..*op
929            }))),
930            Expression::Lt(op) => Ok(Expression::Lt(Box::new(BinaryOp {
931                left: wrap_if_json_arrow(op.left),
932                right: wrap_if_json_arrow(op.right),
933                ..*op
934            }))),
935            Expression::Lte(op) => Ok(Expression::Lte(Box::new(BinaryOp {
936                left: wrap_if_json_arrow(op.left),
937                right: wrap_if_json_arrow(op.right),
938                ..*op
939            }))),
940            Expression::Gt(op) => Ok(Expression::Gt(Box::new(BinaryOp {
941                left: wrap_if_json_arrow(op.left),
942                right: wrap_if_json_arrow(op.right),
943                ..*op
944            }))),
945            Expression::Gte(op) => Ok(Expression::Gte(Box::new(BinaryOp {
946                left: wrap_if_json_arrow(op.left),
947                right: wrap_if_json_arrow(op.right),
948                ..*op
949            }))),
950
951            // In expression - wrap the this part if it's JSON arrow
952            Expression::In(mut i) => {
953                i.this = wrap_if_json_arrow(i.this);
954                Ok(Expression::In(i))
955            }
956
957            // Not expression - wrap the this part if it's JSON arrow
958            Expression::Not(mut n) => {
959                n.this = wrap_if_json_arrow(n.this);
960                Ok(Expression::Not(n))
961            }
962
963            // MERGE: qualifier stripping is handled by the generator (dialect-aware)
964            // PostgreSQL generator strips qualifiers, Snowflake generator keeps them
965            Expression::Merge(m) => Ok(Expression::Merge(m)),
966
967            // JSONExtract with variant_extract (Databricks colon syntax) -> JSON_EXTRACT_PATH
968            Expression::JSONExtract(je) if je.variant_extract.is_some() => {
969                // Convert path from bracketed format to simple key
970                // e.g., '["fr''uit"]' -> 'fr''uit'
971                let path = match *je.expression {
972                    Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
973                        let Literal::String(s) = lit.as_ref() else {
974                            unreachable!()
975                        };
976                        // Strip bracketed JSON path format: ["key"] -> key
977                        let cleaned = if s.starts_with("[\"") && s.ends_with("\"]") {
978                            s[2..s.len() - 2].to_string()
979                        } else {
980                            s.clone()
981                        };
982                        Expression::Literal(Box::new(Literal::String(cleaned)))
983                    }
984                    other => other,
985                };
986                Ok(Expression::Function(Box::new(Function::new(
987                    "JSON_EXTRACT_PATH".to_string(),
988                    vec![*je.this, path],
989                ))))
990            }
991
992            // TRIM(str, chars) -> TRIM(chars FROM str) for PostgreSQL SQL standard syntax
993            Expression::Trim(t) if !t.sql_standard_syntax && t.characters.is_some() => {
994                Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
995                    this: t.this,
996                    characters: t.characters,
997                    position: t.position,
998                    sql_standard_syntax: true,
999                    position_explicit: t.position_explicit,
1000                })))
1001            }
1002
1003            // b'a' -> CAST(e'a' AS BYTEA) for PostgreSQL
1004            Expression::Literal(lit) if matches!(lit.as_ref(), Literal::ByteString(_)) => {
1005                let Literal::ByteString(s) = lit.as_ref() else {
1006                    unreachable!()
1007                };
1008                Ok(Expression::Cast(Box::new(Cast {
1009                    this: Expression::Literal(Box::new(Literal::EscapeString(s.clone()))),
1010                    to: DataType::VarBinary { length: None },
1011                    trailing_comments: Vec::new(),
1012                    double_colon_syntax: false,
1013                    format: None,
1014                    default: None,
1015                    inferred_type: None,
1016                })))
1017            }
1018
1019            // Pass through everything else
1020            _ => Ok(expr),
1021        }
1022    }
1023}
1024
1025#[cfg(feature = "transpile")]
1026impl PostgresDialect {
1027    /// Check if a JSON path expression is "simple" (string literal or non-negative integer)
1028    /// Simple paths can use arrow syntax (->>) in PostgreSQL
1029    /// Complex paths (like negative indices) should use JSON_EXTRACT_PATH_TEXT function
1030    fn is_simple_json_path(path: &Expression) -> bool {
1031        match path {
1032            // String literals are always simple
1033            Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => true,
1034            // Non-negative integer literals are simple
1035            Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
1036                let Literal::Number(n) = lit.as_ref() else {
1037                    unreachable!()
1038                };
1039                // Check if it's non-negative
1040                !n.starts_with('-')
1041            }
1042            // JSONPath expressions are simple (they're already parsed paths)
1043            Expression::JSONPath(_) => true,
1044            // Everything else (Neg, function calls, etc.) is complex
1045            _ => false,
1046        }
1047    }
1048
1049    /// Transform data types according to PostgreSQL TYPE_MAPPING
1050    fn transform_data_type(&self, dt: DataType) -> Result<Expression> {
1051        let transformed = match dt {
1052            // TINYINT -> SMALLINT
1053            DataType::TinyInt { .. } => DataType::SmallInt { length: None },
1054
1055            // FLOAT -> DOUBLE PRECISION (Python sqlglot tokenizes FLOAT as DOUBLE)
1056            DataType::Float { .. } => DataType::Custom {
1057                name: "DOUBLE PRECISION".to_string(),
1058            },
1059
1060            // DOUBLE -> DOUBLE PRECISION
1061            DataType::Double { .. } => DataType::Custom {
1062                name: "DOUBLE PRECISION".to_string(),
1063            },
1064
1065            // BINARY -> BYTEA (handled by generator which preserves length)
1066            DataType::Binary { .. } => dt,
1067
1068            // VARBINARY -> BYTEA (handled by generator which preserves length)
1069            DataType::VarBinary { .. } => dt,
1070
1071            // BLOB -> BYTEA
1072            DataType::Blob => DataType::Custom {
1073                name: "BYTEA".to_string(),
1074            },
1075
1076            // Custom type normalizations
1077            DataType::Custom { ref name } => {
1078                let upper = name.to_uppercase();
1079                match upper.as_str() {
1080                    // INT8 -> BIGINT (PostgreSQL alias)
1081                    "INT8" => DataType::BigInt { length: None },
1082                    // FLOAT8 -> DOUBLE PRECISION (PostgreSQL alias)
1083                    "FLOAT8" => DataType::Custom {
1084                        name: "DOUBLE PRECISION".to_string(),
1085                    },
1086                    // FLOAT4 -> REAL (PostgreSQL alias)
1087                    "FLOAT4" => DataType::Custom {
1088                        name: "REAL".to_string(),
1089                    },
1090                    // INT4 -> INTEGER (PostgreSQL alias)
1091                    "INT4" => DataType::Int {
1092                        length: None,
1093                        integer_spelling: false,
1094                    },
1095                    // INT2 -> SMALLINT (PostgreSQL alias)
1096                    "INT2" => DataType::SmallInt { length: None },
1097                    _ => dt,
1098                }
1099            }
1100
1101            // Keep all other types as-is
1102            other => other,
1103        };
1104        Ok(Expression::DataType(transformed))
1105    }
1106
1107    fn transform_function(&self, f: Function) -> Result<Expression> {
1108        let name_upper = f.name.to_uppercase();
1109        match name_upper.as_str() {
1110            // IFNULL -> COALESCE
1111            "IFNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
1112                original_name: None,
1113                expressions: f.args,
1114                inferred_type: None,
1115            }))),
1116
1117            // NVL -> COALESCE
1118            "NVL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
1119                original_name: None,
1120                expressions: f.args,
1121                inferred_type: None,
1122            }))),
1123
1124            // ISNULL (SQL Server) -> COALESCE
1125            "ISNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
1126                original_name: None,
1127                expressions: f.args,
1128                inferred_type: None,
1129            }))),
1130
1131            // GROUP_CONCAT -> STRING_AGG in PostgreSQL
1132            "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
1133                Function::new("STRING_AGG".to_string(), f.args),
1134            ))),
1135
1136            // SUBSTR -> SUBSTRING (standard SQL)
1137            "SUBSTR" => Ok(Expression::Function(Box::new(Function::new(
1138                "SUBSTRING".to_string(),
1139                f.args,
1140            )))),
1141
1142            // RAND -> RANDOM in PostgreSQL
1143            "RAND" => Ok(Expression::Random(crate::expressions::Random)),
1144
1145            // CEILING -> CEIL (both work in PostgreSQL, but CEIL is preferred)
1146            "CEILING" if f.args.len() == 1 => Ok(Expression::Ceil(Box::new(CeilFunc {
1147                this: f.args.into_iter().next().unwrap(),
1148                decimals: None,
1149                to: None,
1150            }))),
1151
1152            // LEN -> LENGTH in PostgreSQL
1153            "LEN" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc {
1154                this: f.args.into_iter().next().unwrap(),
1155                original_name: None,
1156                inferred_type: None,
1157            }))),
1158
1159            // CHAR_LENGTH -> LENGTH in PostgreSQL
1160            "CHAR_LENGTH" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc {
1161                this: f.args.into_iter().next().unwrap(),
1162                original_name: None,
1163                inferred_type: None,
1164            }))),
1165
1166            // CHARACTER_LENGTH -> LENGTH in PostgreSQL
1167            "CHARACTER_LENGTH" if f.args.len() == 1 => {
1168                Ok(Expression::Length(Box::new(UnaryFunc {
1169                    this: f.args.into_iter().next().unwrap(),
1170                    original_name: None,
1171                    inferred_type: None,
1172                })))
1173            }
1174
1175            // CHARINDEX -> POSITION in PostgreSQL
1176            // CHARINDEX(substring, string) -> POSITION(substring IN string)
1177            "CHARINDEX" if f.args.len() >= 2 => {
1178                let mut args = f.args;
1179                let substring = args.remove(0);
1180                let string = args.remove(0);
1181                Ok(Expression::Position(Box::new(
1182                    crate::expressions::PositionFunc {
1183                        substring,
1184                        string,
1185                        start: args.pop(),
1186                    },
1187                )))
1188            }
1189
1190            // GETDATE -> CURRENT_TIMESTAMP in PostgreSQL
1191            "GETDATE" => Ok(Expression::CurrentTimestamp(
1192                crate::expressions::CurrentTimestamp {
1193                    precision: None,
1194                    sysdate: false,
1195                },
1196            )),
1197
1198            // SYSDATETIME -> CURRENT_TIMESTAMP in PostgreSQL
1199            "SYSDATETIME" => Ok(Expression::CurrentTimestamp(
1200                crate::expressions::CurrentTimestamp {
1201                    precision: None,
1202                    sysdate: false,
1203                },
1204            )),
1205
1206            // NOW -> CURRENT_TIMESTAMP in PostgreSQL (NOW() is also valid)
1207            "NOW" => Ok(Expression::CurrentTimestamp(
1208                crate::expressions::CurrentTimestamp {
1209                    precision: None,
1210                    sysdate: false,
1211                },
1212            )),
1213
1214            // NEWID -> GEN_RANDOM_UUID in PostgreSQL
1215            "NEWID" => Ok(Expression::Function(Box::new(Function::new(
1216                "GEN_RANDOM_UUID".to_string(),
1217                vec![],
1218            )))),
1219
1220            // UUID -> GEN_RANDOM_UUID in PostgreSQL
1221            "UUID" => Ok(Expression::Function(Box::new(Function::new(
1222                "GEN_RANDOM_UUID".to_string(),
1223                vec![],
1224            )))),
1225
1226            // UNNEST is native to PostgreSQL
1227            "UNNEST" => Ok(Expression::Function(Box::new(f))),
1228
1229            // GENERATE_SERIES is native to PostgreSQL
1230            "GENERATE_SERIES" => Ok(Expression::Function(Box::new(f))),
1231
1232            // SHA256 -> SHA256 in PostgreSQL (via pgcrypto extension)
1233            "SHA256" if f.args.len() == 1 => Ok(Expression::Function(Box::new(Function::new(
1234                "SHA256".to_string(),
1235                f.args,
1236            )))),
1237
1238            // SHA2 -> SHA256/SHA512 based on length argument
1239            "SHA2" if f.args.len() == 2 => {
1240                // SHA2(data, length) -> SHA256/SHA384/SHA512
1241                let args = f.args;
1242                let data = args[0].clone();
1243                // Default to SHA256 - would need runtime inspection for exact mapping
1244                Ok(Expression::Function(Box::new(Function::new(
1245                    "SHA256".to_string(),
1246                    vec![data],
1247                ))))
1248            }
1249
1250            // LEVENSHTEIN is native to PostgreSQL (fuzzystrmatch extension)
1251            "LEVENSHTEIN" => Ok(Expression::Function(Box::new(f))),
1252
1253            // EDITDISTANCE -> LEVENSHTEIN_LESS_EQUAL (with max distance) or LEVENSHTEIN
1254            "EDITDISTANCE" if f.args.len() == 3 => Ok(Expression::Function(Box::new(
1255                Function::new("LEVENSHTEIN_LESS_EQUAL".to_string(), f.args),
1256            ))),
1257            "EDITDISTANCE" if f.args.len() == 2 => Ok(Expression::Function(Box::new(
1258                Function::new("LEVENSHTEIN".to_string(), f.args),
1259            ))),
1260
1261            // TRIM(value, chars) -> TRIM(chars FROM value) for Postgres
1262            "TRIM" if f.args.len() == 2 => {
1263                let value = f.args[0].clone();
1264                let chars = f.args[1].clone();
1265                Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
1266                    this: value,
1267                    characters: Some(chars),
1268                    position: crate::expressions::TrimPosition::Both,
1269                    sql_standard_syntax: true,
1270                    position_explicit: false,
1271                })))
1272            }
1273
1274            // DATEDIFF(unit, start, end) -> PostgreSQL EXTRACT/AGE patterns
1275            "DATEDIFF" if f.args.len() >= 2 => {
1276                let mut args = f.args;
1277                if args.len() == 2 {
1278                    // 2-arg form: DATEDIFF(start, end) -> AGE(start, end)
1279                    let first = args.remove(0);
1280                    let second = args.remove(0);
1281                    Ok(Expression::Function(Box::new(Function::new(
1282                        "AGE".to_string(),
1283                        vec![first, second],
1284                    ))))
1285                } else {
1286                    // 3-arg form: DATEDIFF(unit, start, end)
1287                    let unit_expr = args.remove(0);
1288                    let start = args.remove(0);
1289                    let end_expr = args.remove(0);
1290
1291                    // Extract unit name from identifier or column
1292                    let unit_name = match &unit_expr {
1293                        Expression::Identifier(id) => id.name.to_uppercase(),
1294                        Expression::Var(v) => v.this.to_uppercase(),
1295                        Expression::Column(col) if col.table.is_none() => {
1296                            col.name.name.to_uppercase()
1297                        }
1298                        _ => "DAY".to_string(),
1299                    };
1300
1301                    // Helper: CAST(expr AS TIMESTAMP)
1302                    let cast_ts = |e: Expression| -> Expression {
1303                        Expression::Cast(Box::new(Cast {
1304                            this: e,
1305                            to: DataType::Timestamp {
1306                                precision: None,
1307                                timezone: false,
1308                            },
1309                            trailing_comments: Vec::new(),
1310                            double_colon_syntax: false,
1311                            format: None,
1312                            default: None,
1313                            inferred_type: None,
1314                        }))
1315                    };
1316
1317                    // Helper: CAST(expr AS BIGINT)
1318                    let cast_bigint = |e: Expression| -> Expression {
1319                        Expression::Cast(Box::new(Cast {
1320                            this: e,
1321                            to: DataType::BigInt { length: None },
1322                            trailing_comments: Vec::new(),
1323                            double_colon_syntax: false,
1324                            format: None,
1325                            default: None,
1326                            inferred_type: None,
1327                        }))
1328                    };
1329
1330                    let end_ts = cast_ts(end_expr.clone());
1331                    let start_ts = cast_ts(start.clone());
1332
1333                    // Helper: end_ts - start_ts
1334                    let ts_diff = || -> Expression {
1335                        Expression::Sub(Box::new(BinaryOp::new(
1336                            cast_ts(end_expr.clone()),
1337                            cast_ts(start.clone()),
1338                        )))
1339                    };
1340
1341                    // Helper: AGE(end_ts, start_ts)
1342                    let age_call = || -> Expression {
1343                        Expression::Function(Box::new(Function::new(
1344                            "AGE".to_string(),
1345                            vec![cast_ts(end_expr.clone()), cast_ts(start.clone())],
1346                        )))
1347                    };
1348
1349                    // Helper: EXTRACT(field FROM expr)
1350                    let extract = |field: DateTimeField, from: Expression| -> Expression {
1351                        Expression::Extract(Box::new(ExtractFunc { this: from, field }))
1352                    };
1353
1354                    // Helper: number literal
1355                    let num = |n: i64| -> Expression {
1356                        Expression::Literal(Box::new(Literal::Number(n.to_string())))
1357                    };
1358
1359                    // Use Custom DateTimeField for lowercase output (PostgreSQL convention)
1360                    let epoch_field = DateTimeField::Custom("epoch".to_string());
1361
1362                    let result = match unit_name.as_str() {
1363                        "MICROSECOND" => {
1364                            // CAST(EXTRACT(epoch FROM end_ts - start_ts) * 1000000 AS BIGINT)
1365                            let epoch = extract(epoch_field, ts_diff());
1366                            cast_bigint(Expression::Mul(Box::new(BinaryOp::new(
1367                                epoch,
1368                                num(1000000),
1369                            ))))
1370                        }
1371                        "MILLISECOND" => {
1372                            let epoch = extract(epoch_field, ts_diff());
1373                            cast_bigint(Expression::Mul(Box::new(BinaryOp::new(epoch, num(1000)))))
1374                        }
1375                        "SECOND" => {
1376                            let epoch = extract(epoch_field, ts_diff());
1377                            cast_bigint(epoch)
1378                        }
1379                        "MINUTE" => {
1380                            let epoch = extract(epoch_field, ts_diff());
1381                            cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(60)))))
1382                        }
1383                        "HOUR" => {
1384                            let epoch = extract(epoch_field, ts_diff());
1385                            cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(3600)))))
1386                        }
1387                        "DAY" => {
1388                            let epoch = extract(epoch_field, ts_diff());
1389                            cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(86400)))))
1390                        }
1391                        "WEEK" => {
1392                            // CAST(EXTRACT(days FROM (end_ts - start_ts)) / 7 AS BIGINT)
1393                            let diff_parens = Expression::Paren(Box::new(Paren {
1394                                this: ts_diff(),
1395                                trailing_comments: Vec::new(),
1396                            }));
1397                            let days =
1398                                extract(DateTimeField::Custom("days".to_string()), diff_parens);
1399                            cast_bigint(Expression::Div(Box::new(BinaryOp::new(days, num(7)))))
1400                        }
1401                        "MONTH" => {
1402                            // CAST(EXTRACT(year FROM AGE(...)) * 12 + EXTRACT(month FROM AGE(...)) AS BIGINT)
1403                            let year_part =
1404                                extract(DateTimeField::Custom("year".to_string()), age_call());
1405                            let month_part =
1406                                extract(DateTimeField::Custom("month".to_string()), age_call());
1407                            let year_months =
1408                                Expression::Mul(Box::new(BinaryOp::new(year_part, num(12))));
1409                            cast_bigint(Expression::Add(Box::new(BinaryOp::new(
1410                                year_months,
1411                                month_part,
1412                            ))))
1413                        }
1414                        "QUARTER" => {
1415                            // CAST(EXTRACT(year FROM AGE(...)) * 4 + EXTRACT(month FROM AGE(...)) / 3 AS BIGINT)
1416                            let year_part =
1417                                extract(DateTimeField::Custom("year".to_string()), age_call());
1418                            let month_part =
1419                                extract(DateTimeField::Custom("month".to_string()), age_call());
1420                            let year_quarters =
1421                                Expression::Mul(Box::new(BinaryOp::new(year_part, num(4))));
1422                            let month_quarters =
1423                                Expression::Div(Box::new(BinaryOp::new(month_part, num(3))));
1424                            cast_bigint(Expression::Add(Box::new(BinaryOp::new(
1425                                year_quarters,
1426                                month_quarters,
1427                            ))))
1428                        }
1429                        "YEAR" => {
1430                            // CAST(EXTRACT(year FROM AGE(...)) AS BIGINT)
1431                            cast_bigint(extract(
1432                                DateTimeField::Custom("year".to_string()),
1433                                age_call(),
1434                            ))
1435                        }
1436                        _ => {
1437                            // Fallback: simple AGE
1438                            Expression::Function(Box::new(Function::new(
1439                                "AGE".to_string(),
1440                                vec![end_ts, start_ts],
1441                            )))
1442                        }
1443                    };
1444                    Ok(result)
1445                }
1446            }
1447
1448            // TIMESTAMPDIFF -> AGE or EXTRACT pattern
1449            "TIMESTAMPDIFF" if f.args.len() >= 3 => {
1450                let mut args = f.args;
1451                let _unit = args.remove(0); // Unit (ignored, AGE returns full interval)
1452                let start = args.remove(0);
1453                let end = args.remove(0);
1454                Ok(Expression::Function(Box::new(Function::new(
1455                    "AGE".to_string(),
1456                    vec![end, start],
1457                ))))
1458            }
1459
1460            // FROM_UNIXTIME -> TO_TIMESTAMP
1461            "FROM_UNIXTIME" => Ok(Expression::Function(Box::new(Function::new(
1462                "TO_TIMESTAMP".to_string(),
1463                f.args,
1464            )))),
1465
1466            // UNIX_TIMESTAMP -> EXTRACT(EPOCH FROM ...)
1467            "UNIX_TIMESTAMP" if f.args.len() == 1 => {
1468                let arg = f.args.into_iter().next().unwrap();
1469                Ok(Expression::Function(Box::new(Function::new(
1470                    "DATE_PART".to_string(),
1471                    vec![Expression::string("epoch"), arg],
1472                ))))
1473            }
1474
1475            // UNIX_TIMESTAMP() with no args -> EXTRACT(EPOCH FROM CURRENT_TIMESTAMP)
1476            "UNIX_TIMESTAMP" if f.args.is_empty() => {
1477                Ok(Expression::Function(Box::new(Function::new(
1478                    "DATE_PART".to_string(),
1479                    vec![
1480                        Expression::string("epoch"),
1481                        Expression::CurrentTimestamp(crate::expressions::CurrentTimestamp {
1482                            precision: None,
1483                            sysdate: false,
1484                        }),
1485                    ],
1486                ))))
1487            }
1488
1489            // DATEADD -> date + interval pattern
1490            "DATEADD" if f.args.len() == 3 => {
1491                // DATEADD(unit, count, date) -> date + interval 'count unit'
1492                // This is a simplified version - full impl would construct proper interval
1493                let mut args = f.args;
1494                let _unit = args.remove(0);
1495                let count = args.remove(0);
1496                let date = args.remove(0);
1497                Ok(Expression::Add(Box::new(BinaryOp {
1498                    left: date,
1499                    right: count,
1500                    left_comments: Vec::new(),
1501                    operator_comments: Vec::new(),
1502                    trailing_comments: Vec::new(),
1503                    inferred_type: None,
1504                })))
1505            }
1506
1507            // INSTR -> POSITION (simplified)
1508            "INSTR" if f.args.len() >= 2 => {
1509                let mut args = f.args;
1510                let string = args.remove(0);
1511                let substring = args.remove(0);
1512                Ok(Expression::Position(Box::new(
1513                    crate::expressions::PositionFunc {
1514                        substring,
1515                        string,
1516                        start: args.pop(),
1517                    },
1518                )))
1519            }
1520
1521            // CONCAT_WS is native to PostgreSQL
1522            "CONCAT_WS" => Ok(Expression::Function(Box::new(f))),
1523
1524            // REGEXP_REPLACE: pass through without adding 'g' flag
1525            // The 'g' flag handling is managed by cross_dialect_normalize based on source dialect's default behavior
1526            "REGEXP_REPLACE" if f.args.len() == 3 || f.args.len() == 4 => {
1527                Ok(Expression::Function(Box::new(f)))
1528            }
1529            // 6 args from Snowflake: (subject, pattern, replacement, position, occurrence, params)
1530            // If occurrence is 0 (global), append 'g' to flags
1531            "REGEXP_REPLACE" if f.args.len() == 6 => {
1532                let is_global = match &f.args[4] {
1533                    Expression::Literal(lit)
1534                        if matches!(lit.as_ref(), crate::expressions::Literal::Number(_)) =>
1535                    {
1536                        let crate::expressions::Literal::Number(n) = lit.as_ref() else {
1537                            unreachable!()
1538                        };
1539                        n == "0"
1540                    }
1541                    _ => false,
1542                };
1543                if is_global {
1544                    let subject = f.args[0].clone();
1545                    let pattern = f.args[1].clone();
1546                    let replacement = f.args[2].clone();
1547                    let position = f.args[3].clone();
1548                    let occurrence = f.args[4].clone();
1549                    let params = &f.args[5];
1550                    let mut flags = if let Expression::Literal(lit) = params {
1551                        if let crate::expressions::Literal::String(s) = lit.as_ref() {
1552                            s.clone()
1553                        } else {
1554                            String::new()
1555                        }
1556                    } else {
1557                        String::new()
1558                    };
1559                    if !flags.contains('g') {
1560                        flags.push('g');
1561                    }
1562                    Ok(Expression::Function(Box::new(Function::new(
1563                        "REGEXP_REPLACE".to_string(),
1564                        vec![
1565                            subject,
1566                            pattern,
1567                            replacement,
1568                            position,
1569                            occurrence,
1570                            Expression::Literal(Box::new(crate::expressions::Literal::String(
1571                                flags,
1572                            ))),
1573                        ],
1574                    ))))
1575                } else {
1576                    Ok(Expression::Function(Box::new(f)))
1577                }
1578            }
1579            // Default: pass through
1580            "REGEXP_REPLACE" => Ok(Expression::Function(Box::new(f))),
1581
1582            // Pass through everything else
1583            _ => Ok(Expression::Function(Box::new(f))),
1584        }
1585    }
1586
1587    fn transform_aggregate_function(
1588        &self,
1589        f: Box<crate::expressions::AggregateFunction>,
1590    ) -> Result<Expression> {
1591        let name_upper = f.name.to_uppercase();
1592        match name_upper.as_str() {
1593            // COUNT_IF -> SUM(CASE WHEN...)
1594            "COUNT_IF" if !f.args.is_empty() => {
1595                let condition = f.args.into_iter().next().unwrap();
1596                let case_expr = Expression::Case(Box::new(Case {
1597                    operand: None,
1598                    whens: vec![(condition, Expression::number(1))],
1599                    else_: Some(Expression::number(0)),
1600                    comments: Vec::new(),
1601                    inferred_type: None,
1602                }));
1603                Ok(Expression::Sum(Box::new(AggFunc {
1604                    ignore_nulls: None,
1605                    having_max: None,
1606                    this: case_expr,
1607                    distinct: f.distinct,
1608                    filter: f.filter,
1609                    order_by: Vec::new(),
1610                    name: None,
1611                    limit: None,
1612                    inferred_type: None,
1613                })))
1614            }
1615
1616            // GROUP_CONCAT -> STRING_AGG
1617            "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
1618                Function::new("STRING_AGG".to_string(), f.args),
1619            ))),
1620
1621            // STDEV -> STDDEV in PostgreSQL
1622            "STDEV" if !f.args.is_empty() => Ok(Expression::Stddev(Box::new(AggFunc {
1623                ignore_nulls: None,
1624                having_max: None,
1625                this: f.args.into_iter().next().unwrap(),
1626                distinct: f.distinct,
1627                filter: f.filter,
1628                order_by: Vec::new(),
1629                name: None,
1630                limit: None,
1631                inferred_type: None,
1632            }))),
1633
1634            // STDEVP -> STDDEV_POP in PostgreSQL
1635            "STDEVP" if !f.args.is_empty() => Ok(Expression::StddevPop(Box::new(AggFunc {
1636                ignore_nulls: None,
1637                having_max: None,
1638                this: f.args.into_iter().next().unwrap(),
1639                distinct: f.distinct,
1640                filter: f.filter,
1641                order_by: Vec::new(),
1642                name: None,
1643                limit: None,
1644                inferred_type: None,
1645            }))),
1646
1647            // VAR -> VAR_SAMP in PostgreSQL
1648            "VAR" if !f.args.is_empty() => Ok(Expression::VarSamp(Box::new(AggFunc {
1649                ignore_nulls: None,
1650                having_max: None,
1651                this: f.args.into_iter().next().unwrap(),
1652                distinct: f.distinct,
1653                filter: f.filter,
1654                order_by: Vec::new(),
1655                name: None,
1656                limit: None,
1657                inferred_type: None,
1658            }))),
1659
1660            // VARP -> VAR_POP in PostgreSQL
1661            "VARP" if !f.args.is_empty() => Ok(Expression::VarPop(Box::new(AggFunc {
1662                ignore_nulls: None,
1663                having_max: None,
1664                this: f.args.into_iter().next().unwrap(),
1665                distinct: f.distinct,
1666                filter: f.filter,
1667                order_by: Vec::new(),
1668                name: None,
1669                limit: None,
1670                inferred_type: None,
1671            }))),
1672
1673            // BIT_AND is native to PostgreSQL
1674            "BIT_AND" => Ok(Expression::AggregateFunction(f)),
1675
1676            // BIT_OR is native to PostgreSQL
1677            "BIT_OR" => Ok(Expression::AggregateFunction(f)),
1678
1679            // BIT_XOR is native to PostgreSQL
1680            "BIT_XOR" => Ok(Expression::AggregateFunction(f)),
1681
1682            // BOOL_AND is native to PostgreSQL
1683            "BOOL_AND" => Ok(Expression::AggregateFunction(f)),
1684
1685            // BOOL_OR is native to PostgreSQL
1686            "BOOL_OR" => Ok(Expression::AggregateFunction(f)),
1687
1688            // VARIANCE -> VAR_SAMP in PostgreSQL
1689            "VARIANCE" if !f.args.is_empty() => Ok(Expression::VarSamp(Box::new(AggFunc {
1690                ignore_nulls: None,
1691                having_max: None,
1692                this: f.args.into_iter().next().unwrap(),
1693                distinct: f.distinct,
1694                filter: f.filter,
1695                order_by: Vec::new(),
1696                name: None,
1697                limit: None,
1698                inferred_type: None,
1699            }))),
1700
1701            // LOGICAL_OR -> BOOL_OR in PostgreSQL
1702            "LOGICAL_OR" if !f.args.is_empty() => {
1703                let mut new_agg = f.clone();
1704                new_agg.name = "BOOL_OR".to_string();
1705                Ok(Expression::AggregateFunction(new_agg))
1706            }
1707
1708            // LOGICAL_AND -> BOOL_AND in PostgreSQL
1709            "LOGICAL_AND" if !f.args.is_empty() => {
1710                let mut new_agg = f.clone();
1711                new_agg.name = "BOOL_AND".to_string();
1712                Ok(Expression::AggregateFunction(new_agg))
1713            }
1714
1715            // Pass through everything else
1716            _ => Ok(Expression::AggregateFunction(f)),
1717        }
1718    }
1719}
1720
1721#[cfg(test)]
1722mod tests {
1723    use super::*;
1724    use crate::dialects::Dialect;
1725
1726    fn transpile_to_postgres(sql: &str) -> String {
1727        let dialect = Dialect::get(DialectType::Generic);
1728        let result = dialect
1729            .transpile(sql, DialectType::PostgreSQL)
1730            .expect("Transpile failed");
1731        result[0].clone()
1732    }
1733
1734    #[test]
1735    fn test_ifnull_to_coalesce() {
1736        let result = transpile_to_postgres("SELECT IFNULL(a, b)");
1737        assert!(
1738            result.contains("COALESCE"),
1739            "Expected COALESCE, got: {}",
1740            result
1741        );
1742    }
1743
1744    #[test]
1745    fn test_nvl_to_coalesce() {
1746        let result = transpile_to_postgres("SELECT NVL(a, b)");
1747        assert!(
1748            result.contains("COALESCE"),
1749            "Expected COALESCE, got: {}",
1750            result
1751        );
1752    }
1753
1754    #[test]
1755    fn test_rand_to_random() {
1756        let result = transpile_to_postgres("SELECT RAND()");
1757        assert!(
1758            result.contains("RANDOM"),
1759            "Expected RANDOM, got: {}",
1760            result
1761        );
1762    }
1763
1764    #[test]
1765    fn test_basic_select() {
1766        let result = transpile_to_postgres("SELECT a, b FROM users WHERE id = 1");
1767        assert!(result.contains("SELECT"));
1768        assert!(result.contains("FROM users"));
1769    }
1770
1771    #[test]
1772    fn test_len_to_length() {
1773        let result = transpile_to_postgres("SELECT LEN(name)");
1774        assert!(
1775            result.contains("LENGTH"),
1776            "Expected LENGTH, got: {}",
1777            result
1778        );
1779    }
1780
1781    #[test]
1782    fn test_getdate_to_current_timestamp() {
1783        let result = transpile_to_postgres("SELECT GETDATE()");
1784        assert!(
1785            result.contains("CURRENT_TIMESTAMP"),
1786            "Expected CURRENT_TIMESTAMP, got: {}",
1787            result
1788        );
1789    }
1790
1791    #[test]
1792    fn test_substr_to_substring() {
1793        let result = transpile_to_postgres("SELECT SUBSTR(name, 1, 3)");
1794        assert!(
1795            result.contains("SUBSTRING"),
1796            "Expected SUBSTRING, got: {}",
1797            result
1798        );
1799    }
1800
1801    #[test]
1802    fn test_group_concat_to_string_agg() {
1803        let result = transpile_to_postgres("SELECT GROUP_CONCAT(name)");
1804        assert!(
1805            result.contains("STRING_AGG"),
1806            "Expected STRING_AGG, got: {}",
1807            result
1808        );
1809    }
1810
1811    #[test]
1812    fn test_double_quote_identifiers() {
1813        // PostgreSQL uses double quotes for identifiers
1814        let dialect = PostgresDialect;
1815        let config = dialect.generator_config();
1816        assert_eq!(config.identifier_quote, '"');
1817    }
1818
1819    #[test]
1820    fn test_char_length_to_length() {
1821        let result = transpile_to_postgres("SELECT CHAR_LENGTH(name)");
1822        assert!(
1823            result.contains("LENGTH"),
1824            "Expected LENGTH, got: {}",
1825            result
1826        );
1827    }
1828
1829    #[test]
1830    fn test_character_length_to_length() {
1831        let result = transpile_to_postgres("SELECT CHARACTER_LENGTH(name)");
1832        assert!(
1833            result.contains("LENGTH"),
1834            "Expected LENGTH, got: {}",
1835            result
1836        );
1837    }
1838
1839    /// Helper for PostgreSQL identity tests (parse and regenerate with PostgreSQL dialect)
1840    fn identity_postgres(sql: &str) -> String {
1841        let dialect = Dialect::get(DialectType::PostgreSQL);
1842        let exprs = dialect.parse(sql).expect("Parse failed");
1843        let transformed = dialect
1844            .transform(exprs[0].clone())
1845            .expect("Transform failed");
1846        dialect.generate(&transformed).expect("Generate failed")
1847    }
1848
1849    #[test]
1850    fn test_json_extract_with_column_path() {
1851        // When the path is a column reference (not a literal), should use function form
1852        let result = identity_postgres("json_data.data -> field_ids.field_id");
1853        assert!(
1854            result.contains("JSON_EXTRACT_PATH"),
1855            "Expected JSON_EXTRACT_PATH for column path, got: {}",
1856            result
1857        );
1858    }
1859
1860    #[test]
1861    fn test_json_extract_scalar_with_negative_index() {
1862        // When the path is a negative index, should use JSON_EXTRACT_PATH_TEXT function
1863        let result = identity_postgres("x::JSON -> 'duration' ->> -1");
1864        assert!(
1865            result.contains("JSON_EXTRACT_PATH_TEXT"),
1866            "Expected JSON_EXTRACT_PATH_TEXT for negative index, got: {}",
1867            result
1868        );
1869        // The first -> should still be arrow syntax since 'duration' is a string literal
1870        assert!(
1871            result.contains("->"),
1872            "Expected -> for string literal path, got: {}",
1873            result
1874        );
1875    }
1876
1877    #[test]
1878    fn test_json_extract_with_string_literal() {
1879        // When the path is a string literal, should keep arrow syntax
1880        let result = identity_postgres("data -> 'key'");
1881        assert!(
1882            result.contains("->"),
1883            "Expected -> for string literal path, got: {}",
1884            result
1885        );
1886        assert!(
1887            !result.contains("JSON_EXTRACT_PATH"),
1888            "Should NOT use function form for string literal, got: {}",
1889            result
1890        );
1891    }
1892}