Skip to main content

polyglot_sql/dialects/
postgres.rs

1//! PostgreSQL Dialect
2//!
3//! PostgreSQL-specific transformations based on sqlglot patterns.
4//! Comprehensive batch translation from Python sqlglot postgres.py
5//!
6//! Key differences from other dialects:
7//! - TRY_CAST not supported (falls back to CAST)
8//! - RANDOM() instead of RAND()
9//! - STRING_AGG instead of GROUP_CONCAT
10//! - Bitwise XOR is # operator
11//! - BOOL_AND/BOOL_OR for logical aggregates
12//! - GEN_RANDOM_UUID() for UUID generation
13//! - UNNEST instead of EXPLODE
14//! - Type mappings: TINYINT→SMALLINT, FLOAT→REAL, DOUBLE→DOUBLE PRECISION, etc.
15//! - RegexpLike uses ~ operator, RegexpILike uses ~* operator
16//! - JSONB operators: #>, #>>, ?, ?|, ?&
17
18use super::{DialectImpl, DialectType};
19use crate::error::Result;
20use crate::expressions::{
21    AggFunc, AggregateFunction, BinaryOp, BooleanLiteral, Case, Cast, CeilFunc, DataType,
22    DateTimeField, Expression, ExtractFunc, Function, Interval, IntervalUnit, IntervalUnitSpec,
23    Join, JoinKind, Literal, Paren, UnaryFunc, VarArgFunc,
24};
25#[cfg(feature = "generate")]
26use crate::generator::GeneratorConfig;
27use crate::tokens::TokenizerConfig;
28
29/// Helper to wrap JSON arrow expressions in parentheses when they appear
30/// in contexts that require it (Binary, In, Not expressions)
31/// This matches Python sqlglot's WRAPPED_JSON_EXTRACT_EXPRESSIONS behavior
32fn wrap_if_json_arrow(expr: Expression) -> Expression {
33    match &expr {
34        Expression::JsonExtract(f) if f.arrow_syntax => Expression::Paren(Box::new(Paren {
35            this: expr,
36            trailing_comments: Vec::new(),
37        })),
38        Expression::JsonExtractScalar(f) if f.arrow_syntax => Expression::Paren(Box::new(Paren {
39            this: expr,
40            trailing_comments: Vec::new(),
41        })),
42        _ => expr,
43    }
44}
45
46/// PostgreSQL dialect
47pub struct PostgresDialect;
48
49impl DialectImpl for PostgresDialect {
50    fn dialect_type(&self) -> DialectType {
51        DialectType::PostgreSQL
52    }
53
54    fn tokenizer_config(&self) -> TokenizerConfig {
55        use crate::tokens::TokenType;
56        let mut config = TokenizerConfig::default();
57        // PostgreSQL supports $$ string literals (heredoc strings)
58        config.quotes.insert("$$".to_string(), "$$".to_string());
59        // PostgreSQL uses double quotes for identifiers
60        config.identifiers.insert('"', '"');
61        // Nested comments supported
62        config.nested_comments = true;
63        // PostgreSQL treats EXEC as a generic command (not TSQL EXEC statement)
64        // Note: EXECUTE is kept as-is since it's used in GRANT/REVOKE EXECUTE ON FUNCTION
65        config
66            .keywords
67            .insert("EXEC".to_string(), TokenType::Command);
68        for command in [
69            "BASE_BACKUP",
70            "CREATE_REPLICATION_SLOT",
71            "DROP_REPLICATION_SLOT",
72            "IDENTIFY_SYSTEM",
73            "READ_REPLICATION_SLOT",
74            "START_REPLICATION",
75            "TIMELINE_HISTORY",
76        ] {
77            config
78                .keywords
79                .insert(command.to_string(), TokenType::Command);
80        }
81        config
82    }
83
84    #[cfg(feature = "generate")]
85
86    fn generator_config(&self) -> GeneratorConfig {
87        use crate::generator::IdentifierQuoteStyle;
88        GeneratorConfig {
89            identifier_quote: '"',
90            identifier_quote_style: IdentifierQuoteStyle::DOUBLE_QUOTE,
91            dialect: Some(DialectType::PostgreSQL),
92            // PostgreSQL uses TIMESTAMPTZ shorthand
93            tz_to_with_time_zone: false,
94            // PostgreSQL prefers INTERVAL '1 day' syntax
95            single_string_interval: true,
96            // TABLESAMPLE uses REPEATABLE in PostgreSQL
97            tablesample_seed_keyword: "REPEATABLE",
98            // PostgreSQL doesn't support NVL2
99            nvl2_supported: false,
100            // PostgreSQL uses $ for parameters
101            parameter_token: "$",
102            // PostgreSQL uses % for named placeholders
103            named_placeholder_token: "%",
104            // PostgreSQL supports SELECT INTO
105            supports_select_into: true,
106            // PostgreSQL: USING btree(col) without space before parens
107            index_using_no_space: true,
108            // PostgreSQL supports UNLOGGED tables
109            supports_unlogged_tables: true,
110            // PostgreSQL doesn't support multi-arg DISTINCT
111            multi_arg_distinct: false,
112            // PostgreSQL uses ANY (subquery) with space
113            quantified_no_paren_space: false,
114            // PostgreSQL supports window EXCLUDE clause
115            supports_window_exclude: true,
116            // PostgreSQL normalizes single-bound window frames to BETWEEN form
117            normalize_window_frame_between: true,
118            // PostgreSQL COPY doesn't use INTO keyword
119            copy_has_into_keyword: false,
120            // PostgreSQL ARRAY_SIZE requires dimension argument
121            array_size_dim_required: Some(true),
122            // PostgreSQL supports BETWEEN flags
123            supports_between_flags: true,
124            // PostgreSQL doesn't support hints
125            join_hints: false,
126            table_hints: false,
127            query_hints: false,
128            // PostgreSQL supports locking reads
129            locking_reads_supported: true,
130            // PostgreSQL doesn't rename tables with DB
131            rename_table_with_db: false,
132            // PostgreSQL can implement array any
133            can_implement_array_any: true,
134            // PostgreSQL ARRAY_CONCAT is not var-len
135            array_concat_is_var_len: false,
136            // PostgreSQL doesn't support MEDIAN
137            supports_median: false,
138            // PostgreSQL requires JSON type for extraction
139            json_type_required_for_extraction: true,
140            // PostgreSQL LIKE property inside schema
141            like_property_inside_schema: true,
142            ..Default::default()
143        }
144    }
145
146    #[cfg(feature = "transpile")]
147
148    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
149        match expr {
150            // ============================================
151            // DATA TYPE MAPPINGS (from TYPE_MAPPING)
152            // These are handled specially - transform DataType variants
153            // ============================================
154            Expression::DataType(dt) => self.transform_data_type(dt),
155
156            // ============================================
157            // NULL HANDLING
158            // ============================================
159            // IFNULL -> COALESCE in PostgreSQL
160            Expression::IfNull(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
161                original_name: None,
162                expressions: vec![f.this, f.expression],
163                inferred_type: None,
164            }))),
165
166            // NVL -> COALESCE in PostgreSQL
167            Expression::Nvl(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
168                original_name: None,
169                expressions: vec![f.this, f.expression],
170                inferred_type: None,
171            }))),
172
173            // Coalesce with original_name (e.g., IFNULL parsed as Coalesce) -> clear original_name
174            // so it outputs as COALESCE instead of the original function name
175            Expression::Coalesce(mut f) => {
176                f.original_name = None;
177                Ok(Expression::Coalesce(f))
178            }
179
180            // ============================================
181            // CAST OPERATIONS
182            // ============================================
183            // TryCast -> CAST (PostgreSQL doesn't support TRY_CAST)
184            Expression::TryCast(c) => Ok(Expression::Cast(c)),
185
186            // SafeCast -> CAST (PostgreSQL doesn't support safe casts)
187            Expression::SafeCast(c) => Ok(Expression::Cast(c)),
188
189            // ============================================
190            // RANDOM
191            // ============================================
192            // RAND -> RANDOM in PostgreSQL
193            Expression::Rand(r) => {
194                // PostgreSQL's RANDOM() doesn't take a seed argument
195                let _ = r.seed; // Ignore seed
196                Ok(Expression::Random(crate::expressions::Random))
197            }
198
199            // ============================================
200            // UUID
201            // ============================================
202            // Uuid -> GEN_RANDOM_UUID in PostgreSQL
203            Expression::Uuid(_) => Ok(Expression::Function(Box::new(Function::new(
204                "GEN_RANDOM_UUID".to_string(),
205                vec![],
206            )))),
207
208            // ============================================
209            // ARRAY OPERATIONS
210            // ============================================
211            // EXPLODE -> UNNEST in PostgreSQL
212            Expression::Explode(f) => Ok(Expression::Unnest(Box::new(
213                crate::expressions::UnnestFunc {
214                    this: f.this,
215                    expressions: Vec::new(),
216                    with_ordinality: false,
217                    alias: None,
218                    offset_alias: None,
219                },
220            ))),
221
222            // ExplodeOuter -> UNNEST in PostgreSQL
223            Expression::ExplodeOuter(f) => Ok(Expression::Unnest(Box::new(
224                crate::expressions::UnnestFunc {
225                    this: f.this,
226                    expressions: Vec::new(),
227                    with_ordinality: false,
228                    alias: None,
229                    offset_alias: None,
230                },
231            ))),
232
233            // ArrayConcat -> ARRAY_CAT in PostgreSQL
234            Expression::ArrayConcat(f) => Ok(Expression::Function(Box::new(Function::new(
235                "ARRAY_CAT".to_string(),
236                f.expressions,
237            )))),
238
239            // ArrayPrepend -> ARRAY_PREPEND in PostgreSQL (note: args swapped from other dialects)
240            Expression::ArrayPrepend(f) => Ok(Expression::Function(Box::new(Function::new(
241                "ARRAY_PREPEND".to_string(),
242                vec![f.expression, f.this], // PostgreSQL: ARRAY_PREPEND(element, array)
243            )))),
244
245            // ============================================
246            // BITWISE OPERATIONS
247            // ============================================
248            // BitwiseXor -> # operator in PostgreSQL
249            Expression::BitwiseXor(f) => {
250                // Use a special marker that generator will recognize
251                Ok(Expression::Function(Box::new(Function::new(
252                    "__PG_BITWISE_XOR__".to_string(),
253                    vec![f.left, f.right],
254                ))))
255            }
256
257            // BitwiseAndAgg -> BIT_AND
258            Expression::BitwiseAndAgg(f) => Ok(Expression::Function(Box::new(Function::new(
259                "BIT_AND".to_string(),
260                vec![f.this],
261            )))),
262
263            // BitwiseOrAgg -> BIT_OR
264            Expression::BitwiseOrAgg(f) => Ok(Expression::Function(Box::new(Function::new(
265                "BIT_OR".to_string(),
266                vec![f.this],
267            )))),
268
269            // BitwiseXorAgg -> BIT_XOR
270            Expression::BitwiseXorAgg(f) => Ok(Expression::Function(Box::new(Function::new(
271                "BIT_XOR".to_string(),
272                vec![f.this],
273            )))),
274
275            // ============================================
276            // BOOLEAN AGGREGATES
277            // ============================================
278            // LogicalAnd -> BOOL_AND
279            Expression::LogicalAnd(f) => {
280                Ok(Expression::AggregateFunction(Box::new(AggregateFunction {
281                    name: "BOOL_AND".to_string(),
282                    args: vec![f.this],
283                    distinct: f.distinct,
284                    filter: f.filter,
285                    order_by: f.order_by,
286                    limit: f.limit,
287                    ignore_nulls: f.ignore_nulls,
288                    inferred_type: f.inferred_type,
289                })))
290            }
291
292            // LogicalOr -> BOOL_OR
293            Expression::LogicalOr(f) => {
294                Ok(Expression::AggregateFunction(Box::new(AggregateFunction {
295                    name: "BOOL_OR".to_string(),
296                    args: vec![f.this],
297                    distinct: f.distinct,
298                    filter: f.filter,
299                    order_by: f.order_by,
300                    limit: f.limit,
301                    ignore_nulls: f.ignore_nulls,
302                    inferred_type: f.inferred_type,
303                })))
304            }
305
306            // Xor -> PostgreSQL bool_xor pattern: a <> b for boolean values
307            Expression::Xor(f) => {
308                if let (Some(a), Some(b)) = (f.this, f.expression) {
309                    Ok(Expression::Neq(Box::new(BinaryOp {
310                        left: *a,
311                        right: *b,
312                        left_comments: Vec::new(),
313                        operator_comments: Vec::new(),
314                        trailing_comments: Vec::new(),
315                        inferred_type: None,
316                    })))
317                } else {
318                    Ok(Expression::Boolean(BooleanLiteral { value: false }))
319                }
320            }
321
322            // ============================================
323            // REGEXP OPERATIONS (PostgreSQL uses ~ and ~* operators)
324            // ============================================
325            // RegexpLike -> keep as-is, generator handles ~ operator output
326            Expression::RegexpLike(f) => {
327                // Generator will output as: expr ~ pattern
328                Ok(Expression::RegexpLike(f))
329            }
330
331            // ============================================
332            // DATE/TIME FUNCTIONS
333            // ============================================
334            // DateAdd -> date + INTERVAL in PostgreSQL
335            Expression::DateAdd(f) => {
336                let is_literal = matches!(&f.interval, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_) | Literal::String(_)));
337                let right_expr = if is_literal {
338                    // Literal value: INTERVAL 'value' unit
339                    Expression::Interval(Box::new(Interval {
340                        this: Some(f.interval),
341                        unit: Some(IntervalUnitSpec::Simple {
342                            unit: f.unit,
343                            use_plural: false,
344                        }),
345                    }))
346                } else {
347                    // Non-literal (column ref, expression): INTERVAL '1 unit' * value
348                    let unit_str = match f.unit {
349                        IntervalUnit::Year => "YEAR",
350                        IntervalUnit::Quarter => "QUARTER",
351                        IntervalUnit::Month => "MONTH",
352                        IntervalUnit::Week => "WEEK",
353                        IntervalUnit::Day => "DAY",
354                        IntervalUnit::Hour => "HOUR",
355                        IntervalUnit::Minute => "MINUTE",
356                        IntervalUnit::Second => "SECOND",
357                        IntervalUnit::Millisecond => "MILLISECOND",
358                        IntervalUnit::Microsecond => "MICROSECOND",
359                        IntervalUnit::Nanosecond => "NANOSECOND",
360                    };
361                    let interval_one = Expression::Interval(Box::new(Interval {
362                        this: Some(Expression::Literal(Box::new(Literal::String(format!(
363                            "1 {unit_str}"
364                        ))))),
365                        unit: None,
366                    }));
367                    Expression::Mul(Box::new(BinaryOp {
368                        left: interval_one,
369                        right: f.interval,
370                        left_comments: Vec::new(),
371                        operator_comments: Vec::new(),
372                        trailing_comments: Vec::new(),
373                        inferred_type: None,
374                    }))
375                };
376                Ok(Expression::Add(Box::new(BinaryOp {
377                    left: f.this,
378                    right: right_expr,
379                    left_comments: Vec::new(),
380                    operator_comments: Vec::new(),
381                    trailing_comments: Vec::new(),
382                    inferred_type: None,
383                })))
384            }
385
386            // DateSub -> date - INTERVAL in PostgreSQL
387            Expression::DateSub(f) => {
388                let interval_expr = Expression::Interval(Box::new(Interval {
389                    this: Some(f.interval),
390                    unit: Some(IntervalUnitSpec::Simple {
391                        unit: f.unit,
392                        use_plural: false,
393                    }),
394                }));
395                Ok(Expression::Sub(Box::new(BinaryOp {
396                    left: f.this,
397                    right: interval_expr,
398                    left_comments: Vec::new(),
399                    operator_comments: Vec::new(),
400                    trailing_comments: Vec::new(),
401                    inferred_type: None,
402                })))
403            }
404
405            // DateDiff -> Complex PostgreSQL pattern using AGE/EXTRACT
406            Expression::DateDiff(f) => {
407                // For PostgreSQL, DATEDIFF is converted to EXTRACT(epoch FROM ...) pattern
408                // matching the 3-arg string-based DATEDIFF handler below
409                let unit = f.unit.unwrap_or(IntervalUnit::Day);
410
411                // Helper: CAST(expr AS TIMESTAMP)
412                let cast_ts = |e: Expression| -> Expression {
413                    Expression::Cast(Box::new(Cast {
414                        this: e,
415                        to: DataType::Timestamp {
416                            precision: None,
417                            timezone: false,
418                        },
419                        trailing_comments: Vec::new(),
420                        double_colon_syntax: false,
421                        format: None,
422                        default: None,
423                        inferred_type: None,
424                    }))
425                };
426
427                // Helper: CAST(expr AS BIGINT)
428                let cast_bigint = |e: Expression| -> Expression {
429                    Expression::Cast(Box::new(Cast {
430                        this: e,
431                        to: DataType::BigInt { length: None },
432                        trailing_comments: Vec::new(),
433                        double_colon_syntax: false,
434                        format: None,
435                        default: None,
436                        inferred_type: None,
437                    }))
438                };
439
440                // Clone end/start for reuse
441                let end_expr = f.this;
442                let start = f.expression;
443
444                // Helper: end_ts - start_ts
445                let ts_diff = || -> Expression {
446                    Expression::Sub(Box::new(BinaryOp::new(
447                        cast_ts(end_expr.clone()),
448                        cast_ts(start.clone()),
449                    )))
450                };
451
452                // Helper: AGE(end_ts, start_ts)
453                let age_call = || -> Expression {
454                    Expression::Function(Box::new(Function::new(
455                        "AGE".to_string(),
456                        vec![cast_ts(end_expr.clone()), cast_ts(start.clone())],
457                    )))
458                };
459
460                // Helper: EXTRACT(field FROM expr)
461                let extract = |field: DateTimeField, from: Expression| -> Expression {
462                    Expression::Extract(Box::new(ExtractFunc { this: from, field }))
463                };
464
465                // Helper: number literal
466                let num = |n: i64| -> Expression {
467                    Expression::Literal(Box::new(Literal::Number(n.to_string())))
468                };
469
470                let epoch_field = DateTimeField::Custom("epoch".to_string());
471
472                let result = match unit {
473                    IntervalUnit::Nanosecond => {
474                        let epoch = extract(epoch_field.clone(), ts_diff());
475                        cast_bigint(Expression::Mul(Box::new(BinaryOp::new(
476                            epoch,
477                            num(1000000000),
478                        ))))
479                    }
480                    IntervalUnit::Microsecond => {
481                        let epoch = extract(epoch_field, ts_diff());
482                        cast_bigint(Expression::Mul(Box::new(BinaryOp::new(
483                            epoch,
484                            num(1000000),
485                        ))))
486                    }
487                    IntervalUnit::Millisecond => {
488                        let epoch = extract(epoch_field, ts_diff());
489                        cast_bigint(Expression::Mul(Box::new(BinaryOp::new(epoch, num(1000)))))
490                    }
491                    IntervalUnit::Second => {
492                        let epoch = extract(epoch_field, ts_diff());
493                        cast_bigint(epoch)
494                    }
495                    IntervalUnit::Minute => {
496                        let epoch = extract(epoch_field, ts_diff());
497                        cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(60)))))
498                    }
499                    IntervalUnit::Hour => {
500                        let epoch = extract(epoch_field, ts_diff());
501                        cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(3600)))))
502                    }
503                    IntervalUnit::Day => {
504                        let epoch = extract(epoch_field, ts_diff());
505                        cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(86400)))))
506                    }
507                    IntervalUnit::Week => {
508                        let diff_parens = Expression::Paren(Box::new(Paren {
509                            this: ts_diff(),
510                            trailing_comments: Vec::new(),
511                        }));
512                        let days = extract(DateTimeField::Custom("days".to_string()), diff_parens);
513                        cast_bigint(Expression::Div(Box::new(BinaryOp::new(days, num(7)))))
514                    }
515                    IntervalUnit::Month => {
516                        let year_part =
517                            extract(DateTimeField::Custom("year".to_string()), age_call());
518                        let month_part =
519                            extract(DateTimeField::Custom("month".to_string()), age_call());
520                        let year_months =
521                            Expression::Mul(Box::new(BinaryOp::new(year_part, num(12))));
522                        cast_bigint(Expression::Add(Box::new(BinaryOp::new(
523                            year_months,
524                            month_part,
525                        ))))
526                    }
527                    IntervalUnit::Quarter => {
528                        let year_part =
529                            extract(DateTimeField::Custom("year".to_string()), age_call());
530                        let month_part =
531                            extract(DateTimeField::Custom("month".to_string()), age_call());
532                        let year_quarters =
533                            Expression::Mul(Box::new(BinaryOp::new(year_part, num(4))));
534                        let month_quarters =
535                            Expression::Div(Box::new(BinaryOp::new(month_part, num(3))));
536                        cast_bigint(Expression::Add(Box::new(BinaryOp::new(
537                            year_quarters,
538                            month_quarters,
539                        ))))
540                    }
541                    IntervalUnit::Year => cast_bigint(extract(
542                        DateTimeField::Custom("year".to_string()),
543                        age_call(),
544                    )),
545                };
546                Ok(result)
547            }
548
549            // UnixToTime -> TO_TIMESTAMP
550            Expression::UnixToTime(f) => Ok(Expression::Function(Box::new(Function::new(
551                "TO_TIMESTAMP".to_string(),
552                vec![*f.this],
553            )))),
554
555            // TimeToUnix -> DATE_PART('epoch', ...) in PostgreSQL
556            Expression::TimeToUnix(f) => Ok(Expression::Function(Box::new(Function::new(
557                "DATE_PART".to_string(),
558                vec![Expression::string("epoch"), f.this],
559            )))),
560
561            // StrToTime -> TO_TIMESTAMP in PostgreSQL
562            Expression::ToTimestamp(f) => {
563                let mut args = vec![f.this];
564                if let Some(fmt) = f.format {
565                    args.push(fmt);
566                }
567                Ok(Expression::Function(Box::new(Function::new(
568                    "TO_TIMESTAMP".to_string(),
569                    args,
570                ))))
571            }
572
573            // StrToDate -> TO_DATE in PostgreSQL
574            Expression::ToDate(f) => {
575                let mut args = vec![f.this];
576                if let Some(fmt) = f.format {
577                    args.push(fmt);
578                }
579                Ok(Expression::Function(Box::new(Function::new(
580                    "TO_DATE".to_string(),
581                    args,
582                ))))
583            }
584
585            // TimestampTrunc -> DATE_TRUNC
586            Expression::TimestampTrunc(f) => {
587                // Convert DateTimeField to string expression for DATE_TRUNC
588                let unit_str = format!("{:?}", f.unit).to_lowercase();
589                let args = vec![Expression::string(&unit_str), f.this];
590                Ok(Expression::Function(Box::new(Function::new(
591                    "DATE_TRUNC".to_string(),
592                    args,
593                ))))
594            }
595
596            // TimeFromParts -> MAKE_TIME
597            Expression::TimeFromParts(f) => {
598                let mut args = Vec::new();
599                if let Some(h) = f.hour {
600                    args.push(*h);
601                }
602                if let Some(m) = f.min {
603                    args.push(*m);
604                }
605                if let Some(s) = f.sec {
606                    args.push(*s);
607                }
608                Ok(Expression::Function(Box::new(Function::new(
609                    "MAKE_TIME".to_string(),
610                    args,
611                ))))
612            }
613
614            // TimestampFromParts -> MAKE_TIMESTAMP
615            Expression::MakeTimestamp(f) => {
616                // MakeTimestampFunc has direct Expression fields, not Options
617                let args = vec![f.year, f.month, f.day, f.hour, f.minute, f.second];
618                Ok(Expression::Function(Box::new(Function::new(
619                    "MAKE_TIMESTAMP".to_string(),
620                    args,
621                ))))
622            }
623
624            // ============================================
625            // STRING FUNCTIONS
626            // ============================================
627            // StringAgg is native to PostgreSQL - keep as-is
628            Expression::StringAgg(f) => Ok(Expression::StringAgg(f)),
629
630            // GroupConcat -> STRING_AGG in PostgreSQL
631            Expression::GroupConcat(f) => {
632                let mut args = vec![f.this.clone()];
633                if let Some(sep) = f.separator.clone() {
634                    args.push(sep);
635                } else {
636                    args.push(Expression::string(","));
637                }
638                Ok(Expression::Function(Box::new(Function::new(
639                    "STRING_AGG".to_string(),
640                    args,
641                ))))
642            }
643
644            // StrPosition -> POSITION function
645            Expression::Position(f) => {
646                // PostgreSQL: POSITION(substring IN string)
647                // Keep as Position, generator handles it
648                Ok(Expression::Position(f))
649            }
650
651            // ============================================
652            // AGGREGATE FUNCTIONS
653            // ============================================
654            // CountIf -> SUM(CASE WHEN condition THEN 1 ELSE 0 END) in PostgreSQL
655            Expression::CountIf(f) => {
656                let case_expr = Expression::Case(Box::new(Case {
657                    operand: None,
658                    whens: vec![(f.this.clone(), Expression::number(1))],
659                    else_: Some(Expression::number(0)),
660                    comments: Vec::new(),
661                    inferred_type: None,
662                }));
663                Ok(Expression::Sum(Box::new(AggFunc {
664                    ignore_nulls: None,
665                    having_max: None,
666                    this: case_expr,
667                    distinct: f.distinct,
668                    filter: f.filter,
669                    order_by: Vec::new(),
670                    name: None,
671                    limit: None,
672                    inferred_type: None,
673                })))
674            }
675
676            // AnyValue -> keep as ANY_VALUE for PostgreSQL (supported since PG 16)
677            Expression::AnyValue(f) => Ok(Expression::AnyValue(f)),
678
679            // Variance -> VAR_SAMP in PostgreSQL
680            Expression::Variance(f) => Ok(Expression::Function(Box::new(Function::new(
681                "VAR_SAMP".to_string(),
682                vec![f.this],
683            )))),
684
685            // VarPop -> VAR_POP in PostgreSQL
686            Expression::VarPop(f) => Ok(Expression::Function(Box::new(Function::new(
687                "VAR_POP".to_string(),
688                vec![f.this],
689            )))),
690
691            // ============================================
692            // JSON FUNCTIONS
693            // ============================================
694            // JSONExtract -> use arrow syntax (->) in PostgreSQL for simple literal paths
695            // Complex paths (like column references) should use JSON_EXTRACT_PATH function
696            Expression::JsonExtract(mut f) => {
697                // Only use arrow syntax for simple literal paths (string or non-negative number)
698                // Complex expressions like column references should use function form
699                f.arrow_syntax = Self::is_simple_json_path(&f.path);
700                Ok(Expression::JsonExtract(f))
701            }
702
703            // JSONExtractScalar -> use arrow syntax (->>) in PostgreSQL for simple paths
704            // Complex paths (like negative indices) should use JSON_EXTRACT_PATH_TEXT function
705            // #>> (hash_arrow_syntax) stays as #>>
706            Expression::JsonExtractScalar(mut f) => {
707                if !f.hash_arrow_syntax {
708                    // Only use arrow syntax for simple literal paths (string or non-negative number)
709                    // Complex expressions like Neg(-1) should use function form
710                    f.arrow_syntax = Self::is_simple_json_path(&f.path);
711                }
712                Ok(Expression::JsonExtractScalar(f))
713            }
714
715            // ParseJson: handled by generator (outputs CAST(x AS JSON) for PostgreSQL)
716
717            // JSONObjectAgg -> JSON_OBJECT_AGG
718            Expression::JsonObjectAgg(f) => {
719                // JsonObjectAggFunc has key and value as Expression, not Option
720                let args = vec![f.key, f.value];
721                Ok(Expression::Function(Box::new(Function::new(
722                    "JSON_OBJECT_AGG".to_string(),
723                    args,
724                ))))
725            }
726
727            // JSONArrayAgg -> JSON_AGG
728            Expression::JsonArrayAgg(f) => Ok(Expression::Function(Box::new(Function::new(
729                "JSON_AGG".to_string(),
730                vec![f.this],
731            )))),
732
733            // JSONPathRoot -> empty string ($ is implicit in PostgreSQL)
734            Expression::JSONPathRoot(_) => Ok(Expression::Literal(Box::new(Literal::String(
735                String::new(),
736            )))),
737
738            // ============================================
739            // MISC FUNCTIONS
740            // ============================================
741            // IntDiv -> DIV in PostgreSQL
742            Expression::IntDiv(f) => Ok(Expression::Function(Box::new(Function::new(
743                "DIV".to_string(),
744                vec![f.this, f.expression],
745            )))),
746
747            // Unicode -> ASCII in PostgreSQL
748            Expression::Unicode(f) => Ok(Expression::Function(Box::new(Function::new(
749                "ASCII".to_string(),
750                vec![f.this],
751            )))),
752
753            // LastDay -> Complex expression (PostgreSQL doesn't have LAST_DAY)
754            Expression::LastDay(f) => {
755                // (DATE_TRUNC('month', date) + INTERVAL '1 month' - INTERVAL '1 day')::DATE
756                let truncated = Expression::Function(Box::new(Function::new(
757                    "DATE_TRUNC".to_string(),
758                    vec![Expression::string("month"), f.this.clone()],
759                )));
760                let plus_month = Expression::Add(Box::new(BinaryOp {
761                    left: truncated,
762                    right: Expression::Interval(Box::new(Interval {
763                        this: Some(Expression::string("1")),
764                        unit: Some(IntervalUnitSpec::Simple {
765                            unit: IntervalUnit::Month,
766                            use_plural: false,
767                        }),
768                    })),
769                    left_comments: Vec::new(),
770                    operator_comments: Vec::new(),
771                    trailing_comments: Vec::new(),
772                    inferred_type: None,
773                }));
774                let minus_day = Expression::Sub(Box::new(BinaryOp {
775                    left: plus_month,
776                    right: Expression::Interval(Box::new(Interval {
777                        this: Some(Expression::string("1")),
778                        unit: Some(IntervalUnitSpec::Simple {
779                            unit: IntervalUnit::Day,
780                            use_plural: false,
781                        }),
782                    })),
783                    left_comments: Vec::new(),
784                    operator_comments: Vec::new(),
785                    trailing_comments: Vec::new(),
786                    inferred_type: None,
787                }));
788                Ok(Expression::Cast(Box::new(Cast {
789                    this: minus_day,
790                    to: DataType::Date,
791                    trailing_comments: Vec::new(),
792                    double_colon_syntax: true, // Use PostgreSQL :: syntax
793                    format: None,
794                    default: None,
795                    inferred_type: None,
796                })))
797            }
798
799            // GenerateSeries is native to PostgreSQL
800            Expression::GenerateSeries(f) => Ok(Expression::GenerateSeries(f)),
801
802            // ExplodingGenerateSeries -> GENERATE_SERIES
803            Expression::ExplodingGenerateSeries(f) => {
804                let mut args = vec![f.start, f.stop];
805                if let Some(step) = f.step {
806                    args.push(step); // step is Expression, not Box<Expression>
807                }
808                Ok(Expression::Function(Box::new(Function::new(
809                    "GENERATE_SERIES".to_string(),
810                    args,
811                ))))
812            }
813
814            // ============================================
815            // SESSION/TIME FUNCTIONS (no parentheses in PostgreSQL)
816            // ============================================
817            // CurrentTimestamp -> CURRENT_TIMESTAMP (no parens)
818            Expression::CurrentTimestamp(_) => Ok(Expression::Function(Box::new(Function {
819                name: "CURRENT_TIMESTAMP".to_string(),
820                args: vec![],
821                distinct: false,
822                trailing_comments: vec![],
823                use_bracket_syntax: false,
824                no_parens: true,
825                quoted: false,
826                span: None,
827                inferred_type: None,
828            }))),
829
830            // CurrentUser -> CURRENT_USER (no parens)
831            Expression::CurrentUser(_) => Ok(Expression::Function(Box::new(Function::new(
832                "CURRENT_USER".to_string(),
833                vec![],
834            )))),
835
836            // CurrentDate -> CURRENT_DATE (no parens)
837            Expression::CurrentDate(_) => Ok(Expression::Function(Box::new(Function {
838                name: "CURRENT_DATE".to_string(),
839                args: vec![],
840                distinct: false,
841                trailing_comments: vec![],
842                use_bracket_syntax: false,
843                no_parens: true,
844                quoted: false,
845                span: None,
846                inferred_type: None,
847            }))),
848
849            // ============================================
850            // JOIN TRANSFORMATIONS
851            // ============================================
852            // CROSS APPLY -> INNER JOIN LATERAL ... ON TRUE in PostgreSQL
853            Expression::Join(join) if join.kind == JoinKind::CrossApply => {
854                Ok(Expression::Join(Box::new(Join {
855                    this: join.this,
856                    on: Some(Expression::Boolean(BooleanLiteral { value: true })),
857                    using: join.using,
858                    kind: JoinKind::CrossApply,
859                    use_inner_keyword: false,
860                    use_outer_keyword: false,
861                    deferred_condition: false,
862                    join_hint: None,
863                    match_condition: None,
864                    pivots: join.pivots,
865                    comments: join.comments,
866                    nesting_group: 0,
867                    directed: false,
868                })))
869            }
870
871            // OUTER APPLY -> LEFT JOIN LATERAL ... ON TRUE in PostgreSQL
872            Expression::Join(join) if join.kind == JoinKind::OuterApply => {
873                Ok(Expression::Join(Box::new(Join {
874                    this: join.this,
875                    on: Some(Expression::Boolean(BooleanLiteral { value: true })),
876                    using: join.using,
877                    kind: JoinKind::OuterApply,
878                    use_inner_keyword: false,
879                    use_outer_keyword: false,
880                    deferred_condition: false,
881                    join_hint: None,
882                    match_condition: None,
883                    pivots: join.pivots,
884                    comments: join.comments,
885                    nesting_group: 0,
886                    directed: false,
887                })))
888            }
889
890            // ============================================
891            // GENERIC FUNCTION TRANSFORMATIONS
892            // ============================================
893            Expression::Function(f) => self.transform_function(*f),
894
895            // Generic aggregate function transformations
896            Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
897
898            // ===== Context-aware JSON arrow wrapping =====
899            // When JSON arrow expressions appear in Binary/In/Not contexts,
900            // they need to be wrapped in parentheses for correct precedence.
901            // This matches Python sqlglot's WRAPPED_JSON_EXTRACT_EXPRESSIONS behavior.
902
903            // Binary operators that need JSON wrapping
904            Expression::Eq(op) => Ok(Expression::Eq(Box::new(BinaryOp {
905                left: wrap_if_json_arrow(op.left),
906                right: wrap_if_json_arrow(op.right),
907                ..*op
908            }))),
909            Expression::Neq(op) => Ok(Expression::Neq(Box::new(BinaryOp {
910                left: wrap_if_json_arrow(op.left),
911                right: wrap_if_json_arrow(op.right),
912                ..*op
913            }))),
914            Expression::Lt(op) => Ok(Expression::Lt(Box::new(BinaryOp {
915                left: wrap_if_json_arrow(op.left),
916                right: wrap_if_json_arrow(op.right),
917                ..*op
918            }))),
919            Expression::Lte(op) => Ok(Expression::Lte(Box::new(BinaryOp {
920                left: wrap_if_json_arrow(op.left),
921                right: wrap_if_json_arrow(op.right),
922                ..*op
923            }))),
924            Expression::Gt(op) => Ok(Expression::Gt(Box::new(BinaryOp {
925                left: wrap_if_json_arrow(op.left),
926                right: wrap_if_json_arrow(op.right),
927                ..*op
928            }))),
929            Expression::Gte(op) => Ok(Expression::Gte(Box::new(BinaryOp {
930                left: wrap_if_json_arrow(op.left),
931                right: wrap_if_json_arrow(op.right),
932                ..*op
933            }))),
934
935            // In expression - wrap the this part if it's JSON arrow
936            Expression::In(mut i) => {
937                i.this = wrap_if_json_arrow(i.this);
938                Ok(Expression::In(i))
939            }
940
941            // Not expression - wrap the this part if it's JSON arrow
942            Expression::Not(mut n) => {
943                n.this = wrap_if_json_arrow(n.this);
944                Ok(Expression::Not(n))
945            }
946
947            // MERGE: qualifier stripping is handled by the generator (dialect-aware)
948            // PostgreSQL generator strips qualifiers, Snowflake generator keeps them
949            Expression::Merge(m) => Ok(Expression::Merge(m)),
950
951            // JSONExtract with variant_extract (Databricks colon syntax) -> JSON_EXTRACT_PATH
952            Expression::JSONExtract(je) if je.variant_extract.is_some() => {
953                // Convert path from bracketed format to simple key
954                // e.g., '["fr''uit"]' -> 'fr''uit'
955                let path = match *je.expression {
956                    Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
957                        let Literal::String(s) = lit.as_ref() else {
958                            unreachable!()
959                        };
960                        // Strip bracketed JSON path format: ["key"] -> key
961                        let cleaned = if s.starts_with("[\"") && s.ends_with("\"]") {
962                            s[2..s.len() - 2].to_string()
963                        } else {
964                            s.clone()
965                        };
966                        Expression::Literal(Box::new(Literal::String(cleaned)))
967                    }
968                    other => other,
969                };
970                Ok(Expression::Function(Box::new(Function::new(
971                    "JSON_EXTRACT_PATH".to_string(),
972                    vec![*je.this, path],
973                ))))
974            }
975
976            // TRIM(str, chars) -> TRIM(chars FROM str) for PostgreSQL SQL standard syntax
977            Expression::Trim(t) if !t.sql_standard_syntax && t.characters.is_some() => {
978                Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
979                    this: t.this,
980                    characters: t.characters,
981                    position: t.position,
982                    sql_standard_syntax: true,
983                    position_explicit: t.position_explicit,
984                })))
985            }
986
987            // b'a' -> CAST(e'a' AS BYTEA) for PostgreSQL
988            Expression::Literal(lit) if matches!(lit.as_ref(), Literal::ByteString(_)) => {
989                let Literal::ByteString(s) = lit.as_ref() else {
990                    unreachable!()
991                };
992                Ok(Expression::Cast(Box::new(Cast {
993                    this: Expression::Literal(Box::new(Literal::EscapeString(s.clone()))),
994                    to: DataType::VarBinary { length: None },
995                    trailing_comments: Vec::new(),
996                    double_colon_syntax: false,
997                    format: None,
998                    default: None,
999                    inferred_type: None,
1000                })))
1001            }
1002
1003            // Pass through everything else
1004            _ => Ok(expr),
1005        }
1006    }
1007}
1008
1009#[cfg(feature = "transpile")]
1010impl PostgresDialect {
1011    /// Check if a JSON path expression is "simple" (string literal or non-negative integer)
1012    /// Simple paths can use arrow syntax (->>) in PostgreSQL
1013    /// Complex paths (like negative indices) should use JSON_EXTRACT_PATH_TEXT function
1014    fn is_simple_json_path(path: &Expression) -> bool {
1015        match path {
1016            // String literals are always simple
1017            Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => true,
1018            // Non-negative integer literals are simple
1019            Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
1020                let Literal::Number(n) = lit.as_ref() else {
1021                    unreachable!()
1022                };
1023                // Check if it's non-negative
1024                !n.starts_with('-')
1025            }
1026            // JSONPath expressions are simple (they're already parsed paths)
1027            Expression::JSONPath(_) => true,
1028            // Everything else (Neg, function calls, etc.) is complex
1029            _ => false,
1030        }
1031    }
1032
1033    /// Transform data types according to PostgreSQL TYPE_MAPPING
1034    fn transform_data_type(&self, dt: DataType) -> Result<Expression> {
1035        let transformed = match dt {
1036            // TINYINT -> SMALLINT
1037            DataType::TinyInt { .. } => DataType::SmallInt { length: None },
1038
1039            // FLOAT -> DOUBLE PRECISION (Python sqlglot tokenizes FLOAT as DOUBLE)
1040            DataType::Float { .. } => DataType::Custom {
1041                name: "DOUBLE PRECISION".to_string(),
1042            },
1043
1044            // DOUBLE -> DOUBLE PRECISION
1045            DataType::Double { .. } => DataType::Custom {
1046                name: "DOUBLE PRECISION".to_string(),
1047            },
1048
1049            // BINARY -> BYTEA (handled by generator which preserves length)
1050            DataType::Binary { .. } => dt,
1051
1052            // VARBINARY -> BYTEA (handled by generator which preserves length)
1053            DataType::VarBinary { .. } => dt,
1054
1055            // BLOB -> BYTEA
1056            DataType::Blob => DataType::Custom {
1057                name: "BYTEA".to_string(),
1058            },
1059
1060            // Custom type normalizations
1061            DataType::Custom { ref name } => {
1062                let upper = name.to_uppercase();
1063                match upper.as_str() {
1064                    // INT8 -> BIGINT (PostgreSQL alias)
1065                    "INT8" => DataType::BigInt { length: None },
1066                    // FLOAT8 -> DOUBLE PRECISION (PostgreSQL alias)
1067                    "FLOAT8" => DataType::Custom {
1068                        name: "DOUBLE PRECISION".to_string(),
1069                    },
1070                    // FLOAT4 -> REAL (PostgreSQL alias)
1071                    "FLOAT4" => DataType::Custom {
1072                        name: "REAL".to_string(),
1073                    },
1074                    // INT4 -> INTEGER (PostgreSQL alias)
1075                    "INT4" => DataType::Int {
1076                        length: None,
1077                        integer_spelling: false,
1078                    },
1079                    // INT2 -> SMALLINT (PostgreSQL alias)
1080                    "INT2" => DataType::SmallInt { length: None },
1081                    _ => dt,
1082                }
1083            }
1084
1085            // Keep all other types as-is
1086            other => other,
1087        };
1088        Ok(Expression::DataType(transformed))
1089    }
1090
1091    fn transform_function(&self, f: Function) -> Result<Expression> {
1092        let name_upper = f.name.to_uppercase();
1093        match name_upper.as_str() {
1094            // IFNULL -> COALESCE
1095            "IFNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
1096                original_name: None,
1097                expressions: f.args,
1098                inferred_type: None,
1099            }))),
1100
1101            // NVL -> COALESCE
1102            "NVL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
1103                original_name: None,
1104                expressions: f.args,
1105                inferred_type: None,
1106            }))),
1107
1108            // ISNULL (SQL Server) -> COALESCE
1109            "ISNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
1110                original_name: None,
1111                expressions: f.args,
1112                inferred_type: None,
1113            }))),
1114
1115            // GROUP_CONCAT -> STRING_AGG in PostgreSQL
1116            "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
1117                Function::new("STRING_AGG".to_string(), f.args),
1118            ))),
1119
1120            // SUBSTR -> SUBSTRING (standard SQL)
1121            "SUBSTR" => Ok(Expression::Function(Box::new(Function::new(
1122                "SUBSTRING".to_string(),
1123                f.args,
1124            )))),
1125
1126            // RAND -> RANDOM in PostgreSQL
1127            "RAND" => Ok(Expression::Random(crate::expressions::Random)),
1128
1129            // CEILING -> CEIL (both work in PostgreSQL, but CEIL is preferred)
1130            "CEILING" if f.args.len() == 1 => Ok(Expression::Ceil(Box::new(CeilFunc {
1131                this: f.args.into_iter().next().unwrap(),
1132                decimals: None,
1133                to: None,
1134            }))),
1135
1136            // LEN -> LENGTH in PostgreSQL
1137            "LEN" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc {
1138                this: f.args.into_iter().next().unwrap(),
1139                original_name: None,
1140                inferred_type: None,
1141            }))),
1142
1143            // CHAR_LENGTH -> LENGTH in PostgreSQL
1144            "CHAR_LENGTH" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc {
1145                this: f.args.into_iter().next().unwrap(),
1146                original_name: None,
1147                inferred_type: None,
1148            }))),
1149
1150            // CHARACTER_LENGTH -> LENGTH in PostgreSQL
1151            "CHARACTER_LENGTH" if f.args.len() == 1 => {
1152                Ok(Expression::Length(Box::new(UnaryFunc {
1153                    this: f.args.into_iter().next().unwrap(),
1154                    original_name: None,
1155                    inferred_type: None,
1156                })))
1157            }
1158
1159            // CHARINDEX -> POSITION in PostgreSQL
1160            // CHARINDEX(substring, string) -> POSITION(substring IN string)
1161            "CHARINDEX" if f.args.len() >= 2 => {
1162                let mut args = f.args;
1163                let substring = args.remove(0);
1164                let string = args.remove(0);
1165                Ok(Expression::Position(Box::new(
1166                    crate::expressions::PositionFunc {
1167                        substring,
1168                        string,
1169                        start: args.pop(),
1170                    },
1171                )))
1172            }
1173
1174            // GETDATE -> CURRENT_TIMESTAMP in PostgreSQL
1175            "GETDATE" => Ok(Expression::CurrentTimestamp(
1176                crate::expressions::CurrentTimestamp {
1177                    precision: None,
1178                    sysdate: false,
1179                },
1180            )),
1181
1182            // SYSDATETIME -> CURRENT_TIMESTAMP in PostgreSQL
1183            "SYSDATETIME" => Ok(Expression::CurrentTimestamp(
1184                crate::expressions::CurrentTimestamp {
1185                    precision: None,
1186                    sysdate: false,
1187                },
1188            )),
1189
1190            // NOW -> CURRENT_TIMESTAMP in PostgreSQL (NOW() is also valid)
1191            "NOW" => Ok(Expression::CurrentTimestamp(
1192                crate::expressions::CurrentTimestamp {
1193                    precision: None,
1194                    sysdate: false,
1195                },
1196            )),
1197
1198            // NEWID -> GEN_RANDOM_UUID in PostgreSQL
1199            "NEWID" => Ok(Expression::Function(Box::new(Function::new(
1200                "GEN_RANDOM_UUID".to_string(),
1201                vec![],
1202            )))),
1203
1204            // UUID -> GEN_RANDOM_UUID in PostgreSQL
1205            "UUID" => Ok(Expression::Function(Box::new(Function::new(
1206                "GEN_RANDOM_UUID".to_string(),
1207                vec![],
1208            )))),
1209
1210            // UNNEST is native to PostgreSQL
1211            "UNNEST" => Ok(Expression::Function(Box::new(f))),
1212
1213            // GENERATE_SERIES is native to PostgreSQL
1214            "GENERATE_SERIES" => Ok(Expression::Function(Box::new(f))),
1215
1216            // SHA256 -> SHA256 in PostgreSQL (via pgcrypto extension)
1217            "SHA256" if f.args.len() == 1 => Ok(Expression::Function(Box::new(Function::new(
1218                "SHA256".to_string(),
1219                f.args,
1220            )))),
1221
1222            // SHA2 -> SHA256/SHA512 based on length argument
1223            "SHA2" if f.args.len() == 2 => {
1224                // SHA2(data, length) -> SHA256/SHA384/SHA512
1225                let args = f.args;
1226                let data = args[0].clone();
1227                // Default to SHA256 - would need runtime inspection for exact mapping
1228                Ok(Expression::Function(Box::new(Function::new(
1229                    "SHA256".to_string(),
1230                    vec![data],
1231                ))))
1232            }
1233
1234            // LEVENSHTEIN is native to PostgreSQL (fuzzystrmatch extension)
1235            "LEVENSHTEIN" => Ok(Expression::Function(Box::new(f))),
1236
1237            // EDITDISTANCE -> LEVENSHTEIN_LESS_EQUAL (with max distance) or LEVENSHTEIN
1238            "EDITDISTANCE" if f.args.len() == 3 => Ok(Expression::Function(Box::new(
1239                Function::new("LEVENSHTEIN_LESS_EQUAL".to_string(), f.args),
1240            ))),
1241            "EDITDISTANCE" if f.args.len() == 2 => Ok(Expression::Function(Box::new(
1242                Function::new("LEVENSHTEIN".to_string(), f.args),
1243            ))),
1244
1245            // TRIM(value, chars) -> TRIM(chars FROM value) for Postgres
1246            "TRIM" if f.args.len() == 2 => {
1247                let value = f.args[0].clone();
1248                let chars = f.args[1].clone();
1249                Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
1250                    this: value,
1251                    characters: Some(chars),
1252                    position: crate::expressions::TrimPosition::Both,
1253                    sql_standard_syntax: true,
1254                    position_explicit: false,
1255                })))
1256            }
1257
1258            // DATEDIFF(unit, start, end) -> PostgreSQL EXTRACT/AGE patterns
1259            "DATEDIFF" if f.args.len() >= 2 => {
1260                let mut args = f.args;
1261                if args.len() == 2 {
1262                    // 2-arg form: DATEDIFF(start, end) -> AGE(start, end)
1263                    let first = args.remove(0);
1264                    let second = args.remove(0);
1265                    Ok(Expression::Function(Box::new(Function::new(
1266                        "AGE".to_string(),
1267                        vec![first, second],
1268                    ))))
1269                } else {
1270                    // 3-arg form: DATEDIFF(unit, start, end)
1271                    let unit_expr = args.remove(0);
1272                    let start = args.remove(0);
1273                    let end_expr = args.remove(0);
1274
1275                    // Extract unit name from identifier or column
1276                    let unit_name = match &unit_expr {
1277                        Expression::Identifier(id) => id.name.to_uppercase(),
1278                        Expression::Var(v) => v.this.to_uppercase(),
1279                        Expression::Column(col) if col.table.is_none() => {
1280                            col.name.name.to_uppercase()
1281                        }
1282                        _ => "DAY".to_string(),
1283                    };
1284
1285                    // Helper: CAST(expr AS TIMESTAMP)
1286                    let cast_ts = |e: Expression| -> Expression {
1287                        Expression::Cast(Box::new(Cast {
1288                            this: e,
1289                            to: DataType::Timestamp {
1290                                precision: None,
1291                                timezone: false,
1292                            },
1293                            trailing_comments: Vec::new(),
1294                            double_colon_syntax: false,
1295                            format: None,
1296                            default: None,
1297                            inferred_type: None,
1298                        }))
1299                    };
1300
1301                    // Helper: CAST(expr AS BIGINT)
1302                    let cast_bigint = |e: Expression| -> Expression {
1303                        Expression::Cast(Box::new(Cast {
1304                            this: e,
1305                            to: DataType::BigInt { length: None },
1306                            trailing_comments: Vec::new(),
1307                            double_colon_syntax: false,
1308                            format: None,
1309                            default: None,
1310                            inferred_type: None,
1311                        }))
1312                    };
1313
1314                    let end_ts = cast_ts(end_expr.clone());
1315                    let start_ts = cast_ts(start.clone());
1316
1317                    // Helper: end_ts - start_ts
1318                    let ts_diff = || -> Expression {
1319                        Expression::Sub(Box::new(BinaryOp::new(
1320                            cast_ts(end_expr.clone()),
1321                            cast_ts(start.clone()),
1322                        )))
1323                    };
1324
1325                    // Helper: AGE(end_ts, start_ts)
1326                    let age_call = || -> Expression {
1327                        Expression::Function(Box::new(Function::new(
1328                            "AGE".to_string(),
1329                            vec![cast_ts(end_expr.clone()), cast_ts(start.clone())],
1330                        )))
1331                    };
1332
1333                    // Helper: EXTRACT(field FROM expr)
1334                    let extract = |field: DateTimeField, from: Expression| -> Expression {
1335                        Expression::Extract(Box::new(ExtractFunc { this: from, field }))
1336                    };
1337
1338                    // Helper: number literal
1339                    let num = |n: i64| -> Expression {
1340                        Expression::Literal(Box::new(Literal::Number(n.to_string())))
1341                    };
1342
1343                    // Use Custom DateTimeField for lowercase output (PostgreSQL convention)
1344                    let epoch_field = DateTimeField::Custom("epoch".to_string());
1345
1346                    let result = match unit_name.as_str() {
1347                        "MICROSECOND" => {
1348                            // CAST(EXTRACT(epoch FROM end_ts - start_ts) * 1000000 AS BIGINT)
1349                            let epoch = extract(epoch_field, ts_diff());
1350                            cast_bigint(Expression::Mul(Box::new(BinaryOp::new(
1351                                epoch,
1352                                num(1000000),
1353                            ))))
1354                        }
1355                        "MILLISECOND" => {
1356                            let epoch = extract(epoch_field, ts_diff());
1357                            cast_bigint(Expression::Mul(Box::new(BinaryOp::new(epoch, num(1000)))))
1358                        }
1359                        "SECOND" => {
1360                            let epoch = extract(epoch_field, ts_diff());
1361                            cast_bigint(epoch)
1362                        }
1363                        "MINUTE" => {
1364                            let epoch = extract(epoch_field, ts_diff());
1365                            cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(60)))))
1366                        }
1367                        "HOUR" => {
1368                            let epoch = extract(epoch_field, ts_diff());
1369                            cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(3600)))))
1370                        }
1371                        "DAY" => {
1372                            let epoch = extract(epoch_field, ts_diff());
1373                            cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(86400)))))
1374                        }
1375                        "WEEK" => {
1376                            // CAST(EXTRACT(days FROM (end_ts - start_ts)) / 7 AS BIGINT)
1377                            let diff_parens = Expression::Paren(Box::new(Paren {
1378                                this: ts_diff(),
1379                                trailing_comments: Vec::new(),
1380                            }));
1381                            let days =
1382                                extract(DateTimeField::Custom("days".to_string()), diff_parens);
1383                            cast_bigint(Expression::Div(Box::new(BinaryOp::new(days, num(7)))))
1384                        }
1385                        "MONTH" => {
1386                            // CAST(EXTRACT(year FROM AGE(...)) * 12 + EXTRACT(month FROM AGE(...)) AS BIGINT)
1387                            let year_part =
1388                                extract(DateTimeField::Custom("year".to_string()), age_call());
1389                            let month_part =
1390                                extract(DateTimeField::Custom("month".to_string()), age_call());
1391                            let year_months =
1392                                Expression::Mul(Box::new(BinaryOp::new(year_part, num(12))));
1393                            cast_bigint(Expression::Add(Box::new(BinaryOp::new(
1394                                year_months,
1395                                month_part,
1396                            ))))
1397                        }
1398                        "QUARTER" => {
1399                            // CAST(EXTRACT(year FROM AGE(...)) * 4 + EXTRACT(month FROM AGE(...)) / 3 AS BIGINT)
1400                            let year_part =
1401                                extract(DateTimeField::Custom("year".to_string()), age_call());
1402                            let month_part =
1403                                extract(DateTimeField::Custom("month".to_string()), age_call());
1404                            let year_quarters =
1405                                Expression::Mul(Box::new(BinaryOp::new(year_part, num(4))));
1406                            let month_quarters =
1407                                Expression::Div(Box::new(BinaryOp::new(month_part, num(3))));
1408                            cast_bigint(Expression::Add(Box::new(BinaryOp::new(
1409                                year_quarters,
1410                                month_quarters,
1411                            ))))
1412                        }
1413                        "YEAR" => {
1414                            // CAST(EXTRACT(year FROM AGE(...)) AS BIGINT)
1415                            cast_bigint(extract(
1416                                DateTimeField::Custom("year".to_string()),
1417                                age_call(),
1418                            ))
1419                        }
1420                        _ => {
1421                            // Fallback: simple AGE
1422                            Expression::Function(Box::new(Function::new(
1423                                "AGE".to_string(),
1424                                vec![end_ts, start_ts],
1425                            )))
1426                        }
1427                    };
1428                    Ok(result)
1429                }
1430            }
1431
1432            // TIMESTAMPDIFF -> AGE or EXTRACT pattern
1433            "TIMESTAMPDIFF" if f.args.len() >= 3 => {
1434                let mut args = f.args;
1435                let _unit = args.remove(0); // Unit (ignored, AGE returns full interval)
1436                let start = args.remove(0);
1437                let end = args.remove(0);
1438                Ok(Expression::Function(Box::new(Function::new(
1439                    "AGE".to_string(),
1440                    vec![end, start],
1441                ))))
1442            }
1443
1444            // FROM_UNIXTIME -> TO_TIMESTAMP
1445            "FROM_UNIXTIME" => Ok(Expression::Function(Box::new(Function::new(
1446                "TO_TIMESTAMP".to_string(),
1447                f.args,
1448            )))),
1449
1450            // UNIX_TIMESTAMP -> EXTRACT(EPOCH FROM ...)
1451            "UNIX_TIMESTAMP" if f.args.len() == 1 => {
1452                let arg = f.args.into_iter().next().unwrap();
1453                Ok(Expression::Function(Box::new(Function::new(
1454                    "DATE_PART".to_string(),
1455                    vec![Expression::string("epoch"), arg],
1456                ))))
1457            }
1458
1459            // UNIX_TIMESTAMP() with no args -> EXTRACT(EPOCH FROM CURRENT_TIMESTAMP)
1460            "UNIX_TIMESTAMP" if f.args.is_empty() => {
1461                Ok(Expression::Function(Box::new(Function::new(
1462                    "DATE_PART".to_string(),
1463                    vec![
1464                        Expression::string("epoch"),
1465                        Expression::CurrentTimestamp(crate::expressions::CurrentTimestamp {
1466                            precision: None,
1467                            sysdate: false,
1468                        }),
1469                    ],
1470                ))))
1471            }
1472
1473            // DATEADD -> date + interval pattern
1474            "DATEADD" if f.args.len() == 3 => {
1475                // DATEADD(unit, count, date) -> date + interval 'count unit'
1476                // This is a simplified version - full impl would construct proper interval
1477                let mut args = f.args;
1478                let _unit = args.remove(0);
1479                let count = args.remove(0);
1480                let date = args.remove(0);
1481                Ok(Expression::Add(Box::new(BinaryOp {
1482                    left: date,
1483                    right: count,
1484                    left_comments: Vec::new(),
1485                    operator_comments: Vec::new(),
1486                    trailing_comments: Vec::new(),
1487                    inferred_type: None,
1488                })))
1489            }
1490
1491            // INSTR -> POSITION (simplified)
1492            "INSTR" if f.args.len() >= 2 => {
1493                let mut args = f.args;
1494                let string = args.remove(0);
1495                let substring = args.remove(0);
1496                Ok(Expression::Position(Box::new(
1497                    crate::expressions::PositionFunc {
1498                        substring,
1499                        string,
1500                        start: args.pop(),
1501                    },
1502                )))
1503            }
1504
1505            // CONCAT_WS is native to PostgreSQL
1506            "CONCAT_WS" => Ok(Expression::Function(Box::new(f))),
1507
1508            // REGEXP_REPLACE: pass through without adding 'g' flag
1509            // The 'g' flag handling is managed by cross_dialect_normalize based on source dialect's default behavior
1510            "REGEXP_REPLACE" if f.args.len() == 3 || f.args.len() == 4 => {
1511                Ok(Expression::Function(Box::new(f)))
1512            }
1513            // 6 args from Snowflake: (subject, pattern, replacement, position, occurrence, params)
1514            // If occurrence is 0 (global), append 'g' to flags
1515            "REGEXP_REPLACE" if f.args.len() == 6 => {
1516                let is_global = match &f.args[4] {
1517                    Expression::Literal(lit)
1518                        if matches!(lit.as_ref(), crate::expressions::Literal::Number(_)) =>
1519                    {
1520                        let crate::expressions::Literal::Number(n) = lit.as_ref() else {
1521                            unreachable!()
1522                        };
1523                        n == "0"
1524                    }
1525                    _ => false,
1526                };
1527                if is_global {
1528                    let subject = f.args[0].clone();
1529                    let pattern = f.args[1].clone();
1530                    let replacement = f.args[2].clone();
1531                    let position = f.args[3].clone();
1532                    let occurrence = f.args[4].clone();
1533                    let params = &f.args[5];
1534                    let mut flags = if let Expression::Literal(lit) = params {
1535                        if let crate::expressions::Literal::String(s) = lit.as_ref() {
1536                            s.clone()
1537                        } else {
1538                            String::new()
1539                        }
1540                    } else {
1541                        String::new()
1542                    };
1543                    if !flags.contains('g') {
1544                        flags.push('g');
1545                    }
1546                    Ok(Expression::Function(Box::new(Function::new(
1547                        "REGEXP_REPLACE".to_string(),
1548                        vec![
1549                            subject,
1550                            pattern,
1551                            replacement,
1552                            position,
1553                            occurrence,
1554                            Expression::Literal(Box::new(crate::expressions::Literal::String(
1555                                flags,
1556                            ))),
1557                        ],
1558                    ))))
1559                } else {
1560                    Ok(Expression::Function(Box::new(f)))
1561                }
1562            }
1563            // Default: pass through
1564            "REGEXP_REPLACE" => Ok(Expression::Function(Box::new(f))),
1565
1566            // Pass through everything else
1567            _ => Ok(Expression::Function(Box::new(f))),
1568        }
1569    }
1570
1571    fn transform_aggregate_function(
1572        &self,
1573        f: Box<crate::expressions::AggregateFunction>,
1574    ) -> Result<Expression> {
1575        let name_upper = f.name.to_uppercase();
1576        match name_upper.as_str() {
1577            // COUNT_IF -> SUM(CASE WHEN...)
1578            "COUNT_IF" if !f.args.is_empty() => {
1579                let condition = f.args.into_iter().next().unwrap();
1580                let case_expr = Expression::Case(Box::new(Case {
1581                    operand: None,
1582                    whens: vec![(condition, Expression::number(1))],
1583                    else_: Some(Expression::number(0)),
1584                    comments: Vec::new(),
1585                    inferred_type: None,
1586                }));
1587                Ok(Expression::Sum(Box::new(AggFunc {
1588                    ignore_nulls: None,
1589                    having_max: None,
1590                    this: case_expr,
1591                    distinct: f.distinct,
1592                    filter: f.filter,
1593                    order_by: Vec::new(),
1594                    name: None,
1595                    limit: None,
1596                    inferred_type: None,
1597                })))
1598            }
1599
1600            // GROUP_CONCAT -> STRING_AGG
1601            "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
1602                Function::new("STRING_AGG".to_string(), f.args),
1603            ))),
1604
1605            // STDEV -> STDDEV in PostgreSQL
1606            "STDEV" if !f.args.is_empty() => Ok(Expression::Stddev(Box::new(AggFunc {
1607                ignore_nulls: None,
1608                having_max: None,
1609                this: f.args.into_iter().next().unwrap(),
1610                distinct: f.distinct,
1611                filter: f.filter,
1612                order_by: Vec::new(),
1613                name: None,
1614                limit: None,
1615                inferred_type: None,
1616            }))),
1617
1618            // STDEVP -> STDDEV_POP in PostgreSQL
1619            "STDEVP" if !f.args.is_empty() => Ok(Expression::StddevPop(Box::new(AggFunc {
1620                ignore_nulls: None,
1621                having_max: None,
1622                this: f.args.into_iter().next().unwrap(),
1623                distinct: f.distinct,
1624                filter: f.filter,
1625                order_by: Vec::new(),
1626                name: None,
1627                limit: None,
1628                inferred_type: None,
1629            }))),
1630
1631            // VAR -> VAR_SAMP in PostgreSQL
1632            "VAR" if !f.args.is_empty() => Ok(Expression::VarSamp(Box::new(AggFunc {
1633                ignore_nulls: None,
1634                having_max: None,
1635                this: f.args.into_iter().next().unwrap(),
1636                distinct: f.distinct,
1637                filter: f.filter,
1638                order_by: Vec::new(),
1639                name: None,
1640                limit: None,
1641                inferred_type: None,
1642            }))),
1643
1644            // VARP -> VAR_POP in PostgreSQL
1645            "VARP" if !f.args.is_empty() => Ok(Expression::VarPop(Box::new(AggFunc {
1646                ignore_nulls: None,
1647                having_max: None,
1648                this: f.args.into_iter().next().unwrap(),
1649                distinct: f.distinct,
1650                filter: f.filter,
1651                order_by: Vec::new(),
1652                name: None,
1653                limit: None,
1654                inferred_type: None,
1655            }))),
1656
1657            // BIT_AND is native to PostgreSQL
1658            "BIT_AND" => Ok(Expression::AggregateFunction(f)),
1659
1660            // BIT_OR is native to PostgreSQL
1661            "BIT_OR" => Ok(Expression::AggregateFunction(f)),
1662
1663            // BIT_XOR is native to PostgreSQL
1664            "BIT_XOR" => Ok(Expression::AggregateFunction(f)),
1665
1666            // BOOL_AND is native to PostgreSQL
1667            "BOOL_AND" => Ok(Expression::AggregateFunction(f)),
1668
1669            // BOOL_OR is native to PostgreSQL
1670            "BOOL_OR" => Ok(Expression::AggregateFunction(f)),
1671
1672            // VARIANCE -> VAR_SAMP in PostgreSQL
1673            "VARIANCE" if !f.args.is_empty() => Ok(Expression::VarSamp(Box::new(AggFunc {
1674                ignore_nulls: None,
1675                having_max: None,
1676                this: f.args.into_iter().next().unwrap(),
1677                distinct: f.distinct,
1678                filter: f.filter,
1679                order_by: Vec::new(),
1680                name: None,
1681                limit: None,
1682                inferred_type: None,
1683            }))),
1684
1685            // LOGICAL_OR -> BOOL_OR in PostgreSQL
1686            "LOGICAL_OR" if !f.args.is_empty() => {
1687                let mut new_agg = f.clone();
1688                new_agg.name = "BOOL_OR".to_string();
1689                Ok(Expression::AggregateFunction(new_agg))
1690            }
1691
1692            // LOGICAL_AND -> BOOL_AND in PostgreSQL
1693            "LOGICAL_AND" if !f.args.is_empty() => {
1694                let mut new_agg = f.clone();
1695                new_agg.name = "BOOL_AND".to_string();
1696                Ok(Expression::AggregateFunction(new_agg))
1697            }
1698
1699            // Pass through everything else
1700            _ => Ok(Expression::AggregateFunction(f)),
1701        }
1702    }
1703}
1704
1705#[cfg(test)]
1706mod tests {
1707    use super::*;
1708    use crate::dialects::Dialect;
1709
1710    fn transpile_to_postgres(sql: &str) -> String {
1711        let dialect = Dialect::get(DialectType::Generic);
1712        let result = dialect
1713            .transpile(sql, DialectType::PostgreSQL)
1714            .expect("Transpile failed");
1715        result[0].clone()
1716    }
1717
1718    #[test]
1719    fn test_ifnull_to_coalesce() {
1720        let result = transpile_to_postgres("SELECT IFNULL(a, b)");
1721        assert!(
1722            result.contains("COALESCE"),
1723            "Expected COALESCE, got: {}",
1724            result
1725        );
1726    }
1727
1728    #[test]
1729    fn test_nvl_to_coalesce() {
1730        let result = transpile_to_postgres("SELECT NVL(a, b)");
1731        assert!(
1732            result.contains("COALESCE"),
1733            "Expected COALESCE, got: {}",
1734            result
1735        );
1736    }
1737
1738    #[test]
1739    fn test_rand_to_random() {
1740        let result = transpile_to_postgres("SELECT RAND()");
1741        assert!(
1742            result.contains("RANDOM"),
1743            "Expected RANDOM, got: {}",
1744            result
1745        );
1746    }
1747
1748    #[test]
1749    fn test_basic_select() {
1750        let result = transpile_to_postgres("SELECT a, b FROM users WHERE id = 1");
1751        assert!(result.contains("SELECT"));
1752        assert!(result.contains("FROM users"));
1753    }
1754
1755    #[test]
1756    fn test_len_to_length() {
1757        let result = transpile_to_postgres("SELECT LEN(name)");
1758        assert!(
1759            result.contains("LENGTH"),
1760            "Expected LENGTH, got: {}",
1761            result
1762        );
1763    }
1764
1765    #[test]
1766    fn test_getdate_to_current_timestamp() {
1767        let result = transpile_to_postgres("SELECT GETDATE()");
1768        assert!(
1769            result.contains("CURRENT_TIMESTAMP"),
1770            "Expected CURRENT_TIMESTAMP, got: {}",
1771            result
1772        );
1773    }
1774
1775    #[test]
1776    fn test_substr_to_substring() {
1777        let result = transpile_to_postgres("SELECT SUBSTR(name, 1, 3)");
1778        assert!(
1779            result.contains("SUBSTRING"),
1780            "Expected SUBSTRING, got: {}",
1781            result
1782        );
1783    }
1784
1785    #[test]
1786    fn test_group_concat_to_string_agg() {
1787        let result = transpile_to_postgres("SELECT GROUP_CONCAT(name)");
1788        assert!(
1789            result.contains("STRING_AGG"),
1790            "Expected STRING_AGG, got: {}",
1791            result
1792        );
1793    }
1794
1795    #[test]
1796    fn test_double_quote_identifiers() {
1797        // PostgreSQL uses double quotes for identifiers
1798        let dialect = PostgresDialect;
1799        let config = dialect.generator_config();
1800        assert_eq!(config.identifier_quote, '"');
1801    }
1802
1803    #[test]
1804    fn test_char_length_to_length() {
1805        let result = transpile_to_postgres("SELECT CHAR_LENGTH(name)");
1806        assert!(
1807            result.contains("LENGTH"),
1808            "Expected LENGTH, got: {}",
1809            result
1810        );
1811    }
1812
1813    #[test]
1814    fn test_character_length_to_length() {
1815        let result = transpile_to_postgres("SELECT CHARACTER_LENGTH(name)");
1816        assert!(
1817            result.contains("LENGTH"),
1818            "Expected LENGTH, got: {}",
1819            result
1820        );
1821    }
1822
1823    /// Helper for PostgreSQL identity tests (parse and regenerate with PostgreSQL dialect)
1824    fn identity_postgres(sql: &str) -> String {
1825        let dialect = Dialect::get(DialectType::PostgreSQL);
1826        let exprs = dialect.parse(sql).expect("Parse failed");
1827        let transformed = dialect
1828            .transform(exprs[0].clone())
1829            .expect("Transform failed");
1830        dialect.generate(&transformed).expect("Generate failed")
1831    }
1832
1833    #[test]
1834    fn test_json_extract_with_column_path() {
1835        // When the path is a column reference (not a literal), should use function form
1836        let result = identity_postgres("json_data.data -> field_ids.field_id");
1837        assert!(
1838            result.contains("JSON_EXTRACT_PATH"),
1839            "Expected JSON_EXTRACT_PATH for column path, got: {}",
1840            result
1841        );
1842    }
1843
1844    #[test]
1845    fn test_json_extract_scalar_with_negative_index() {
1846        // When the path is a negative index, should use JSON_EXTRACT_PATH_TEXT function
1847        let result = identity_postgres("x::JSON -> 'duration' ->> -1");
1848        assert!(
1849            result.contains("JSON_EXTRACT_PATH_TEXT"),
1850            "Expected JSON_EXTRACT_PATH_TEXT for negative index, got: {}",
1851            result
1852        );
1853        // The first -> should still be arrow syntax since 'duration' is a string literal
1854        assert!(
1855            result.contains("->"),
1856            "Expected -> for string literal path, got: {}",
1857            result
1858        );
1859    }
1860
1861    #[test]
1862    fn test_json_extract_with_string_literal() {
1863        // When the path is a string literal, should keep arrow syntax
1864        let result = identity_postgres("data -> 'key'");
1865        assert!(
1866            result.contains("->"),
1867            "Expected -> for string literal path, got: {}",
1868            result
1869        );
1870        assert!(
1871            !result.contains("JSON_EXTRACT_PATH"),
1872            "Should NOT use function form for string literal, got: {}",
1873            result
1874        );
1875    }
1876}