Skip to main content

polyglot_sql/dialects/
postgres.rs

1//! PostgreSQL Dialect
2//!
3//! PostgreSQL-specific transformations based on sqlglot patterns.
4//! Comprehensive batch translation from Python sqlglot postgres.py
5//!
6//! Key differences from other dialects:
7//! - TRY_CAST not supported (falls back to CAST)
8//! - RANDOM() instead of RAND()
9//! - STRING_AGG instead of GROUP_CONCAT
10//! - Bitwise XOR is # operator
11//! - BOOL_AND/BOOL_OR for logical aggregates
12//! - GEN_RANDOM_UUID() for UUID generation
13//! - UNNEST instead of EXPLODE
14//! - Type mappings: TINYINT→SMALLINT, FLOAT→REAL, DOUBLE→DOUBLE PRECISION, etc.
15//! - RegexpLike uses ~ operator, RegexpILike uses ~* operator
16//! - JSONB operators: #>, #>>, ?, ?|, ?&
17
18use super::{DialectImpl, DialectType};
19use crate::error::Result;
20use crate::expressions::{
21    AggFunc, BinaryOp, BooleanLiteral, Case, Cast, CeilFunc, DataType, DateTimeField, Expression,
22    ExtractFunc, Function, Interval, IntervalUnit, IntervalUnitSpec, Join, JoinKind, Literal,
23    Paren, UnaryFunc, VarArgFunc,
24};
25use crate::generator::GeneratorConfig;
26use crate::tokens::TokenizerConfig;
27
28/// Helper to wrap JSON arrow expressions in parentheses when they appear
29/// in contexts that require it (Binary, In, Not expressions)
30/// This matches Python sqlglot's WRAPPED_JSON_EXTRACT_EXPRESSIONS behavior
31fn wrap_if_json_arrow(expr: Expression) -> Expression {
32    match &expr {
33        Expression::JsonExtract(f) if f.arrow_syntax => Expression::Paren(Box::new(Paren {
34            this: expr,
35            trailing_comments: Vec::new(),
36        })),
37        Expression::JsonExtractScalar(f) if f.arrow_syntax => Expression::Paren(Box::new(Paren {
38            this: expr,
39            trailing_comments: Vec::new(),
40        })),
41        _ => expr,
42    }
43}
44
45/// PostgreSQL dialect
46pub struct PostgresDialect;
47
48impl DialectImpl for PostgresDialect {
49    fn dialect_type(&self) -> DialectType {
50        DialectType::PostgreSQL
51    }
52
53    fn tokenizer_config(&self) -> TokenizerConfig {
54        use crate::tokens::TokenType;
55        let mut config = TokenizerConfig::default();
56        // PostgreSQL supports $$ string literals (heredoc strings)
57        config.quotes.insert("$$".to_string(), "$$".to_string());
58        // PostgreSQL uses double quotes for identifiers
59        config.identifiers.insert('"', '"');
60        // Nested comments supported
61        config.nested_comments = true;
62        // PostgreSQL treats EXEC as a generic command (not TSQL EXEC statement)
63        // Note: EXECUTE is kept as-is since it's used in GRANT/REVOKE EXECUTE ON FUNCTION
64        config
65            .keywords
66            .insert("EXEC".to_string(), TokenType::Command);
67        config
68    }
69
70    fn generator_config(&self) -> GeneratorConfig {
71        use crate::generator::IdentifierQuoteStyle;
72        GeneratorConfig {
73            identifier_quote: '"',
74            identifier_quote_style: IdentifierQuoteStyle::DOUBLE_QUOTE,
75            dialect: Some(DialectType::PostgreSQL),
76            // PostgreSQL uses TIMESTAMPTZ shorthand
77            tz_to_with_time_zone: false,
78            // PostgreSQL prefers INTERVAL '1 day' syntax
79            single_string_interval: true,
80            // TABLESAMPLE uses REPEATABLE in PostgreSQL
81            tablesample_seed_keyword: "REPEATABLE",
82            // PostgreSQL doesn't support NVL2
83            nvl2_supported: false,
84            // PostgreSQL uses $ for parameters
85            parameter_token: "$",
86            // PostgreSQL uses % for named placeholders
87            named_placeholder_token: "%",
88            // PostgreSQL supports SELECT INTO
89            supports_select_into: true,
90            // PostgreSQL: USING btree(col) without space before parens
91            index_using_no_space: true,
92            // PostgreSQL supports UNLOGGED tables
93            supports_unlogged_tables: true,
94            // PostgreSQL doesn't support multi-arg DISTINCT
95            multi_arg_distinct: false,
96            // PostgreSQL uses ANY (subquery) with space
97            quantified_no_paren_space: false,
98            // PostgreSQL supports window EXCLUDE clause
99            supports_window_exclude: true,
100            // PostgreSQL normalizes single-bound window frames to BETWEEN form
101            normalize_window_frame_between: true,
102            // PostgreSQL COPY doesn't use INTO keyword
103            copy_has_into_keyword: false,
104            // PostgreSQL ARRAY_SIZE requires dimension argument
105            array_size_dim_required: Some(true),
106            // PostgreSQL supports BETWEEN flags
107            supports_between_flags: true,
108            // PostgreSQL doesn't support hints
109            join_hints: false,
110            table_hints: false,
111            query_hints: false,
112            // PostgreSQL supports locking reads
113            locking_reads_supported: true,
114            // PostgreSQL doesn't rename tables with DB
115            rename_table_with_db: false,
116            // PostgreSQL can implement array any
117            can_implement_array_any: true,
118            // PostgreSQL ARRAY_CONCAT is not var-len
119            array_concat_is_var_len: false,
120            // PostgreSQL doesn't support MEDIAN
121            supports_median: false,
122            // PostgreSQL requires JSON type for extraction
123            json_type_required_for_extraction: true,
124            // PostgreSQL LIKE property inside schema
125            like_property_inside_schema: true,
126            ..Default::default()
127        }
128    }
129
130    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
131        match expr {
132            // ============================================
133            // DATA TYPE MAPPINGS (from TYPE_MAPPING)
134            // These are handled specially - transform DataType variants
135            // ============================================
136            Expression::DataType(dt) => self.transform_data_type(dt),
137
138            // ============================================
139            // NULL HANDLING
140            // ============================================
141            // IFNULL -> COALESCE in PostgreSQL
142            Expression::IfNull(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
143                original_name: None,
144                expressions: vec![f.this, f.expression],
145                inferred_type: None,
146            }))),
147
148            // NVL -> COALESCE in PostgreSQL
149            Expression::Nvl(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
150                original_name: None,
151                expressions: vec![f.this, f.expression],
152                inferred_type: None,
153            }))),
154
155            // Coalesce with original_name (e.g., IFNULL parsed as Coalesce) -> clear original_name
156            // so it outputs as COALESCE instead of the original function name
157            Expression::Coalesce(mut f) => {
158                f.original_name = None;
159                Ok(Expression::Coalesce(f))
160            }
161
162            // ============================================
163            // CAST OPERATIONS
164            // ============================================
165            // TryCast -> CAST (PostgreSQL doesn't support TRY_CAST)
166            Expression::TryCast(c) => Ok(Expression::Cast(c)),
167
168            // SafeCast -> CAST (PostgreSQL doesn't support safe casts)
169            Expression::SafeCast(c) => Ok(Expression::Cast(c)),
170
171            // ============================================
172            // RANDOM
173            // ============================================
174            // RAND -> RANDOM in PostgreSQL
175            Expression::Rand(r) => {
176                // PostgreSQL's RANDOM() doesn't take a seed argument
177                let _ = r.seed; // Ignore seed
178                Ok(Expression::Random(crate::expressions::Random))
179            }
180
181            // ============================================
182            // UUID
183            // ============================================
184            // Uuid -> GEN_RANDOM_UUID in PostgreSQL
185            Expression::Uuid(_) => Ok(Expression::Function(Box::new(Function::new(
186                "GEN_RANDOM_UUID".to_string(),
187                vec![],
188            )))),
189
190            // ============================================
191            // ARRAY OPERATIONS
192            // ============================================
193            // EXPLODE -> UNNEST in PostgreSQL
194            Expression::Explode(f) => Ok(Expression::Unnest(Box::new(
195                crate::expressions::UnnestFunc {
196                    this: f.this,
197                    expressions: Vec::new(),
198                    with_ordinality: false,
199                    alias: None,
200                    offset_alias: None,
201                },
202            ))),
203
204            // ExplodeOuter -> UNNEST in PostgreSQL
205            Expression::ExplodeOuter(f) => Ok(Expression::Unnest(Box::new(
206                crate::expressions::UnnestFunc {
207                    this: f.this,
208                    expressions: Vec::new(),
209                    with_ordinality: false,
210                    alias: None,
211                    offset_alias: None,
212                },
213            ))),
214
215            // ArrayConcat -> ARRAY_CAT in PostgreSQL
216            Expression::ArrayConcat(f) => Ok(Expression::Function(Box::new(Function::new(
217                "ARRAY_CAT".to_string(),
218                f.expressions,
219            )))),
220
221            // ArrayPrepend -> ARRAY_PREPEND in PostgreSQL (note: args swapped from other dialects)
222            Expression::ArrayPrepend(f) => Ok(Expression::Function(Box::new(Function::new(
223                "ARRAY_PREPEND".to_string(),
224                vec![f.expression, f.this], // PostgreSQL: ARRAY_PREPEND(element, array)
225            )))),
226
227            // ============================================
228            // BITWISE OPERATIONS
229            // ============================================
230            // BitwiseXor -> # operator in PostgreSQL
231            Expression::BitwiseXor(f) => {
232                // Use a special marker that generator will recognize
233                Ok(Expression::Function(Box::new(Function::new(
234                    "__PG_BITWISE_XOR__".to_string(),
235                    vec![f.left, f.right],
236                ))))
237            }
238
239            // BitwiseAndAgg -> BIT_AND
240            Expression::BitwiseAndAgg(f) => Ok(Expression::Function(Box::new(Function::new(
241                "BIT_AND".to_string(),
242                vec![f.this],
243            )))),
244
245            // BitwiseOrAgg -> BIT_OR
246            Expression::BitwiseOrAgg(f) => Ok(Expression::Function(Box::new(Function::new(
247                "BIT_OR".to_string(),
248                vec![f.this],
249            )))),
250
251            // BitwiseXorAgg -> BIT_XOR
252            Expression::BitwiseXorAgg(f) => Ok(Expression::Function(Box::new(Function::new(
253                "BIT_XOR".to_string(),
254                vec![f.this],
255            )))),
256
257            // ============================================
258            // BOOLEAN AGGREGATES
259            // ============================================
260            // LogicalAnd -> BOOL_AND
261            Expression::LogicalAnd(f) => Ok(Expression::Function(Box::new(Function::new(
262                "BOOL_AND".to_string(),
263                vec![f.this],
264            )))),
265
266            // LogicalOr -> BOOL_OR
267            Expression::LogicalOr(f) => Ok(Expression::Function(Box::new(Function::new(
268                "BOOL_OR".to_string(),
269                vec![f.this],
270            )))),
271
272            // Xor -> PostgreSQL bool_xor pattern: a <> b for boolean values
273            Expression::Xor(f) => {
274                if let (Some(a), Some(b)) = (f.this, f.expression) {
275                    Ok(Expression::Neq(Box::new(BinaryOp {
276                        left: *a,
277                        right: *b,
278                        left_comments: Vec::new(),
279                        operator_comments: Vec::new(),
280                        trailing_comments: Vec::new(),
281                        inferred_type: None,
282                    })))
283                } else {
284                    Ok(Expression::Boolean(BooleanLiteral { value: false }))
285                }
286            }
287
288            // ============================================
289            // ARRAY OPERATORS
290            // ============================================
291            // ArrayContainedBy (<@) -> ArrayContainsAll (@>) with swapped operands
292            // a <@ b -> b @> a (PostgreSQL prefers @> syntax)
293            Expression::ArrayContainedBy(op) => {
294                Ok(Expression::ArrayContainsAll(Box::new(BinaryOp {
295                    left: op.right,
296                    right: op.left,
297                    left_comments: Vec::new(),
298                    operator_comments: Vec::new(),
299                    trailing_comments: Vec::new(),
300                    inferred_type: None,
301                })))
302            }
303
304            // ============================================
305            // REGEXP OPERATIONS (PostgreSQL uses ~ and ~* operators)
306            // ============================================
307            // RegexpLike -> keep as-is, generator handles ~ operator output
308            Expression::RegexpLike(f) => {
309                // Generator will output as: expr ~ pattern
310                Ok(Expression::RegexpLike(f))
311            }
312
313            // ============================================
314            // DATE/TIME FUNCTIONS
315            // ============================================
316            // DateAdd -> date + INTERVAL in PostgreSQL
317            Expression::DateAdd(f) => {
318                let interval_expr = Expression::Interval(Box::new(Interval {
319                    this: Some(f.interval),
320                    unit: Some(IntervalUnitSpec::Simple {
321                        unit: f.unit,
322                        use_plural: false,
323                    }),
324                }));
325                Ok(Expression::Add(Box::new(BinaryOp {
326                    left: f.this,
327                    right: interval_expr,
328                    left_comments: Vec::new(),
329                    operator_comments: Vec::new(),
330                    trailing_comments: Vec::new(),
331                    inferred_type: None,
332                })))
333            }
334
335            // DateSub -> date - INTERVAL in PostgreSQL
336            Expression::DateSub(f) => {
337                let interval_expr = Expression::Interval(Box::new(Interval {
338                    this: Some(f.interval),
339                    unit: Some(IntervalUnitSpec::Simple {
340                        unit: f.unit,
341                        use_plural: false,
342                    }),
343                }));
344                Ok(Expression::Sub(Box::new(BinaryOp {
345                    left: f.this,
346                    right: interval_expr,
347                    left_comments: Vec::new(),
348                    operator_comments: Vec::new(),
349                    trailing_comments: Vec::new(),
350                    inferred_type: None,
351                })))
352            }
353
354            // DateDiff -> Complex PostgreSQL pattern using AGE/EXTRACT
355            Expression::DateDiff(f) => {
356                // For PostgreSQL, DATEDIFF is converted to EXTRACT(epoch FROM ...) pattern
357                // matching the 3-arg string-based DATEDIFF handler below
358                let unit = f.unit.unwrap_or(IntervalUnit::Day);
359
360                // Helper: CAST(expr AS TIMESTAMP)
361                let cast_ts = |e: Expression| -> Expression {
362                    Expression::Cast(Box::new(Cast {
363                        this: e,
364                        to: DataType::Timestamp {
365                            precision: None,
366                            timezone: false,
367                        },
368                        trailing_comments: Vec::new(),
369                        double_colon_syntax: false,
370                        format: None,
371                        default: None,
372                        inferred_type: None,
373                    }))
374                };
375
376                // Helper: CAST(expr AS BIGINT)
377                let cast_bigint = |e: Expression| -> Expression {
378                    Expression::Cast(Box::new(Cast {
379                        this: e,
380                        to: DataType::BigInt { length: None },
381                        trailing_comments: Vec::new(),
382                        double_colon_syntax: false,
383                        format: None,
384                        default: None,
385                        inferred_type: None,
386                    }))
387                };
388
389                // Clone end/start for reuse
390                let end_expr = f.this;
391                let start = f.expression;
392
393                // Helper: end_ts - start_ts
394                let ts_diff = || -> Expression {
395                    Expression::Sub(Box::new(BinaryOp::new(
396                        cast_ts(end_expr.clone()),
397                        cast_ts(start.clone()),
398                    )))
399                };
400
401                // Helper: AGE(end_ts, start_ts)
402                let age_call = || -> Expression {
403                    Expression::Function(Box::new(Function::new(
404                        "AGE".to_string(),
405                        vec![cast_ts(end_expr.clone()), cast_ts(start.clone())],
406                    )))
407                };
408
409                // Helper: EXTRACT(field FROM expr)
410                let extract = |field: DateTimeField, from: Expression| -> Expression {
411                    Expression::Extract(Box::new(ExtractFunc { this: from, field }))
412                };
413
414                // Helper: number literal
415                let num =
416                    |n: i64| -> Expression { Expression::Literal(Literal::Number(n.to_string())) };
417
418                let epoch_field = DateTimeField::Custom("epoch".to_string());
419
420                let result = match unit {
421                    IntervalUnit::Nanosecond => {
422                        let epoch = extract(epoch_field.clone(), ts_diff());
423                        cast_bigint(Expression::Mul(Box::new(BinaryOp::new(
424                            epoch,
425                            num(1000000000),
426                        ))))
427                    }
428                    IntervalUnit::Microsecond => {
429                        let epoch = extract(epoch_field, ts_diff());
430                        cast_bigint(Expression::Mul(Box::new(BinaryOp::new(
431                            epoch,
432                            num(1000000),
433                        ))))
434                    }
435                    IntervalUnit::Millisecond => {
436                        let epoch = extract(epoch_field, ts_diff());
437                        cast_bigint(Expression::Mul(Box::new(BinaryOp::new(epoch, num(1000)))))
438                    }
439                    IntervalUnit::Second => {
440                        let epoch = extract(epoch_field, ts_diff());
441                        cast_bigint(epoch)
442                    }
443                    IntervalUnit::Minute => {
444                        let epoch = extract(epoch_field, ts_diff());
445                        cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(60)))))
446                    }
447                    IntervalUnit::Hour => {
448                        let epoch = extract(epoch_field, ts_diff());
449                        cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(3600)))))
450                    }
451                    IntervalUnit::Day => {
452                        let epoch = extract(epoch_field, ts_diff());
453                        cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(86400)))))
454                    }
455                    IntervalUnit::Week => {
456                        let diff_parens = Expression::Paren(Box::new(Paren {
457                            this: ts_diff(),
458                            trailing_comments: Vec::new(),
459                        }));
460                        let days = extract(DateTimeField::Custom("days".to_string()), diff_parens);
461                        cast_bigint(Expression::Div(Box::new(BinaryOp::new(days, num(7)))))
462                    }
463                    IntervalUnit::Month => {
464                        let year_part =
465                            extract(DateTimeField::Custom("year".to_string()), age_call());
466                        let month_part =
467                            extract(DateTimeField::Custom("month".to_string()), age_call());
468                        let year_months =
469                            Expression::Mul(Box::new(BinaryOp::new(year_part, num(12))));
470                        cast_bigint(Expression::Add(Box::new(BinaryOp::new(
471                            year_months,
472                            month_part,
473                        ))))
474                    }
475                    IntervalUnit::Quarter => {
476                        let year_part =
477                            extract(DateTimeField::Custom("year".to_string()), age_call());
478                        let month_part =
479                            extract(DateTimeField::Custom("month".to_string()), age_call());
480                        let year_quarters =
481                            Expression::Mul(Box::new(BinaryOp::new(year_part, num(4))));
482                        let month_quarters =
483                            Expression::Div(Box::new(BinaryOp::new(month_part, num(3))));
484                        cast_bigint(Expression::Add(Box::new(BinaryOp::new(
485                            year_quarters,
486                            month_quarters,
487                        ))))
488                    }
489                    IntervalUnit::Year => cast_bigint(extract(
490                        DateTimeField::Custom("year".to_string()),
491                        age_call(),
492                    )),
493                };
494                Ok(result)
495            }
496
497            // UnixToTime -> TO_TIMESTAMP
498            Expression::UnixToTime(f) => Ok(Expression::Function(Box::new(Function::new(
499                "TO_TIMESTAMP".to_string(),
500                vec![*f.this],
501            )))),
502
503            // TimeToUnix -> DATE_PART('epoch', ...) in PostgreSQL
504            Expression::TimeToUnix(f) => Ok(Expression::Function(Box::new(Function::new(
505                "DATE_PART".to_string(),
506                vec![Expression::string("epoch"), f.this],
507            )))),
508
509            // StrToTime -> TO_TIMESTAMP in PostgreSQL
510            Expression::ToTimestamp(f) => {
511                let mut args = vec![f.this];
512                if let Some(fmt) = f.format {
513                    args.push(fmt);
514                }
515                Ok(Expression::Function(Box::new(Function::new(
516                    "TO_TIMESTAMP".to_string(),
517                    args,
518                ))))
519            }
520
521            // StrToDate -> TO_DATE in PostgreSQL
522            Expression::ToDate(f) => {
523                let mut args = vec![f.this];
524                if let Some(fmt) = f.format {
525                    args.push(fmt);
526                }
527                Ok(Expression::Function(Box::new(Function::new(
528                    "TO_DATE".to_string(),
529                    args,
530                ))))
531            }
532
533            // TimestampTrunc -> DATE_TRUNC
534            Expression::TimestampTrunc(f) => {
535                // Convert DateTimeField to string expression for DATE_TRUNC
536                let unit_str = format!("{:?}", f.unit).to_lowercase();
537                let args = vec![Expression::string(&unit_str), f.this];
538                Ok(Expression::Function(Box::new(Function::new(
539                    "DATE_TRUNC".to_string(),
540                    args,
541                ))))
542            }
543
544            // TimeFromParts -> MAKE_TIME
545            Expression::TimeFromParts(f) => {
546                let mut args = Vec::new();
547                if let Some(h) = f.hour {
548                    args.push(*h);
549                }
550                if let Some(m) = f.min {
551                    args.push(*m);
552                }
553                if let Some(s) = f.sec {
554                    args.push(*s);
555                }
556                Ok(Expression::Function(Box::new(Function::new(
557                    "MAKE_TIME".to_string(),
558                    args,
559                ))))
560            }
561
562            // TimestampFromParts -> MAKE_TIMESTAMP
563            Expression::MakeTimestamp(f) => {
564                // MakeTimestampFunc has direct Expression fields, not Options
565                let args = vec![f.year, f.month, f.day, f.hour, f.minute, f.second];
566                Ok(Expression::Function(Box::new(Function::new(
567                    "MAKE_TIMESTAMP".to_string(),
568                    args,
569                ))))
570            }
571
572            // ============================================
573            // STRING FUNCTIONS
574            // ============================================
575            // StringAgg is native to PostgreSQL - keep as-is
576            Expression::StringAgg(f) => Ok(Expression::StringAgg(f)),
577
578            // GroupConcat -> STRING_AGG in PostgreSQL
579            Expression::GroupConcat(f) => {
580                let mut args = vec![f.this.clone()];
581                if let Some(sep) = f.separator.clone() {
582                    args.push(sep);
583                } else {
584                    args.push(Expression::string(","));
585                }
586                Ok(Expression::Function(Box::new(Function::new(
587                    "STRING_AGG".to_string(),
588                    args,
589                ))))
590            }
591
592            // StrPosition -> POSITION function
593            Expression::Position(f) => {
594                // PostgreSQL: POSITION(substring IN string)
595                // Keep as Position, generator handles it
596                Ok(Expression::Position(f))
597            }
598
599            // ============================================
600            // AGGREGATE FUNCTIONS
601            // ============================================
602            // CountIf -> SUM(CASE WHEN condition THEN 1 ELSE 0 END) in PostgreSQL
603            Expression::CountIf(f) => {
604                let case_expr = Expression::Case(Box::new(Case {
605                    operand: None,
606                    whens: vec![(f.this.clone(), Expression::number(1))],
607                    else_: Some(Expression::number(0)),
608                    comments: Vec::new(),
609                    inferred_type: None,
610                }));
611                Ok(Expression::Sum(Box::new(AggFunc {
612                    ignore_nulls: None,
613                    having_max: None,
614                    this: case_expr,
615                    distinct: f.distinct,
616                    filter: f.filter,
617                    order_by: Vec::new(),
618                    name: None,
619                    limit: None,
620                    inferred_type: None,
621                })))
622            }
623
624            // AnyValue -> keep as ANY_VALUE for PostgreSQL (supported since PG 16)
625            Expression::AnyValue(f) => Ok(Expression::AnyValue(f)),
626
627            // Variance -> VAR_SAMP in PostgreSQL
628            Expression::Variance(f) => Ok(Expression::Function(Box::new(Function::new(
629                "VAR_SAMP".to_string(),
630                vec![f.this],
631            )))),
632
633            // VarPop -> VAR_POP in PostgreSQL
634            Expression::VarPop(f) => Ok(Expression::Function(Box::new(Function::new(
635                "VAR_POP".to_string(),
636                vec![f.this],
637            )))),
638
639            // ============================================
640            // JSON FUNCTIONS
641            // ============================================
642            // JSONExtract -> use arrow syntax (->) in PostgreSQL for simple literal paths
643            // Complex paths (like column references) should use JSON_EXTRACT_PATH function
644            Expression::JsonExtract(mut f) => {
645                // Only use arrow syntax for simple literal paths (string or non-negative number)
646                // Complex expressions like column references should use function form
647                f.arrow_syntax = Self::is_simple_json_path(&f.path);
648                Ok(Expression::JsonExtract(f))
649            }
650
651            // JSONExtractScalar -> use arrow syntax (->>) in PostgreSQL for simple paths
652            // Complex paths (like negative indices) should use JSON_EXTRACT_PATH_TEXT function
653            // #>> (hash_arrow_syntax) stays as #>>
654            Expression::JsonExtractScalar(mut f) => {
655                if !f.hash_arrow_syntax {
656                    // Only use arrow syntax for simple literal paths (string or non-negative number)
657                    // Complex expressions like Neg(-1) should use function form
658                    f.arrow_syntax = Self::is_simple_json_path(&f.path);
659                }
660                Ok(Expression::JsonExtractScalar(f))
661            }
662
663            // ParseJson: handled by generator (outputs CAST(x AS JSON) for PostgreSQL)
664
665            // JSONObjectAgg -> JSON_OBJECT_AGG
666            Expression::JsonObjectAgg(f) => {
667                // JsonObjectAggFunc has key and value as Expression, not Option
668                let args = vec![f.key, f.value];
669                Ok(Expression::Function(Box::new(Function::new(
670                    "JSON_OBJECT_AGG".to_string(),
671                    args,
672                ))))
673            }
674
675            // JSONArrayAgg -> JSON_AGG
676            Expression::JsonArrayAgg(f) => Ok(Expression::Function(Box::new(Function::new(
677                "JSON_AGG".to_string(),
678                vec![f.this],
679            )))),
680
681            // JSONPathRoot -> empty string ($ is implicit in PostgreSQL)
682            Expression::JSONPathRoot(_) => Ok(Expression::Literal(Literal::String(String::new()))),
683
684            // ============================================
685            // MISC FUNCTIONS
686            // ============================================
687            // IntDiv -> DIV in PostgreSQL
688            Expression::IntDiv(f) => Ok(Expression::Function(Box::new(Function::new(
689                "DIV".to_string(),
690                vec![f.this, f.expression],
691            )))),
692
693            // Unicode -> ASCII in PostgreSQL
694            Expression::Unicode(f) => Ok(Expression::Function(Box::new(Function::new(
695                "ASCII".to_string(),
696                vec![f.this],
697            )))),
698
699            // LastDay -> Complex expression (PostgreSQL doesn't have LAST_DAY)
700            Expression::LastDay(f) => {
701                // (DATE_TRUNC('month', date) + INTERVAL '1 month' - INTERVAL '1 day')::DATE
702                let truncated = Expression::Function(Box::new(Function::new(
703                    "DATE_TRUNC".to_string(),
704                    vec![Expression::string("month"), f.this.clone()],
705                )));
706                let plus_month = Expression::Add(Box::new(BinaryOp {
707                    left: truncated,
708                    right: Expression::Interval(Box::new(Interval {
709                        this: Some(Expression::string("1")),
710                        unit: Some(IntervalUnitSpec::Simple {
711                            unit: IntervalUnit::Month,
712                            use_plural: false,
713                        }),
714                    })),
715                    left_comments: Vec::new(),
716                    operator_comments: Vec::new(),
717                    trailing_comments: Vec::new(),
718                    inferred_type: None,
719                }));
720                let minus_day = Expression::Sub(Box::new(BinaryOp {
721                    left: plus_month,
722                    right: Expression::Interval(Box::new(Interval {
723                        this: Some(Expression::string("1")),
724                        unit: Some(IntervalUnitSpec::Simple {
725                            unit: IntervalUnit::Day,
726                            use_plural: false,
727                        }),
728                    })),
729                    left_comments: Vec::new(),
730                    operator_comments: Vec::new(),
731                    trailing_comments: Vec::new(),
732                    inferred_type: None,
733                }));
734                Ok(Expression::Cast(Box::new(Cast {
735                    this: minus_day,
736                    to: DataType::Date,
737                    trailing_comments: Vec::new(),
738                    double_colon_syntax: true, // Use PostgreSQL :: syntax
739                    format: None,
740                    default: None,
741                    inferred_type: None,
742                })))
743            }
744
745            // GenerateSeries is native to PostgreSQL
746            Expression::GenerateSeries(f) => Ok(Expression::GenerateSeries(f)),
747
748            // ExplodingGenerateSeries -> GENERATE_SERIES
749            Expression::ExplodingGenerateSeries(f) => {
750                let mut args = vec![f.start, f.stop];
751                if let Some(step) = f.step {
752                    args.push(step); // step is Expression, not Box<Expression>
753                }
754                Ok(Expression::Function(Box::new(Function::new(
755                    "GENERATE_SERIES".to_string(),
756                    args,
757                ))))
758            }
759
760            // ============================================
761            // SESSION/TIME FUNCTIONS (no parentheses in PostgreSQL)
762            // ============================================
763            // CurrentTimestamp -> CURRENT_TIMESTAMP (no parens)
764            Expression::CurrentTimestamp(_) => Ok(Expression::Function(Box::new(Function {
765                name: "CURRENT_TIMESTAMP".to_string(),
766                args: vec![],
767                distinct: false,
768                trailing_comments: vec![],
769                use_bracket_syntax: false,
770                no_parens: true,
771                quoted: false,
772                span: None,
773                inferred_type: None,
774            }))),
775
776            // CurrentUser -> CURRENT_USER (no parens)
777            Expression::CurrentUser(_) => Ok(Expression::Function(Box::new(Function::new(
778                "CURRENT_USER".to_string(),
779                vec![],
780            )))),
781
782            // CurrentDate -> CURRENT_DATE (no parens)
783            Expression::CurrentDate(_) => Ok(Expression::Function(Box::new(Function {
784                name: "CURRENT_DATE".to_string(),
785                args: vec![],
786                distinct: false,
787                trailing_comments: vec![],
788                use_bracket_syntax: false,
789                no_parens: true,
790                quoted: false,
791                span: None,
792                inferred_type: None,
793            }))),
794
795            // ============================================
796            // JOIN TRANSFORMATIONS
797            // ============================================
798            // CROSS APPLY -> INNER JOIN LATERAL ... ON TRUE in PostgreSQL
799            Expression::Join(join) if join.kind == JoinKind::CrossApply => {
800                Ok(Expression::Join(Box::new(Join {
801                    this: join.this,
802                    on: Some(Expression::Boolean(BooleanLiteral { value: true })),
803                    using: join.using,
804                    kind: JoinKind::CrossApply,
805                    use_inner_keyword: false,
806                    use_outer_keyword: false,
807                    deferred_condition: false,
808                    join_hint: None,
809                    match_condition: None,
810                    pivots: join.pivots,
811                    comments: join.comments,
812                    nesting_group: 0,
813                    directed: false,
814                })))
815            }
816
817            // OUTER APPLY -> LEFT JOIN LATERAL ... ON TRUE in PostgreSQL
818            Expression::Join(join) if join.kind == JoinKind::OuterApply => {
819                Ok(Expression::Join(Box::new(Join {
820                    this: join.this,
821                    on: Some(Expression::Boolean(BooleanLiteral { value: true })),
822                    using: join.using,
823                    kind: JoinKind::OuterApply,
824                    use_inner_keyword: false,
825                    use_outer_keyword: false,
826                    deferred_condition: false,
827                    join_hint: None,
828                    match_condition: None,
829                    pivots: join.pivots,
830                    comments: join.comments,
831                    nesting_group: 0,
832                    directed: false,
833                })))
834            }
835
836            // ============================================
837            // GENERIC FUNCTION TRANSFORMATIONS
838            // ============================================
839            Expression::Function(f) => self.transform_function(*f),
840
841            // Generic aggregate function transformations
842            Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
843
844            // ===== Context-aware JSON arrow wrapping =====
845            // When JSON arrow expressions appear in Binary/In/Not contexts,
846            // they need to be wrapped in parentheses for correct precedence.
847            // This matches Python sqlglot's WRAPPED_JSON_EXTRACT_EXPRESSIONS behavior.
848
849            // Binary operators that need JSON wrapping
850            Expression::Eq(op) => Ok(Expression::Eq(Box::new(BinaryOp {
851                left: wrap_if_json_arrow(op.left),
852                right: wrap_if_json_arrow(op.right),
853                ..*op
854            }))),
855            Expression::Neq(op) => Ok(Expression::Neq(Box::new(BinaryOp {
856                left: wrap_if_json_arrow(op.left),
857                right: wrap_if_json_arrow(op.right),
858                ..*op
859            }))),
860            Expression::Lt(op) => Ok(Expression::Lt(Box::new(BinaryOp {
861                left: wrap_if_json_arrow(op.left),
862                right: wrap_if_json_arrow(op.right),
863                ..*op
864            }))),
865            Expression::Lte(op) => Ok(Expression::Lte(Box::new(BinaryOp {
866                left: wrap_if_json_arrow(op.left),
867                right: wrap_if_json_arrow(op.right),
868                ..*op
869            }))),
870            Expression::Gt(op) => Ok(Expression::Gt(Box::new(BinaryOp {
871                left: wrap_if_json_arrow(op.left),
872                right: wrap_if_json_arrow(op.right),
873                ..*op
874            }))),
875            Expression::Gte(op) => Ok(Expression::Gte(Box::new(BinaryOp {
876                left: wrap_if_json_arrow(op.left),
877                right: wrap_if_json_arrow(op.right),
878                ..*op
879            }))),
880
881            // In expression - wrap the this part if it's JSON arrow
882            Expression::In(mut i) => {
883                i.this = wrap_if_json_arrow(i.this);
884                Ok(Expression::In(i))
885            }
886
887            // Not expression - wrap the this part if it's JSON arrow
888            Expression::Not(mut n) => {
889                n.this = wrap_if_json_arrow(n.this);
890                Ok(Expression::Not(n))
891            }
892
893            // MERGE: qualifier stripping is handled by the generator (dialect-aware)
894            // PostgreSQL generator strips qualifiers, Snowflake generator keeps them
895            Expression::Merge(m) => Ok(Expression::Merge(m)),
896
897            // JSONExtract with variant_extract (Databricks colon syntax) -> JSON_EXTRACT_PATH
898            Expression::JSONExtract(je) if je.variant_extract.is_some() => {
899                // Convert path from bracketed format to simple key
900                // e.g., '["fr''uit"]' -> 'fr''uit'
901                let path = match *je.expression {
902                    Expression::Literal(Literal::String(s)) => {
903                        // Strip bracketed JSON path format: ["key"] -> key
904                        let cleaned = if s.starts_with("[\"") && s.ends_with("\"]") {
905                            s[2..s.len() - 2].to_string()
906                        } else {
907                            s
908                        };
909                        Expression::Literal(Literal::String(cleaned))
910                    }
911                    other => other,
912                };
913                Ok(Expression::Function(Box::new(Function::new(
914                    "JSON_EXTRACT_PATH".to_string(),
915                    vec![*je.this, path],
916                ))))
917            }
918
919            // TRIM(str, chars) -> TRIM(chars FROM str) for PostgreSQL SQL standard syntax
920            Expression::Trim(t) if !t.sql_standard_syntax && t.characters.is_some() => {
921                Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
922                    this: t.this,
923                    characters: t.characters,
924                    position: t.position,
925                    sql_standard_syntax: true,
926                    position_explicit: t.position_explicit,
927                })))
928            }
929
930            // b'a' -> CAST(e'a' AS BYTEA) for PostgreSQL
931            Expression::Literal(Literal::ByteString(s)) => Ok(Expression::Cast(Box::new(Cast {
932                this: Expression::Literal(Literal::EscapeString(s)),
933                to: DataType::VarBinary { length: None },
934                trailing_comments: Vec::new(),
935                double_colon_syntax: false,
936                format: None,
937                default: None,
938                inferred_type: None,
939            }))),
940
941            // Pass through everything else
942            _ => Ok(expr),
943        }
944    }
945}
946
947impl PostgresDialect {
948    /// Check if a JSON path expression is "simple" (string literal or non-negative integer)
949    /// Simple paths can use arrow syntax (->>) in PostgreSQL
950    /// Complex paths (like negative indices) should use JSON_EXTRACT_PATH_TEXT function
951    fn is_simple_json_path(path: &Expression) -> bool {
952        match path {
953            // String literals are always simple
954            Expression::Literal(Literal::String(_)) => true,
955            // Non-negative integer literals are simple
956            Expression::Literal(Literal::Number(n)) => {
957                // Check if it's non-negative
958                !n.starts_with('-')
959            }
960            // JSONPath expressions are simple (they're already parsed paths)
961            Expression::JSONPath(_) => true,
962            // Everything else (Neg, function calls, etc.) is complex
963            _ => false,
964        }
965    }
966
967    /// Transform data types according to PostgreSQL TYPE_MAPPING
968    fn transform_data_type(&self, dt: DataType) -> Result<Expression> {
969        let transformed = match dt {
970            // TINYINT -> SMALLINT
971            DataType::TinyInt { .. } => DataType::SmallInt { length: None },
972
973            // FLOAT -> DOUBLE PRECISION (Python sqlglot tokenizes FLOAT as DOUBLE)
974            DataType::Float { .. } => DataType::Custom {
975                name: "DOUBLE PRECISION".to_string(),
976            },
977
978            // DOUBLE -> DOUBLE PRECISION
979            DataType::Double { .. } => DataType::Custom {
980                name: "DOUBLE PRECISION".to_string(),
981            },
982
983            // BINARY -> BYTEA (handled by generator which preserves length)
984            DataType::Binary { .. } => dt,
985
986            // VARBINARY -> BYTEA (handled by generator which preserves length)
987            DataType::VarBinary { .. } => dt,
988
989            // BLOB -> BYTEA
990            DataType::Blob => DataType::Custom {
991                name: "BYTEA".to_string(),
992            },
993
994            // Custom type normalizations
995            DataType::Custom { ref name } => {
996                let upper = name.to_uppercase();
997                match upper.as_str() {
998                    // INT8 -> BIGINT (PostgreSQL alias)
999                    "INT8" => DataType::BigInt { length: None },
1000                    // FLOAT8 -> DOUBLE PRECISION (PostgreSQL alias)
1001                    "FLOAT8" => DataType::Custom {
1002                        name: "DOUBLE PRECISION".to_string(),
1003                    },
1004                    // FLOAT4 -> REAL (PostgreSQL alias)
1005                    "FLOAT4" => DataType::Custom {
1006                        name: "REAL".to_string(),
1007                    },
1008                    // INT4 -> INTEGER (PostgreSQL alias)
1009                    "INT4" => DataType::Int {
1010                        length: None,
1011                        integer_spelling: false,
1012                    },
1013                    // INT2 -> SMALLINT (PostgreSQL alias)
1014                    "INT2" => DataType::SmallInt { length: None },
1015                    _ => dt,
1016                }
1017            }
1018
1019            // Keep all other types as-is
1020            other => other,
1021        };
1022        Ok(Expression::DataType(transformed))
1023    }
1024
1025    fn transform_function(&self, f: Function) -> Result<Expression> {
1026        let name_upper = f.name.to_uppercase();
1027        match name_upper.as_str() {
1028            // IFNULL -> COALESCE
1029            "IFNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
1030                original_name: None,
1031                expressions: f.args,
1032                inferred_type: None,
1033            }))),
1034
1035            // NVL -> COALESCE
1036            "NVL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
1037                original_name: None,
1038                expressions: f.args,
1039                inferred_type: None,
1040            }))),
1041
1042            // ISNULL (SQL Server) -> COALESCE
1043            "ISNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
1044                original_name: None,
1045                expressions: f.args,
1046                inferred_type: None,
1047            }))),
1048
1049            // GROUP_CONCAT -> STRING_AGG in PostgreSQL
1050            "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
1051                Function::new("STRING_AGG".to_string(), f.args),
1052            ))),
1053
1054            // SUBSTR -> SUBSTRING (standard SQL)
1055            "SUBSTR" => Ok(Expression::Function(Box::new(Function::new(
1056                "SUBSTRING".to_string(),
1057                f.args,
1058            )))),
1059
1060            // RAND -> RANDOM in PostgreSQL
1061            "RAND" => Ok(Expression::Random(crate::expressions::Random)),
1062
1063            // CEILING -> CEIL (both work in PostgreSQL, but CEIL is preferred)
1064            "CEILING" if f.args.len() == 1 => Ok(Expression::Ceil(Box::new(CeilFunc {
1065                this: f.args.into_iter().next().unwrap(),
1066                decimals: None,
1067                to: None,
1068            }))),
1069
1070            // LEN -> LENGTH in PostgreSQL
1071            "LEN" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc {
1072                this: f.args.into_iter().next().unwrap(),
1073                original_name: None,
1074                inferred_type: None,
1075            }))),
1076
1077            // CHAR_LENGTH -> LENGTH in PostgreSQL
1078            "CHAR_LENGTH" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc {
1079                this: f.args.into_iter().next().unwrap(),
1080                original_name: None,
1081                inferred_type: None,
1082            }))),
1083
1084            // CHARACTER_LENGTH -> LENGTH in PostgreSQL
1085            "CHARACTER_LENGTH" if f.args.len() == 1 => {
1086                Ok(Expression::Length(Box::new(UnaryFunc {
1087                    this: f.args.into_iter().next().unwrap(),
1088                    original_name: None,
1089                    inferred_type: None,
1090                })))
1091            }
1092
1093            // CHARINDEX -> POSITION in PostgreSQL
1094            // CHARINDEX(substring, string) -> POSITION(substring IN string)
1095            "CHARINDEX" if f.args.len() >= 2 => {
1096                let mut args = f.args;
1097                let substring = args.remove(0);
1098                let string = args.remove(0);
1099                Ok(Expression::Position(Box::new(
1100                    crate::expressions::PositionFunc {
1101                        substring,
1102                        string,
1103                        start: args.pop(),
1104                    },
1105                )))
1106            }
1107
1108            // GETDATE -> CURRENT_TIMESTAMP in PostgreSQL
1109            "GETDATE" => Ok(Expression::CurrentTimestamp(
1110                crate::expressions::CurrentTimestamp {
1111                    precision: None,
1112                    sysdate: false,
1113                },
1114            )),
1115
1116            // SYSDATETIME -> CURRENT_TIMESTAMP in PostgreSQL
1117            "SYSDATETIME" => Ok(Expression::CurrentTimestamp(
1118                crate::expressions::CurrentTimestamp {
1119                    precision: None,
1120                    sysdate: false,
1121                },
1122            )),
1123
1124            // NOW -> CURRENT_TIMESTAMP in PostgreSQL (NOW() is also valid)
1125            "NOW" => Ok(Expression::CurrentTimestamp(
1126                crate::expressions::CurrentTimestamp {
1127                    precision: None,
1128                    sysdate: false,
1129                },
1130            )),
1131
1132            // NEWID -> GEN_RANDOM_UUID in PostgreSQL
1133            "NEWID" => Ok(Expression::Function(Box::new(Function::new(
1134                "GEN_RANDOM_UUID".to_string(),
1135                vec![],
1136            )))),
1137
1138            // UUID -> GEN_RANDOM_UUID in PostgreSQL
1139            "UUID" => Ok(Expression::Function(Box::new(Function::new(
1140                "GEN_RANDOM_UUID".to_string(),
1141                vec![],
1142            )))),
1143
1144            // UNNEST is native to PostgreSQL
1145            "UNNEST" => Ok(Expression::Function(Box::new(f))),
1146
1147            // GENERATE_SERIES is native to PostgreSQL
1148            "GENERATE_SERIES" => Ok(Expression::Function(Box::new(f))),
1149
1150            // SHA256 -> SHA256 in PostgreSQL (via pgcrypto extension)
1151            "SHA256" if f.args.len() == 1 => Ok(Expression::Function(Box::new(Function::new(
1152                "SHA256".to_string(),
1153                f.args,
1154            )))),
1155
1156            // SHA2 -> SHA256/SHA512 based on length argument
1157            "SHA2" if f.args.len() == 2 => {
1158                // SHA2(data, length) -> SHA256/SHA384/SHA512
1159                let args = f.args;
1160                let data = args[0].clone();
1161                // Default to SHA256 - would need runtime inspection for exact mapping
1162                Ok(Expression::Function(Box::new(Function::new(
1163                    "SHA256".to_string(),
1164                    vec![data],
1165                ))))
1166            }
1167
1168            // LEVENSHTEIN is native to PostgreSQL (fuzzystrmatch extension)
1169            "LEVENSHTEIN" => Ok(Expression::Function(Box::new(f))),
1170
1171            // EDITDISTANCE -> LEVENSHTEIN_LESS_EQUAL (with max distance) or LEVENSHTEIN
1172            "EDITDISTANCE" if f.args.len() == 3 => Ok(Expression::Function(Box::new(
1173                Function::new("LEVENSHTEIN_LESS_EQUAL".to_string(), f.args),
1174            ))),
1175            "EDITDISTANCE" if f.args.len() == 2 => Ok(Expression::Function(Box::new(
1176                Function::new("LEVENSHTEIN".to_string(), f.args),
1177            ))),
1178
1179            // TRIM(value, chars) -> TRIM(chars FROM value) for Postgres
1180            "TRIM" if f.args.len() == 2 => {
1181                let value = f.args[0].clone();
1182                let chars = f.args[1].clone();
1183                Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
1184                    this: value,
1185                    characters: Some(chars),
1186                    position: crate::expressions::TrimPosition::Both,
1187                    sql_standard_syntax: true,
1188                    position_explicit: false,
1189                })))
1190            }
1191
1192            // DATEDIFF(unit, start, end) -> PostgreSQL EXTRACT/AGE patterns
1193            "DATEDIFF" if f.args.len() >= 2 => {
1194                let mut args = f.args;
1195                if args.len() == 2 {
1196                    // 2-arg form: DATEDIFF(start, end) -> AGE(start, end)
1197                    let first = args.remove(0);
1198                    let second = args.remove(0);
1199                    Ok(Expression::Function(Box::new(Function::new(
1200                        "AGE".to_string(),
1201                        vec![first, second],
1202                    ))))
1203                } else {
1204                    // 3-arg form: DATEDIFF(unit, start, end)
1205                    let unit_expr = args.remove(0);
1206                    let start = args.remove(0);
1207                    let end_expr = args.remove(0);
1208
1209                    // Extract unit name from identifier or column
1210                    let unit_name = match &unit_expr {
1211                        Expression::Identifier(id) => id.name.to_uppercase(),
1212                        Expression::Column(col) if col.table.is_none() => {
1213                            col.name.name.to_uppercase()
1214                        }
1215                        _ => "DAY".to_string(),
1216                    };
1217
1218                    // Helper: CAST(expr AS TIMESTAMP)
1219                    let cast_ts = |e: Expression| -> Expression {
1220                        Expression::Cast(Box::new(Cast {
1221                            this: e,
1222                            to: DataType::Timestamp {
1223                                precision: None,
1224                                timezone: false,
1225                            },
1226                            trailing_comments: Vec::new(),
1227                            double_colon_syntax: false,
1228                            format: None,
1229                            default: None,
1230                            inferred_type: None,
1231                        }))
1232                    };
1233
1234                    // Helper: CAST(expr AS BIGINT)
1235                    let cast_bigint = |e: Expression| -> Expression {
1236                        Expression::Cast(Box::new(Cast {
1237                            this: e,
1238                            to: DataType::BigInt { length: None },
1239                            trailing_comments: Vec::new(),
1240                            double_colon_syntax: false,
1241                            format: None,
1242                            default: None,
1243                            inferred_type: None,
1244                        }))
1245                    };
1246
1247                    let end_ts = cast_ts(end_expr.clone());
1248                    let start_ts = cast_ts(start.clone());
1249
1250                    // Helper: end_ts - start_ts
1251                    let ts_diff = || -> Expression {
1252                        Expression::Sub(Box::new(BinaryOp::new(
1253                            cast_ts(end_expr.clone()),
1254                            cast_ts(start.clone()),
1255                        )))
1256                    };
1257
1258                    // Helper: AGE(end_ts, start_ts)
1259                    let age_call = || -> Expression {
1260                        Expression::Function(Box::new(Function::new(
1261                            "AGE".to_string(),
1262                            vec![cast_ts(end_expr.clone()), cast_ts(start.clone())],
1263                        )))
1264                    };
1265
1266                    // Helper: EXTRACT(field FROM expr)
1267                    let extract = |field: DateTimeField, from: Expression| -> Expression {
1268                        Expression::Extract(Box::new(ExtractFunc { this: from, field }))
1269                    };
1270
1271                    // Helper: number literal
1272                    let num = |n: i64| -> Expression {
1273                        Expression::Literal(Literal::Number(n.to_string()))
1274                    };
1275
1276                    // Use Custom DateTimeField for lowercase output (PostgreSQL convention)
1277                    let epoch_field = DateTimeField::Custom("epoch".to_string());
1278
1279                    let result = match unit_name.as_str() {
1280                        "MICROSECOND" => {
1281                            // CAST(EXTRACT(epoch FROM end_ts - start_ts) * 1000000 AS BIGINT)
1282                            let epoch = extract(epoch_field, ts_diff());
1283                            cast_bigint(Expression::Mul(Box::new(BinaryOp::new(
1284                                epoch,
1285                                num(1000000),
1286                            ))))
1287                        }
1288                        "MILLISECOND" => {
1289                            let epoch = extract(epoch_field, ts_diff());
1290                            cast_bigint(Expression::Mul(Box::new(BinaryOp::new(epoch, num(1000)))))
1291                        }
1292                        "SECOND" => {
1293                            let epoch = extract(epoch_field, ts_diff());
1294                            cast_bigint(epoch)
1295                        }
1296                        "MINUTE" => {
1297                            let epoch = extract(epoch_field, ts_diff());
1298                            cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(60)))))
1299                        }
1300                        "HOUR" => {
1301                            let epoch = extract(epoch_field, ts_diff());
1302                            cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(3600)))))
1303                        }
1304                        "DAY" => {
1305                            let epoch = extract(epoch_field, ts_diff());
1306                            cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(86400)))))
1307                        }
1308                        "WEEK" => {
1309                            // CAST(EXTRACT(days FROM (end_ts - start_ts)) / 7 AS BIGINT)
1310                            let diff_parens = Expression::Paren(Box::new(Paren {
1311                                this: ts_diff(),
1312                                trailing_comments: Vec::new(),
1313                            }));
1314                            let days =
1315                                extract(DateTimeField::Custom("days".to_string()), diff_parens);
1316                            cast_bigint(Expression::Div(Box::new(BinaryOp::new(days, num(7)))))
1317                        }
1318                        "MONTH" => {
1319                            // CAST(EXTRACT(year FROM AGE(...)) * 12 + EXTRACT(month FROM AGE(...)) AS BIGINT)
1320                            let year_part =
1321                                extract(DateTimeField::Custom("year".to_string()), age_call());
1322                            let month_part =
1323                                extract(DateTimeField::Custom("month".to_string()), age_call());
1324                            let year_months =
1325                                Expression::Mul(Box::new(BinaryOp::new(year_part, num(12))));
1326                            cast_bigint(Expression::Add(Box::new(BinaryOp::new(
1327                                year_months,
1328                                month_part,
1329                            ))))
1330                        }
1331                        "QUARTER" => {
1332                            // CAST(EXTRACT(year FROM AGE(...)) * 4 + EXTRACT(month FROM AGE(...)) / 3 AS BIGINT)
1333                            let year_part =
1334                                extract(DateTimeField::Custom("year".to_string()), age_call());
1335                            let month_part =
1336                                extract(DateTimeField::Custom("month".to_string()), age_call());
1337                            let year_quarters =
1338                                Expression::Mul(Box::new(BinaryOp::new(year_part, num(4))));
1339                            let month_quarters =
1340                                Expression::Div(Box::new(BinaryOp::new(month_part, num(3))));
1341                            cast_bigint(Expression::Add(Box::new(BinaryOp::new(
1342                                year_quarters,
1343                                month_quarters,
1344                            ))))
1345                        }
1346                        "YEAR" => {
1347                            // CAST(EXTRACT(year FROM AGE(...)) AS BIGINT)
1348                            cast_bigint(extract(
1349                                DateTimeField::Custom("year".to_string()),
1350                                age_call(),
1351                            ))
1352                        }
1353                        _ => {
1354                            // Fallback: simple AGE
1355                            Expression::Function(Box::new(Function::new(
1356                                "AGE".to_string(),
1357                                vec![end_ts, start_ts],
1358                            )))
1359                        }
1360                    };
1361                    Ok(result)
1362                }
1363            }
1364
1365            // TIMESTAMPDIFF -> AGE or EXTRACT pattern
1366            "TIMESTAMPDIFF" if f.args.len() >= 3 => {
1367                let mut args = f.args;
1368                let _unit = args.remove(0); // Unit (ignored, AGE returns full interval)
1369                let start = args.remove(0);
1370                let end = args.remove(0);
1371                Ok(Expression::Function(Box::new(Function::new(
1372                    "AGE".to_string(),
1373                    vec![end, start],
1374                ))))
1375            }
1376
1377            // FROM_UNIXTIME -> TO_TIMESTAMP
1378            "FROM_UNIXTIME" => Ok(Expression::Function(Box::new(Function::new(
1379                "TO_TIMESTAMP".to_string(),
1380                f.args,
1381            )))),
1382
1383            // UNIX_TIMESTAMP -> EXTRACT(EPOCH FROM ...)
1384            "UNIX_TIMESTAMP" if f.args.len() == 1 => {
1385                let arg = f.args.into_iter().next().unwrap();
1386                Ok(Expression::Function(Box::new(Function::new(
1387                    "DATE_PART".to_string(),
1388                    vec![Expression::string("epoch"), arg],
1389                ))))
1390            }
1391
1392            // UNIX_TIMESTAMP() with no args -> EXTRACT(EPOCH FROM CURRENT_TIMESTAMP)
1393            "UNIX_TIMESTAMP" if f.args.is_empty() => {
1394                Ok(Expression::Function(Box::new(Function::new(
1395                    "DATE_PART".to_string(),
1396                    vec![
1397                        Expression::string("epoch"),
1398                        Expression::CurrentTimestamp(crate::expressions::CurrentTimestamp {
1399                            precision: None,
1400                            sysdate: false,
1401                        }),
1402                    ],
1403                ))))
1404            }
1405
1406            // DATEADD -> date + interval pattern
1407            "DATEADD" if f.args.len() == 3 => {
1408                // DATEADD(unit, count, date) -> date + interval 'count unit'
1409                // This is a simplified version - full impl would construct proper interval
1410                let mut args = f.args;
1411                let _unit = args.remove(0);
1412                let count = args.remove(0);
1413                let date = args.remove(0);
1414                Ok(Expression::Add(Box::new(BinaryOp {
1415                    left: date,
1416                    right: count,
1417                    left_comments: Vec::new(),
1418                    operator_comments: Vec::new(),
1419                    trailing_comments: Vec::new(),
1420                    inferred_type: None,
1421                })))
1422            }
1423
1424            // INSTR -> POSITION (simplified)
1425            "INSTR" if f.args.len() >= 2 => {
1426                let mut args = f.args;
1427                let string = args.remove(0);
1428                let substring = args.remove(0);
1429                Ok(Expression::Position(Box::new(
1430                    crate::expressions::PositionFunc {
1431                        substring,
1432                        string,
1433                        start: args.pop(),
1434                    },
1435                )))
1436            }
1437
1438            // CONCAT_WS is native to PostgreSQL
1439            "CONCAT_WS" => Ok(Expression::Function(Box::new(f))),
1440
1441            // REGEXP_REPLACE: pass through without adding 'g' flag
1442            // The 'g' flag handling is managed by cross_dialect_normalize based on source dialect's default behavior
1443            "REGEXP_REPLACE" if f.args.len() == 3 || f.args.len() == 4 => {
1444                Ok(Expression::Function(Box::new(f)))
1445            }
1446            // 6 args from Snowflake: (subject, pattern, replacement, position, occurrence, params)
1447            // If occurrence is 0 (global), append 'g' to flags
1448            "REGEXP_REPLACE" if f.args.len() == 6 => {
1449                let is_global = match &f.args[4] {
1450                    Expression::Literal(crate::expressions::Literal::Number(n)) => n == "0",
1451                    _ => false,
1452                };
1453                if is_global {
1454                    let subject = f.args[0].clone();
1455                    let pattern = f.args[1].clone();
1456                    let replacement = f.args[2].clone();
1457                    let position = f.args[3].clone();
1458                    let occurrence = f.args[4].clone();
1459                    let params = &f.args[5];
1460                    let mut flags =
1461                        if let Expression::Literal(crate::expressions::Literal::String(s)) = params
1462                        {
1463                            s.clone()
1464                        } else {
1465                            String::new()
1466                        };
1467                    if !flags.contains('g') {
1468                        flags.push('g');
1469                    }
1470                    Ok(Expression::Function(Box::new(Function::new(
1471                        "REGEXP_REPLACE".to_string(),
1472                        vec![
1473                            subject,
1474                            pattern,
1475                            replacement,
1476                            position,
1477                            occurrence,
1478                            Expression::Literal(crate::expressions::Literal::String(flags)),
1479                        ],
1480                    ))))
1481                } else {
1482                    Ok(Expression::Function(Box::new(f)))
1483                }
1484            }
1485            // Default: pass through
1486            "REGEXP_REPLACE" => Ok(Expression::Function(Box::new(f))),
1487
1488            // Pass through everything else
1489            _ => Ok(Expression::Function(Box::new(f))),
1490        }
1491    }
1492
1493    fn transform_aggregate_function(
1494        &self,
1495        f: Box<crate::expressions::AggregateFunction>,
1496    ) -> Result<Expression> {
1497        let name_upper = f.name.to_uppercase();
1498        match name_upper.as_str() {
1499            // COUNT_IF -> SUM(CASE WHEN...)
1500            "COUNT_IF" if !f.args.is_empty() => {
1501                let condition = f.args.into_iter().next().unwrap();
1502                let case_expr = Expression::Case(Box::new(Case {
1503                    operand: None,
1504                    whens: vec![(condition, Expression::number(1))],
1505                    else_: Some(Expression::number(0)),
1506                    comments: Vec::new(),
1507                    inferred_type: None,
1508                }));
1509                Ok(Expression::Sum(Box::new(AggFunc {
1510                    ignore_nulls: None,
1511                    having_max: None,
1512                    this: case_expr,
1513                    distinct: f.distinct,
1514                    filter: f.filter,
1515                    order_by: Vec::new(),
1516                    name: None,
1517                    limit: None,
1518                    inferred_type: None,
1519                })))
1520            }
1521
1522            // GROUP_CONCAT -> STRING_AGG
1523            "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
1524                Function::new("STRING_AGG".to_string(), f.args),
1525            ))),
1526
1527            // STDEV -> STDDEV in PostgreSQL
1528            "STDEV" if !f.args.is_empty() => Ok(Expression::Stddev(Box::new(AggFunc {
1529                ignore_nulls: None,
1530                having_max: None,
1531                this: f.args.into_iter().next().unwrap(),
1532                distinct: f.distinct,
1533                filter: f.filter,
1534                order_by: Vec::new(),
1535                name: None,
1536                limit: None,
1537                inferred_type: None,
1538            }))),
1539
1540            // STDEVP -> STDDEV_POP in PostgreSQL
1541            "STDEVP" if !f.args.is_empty() => Ok(Expression::StddevPop(Box::new(AggFunc {
1542                ignore_nulls: None,
1543                having_max: None,
1544                this: f.args.into_iter().next().unwrap(),
1545                distinct: f.distinct,
1546                filter: f.filter,
1547                order_by: Vec::new(),
1548                name: None,
1549                limit: None,
1550                inferred_type: None,
1551            }))),
1552
1553            // VAR -> VAR_SAMP in PostgreSQL
1554            "VAR" if !f.args.is_empty() => Ok(Expression::VarSamp(Box::new(AggFunc {
1555                ignore_nulls: None,
1556                having_max: None,
1557                this: f.args.into_iter().next().unwrap(),
1558                distinct: f.distinct,
1559                filter: f.filter,
1560                order_by: Vec::new(),
1561                name: None,
1562                limit: None,
1563                inferred_type: None,
1564            }))),
1565
1566            // VARP -> VAR_POP in PostgreSQL
1567            "VARP" if !f.args.is_empty() => Ok(Expression::VarPop(Box::new(AggFunc {
1568                ignore_nulls: None,
1569                having_max: None,
1570                this: f.args.into_iter().next().unwrap(),
1571                distinct: f.distinct,
1572                filter: f.filter,
1573                order_by: Vec::new(),
1574                name: None,
1575                limit: None,
1576                inferred_type: None,
1577            }))),
1578
1579            // BIT_AND is native to PostgreSQL
1580            "BIT_AND" => Ok(Expression::AggregateFunction(f)),
1581
1582            // BIT_OR is native to PostgreSQL
1583            "BIT_OR" => Ok(Expression::AggregateFunction(f)),
1584
1585            // BIT_XOR is native to PostgreSQL
1586            "BIT_XOR" => Ok(Expression::AggregateFunction(f)),
1587
1588            // BOOL_AND is native to PostgreSQL
1589            "BOOL_AND" => Ok(Expression::AggregateFunction(f)),
1590
1591            // BOOL_OR is native to PostgreSQL
1592            "BOOL_OR" => Ok(Expression::AggregateFunction(f)),
1593
1594            // VARIANCE -> VAR_SAMP in PostgreSQL
1595            "VARIANCE" if !f.args.is_empty() => Ok(Expression::VarSamp(Box::new(AggFunc {
1596                ignore_nulls: None,
1597                having_max: None,
1598                this: f.args.into_iter().next().unwrap(),
1599                distinct: f.distinct,
1600                filter: f.filter,
1601                order_by: Vec::new(),
1602                name: None,
1603                limit: None,
1604                inferred_type: None,
1605            }))),
1606
1607            // LOGICAL_OR -> BOOL_OR in PostgreSQL
1608            "LOGICAL_OR" if !f.args.is_empty() => {
1609                let mut new_agg = f.clone();
1610                new_agg.name = "BOOL_OR".to_string();
1611                Ok(Expression::AggregateFunction(new_agg))
1612            }
1613
1614            // LOGICAL_AND -> BOOL_AND in PostgreSQL
1615            "LOGICAL_AND" if !f.args.is_empty() => {
1616                let mut new_agg = f.clone();
1617                new_agg.name = "BOOL_AND".to_string();
1618                Ok(Expression::AggregateFunction(new_agg))
1619            }
1620
1621            // Pass through everything else
1622            _ => Ok(Expression::AggregateFunction(f)),
1623        }
1624    }
1625}
1626
1627#[cfg(test)]
1628mod tests {
1629    use super::*;
1630    use crate::dialects::Dialect;
1631
1632    fn transpile_to_postgres(sql: &str) -> String {
1633        let dialect = Dialect::get(DialectType::Generic);
1634        let result = dialect
1635            .transpile_to(sql, DialectType::PostgreSQL)
1636            .expect("Transpile failed");
1637        result[0].clone()
1638    }
1639
1640    #[test]
1641    fn test_ifnull_to_coalesce() {
1642        let result = transpile_to_postgres("SELECT IFNULL(a, b)");
1643        assert!(
1644            result.contains("COALESCE"),
1645            "Expected COALESCE, got: {}",
1646            result
1647        );
1648    }
1649
1650    #[test]
1651    fn test_nvl_to_coalesce() {
1652        let result = transpile_to_postgres("SELECT NVL(a, b)");
1653        assert!(
1654            result.contains("COALESCE"),
1655            "Expected COALESCE, got: {}",
1656            result
1657        );
1658    }
1659
1660    #[test]
1661    fn test_rand_to_random() {
1662        let result = transpile_to_postgres("SELECT RAND()");
1663        assert!(
1664            result.contains("RANDOM"),
1665            "Expected RANDOM, got: {}",
1666            result
1667        );
1668    }
1669
1670    #[test]
1671    fn test_basic_select() {
1672        let result = transpile_to_postgres("SELECT a, b FROM users WHERE id = 1");
1673        assert!(result.contains("SELECT"));
1674        assert!(result.contains("FROM users"));
1675    }
1676
1677    #[test]
1678    fn test_len_to_length() {
1679        let result = transpile_to_postgres("SELECT LEN(name)");
1680        assert!(
1681            result.contains("LENGTH"),
1682            "Expected LENGTH, got: {}",
1683            result
1684        );
1685    }
1686
1687    #[test]
1688    fn test_getdate_to_current_timestamp() {
1689        let result = transpile_to_postgres("SELECT GETDATE()");
1690        assert!(
1691            result.contains("CURRENT_TIMESTAMP"),
1692            "Expected CURRENT_TIMESTAMP, got: {}",
1693            result
1694        );
1695    }
1696
1697    #[test]
1698    fn test_substr_to_substring() {
1699        let result = transpile_to_postgres("SELECT SUBSTR(name, 1, 3)");
1700        assert!(
1701            result.contains("SUBSTRING"),
1702            "Expected SUBSTRING, got: {}",
1703            result
1704        );
1705    }
1706
1707    #[test]
1708    fn test_group_concat_to_string_agg() {
1709        let result = transpile_to_postgres("SELECT GROUP_CONCAT(name)");
1710        assert!(
1711            result.contains("STRING_AGG"),
1712            "Expected STRING_AGG, got: {}",
1713            result
1714        );
1715    }
1716
1717    #[test]
1718    fn test_double_quote_identifiers() {
1719        // PostgreSQL uses double quotes for identifiers
1720        let dialect = PostgresDialect;
1721        let config = dialect.generator_config();
1722        assert_eq!(config.identifier_quote, '"');
1723    }
1724
1725    #[test]
1726    fn test_char_length_to_length() {
1727        let result = transpile_to_postgres("SELECT CHAR_LENGTH(name)");
1728        assert!(
1729            result.contains("LENGTH"),
1730            "Expected LENGTH, got: {}",
1731            result
1732        );
1733    }
1734
1735    #[test]
1736    fn test_character_length_to_length() {
1737        let result = transpile_to_postgres("SELECT CHARACTER_LENGTH(name)");
1738        assert!(
1739            result.contains("LENGTH"),
1740            "Expected LENGTH, got: {}",
1741            result
1742        );
1743    }
1744
1745    /// Helper for PostgreSQL identity tests (parse and regenerate with PostgreSQL dialect)
1746    fn identity_postgres(sql: &str) -> String {
1747        let dialect = Dialect::get(DialectType::PostgreSQL);
1748        let exprs = dialect.parse(sql).expect("Parse failed");
1749        let transformed = dialect
1750            .transform(exprs[0].clone())
1751            .expect("Transform failed");
1752        dialect.generate(&transformed).expect("Generate failed")
1753    }
1754
1755    #[test]
1756    fn test_json_extract_with_column_path() {
1757        // When the path is a column reference (not a literal), should use function form
1758        let result = identity_postgres("json_data.data -> field_ids.field_id");
1759        assert!(
1760            result.contains("JSON_EXTRACT_PATH"),
1761            "Expected JSON_EXTRACT_PATH for column path, got: {}",
1762            result
1763        );
1764    }
1765
1766    #[test]
1767    fn test_json_extract_scalar_with_negative_index() {
1768        // When the path is a negative index, should use JSON_EXTRACT_PATH_TEXT function
1769        let result = identity_postgres("x::JSON -> 'duration' ->> -1");
1770        assert!(
1771            result.contains("JSON_EXTRACT_PATH_TEXT"),
1772            "Expected JSON_EXTRACT_PATH_TEXT for negative index, got: {}",
1773            result
1774        );
1775        // The first -> should still be arrow syntax since 'duration' is a string literal
1776        assert!(
1777            result.contains("->"),
1778            "Expected -> for string literal path, got: {}",
1779            result
1780        );
1781    }
1782
1783    #[test]
1784    fn test_json_extract_with_string_literal() {
1785        // When the path is a string literal, should keep arrow syntax
1786        let result = identity_postgres("data -> 'key'");
1787        assert!(
1788            result.contains("->"),
1789            "Expected -> for string literal path, got: {}",
1790            result
1791        );
1792        assert!(
1793            !result.contains("JSON_EXTRACT_PATH"),
1794            "Should NOT use function form for string literal, got: {}",
1795            result
1796        );
1797    }
1798}