Skip to main content

polyglot_sql/dialects/
postgres.rs

1//! PostgreSQL Dialect
2//!
3//! PostgreSQL-specific transformations based on sqlglot patterns.
4//! Comprehensive batch translation from Python sqlglot postgres.py
5//!
6//! Key differences from other dialects:
7//! - TRY_CAST not supported (falls back to CAST)
8//! - RANDOM() instead of RAND()
9//! - STRING_AGG instead of GROUP_CONCAT
10//! - Bitwise XOR is # operator
11//! - BOOL_AND/BOOL_OR for logical aggregates
12//! - GEN_RANDOM_UUID() for UUID generation
13//! - UNNEST instead of EXPLODE
14//! - Type mappings: TINYINT→SMALLINT, FLOAT→REAL, DOUBLE→DOUBLE PRECISION, etc.
15//! - RegexpLike uses ~ operator, RegexpILike uses ~* operator
16//! - JSONB operators: #>, #>>, ?, ?|, ?&
17
18use super::{DialectImpl, DialectType};
19use crate::error::Result;
20use crate::expressions::{
21    AggFunc, BinaryOp, BooleanLiteral, Case, Cast, CeilFunc, DataType, DateTimeField, Expression,
22    ExtractFunc, Function, Interval, IntervalUnit, IntervalUnitSpec, Join, JoinKind, Literal,
23    Paren, UnaryFunc, VarArgFunc,
24};
25use crate::generator::GeneratorConfig;
26use crate::tokens::TokenizerConfig;
27
28/// Helper to wrap JSON arrow expressions in parentheses when they appear
29/// in contexts that require it (Binary, In, Not expressions)
30/// This matches Python sqlglot's WRAPPED_JSON_EXTRACT_EXPRESSIONS behavior
31fn wrap_if_json_arrow(expr: Expression) -> Expression {
32    match &expr {
33        Expression::JsonExtract(f) if f.arrow_syntax => Expression::Paren(Box::new(Paren {
34            this: expr,
35            trailing_comments: Vec::new(),
36        })),
37        Expression::JsonExtractScalar(f) if f.arrow_syntax => Expression::Paren(Box::new(Paren {
38            this: expr,
39            trailing_comments: Vec::new(),
40        })),
41        _ => expr,
42    }
43}
44
45/// PostgreSQL dialect
46pub struct PostgresDialect;
47
48impl DialectImpl for PostgresDialect {
49    fn dialect_type(&self) -> DialectType {
50        DialectType::PostgreSQL
51    }
52
53    fn tokenizer_config(&self) -> TokenizerConfig {
54        use crate::tokens::TokenType;
55        let mut config = TokenizerConfig::default();
56        // PostgreSQL supports $$ string literals (heredoc strings)
57        config.quotes.insert("$$".to_string(), "$$".to_string());
58        // PostgreSQL uses double quotes for identifiers
59        config.identifiers.insert('"', '"');
60        // Nested comments supported
61        config.nested_comments = true;
62        // PostgreSQL treats EXEC as a generic command (not TSQL EXEC statement)
63        // Note: EXECUTE is kept as-is since it's used in GRANT/REVOKE EXECUTE ON FUNCTION
64        config
65            .keywords
66            .insert("EXEC".to_string(), TokenType::Command);
67        config
68    }
69
70    fn generator_config(&self) -> GeneratorConfig {
71        use crate::generator::IdentifierQuoteStyle;
72        GeneratorConfig {
73            identifier_quote: '"',
74            identifier_quote_style: IdentifierQuoteStyle::DOUBLE_QUOTE,
75            dialect: Some(DialectType::PostgreSQL),
76            // PostgreSQL uses TIMESTAMPTZ shorthand
77            tz_to_with_time_zone: false,
78            // PostgreSQL prefers INTERVAL '1 day' syntax
79            single_string_interval: true,
80            // TABLESAMPLE uses REPEATABLE in PostgreSQL
81            tablesample_seed_keyword: "REPEATABLE",
82            // PostgreSQL doesn't support NVL2
83            nvl2_supported: false,
84            // PostgreSQL uses $ for parameters
85            parameter_token: "$",
86            // PostgreSQL uses % for named placeholders
87            named_placeholder_token: "%",
88            // PostgreSQL supports SELECT INTO
89            supports_select_into: true,
90            // PostgreSQL: USING btree(col) without space before parens
91            index_using_no_space: true,
92            // PostgreSQL supports UNLOGGED tables
93            supports_unlogged_tables: true,
94            // PostgreSQL doesn't support multi-arg DISTINCT
95            multi_arg_distinct: false,
96            // PostgreSQL uses ANY (subquery) with space
97            quantified_no_paren_space: false,
98            // PostgreSQL supports window EXCLUDE clause
99            supports_window_exclude: true,
100            // PostgreSQL normalizes single-bound window frames to BETWEEN form
101            normalize_window_frame_between: true,
102            // PostgreSQL COPY doesn't use INTO keyword
103            copy_has_into_keyword: false,
104            // PostgreSQL ARRAY_SIZE requires dimension argument
105            array_size_dim_required: Some(true),
106            // PostgreSQL supports BETWEEN flags
107            supports_between_flags: true,
108            // PostgreSQL doesn't support hints
109            join_hints: false,
110            table_hints: false,
111            query_hints: false,
112            // PostgreSQL supports locking reads
113            locking_reads_supported: true,
114            // PostgreSQL doesn't rename tables with DB
115            rename_table_with_db: false,
116            // PostgreSQL can implement array any
117            can_implement_array_any: true,
118            // PostgreSQL ARRAY_CONCAT is not var-len
119            array_concat_is_var_len: false,
120            // PostgreSQL doesn't support MEDIAN
121            supports_median: false,
122            // PostgreSQL requires JSON type for extraction
123            json_type_required_for_extraction: true,
124            // PostgreSQL LIKE property inside schema
125            like_property_inside_schema: true,
126            ..Default::default()
127        }
128    }
129
130    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
131        match expr {
132            // ============================================
133            // DATA TYPE MAPPINGS (from TYPE_MAPPING)
134            // These are handled specially - transform DataType variants
135            // ============================================
136            Expression::DataType(dt) => self.transform_data_type(dt),
137
138            // ============================================
139            // NULL HANDLING
140            // ============================================
141            // IFNULL -> COALESCE in PostgreSQL
142            Expression::IfNull(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
143                original_name: None,
144                expressions: vec![f.this, f.expression],
145                inferred_type: None,
146            }))),
147
148            // NVL -> COALESCE in PostgreSQL
149            Expression::Nvl(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
150                original_name: None,
151                expressions: vec![f.this, f.expression],
152                inferred_type: None,
153            }))),
154
155            // Coalesce with original_name (e.g., IFNULL parsed as Coalesce) -> clear original_name
156            // so it outputs as COALESCE instead of the original function name
157            Expression::Coalesce(mut f) => {
158                f.original_name = None;
159                Ok(Expression::Coalesce(f))
160            }
161
162            // ============================================
163            // CAST OPERATIONS
164            // ============================================
165            // TryCast -> CAST (PostgreSQL doesn't support TRY_CAST)
166            Expression::TryCast(c) => Ok(Expression::Cast(c)),
167
168            // SafeCast -> CAST (PostgreSQL doesn't support safe casts)
169            Expression::SafeCast(c) => Ok(Expression::Cast(c)),
170
171            // ============================================
172            // RANDOM
173            // ============================================
174            // RAND -> RANDOM in PostgreSQL
175            Expression::Rand(r) => {
176                // PostgreSQL's RANDOM() doesn't take a seed argument
177                let _ = r.seed; // Ignore seed
178                Ok(Expression::Random(crate::expressions::Random))
179            }
180
181            // ============================================
182            // UUID
183            // ============================================
184            // Uuid -> GEN_RANDOM_UUID in PostgreSQL
185            Expression::Uuid(_) => Ok(Expression::Function(Box::new(Function::new(
186                "GEN_RANDOM_UUID".to_string(),
187                vec![],
188            )))),
189
190            // ============================================
191            // ARRAY OPERATIONS
192            // ============================================
193            // EXPLODE -> UNNEST in PostgreSQL
194            Expression::Explode(f) => Ok(Expression::Unnest(Box::new(
195                crate::expressions::UnnestFunc {
196                    this: f.this,
197                    expressions: Vec::new(),
198                    with_ordinality: false,
199                    alias: None,
200                    offset_alias: None,
201                },
202            ))),
203
204            // ExplodeOuter -> UNNEST in PostgreSQL
205            Expression::ExplodeOuter(f) => Ok(Expression::Unnest(Box::new(
206                crate::expressions::UnnestFunc {
207                    this: f.this,
208                    expressions: Vec::new(),
209                    with_ordinality: false,
210                    alias: None,
211                    offset_alias: None,
212                },
213            ))),
214
215            // ArrayConcat -> ARRAY_CAT in PostgreSQL
216            Expression::ArrayConcat(f) => Ok(Expression::Function(Box::new(Function::new(
217                "ARRAY_CAT".to_string(),
218                f.expressions,
219            )))),
220
221            // ArrayPrepend -> ARRAY_PREPEND in PostgreSQL (note: args swapped from other dialects)
222            Expression::ArrayPrepend(f) => Ok(Expression::Function(Box::new(Function::new(
223                "ARRAY_PREPEND".to_string(),
224                vec![f.expression, f.this], // PostgreSQL: ARRAY_PREPEND(element, array)
225            )))),
226
227            // ============================================
228            // BITWISE OPERATIONS
229            // ============================================
230            // BitwiseXor -> # operator in PostgreSQL
231            Expression::BitwiseXor(f) => {
232                // Use a special marker that generator will recognize
233                Ok(Expression::Function(Box::new(Function::new(
234                    "__PG_BITWISE_XOR__".to_string(),
235                    vec![f.left, f.right],
236                ))))
237            }
238
239            // BitwiseAndAgg -> BIT_AND
240            Expression::BitwiseAndAgg(f) => Ok(Expression::Function(Box::new(Function::new(
241                "BIT_AND".to_string(),
242                vec![f.this],
243            )))),
244
245            // BitwiseOrAgg -> BIT_OR
246            Expression::BitwiseOrAgg(f) => Ok(Expression::Function(Box::new(Function::new(
247                "BIT_OR".to_string(),
248                vec![f.this],
249            )))),
250
251            // BitwiseXorAgg -> BIT_XOR
252            Expression::BitwiseXorAgg(f) => Ok(Expression::Function(Box::new(Function::new(
253                "BIT_XOR".to_string(),
254                vec![f.this],
255            )))),
256
257            // ============================================
258            // BOOLEAN AGGREGATES
259            // ============================================
260            // LogicalAnd -> BOOL_AND
261            Expression::LogicalAnd(f) => Ok(Expression::Function(Box::new(Function::new(
262                "BOOL_AND".to_string(),
263                vec![f.this],
264            )))),
265
266            // LogicalOr -> BOOL_OR
267            Expression::LogicalOr(f) => Ok(Expression::Function(Box::new(Function::new(
268                "BOOL_OR".to_string(),
269                vec![f.this],
270            )))),
271
272            // Xor -> PostgreSQL bool_xor pattern: a <> b for boolean values
273            Expression::Xor(f) => {
274                if let (Some(a), Some(b)) = (f.this, f.expression) {
275                    Ok(Expression::Neq(Box::new(BinaryOp {
276                        left: *a,
277                        right: *b,
278                        left_comments: Vec::new(),
279                        operator_comments: Vec::new(),
280                        trailing_comments: Vec::new(),
281                        inferred_type: None,
282                    })))
283                } else {
284                    Ok(Expression::Boolean(BooleanLiteral { value: false }))
285                }
286            }
287
288            // ============================================
289            // ARRAY OPERATORS
290            // ============================================
291            // ArrayContainedBy (<@) -> ArrayContainsAll (@>) with swapped operands
292            // a <@ b -> b @> a (PostgreSQL prefers @> syntax)
293            Expression::ArrayContainedBy(op) => {
294                Ok(Expression::ArrayContainsAll(Box::new(BinaryOp {
295                    left: op.right,
296                    right: op.left,
297                    left_comments: Vec::new(),
298                    operator_comments: Vec::new(),
299                    trailing_comments: Vec::new(),
300                    inferred_type: None,
301                })))
302            }
303
304            // ============================================
305            // REGEXP OPERATIONS (PostgreSQL uses ~ and ~* operators)
306            // ============================================
307            // RegexpLike -> keep as-is, generator handles ~ operator output
308            Expression::RegexpLike(f) => {
309                // Generator will output as: expr ~ pattern
310                Ok(Expression::RegexpLike(f))
311            }
312
313            // ============================================
314            // DATE/TIME FUNCTIONS
315            // ============================================
316            // DateAdd -> date + INTERVAL in PostgreSQL
317            Expression::DateAdd(f) => {
318                let interval_expr = Expression::Interval(Box::new(Interval {
319                    this: Some(f.interval),
320                    unit: Some(IntervalUnitSpec::Simple {
321                        unit: f.unit,
322                        use_plural: false,
323                    }),
324                }));
325                Ok(Expression::Add(Box::new(BinaryOp {
326                    left: f.this,
327                    right: interval_expr,
328                    left_comments: Vec::new(),
329                    operator_comments: Vec::new(),
330                    trailing_comments: Vec::new(),
331                    inferred_type: None,
332                })))
333            }
334
335            // DateSub -> date - INTERVAL in PostgreSQL
336            Expression::DateSub(f) => {
337                let interval_expr = Expression::Interval(Box::new(Interval {
338                    this: Some(f.interval),
339                    unit: Some(IntervalUnitSpec::Simple {
340                        unit: f.unit,
341                        use_plural: false,
342                    }),
343                }));
344                Ok(Expression::Sub(Box::new(BinaryOp {
345                    left: f.this,
346                    right: interval_expr,
347                    left_comments: Vec::new(),
348                    operator_comments: Vec::new(),
349                    trailing_comments: Vec::new(),
350                    inferred_type: None,
351                })))
352            }
353
354            // DateDiff -> Complex PostgreSQL pattern using AGE/EXTRACT
355            Expression::DateDiff(f) => {
356                // For PostgreSQL, DATEDIFF is converted to EXTRACT(epoch FROM ...) pattern
357                // matching the 3-arg string-based DATEDIFF handler below
358                let unit = f.unit.unwrap_or(IntervalUnit::Day);
359
360                // Helper: CAST(expr AS TIMESTAMP)
361                let cast_ts = |e: Expression| -> Expression {
362                    Expression::Cast(Box::new(Cast {
363                        this: e,
364                        to: DataType::Timestamp {
365                            precision: None,
366                            timezone: false,
367                        },
368                        trailing_comments: Vec::new(),
369                        double_colon_syntax: false,
370                        format: None,
371                        default: None,
372                        inferred_type: None,
373                    }))
374                };
375
376                // Helper: CAST(expr AS BIGINT)
377                let cast_bigint = |e: Expression| -> Expression {
378                    Expression::Cast(Box::new(Cast {
379                        this: e,
380                        to: DataType::BigInt { length: None },
381                        trailing_comments: Vec::new(),
382                        double_colon_syntax: false,
383                        format: None,
384                        default: None,
385                        inferred_type: None,
386                    }))
387                };
388
389                // Clone end/start for reuse
390                let end_expr = f.this;
391                let start = f.expression;
392
393                // Helper: end_ts - start_ts
394                let ts_diff = || -> Expression {
395                    Expression::Sub(Box::new(BinaryOp::new(
396                        cast_ts(end_expr.clone()),
397                        cast_ts(start.clone()),
398                    )))
399                };
400
401                // Helper: AGE(end_ts, start_ts)
402                let age_call = || -> Expression {
403                    Expression::Function(Box::new(Function::new(
404                        "AGE".to_string(),
405                        vec![cast_ts(end_expr.clone()), cast_ts(start.clone())],
406                    )))
407                };
408
409                // Helper: EXTRACT(field FROM expr)
410                let extract = |field: DateTimeField, from: Expression| -> Expression {
411                    Expression::Extract(Box::new(ExtractFunc { this: from, field }))
412                };
413
414                // Helper: number literal
415                let num =
416                    |n: i64| -> Expression { Expression::Literal(Box::new(Literal::Number(n.to_string()))) };
417
418                let epoch_field = DateTimeField::Custom("epoch".to_string());
419
420                let result = match unit {
421                    IntervalUnit::Nanosecond => {
422                        let epoch = extract(epoch_field.clone(), ts_diff());
423                        cast_bigint(Expression::Mul(Box::new(BinaryOp::new(
424                            epoch,
425                            num(1000000000),
426                        ))))
427                    }
428                    IntervalUnit::Microsecond => {
429                        let epoch = extract(epoch_field, ts_diff());
430                        cast_bigint(Expression::Mul(Box::new(BinaryOp::new(
431                            epoch,
432                            num(1000000),
433                        ))))
434                    }
435                    IntervalUnit::Millisecond => {
436                        let epoch = extract(epoch_field, ts_diff());
437                        cast_bigint(Expression::Mul(Box::new(BinaryOp::new(epoch, num(1000)))))
438                    }
439                    IntervalUnit::Second => {
440                        let epoch = extract(epoch_field, ts_diff());
441                        cast_bigint(epoch)
442                    }
443                    IntervalUnit::Minute => {
444                        let epoch = extract(epoch_field, ts_diff());
445                        cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(60)))))
446                    }
447                    IntervalUnit::Hour => {
448                        let epoch = extract(epoch_field, ts_diff());
449                        cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(3600)))))
450                    }
451                    IntervalUnit::Day => {
452                        let epoch = extract(epoch_field, ts_diff());
453                        cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(86400)))))
454                    }
455                    IntervalUnit::Week => {
456                        let diff_parens = Expression::Paren(Box::new(Paren {
457                            this: ts_diff(),
458                            trailing_comments: Vec::new(),
459                        }));
460                        let days = extract(DateTimeField::Custom("days".to_string()), diff_parens);
461                        cast_bigint(Expression::Div(Box::new(BinaryOp::new(days, num(7)))))
462                    }
463                    IntervalUnit::Month => {
464                        let year_part =
465                            extract(DateTimeField::Custom("year".to_string()), age_call());
466                        let month_part =
467                            extract(DateTimeField::Custom("month".to_string()), age_call());
468                        let year_months =
469                            Expression::Mul(Box::new(BinaryOp::new(year_part, num(12))));
470                        cast_bigint(Expression::Add(Box::new(BinaryOp::new(
471                            year_months,
472                            month_part,
473                        ))))
474                    }
475                    IntervalUnit::Quarter => {
476                        let year_part =
477                            extract(DateTimeField::Custom("year".to_string()), age_call());
478                        let month_part =
479                            extract(DateTimeField::Custom("month".to_string()), age_call());
480                        let year_quarters =
481                            Expression::Mul(Box::new(BinaryOp::new(year_part, num(4))));
482                        let month_quarters =
483                            Expression::Div(Box::new(BinaryOp::new(month_part, num(3))));
484                        cast_bigint(Expression::Add(Box::new(BinaryOp::new(
485                            year_quarters,
486                            month_quarters,
487                        ))))
488                    }
489                    IntervalUnit::Year => cast_bigint(extract(
490                        DateTimeField::Custom("year".to_string()),
491                        age_call(),
492                    )),
493                };
494                Ok(result)
495            }
496
497            // UnixToTime -> TO_TIMESTAMP
498            Expression::UnixToTime(f) => Ok(Expression::Function(Box::new(Function::new(
499                "TO_TIMESTAMP".to_string(),
500                vec![*f.this],
501            )))),
502
503            // TimeToUnix -> DATE_PART('epoch', ...) in PostgreSQL
504            Expression::TimeToUnix(f) => Ok(Expression::Function(Box::new(Function::new(
505                "DATE_PART".to_string(),
506                vec![Expression::string("epoch"), f.this],
507            )))),
508
509            // StrToTime -> TO_TIMESTAMP in PostgreSQL
510            Expression::ToTimestamp(f) => {
511                let mut args = vec![f.this];
512                if let Some(fmt) = f.format {
513                    args.push(fmt);
514                }
515                Ok(Expression::Function(Box::new(Function::new(
516                    "TO_TIMESTAMP".to_string(),
517                    args,
518                ))))
519            }
520
521            // StrToDate -> TO_DATE in PostgreSQL
522            Expression::ToDate(f) => {
523                let mut args = vec![f.this];
524                if let Some(fmt) = f.format {
525                    args.push(fmt);
526                }
527                Ok(Expression::Function(Box::new(Function::new(
528                    "TO_DATE".to_string(),
529                    args,
530                ))))
531            }
532
533            // TimestampTrunc -> DATE_TRUNC
534            Expression::TimestampTrunc(f) => {
535                // Convert DateTimeField to string expression for DATE_TRUNC
536                let unit_str = format!("{:?}", f.unit).to_lowercase();
537                let args = vec![Expression::string(&unit_str), f.this];
538                Ok(Expression::Function(Box::new(Function::new(
539                    "DATE_TRUNC".to_string(),
540                    args,
541                ))))
542            }
543
544            // TimeFromParts -> MAKE_TIME
545            Expression::TimeFromParts(f) => {
546                let mut args = Vec::new();
547                if let Some(h) = f.hour {
548                    args.push(*h);
549                }
550                if let Some(m) = f.min {
551                    args.push(*m);
552                }
553                if let Some(s) = f.sec {
554                    args.push(*s);
555                }
556                Ok(Expression::Function(Box::new(Function::new(
557                    "MAKE_TIME".to_string(),
558                    args,
559                ))))
560            }
561
562            // TimestampFromParts -> MAKE_TIMESTAMP
563            Expression::MakeTimestamp(f) => {
564                // MakeTimestampFunc has direct Expression fields, not Options
565                let args = vec![f.year, f.month, f.day, f.hour, f.minute, f.second];
566                Ok(Expression::Function(Box::new(Function::new(
567                    "MAKE_TIMESTAMP".to_string(),
568                    args,
569                ))))
570            }
571
572            // ============================================
573            // STRING FUNCTIONS
574            // ============================================
575            // StringAgg is native to PostgreSQL - keep as-is
576            Expression::StringAgg(f) => Ok(Expression::StringAgg(f)),
577
578            // GroupConcat -> STRING_AGG in PostgreSQL
579            Expression::GroupConcat(f) => {
580                let mut args = vec![f.this.clone()];
581                if let Some(sep) = f.separator.clone() {
582                    args.push(sep);
583                } else {
584                    args.push(Expression::string(","));
585                }
586                Ok(Expression::Function(Box::new(Function::new(
587                    "STRING_AGG".to_string(),
588                    args,
589                ))))
590            }
591
592            // StrPosition -> POSITION function
593            Expression::Position(f) => {
594                // PostgreSQL: POSITION(substring IN string)
595                // Keep as Position, generator handles it
596                Ok(Expression::Position(f))
597            }
598
599            // ============================================
600            // AGGREGATE FUNCTIONS
601            // ============================================
602            // CountIf -> SUM(CASE WHEN condition THEN 1 ELSE 0 END) in PostgreSQL
603            Expression::CountIf(f) => {
604                let case_expr = Expression::Case(Box::new(Case {
605                    operand: None,
606                    whens: vec![(f.this.clone(), Expression::number(1))],
607                    else_: Some(Expression::number(0)),
608                    comments: Vec::new(),
609                    inferred_type: None,
610                }));
611                Ok(Expression::Sum(Box::new(AggFunc {
612                    ignore_nulls: None,
613                    having_max: None,
614                    this: case_expr,
615                    distinct: f.distinct,
616                    filter: f.filter,
617                    order_by: Vec::new(),
618                    name: None,
619                    limit: None,
620                    inferred_type: None,
621                })))
622            }
623
624            // AnyValue -> keep as ANY_VALUE for PostgreSQL (supported since PG 16)
625            Expression::AnyValue(f) => Ok(Expression::AnyValue(f)),
626
627            // Variance -> VAR_SAMP in PostgreSQL
628            Expression::Variance(f) => Ok(Expression::Function(Box::new(Function::new(
629                "VAR_SAMP".to_string(),
630                vec![f.this],
631            )))),
632
633            // VarPop -> VAR_POP in PostgreSQL
634            Expression::VarPop(f) => Ok(Expression::Function(Box::new(Function::new(
635                "VAR_POP".to_string(),
636                vec![f.this],
637            )))),
638
639            // ============================================
640            // JSON FUNCTIONS
641            // ============================================
642            // JSONExtract -> use arrow syntax (->) in PostgreSQL for simple literal paths
643            // Complex paths (like column references) should use JSON_EXTRACT_PATH function
644            Expression::JsonExtract(mut f) => {
645                // Only use arrow syntax for simple literal paths (string or non-negative number)
646                // Complex expressions like column references should use function form
647                f.arrow_syntax = Self::is_simple_json_path(&f.path);
648                Ok(Expression::JsonExtract(f))
649            }
650
651            // JSONExtractScalar -> use arrow syntax (->>) in PostgreSQL for simple paths
652            // Complex paths (like negative indices) should use JSON_EXTRACT_PATH_TEXT function
653            // #>> (hash_arrow_syntax) stays as #>>
654            Expression::JsonExtractScalar(mut f) => {
655                if !f.hash_arrow_syntax {
656                    // Only use arrow syntax for simple literal paths (string or non-negative number)
657                    // Complex expressions like Neg(-1) should use function form
658                    f.arrow_syntax = Self::is_simple_json_path(&f.path);
659                }
660                Ok(Expression::JsonExtractScalar(f))
661            }
662
663            // ParseJson: handled by generator (outputs CAST(x AS JSON) for PostgreSQL)
664
665            // JSONObjectAgg -> JSON_OBJECT_AGG
666            Expression::JsonObjectAgg(f) => {
667                // JsonObjectAggFunc has key and value as Expression, not Option
668                let args = vec![f.key, f.value];
669                Ok(Expression::Function(Box::new(Function::new(
670                    "JSON_OBJECT_AGG".to_string(),
671                    args,
672                ))))
673            }
674
675            // JSONArrayAgg -> JSON_AGG
676            Expression::JsonArrayAgg(f) => Ok(Expression::Function(Box::new(Function::new(
677                "JSON_AGG".to_string(),
678                vec![f.this],
679            )))),
680
681            // JSONPathRoot -> empty string ($ is implicit in PostgreSQL)
682            Expression::JSONPathRoot(_) => Ok(Expression::Literal(Box::new(Literal::String(String::new())))),
683
684            // ============================================
685            // MISC FUNCTIONS
686            // ============================================
687            // IntDiv -> DIV in PostgreSQL
688            Expression::IntDiv(f) => Ok(Expression::Function(Box::new(Function::new(
689                "DIV".to_string(),
690                vec![f.this, f.expression],
691            )))),
692
693            // Unicode -> ASCII in PostgreSQL
694            Expression::Unicode(f) => Ok(Expression::Function(Box::new(Function::new(
695                "ASCII".to_string(),
696                vec![f.this],
697            )))),
698
699            // LastDay -> Complex expression (PostgreSQL doesn't have LAST_DAY)
700            Expression::LastDay(f) => {
701                // (DATE_TRUNC('month', date) + INTERVAL '1 month' - INTERVAL '1 day')::DATE
702                let truncated = Expression::Function(Box::new(Function::new(
703                    "DATE_TRUNC".to_string(),
704                    vec![Expression::string("month"), f.this.clone()],
705                )));
706                let plus_month = Expression::Add(Box::new(BinaryOp {
707                    left: truncated,
708                    right: Expression::Interval(Box::new(Interval {
709                        this: Some(Expression::string("1")),
710                        unit: Some(IntervalUnitSpec::Simple {
711                            unit: IntervalUnit::Month,
712                            use_plural: false,
713                        }),
714                    })),
715                    left_comments: Vec::new(),
716                    operator_comments: Vec::new(),
717                    trailing_comments: Vec::new(),
718                    inferred_type: None,
719                }));
720                let minus_day = Expression::Sub(Box::new(BinaryOp {
721                    left: plus_month,
722                    right: Expression::Interval(Box::new(Interval {
723                        this: Some(Expression::string("1")),
724                        unit: Some(IntervalUnitSpec::Simple {
725                            unit: IntervalUnit::Day,
726                            use_plural: false,
727                        }),
728                    })),
729                    left_comments: Vec::new(),
730                    operator_comments: Vec::new(),
731                    trailing_comments: Vec::new(),
732                    inferred_type: None,
733                }));
734                Ok(Expression::Cast(Box::new(Cast {
735                    this: minus_day,
736                    to: DataType::Date,
737                    trailing_comments: Vec::new(),
738                    double_colon_syntax: true, // Use PostgreSQL :: syntax
739                    format: None,
740                    default: None,
741                    inferred_type: None,
742                })))
743            }
744
745            // GenerateSeries is native to PostgreSQL
746            Expression::GenerateSeries(f) => Ok(Expression::GenerateSeries(f)),
747
748            // ExplodingGenerateSeries -> GENERATE_SERIES
749            Expression::ExplodingGenerateSeries(f) => {
750                let mut args = vec![f.start, f.stop];
751                if let Some(step) = f.step {
752                    args.push(step); // step is Expression, not Box<Expression>
753                }
754                Ok(Expression::Function(Box::new(Function::new(
755                    "GENERATE_SERIES".to_string(),
756                    args,
757                ))))
758            }
759
760            // ============================================
761            // SESSION/TIME FUNCTIONS (no parentheses in PostgreSQL)
762            // ============================================
763            // CurrentTimestamp -> CURRENT_TIMESTAMP (no parens)
764            Expression::CurrentTimestamp(_) => Ok(Expression::Function(Box::new(Function {
765                name: "CURRENT_TIMESTAMP".to_string(),
766                args: vec![],
767                distinct: false,
768                trailing_comments: vec![],
769                use_bracket_syntax: false,
770                no_parens: true,
771                quoted: false,
772                span: None,
773                inferred_type: None,
774            }))),
775
776            // CurrentUser -> CURRENT_USER (no parens)
777            Expression::CurrentUser(_) => Ok(Expression::Function(Box::new(Function::new(
778                "CURRENT_USER".to_string(),
779                vec![],
780            )))),
781
782            // CurrentDate -> CURRENT_DATE (no parens)
783            Expression::CurrentDate(_) => Ok(Expression::Function(Box::new(Function {
784                name: "CURRENT_DATE".to_string(),
785                args: vec![],
786                distinct: false,
787                trailing_comments: vec![],
788                use_bracket_syntax: false,
789                no_parens: true,
790                quoted: false,
791                span: None,
792                inferred_type: None,
793            }))),
794
795            // ============================================
796            // JOIN TRANSFORMATIONS
797            // ============================================
798            // CROSS APPLY -> INNER JOIN LATERAL ... ON TRUE in PostgreSQL
799            Expression::Join(join) if join.kind == JoinKind::CrossApply => {
800                Ok(Expression::Join(Box::new(Join {
801                    this: join.this,
802                    on: Some(Expression::Boolean(BooleanLiteral { value: true })),
803                    using: join.using,
804                    kind: JoinKind::CrossApply,
805                    use_inner_keyword: false,
806                    use_outer_keyword: false,
807                    deferred_condition: false,
808                    join_hint: None,
809                    match_condition: None,
810                    pivots: join.pivots,
811                    comments: join.comments,
812                    nesting_group: 0,
813                    directed: false,
814                })))
815            }
816
817            // OUTER APPLY -> LEFT JOIN LATERAL ... ON TRUE in PostgreSQL
818            Expression::Join(join) if join.kind == JoinKind::OuterApply => {
819                Ok(Expression::Join(Box::new(Join {
820                    this: join.this,
821                    on: Some(Expression::Boolean(BooleanLiteral { value: true })),
822                    using: join.using,
823                    kind: JoinKind::OuterApply,
824                    use_inner_keyword: false,
825                    use_outer_keyword: false,
826                    deferred_condition: false,
827                    join_hint: None,
828                    match_condition: None,
829                    pivots: join.pivots,
830                    comments: join.comments,
831                    nesting_group: 0,
832                    directed: false,
833                })))
834            }
835
836            // ============================================
837            // GENERIC FUNCTION TRANSFORMATIONS
838            // ============================================
839            Expression::Function(f) => self.transform_function(*f),
840
841            // Generic aggregate function transformations
842            Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
843
844            // ===== Context-aware JSON arrow wrapping =====
845            // When JSON arrow expressions appear in Binary/In/Not contexts,
846            // they need to be wrapped in parentheses for correct precedence.
847            // This matches Python sqlglot's WRAPPED_JSON_EXTRACT_EXPRESSIONS behavior.
848
849            // Binary operators that need JSON wrapping
850            Expression::Eq(op) => Ok(Expression::Eq(Box::new(BinaryOp {
851                left: wrap_if_json_arrow(op.left),
852                right: wrap_if_json_arrow(op.right),
853                ..*op
854            }))),
855            Expression::Neq(op) => Ok(Expression::Neq(Box::new(BinaryOp {
856                left: wrap_if_json_arrow(op.left),
857                right: wrap_if_json_arrow(op.right),
858                ..*op
859            }))),
860            Expression::Lt(op) => Ok(Expression::Lt(Box::new(BinaryOp {
861                left: wrap_if_json_arrow(op.left),
862                right: wrap_if_json_arrow(op.right),
863                ..*op
864            }))),
865            Expression::Lte(op) => Ok(Expression::Lte(Box::new(BinaryOp {
866                left: wrap_if_json_arrow(op.left),
867                right: wrap_if_json_arrow(op.right),
868                ..*op
869            }))),
870            Expression::Gt(op) => Ok(Expression::Gt(Box::new(BinaryOp {
871                left: wrap_if_json_arrow(op.left),
872                right: wrap_if_json_arrow(op.right),
873                ..*op
874            }))),
875            Expression::Gte(op) => Ok(Expression::Gte(Box::new(BinaryOp {
876                left: wrap_if_json_arrow(op.left),
877                right: wrap_if_json_arrow(op.right),
878                ..*op
879            }))),
880
881            // In expression - wrap the this part if it's JSON arrow
882            Expression::In(mut i) => {
883                i.this = wrap_if_json_arrow(i.this);
884                Ok(Expression::In(i))
885            }
886
887            // Not expression - wrap the this part if it's JSON arrow
888            Expression::Not(mut n) => {
889                n.this = wrap_if_json_arrow(n.this);
890                Ok(Expression::Not(n))
891            }
892
893            // MERGE: qualifier stripping is handled by the generator (dialect-aware)
894            // PostgreSQL generator strips qualifiers, Snowflake generator keeps them
895            Expression::Merge(m) => Ok(Expression::Merge(m)),
896
897            // JSONExtract with variant_extract (Databricks colon syntax) -> JSON_EXTRACT_PATH
898            Expression::JSONExtract(je) if je.variant_extract.is_some() => {
899                // Convert path from bracketed format to simple key
900                // e.g., '["fr''uit"]' -> 'fr''uit'
901                let path = match *je.expression {
902                    Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
903                        let Literal::String(s) = lit.as_ref() else { unreachable!() };
904                        // Strip bracketed JSON path format: ["key"] -> key
905                        let cleaned = if s.starts_with("[\"") && s.ends_with("\"]") {
906                            s[2..s.len() - 2].to_string()
907                        } else {
908                            s.clone()
909                        };
910                        Expression::Literal(Box::new(Literal::String(cleaned)))
911                    }
912                    other => other,
913                };
914                Ok(Expression::Function(Box::new(Function::new(
915                    "JSON_EXTRACT_PATH".to_string(),
916                    vec![*je.this, path],
917                ))))
918            }
919
920            // TRIM(str, chars) -> TRIM(chars FROM str) for PostgreSQL SQL standard syntax
921            Expression::Trim(t) if !t.sql_standard_syntax && t.characters.is_some() => {
922                Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
923                    this: t.this,
924                    characters: t.characters,
925                    position: t.position,
926                    sql_standard_syntax: true,
927                    position_explicit: t.position_explicit,
928                })))
929            }
930
931            // b'a' -> CAST(e'a' AS BYTEA) for PostgreSQL
932            Expression::Literal(lit) if matches!(lit.as_ref(), Literal::ByteString(_)) => { let Literal::ByteString(s) = lit.as_ref() else { unreachable!() }; Ok(Expression::Cast(Box::new(Cast {
933                this: Expression::Literal(Box::new(Literal::EscapeString(s.clone()))),
934                to: DataType::VarBinary { length: None },
935                trailing_comments: Vec::new(),
936                double_colon_syntax: false,
937                format: None,
938                default: None,
939                inferred_type: None,
940            }))) },
941
942            // Pass through everything else
943            _ => Ok(expr),
944        }
945    }
946}
947
948impl PostgresDialect {
949    /// Check if a JSON path expression is "simple" (string literal or non-negative integer)
950    /// Simple paths can use arrow syntax (->>) in PostgreSQL
951    /// Complex paths (like negative indices) should use JSON_EXTRACT_PATH_TEXT function
952    fn is_simple_json_path(path: &Expression) -> bool {
953        match path {
954            // String literals are always simple
955            Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => true,
956            // Non-negative integer literals are simple
957            Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
958                let Literal::Number(n) = lit.as_ref() else { unreachable!() };
959                // Check if it's non-negative
960                !n.starts_with('-')
961            }
962            // JSONPath expressions are simple (they're already parsed paths)
963            Expression::JSONPath(_) => true,
964            // Everything else (Neg, function calls, etc.) is complex
965            _ => false,
966        }
967    }
968
969    /// Transform data types according to PostgreSQL TYPE_MAPPING
970    fn transform_data_type(&self, dt: DataType) -> Result<Expression> {
971        let transformed = match dt {
972            // TINYINT -> SMALLINT
973            DataType::TinyInt { .. } => DataType::SmallInt { length: None },
974
975            // FLOAT -> DOUBLE PRECISION (Python sqlglot tokenizes FLOAT as DOUBLE)
976            DataType::Float { .. } => DataType::Custom {
977                name: "DOUBLE PRECISION".to_string(),
978            },
979
980            // DOUBLE -> DOUBLE PRECISION
981            DataType::Double { .. } => DataType::Custom {
982                name: "DOUBLE PRECISION".to_string(),
983            },
984
985            // BINARY -> BYTEA (handled by generator which preserves length)
986            DataType::Binary { .. } => dt,
987
988            // VARBINARY -> BYTEA (handled by generator which preserves length)
989            DataType::VarBinary { .. } => dt,
990
991            // BLOB -> BYTEA
992            DataType::Blob => DataType::Custom {
993                name: "BYTEA".to_string(),
994            },
995
996            // Custom type normalizations
997            DataType::Custom { ref name } => {
998                let upper = name.to_uppercase();
999                match upper.as_str() {
1000                    // INT8 -> BIGINT (PostgreSQL alias)
1001                    "INT8" => DataType::BigInt { length: None },
1002                    // FLOAT8 -> DOUBLE PRECISION (PostgreSQL alias)
1003                    "FLOAT8" => DataType::Custom {
1004                        name: "DOUBLE PRECISION".to_string(),
1005                    },
1006                    // FLOAT4 -> REAL (PostgreSQL alias)
1007                    "FLOAT4" => DataType::Custom {
1008                        name: "REAL".to_string(),
1009                    },
1010                    // INT4 -> INTEGER (PostgreSQL alias)
1011                    "INT4" => DataType::Int {
1012                        length: None,
1013                        integer_spelling: false,
1014                    },
1015                    // INT2 -> SMALLINT (PostgreSQL alias)
1016                    "INT2" => DataType::SmallInt { length: None },
1017                    _ => dt,
1018                }
1019            }
1020
1021            // Keep all other types as-is
1022            other => other,
1023        };
1024        Ok(Expression::DataType(transformed))
1025    }
1026
1027    fn transform_function(&self, f: Function) -> Result<Expression> {
1028        let name_upper = f.name.to_uppercase();
1029        match name_upper.as_str() {
1030            // IFNULL -> COALESCE
1031            "IFNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
1032                original_name: None,
1033                expressions: f.args,
1034                inferred_type: None,
1035            }))),
1036
1037            // NVL -> COALESCE
1038            "NVL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
1039                original_name: None,
1040                expressions: f.args,
1041                inferred_type: None,
1042            }))),
1043
1044            // ISNULL (SQL Server) -> COALESCE
1045            "ISNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
1046                original_name: None,
1047                expressions: f.args,
1048                inferred_type: None,
1049            }))),
1050
1051            // GROUP_CONCAT -> STRING_AGG in PostgreSQL
1052            "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
1053                Function::new("STRING_AGG".to_string(), f.args),
1054            ))),
1055
1056            // SUBSTR -> SUBSTRING (standard SQL)
1057            "SUBSTR" => Ok(Expression::Function(Box::new(Function::new(
1058                "SUBSTRING".to_string(),
1059                f.args,
1060            )))),
1061
1062            // RAND -> RANDOM in PostgreSQL
1063            "RAND" => Ok(Expression::Random(crate::expressions::Random)),
1064
1065            // CEILING -> CEIL (both work in PostgreSQL, but CEIL is preferred)
1066            "CEILING" if f.args.len() == 1 => Ok(Expression::Ceil(Box::new(CeilFunc {
1067                this: f.args.into_iter().next().unwrap(),
1068                decimals: None,
1069                to: None,
1070            }))),
1071
1072            // LEN -> LENGTH in PostgreSQL
1073            "LEN" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc {
1074                this: f.args.into_iter().next().unwrap(),
1075                original_name: None,
1076                inferred_type: None,
1077            }))),
1078
1079            // CHAR_LENGTH -> LENGTH in PostgreSQL
1080            "CHAR_LENGTH" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc {
1081                this: f.args.into_iter().next().unwrap(),
1082                original_name: None,
1083                inferred_type: None,
1084            }))),
1085
1086            // CHARACTER_LENGTH -> LENGTH in PostgreSQL
1087            "CHARACTER_LENGTH" if f.args.len() == 1 => {
1088                Ok(Expression::Length(Box::new(UnaryFunc {
1089                    this: f.args.into_iter().next().unwrap(),
1090                    original_name: None,
1091                    inferred_type: None,
1092                })))
1093            }
1094
1095            // CHARINDEX -> POSITION in PostgreSQL
1096            // CHARINDEX(substring, string) -> POSITION(substring IN string)
1097            "CHARINDEX" if f.args.len() >= 2 => {
1098                let mut args = f.args;
1099                let substring = args.remove(0);
1100                let string = args.remove(0);
1101                Ok(Expression::Position(Box::new(
1102                    crate::expressions::PositionFunc {
1103                        substring,
1104                        string,
1105                        start: args.pop(),
1106                    },
1107                )))
1108            }
1109
1110            // GETDATE -> CURRENT_TIMESTAMP in PostgreSQL
1111            "GETDATE" => Ok(Expression::CurrentTimestamp(
1112                crate::expressions::CurrentTimestamp {
1113                    precision: None,
1114                    sysdate: false,
1115                },
1116            )),
1117
1118            // SYSDATETIME -> CURRENT_TIMESTAMP in PostgreSQL
1119            "SYSDATETIME" => Ok(Expression::CurrentTimestamp(
1120                crate::expressions::CurrentTimestamp {
1121                    precision: None,
1122                    sysdate: false,
1123                },
1124            )),
1125
1126            // NOW -> CURRENT_TIMESTAMP in PostgreSQL (NOW() is also valid)
1127            "NOW" => Ok(Expression::CurrentTimestamp(
1128                crate::expressions::CurrentTimestamp {
1129                    precision: None,
1130                    sysdate: false,
1131                },
1132            )),
1133
1134            // NEWID -> GEN_RANDOM_UUID in PostgreSQL
1135            "NEWID" => Ok(Expression::Function(Box::new(Function::new(
1136                "GEN_RANDOM_UUID".to_string(),
1137                vec![],
1138            )))),
1139
1140            // UUID -> GEN_RANDOM_UUID in PostgreSQL
1141            "UUID" => Ok(Expression::Function(Box::new(Function::new(
1142                "GEN_RANDOM_UUID".to_string(),
1143                vec![],
1144            )))),
1145
1146            // UNNEST is native to PostgreSQL
1147            "UNNEST" => Ok(Expression::Function(Box::new(f))),
1148
1149            // GENERATE_SERIES is native to PostgreSQL
1150            "GENERATE_SERIES" => Ok(Expression::Function(Box::new(f))),
1151
1152            // SHA256 -> SHA256 in PostgreSQL (via pgcrypto extension)
1153            "SHA256" if f.args.len() == 1 => Ok(Expression::Function(Box::new(Function::new(
1154                "SHA256".to_string(),
1155                f.args,
1156            )))),
1157
1158            // SHA2 -> SHA256/SHA512 based on length argument
1159            "SHA2" if f.args.len() == 2 => {
1160                // SHA2(data, length) -> SHA256/SHA384/SHA512
1161                let args = f.args;
1162                let data = args[0].clone();
1163                // Default to SHA256 - would need runtime inspection for exact mapping
1164                Ok(Expression::Function(Box::new(Function::new(
1165                    "SHA256".to_string(),
1166                    vec![data],
1167                ))))
1168            }
1169
1170            // LEVENSHTEIN is native to PostgreSQL (fuzzystrmatch extension)
1171            "LEVENSHTEIN" => Ok(Expression::Function(Box::new(f))),
1172
1173            // EDITDISTANCE -> LEVENSHTEIN_LESS_EQUAL (with max distance) or LEVENSHTEIN
1174            "EDITDISTANCE" if f.args.len() == 3 => Ok(Expression::Function(Box::new(
1175                Function::new("LEVENSHTEIN_LESS_EQUAL".to_string(), f.args),
1176            ))),
1177            "EDITDISTANCE" if f.args.len() == 2 => Ok(Expression::Function(Box::new(
1178                Function::new("LEVENSHTEIN".to_string(), f.args),
1179            ))),
1180
1181            // TRIM(value, chars) -> TRIM(chars FROM value) for Postgres
1182            "TRIM" if f.args.len() == 2 => {
1183                let value = f.args[0].clone();
1184                let chars = f.args[1].clone();
1185                Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
1186                    this: value,
1187                    characters: Some(chars),
1188                    position: crate::expressions::TrimPosition::Both,
1189                    sql_standard_syntax: true,
1190                    position_explicit: false,
1191                })))
1192            }
1193
1194            // DATEDIFF(unit, start, end) -> PostgreSQL EXTRACT/AGE patterns
1195            "DATEDIFF" if f.args.len() >= 2 => {
1196                let mut args = f.args;
1197                if args.len() == 2 {
1198                    // 2-arg form: DATEDIFF(start, end) -> AGE(start, end)
1199                    let first = args.remove(0);
1200                    let second = args.remove(0);
1201                    Ok(Expression::Function(Box::new(Function::new(
1202                        "AGE".to_string(),
1203                        vec![first, second],
1204                    ))))
1205                } else {
1206                    // 3-arg form: DATEDIFF(unit, start, end)
1207                    let unit_expr = args.remove(0);
1208                    let start = args.remove(0);
1209                    let end_expr = args.remove(0);
1210
1211                    // Extract unit name from identifier or column
1212                    let unit_name = match &unit_expr {
1213                        Expression::Identifier(id) => id.name.to_uppercase(),
1214                        Expression::Column(col) if col.table.is_none() => {
1215                            col.name.name.to_uppercase()
1216                        }
1217                        _ => "DAY".to_string(),
1218                    };
1219
1220                    // Helper: CAST(expr AS TIMESTAMP)
1221                    let cast_ts = |e: Expression| -> Expression {
1222                        Expression::Cast(Box::new(Cast {
1223                            this: e,
1224                            to: DataType::Timestamp {
1225                                precision: None,
1226                                timezone: false,
1227                            },
1228                            trailing_comments: Vec::new(),
1229                            double_colon_syntax: false,
1230                            format: None,
1231                            default: None,
1232                            inferred_type: None,
1233                        }))
1234                    };
1235
1236                    // Helper: CAST(expr AS BIGINT)
1237                    let cast_bigint = |e: Expression| -> Expression {
1238                        Expression::Cast(Box::new(Cast {
1239                            this: e,
1240                            to: DataType::BigInt { length: None },
1241                            trailing_comments: Vec::new(),
1242                            double_colon_syntax: false,
1243                            format: None,
1244                            default: None,
1245                            inferred_type: None,
1246                        }))
1247                    };
1248
1249                    let end_ts = cast_ts(end_expr.clone());
1250                    let start_ts = cast_ts(start.clone());
1251
1252                    // Helper: end_ts - start_ts
1253                    let ts_diff = || -> Expression {
1254                        Expression::Sub(Box::new(BinaryOp::new(
1255                            cast_ts(end_expr.clone()),
1256                            cast_ts(start.clone()),
1257                        )))
1258                    };
1259
1260                    // Helper: AGE(end_ts, start_ts)
1261                    let age_call = || -> Expression {
1262                        Expression::Function(Box::new(Function::new(
1263                            "AGE".to_string(),
1264                            vec![cast_ts(end_expr.clone()), cast_ts(start.clone())],
1265                        )))
1266                    };
1267
1268                    // Helper: EXTRACT(field FROM expr)
1269                    let extract = |field: DateTimeField, from: Expression| -> Expression {
1270                        Expression::Extract(Box::new(ExtractFunc { this: from, field }))
1271                    };
1272
1273                    // Helper: number literal
1274                    let num = |n: i64| -> Expression {
1275                        Expression::Literal(Box::new(Literal::Number(n.to_string())))
1276                    };
1277
1278                    // Use Custom DateTimeField for lowercase output (PostgreSQL convention)
1279                    let epoch_field = DateTimeField::Custom("epoch".to_string());
1280
1281                    let result = match unit_name.as_str() {
1282                        "MICROSECOND" => {
1283                            // CAST(EXTRACT(epoch FROM end_ts - start_ts) * 1000000 AS BIGINT)
1284                            let epoch = extract(epoch_field, ts_diff());
1285                            cast_bigint(Expression::Mul(Box::new(BinaryOp::new(
1286                                epoch,
1287                                num(1000000),
1288                            ))))
1289                        }
1290                        "MILLISECOND" => {
1291                            let epoch = extract(epoch_field, ts_diff());
1292                            cast_bigint(Expression::Mul(Box::new(BinaryOp::new(epoch, num(1000)))))
1293                        }
1294                        "SECOND" => {
1295                            let epoch = extract(epoch_field, ts_diff());
1296                            cast_bigint(epoch)
1297                        }
1298                        "MINUTE" => {
1299                            let epoch = extract(epoch_field, ts_diff());
1300                            cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(60)))))
1301                        }
1302                        "HOUR" => {
1303                            let epoch = extract(epoch_field, ts_diff());
1304                            cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(3600)))))
1305                        }
1306                        "DAY" => {
1307                            let epoch = extract(epoch_field, ts_diff());
1308                            cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(86400)))))
1309                        }
1310                        "WEEK" => {
1311                            // CAST(EXTRACT(days FROM (end_ts - start_ts)) / 7 AS BIGINT)
1312                            let diff_parens = Expression::Paren(Box::new(Paren {
1313                                this: ts_diff(),
1314                                trailing_comments: Vec::new(),
1315                            }));
1316                            let days =
1317                                extract(DateTimeField::Custom("days".to_string()), diff_parens);
1318                            cast_bigint(Expression::Div(Box::new(BinaryOp::new(days, num(7)))))
1319                        }
1320                        "MONTH" => {
1321                            // CAST(EXTRACT(year FROM AGE(...)) * 12 + EXTRACT(month FROM AGE(...)) AS BIGINT)
1322                            let year_part =
1323                                extract(DateTimeField::Custom("year".to_string()), age_call());
1324                            let month_part =
1325                                extract(DateTimeField::Custom("month".to_string()), age_call());
1326                            let year_months =
1327                                Expression::Mul(Box::new(BinaryOp::new(year_part, num(12))));
1328                            cast_bigint(Expression::Add(Box::new(BinaryOp::new(
1329                                year_months,
1330                                month_part,
1331                            ))))
1332                        }
1333                        "QUARTER" => {
1334                            // CAST(EXTRACT(year FROM AGE(...)) * 4 + EXTRACT(month FROM AGE(...)) / 3 AS BIGINT)
1335                            let year_part =
1336                                extract(DateTimeField::Custom("year".to_string()), age_call());
1337                            let month_part =
1338                                extract(DateTimeField::Custom("month".to_string()), age_call());
1339                            let year_quarters =
1340                                Expression::Mul(Box::new(BinaryOp::new(year_part, num(4))));
1341                            let month_quarters =
1342                                Expression::Div(Box::new(BinaryOp::new(month_part, num(3))));
1343                            cast_bigint(Expression::Add(Box::new(BinaryOp::new(
1344                                year_quarters,
1345                                month_quarters,
1346                            ))))
1347                        }
1348                        "YEAR" => {
1349                            // CAST(EXTRACT(year FROM AGE(...)) AS BIGINT)
1350                            cast_bigint(extract(
1351                                DateTimeField::Custom("year".to_string()),
1352                                age_call(),
1353                            ))
1354                        }
1355                        _ => {
1356                            // Fallback: simple AGE
1357                            Expression::Function(Box::new(Function::new(
1358                                "AGE".to_string(),
1359                                vec![end_ts, start_ts],
1360                            )))
1361                        }
1362                    };
1363                    Ok(result)
1364                }
1365            }
1366
1367            // TIMESTAMPDIFF -> AGE or EXTRACT pattern
1368            "TIMESTAMPDIFF" if f.args.len() >= 3 => {
1369                let mut args = f.args;
1370                let _unit = args.remove(0); // Unit (ignored, AGE returns full interval)
1371                let start = args.remove(0);
1372                let end = args.remove(0);
1373                Ok(Expression::Function(Box::new(Function::new(
1374                    "AGE".to_string(),
1375                    vec![end, start],
1376                ))))
1377            }
1378
1379            // FROM_UNIXTIME -> TO_TIMESTAMP
1380            "FROM_UNIXTIME" => Ok(Expression::Function(Box::new(Function::new(
1381                "TO_TIMESTAMP".to_string(),
1382                f.args,
1383            )))),
1384
1385            // UNIX_TIMESTAMP -> EXTRACT(EPOCH FROM ...)
1386            "UNIX_TIMESTAMP" if f.args.len() == 1 => {
1387                let arg = f.args.into_iter().next().unwrap();
1388                Ok(Expression::Function(Box::new(Function::new(
1389                    "DATE_PART".to_string(),
1390                    vec![Expression::string("epoch"), arg],
1391                ))))
1392            }
1393
1394            // UNIX_TIMESTAMP() with no args -> EXTRACT(EPOCH FROM CURRENT_TIMESTAMP)
1395            "UNIX_TIMESTAMP" if f.args.is_empty() => {
1396                Ok(Expression::Function(Box::new(Function::new(
1397                    "DATE_PART".to_string(),
1398                    vec![
1399                        Expression::string("epoch"),
1400                        Expression::CurrentTimestamp(crate::expressions::CurrentTimestamp {
1401                            precision: None,
1402                            sysdate: false,
1403                        }),
1404                    ],
1405                ))))
1406            }
1407
1408            // DATEADD -> date + interval pattern
1409            "DATEADD" if f.args.len() == 3 => {
1410                // DATEADD(unit, count, date) -> date + interval 'count unit'
1411                // This is a simplified version - full impl would construct proper interval
1412                let mut args = f.args;
1413                let _unit = args.remove(0);
1414                let count = args.remove(0);
1415                let date = args.remove(0);
1416                Ok(Expression::Add(Box::new(BinaryOp {
1417                    left: date,
1418                    right: count,
1419                    left_comments: Vec::new(),
1420                    operator_comments: Vec::new(),
1421                    trailing_comments: Vec::new(),
1422                    inferred_type: None,
1423                })))
1424            }
1425
1426            // INSTR -> POSITION (simplified)
1427            "INSTR" if f.args.len() >= 2 => {
1428                let mut args = f.args;
1429                let string = args.remove(0);
1430                let substring = args.remove(0);
1431                Ok(Expression::Position(Box::new(
1432                    crate::expressions::PositionFunc {
1433                        substring,
1434                        string,
1435                        start: args.pop(),
1436                    },
1437                )))
1438            }
1439
1440            // CONCAT_WS is native to PostgreSQL
1441            "CONCAT_WS" => Ok(Expression::Function(Box::new(f))),
1442
1443            // REGEXP_REPLACE: pass through without adding 'g' flag
1444            // The 'g' flag handling is managed by cross_dialect_normalize based on source dialect's default behavior
1445            "REGEXP_REPLACE" if f.args.len() == 3 || f.args.len() == 4 => {
1446                Ok(Expression::Function(Box::new(f)))
1447            }
1448            // 6 args from Snowflake: (subject, pattern, replacement, position, occurrence, params)
1449            // If occurrence is 0 (global), append 'g' to flags
1450            "REGEXP_REPLACE" if f.args.len() == 6 => {
1451                let is_global = match &f.args[4] {
1452                    Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::Number(_)) => { let crate::expressions::Literal::Number(n) = lit.as_ref() else { unreachable!() }; n == "0" },
1453                    _ => false,
1454                };
1455                if is_global {
1456                    let subject = f.args[0].clone();
1457                    let pattern = f.args[1].clone();
1458                    let replacement = f.args[2].clone();
1459                    let position = f.args[3].clone();
1460                    let occurrence = f.args[4].clone();
1461                    let params = &f.args[5];
1462                    let mut flags =
1463                        if let Expression::Literal(lit) = params
1464                        {
1465                            if let crate::expressions::Literal::String(s) = lit.as_ref() {
1466                            s.clone()
1467                        } else { String::new() }
1468                        } else {
1469                            String::new()
1470                        };
1471                    if !flags.contains('g') {
1472                        flags.push('g');
1473                    }
1474                    Ok(Expression::Function(Box::new(Function::new(
1475                        "REGEXP_REPLACE".to_string(),
1476                        vec![
1477                            subject,
1478                            pattern,
1479                            replacement,
1480                            position,
1481                            occurrence,
1482                            Expression::Literal(Box::new(crate::expressions::Literal::String(flags))),
1483                        ],
1484                    ))))
1485                } else {
1486                    Ok(Expression::Function(Box::new(f)))
1487                }
1488            }
1489            // Default: pass through
1490            "REGEXP_REPLACE" => Ok(Expression::Function(Box::new(f))),
1491
1492            // Pass through everything else
1493            _ => Ok(Expression::Function(Box::new(f))),
1494        }
1495    }
1496
1497    fn transform_aggregate_function(
1498        &self,
1499        f: Box<crate::expressions::AggregateFunction>,
1500    ) -> Result<Expression> {
1501        let name_upper = f.name.to_uppercase();
1502        match name_upper.as_str() {
1503            // COUNT_IF -> SUM(CASE WHEN...)
1504            "COUNT_IF" if !f.args.is_empty() => {
1505                let condition = f.args.into_iter().next().unwrap();
1506                let case_expr = Expression::Case(Box::new(Case {
1507                    operand: None,
1508                    whens: vec![(condition, Expression::number(1))],
1509                    else_: Some(Expression::number(0)),
1510                    comments: Vec::new(),
1511                    inferred_type: None,
1512                }));
1513                Ok(Expression::Sum(Box::new(AggFunc {
1514                    ignore_nulls: None,
1515                    having_max: None,
1516                    this: case_expr,
1517                    distinct: f.distinct,
1518                    filter: f.filter,
1519                    order_by: Vec::new(),
1520                    name: None,
1521                    limit: None,
1522                    inferred_type: None,
1523                })))
1524            }
1525
1526            // GROUP_CONCAT -> STRING_AGG
1527            "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
1528                Function::new("STRING_AGG".to_string(), f.args),
1529            ))),
1530
1531            // STDEV -> STDDEV in PostgreSQL
1532            "STDEV" if !f.args.is_empty() => Ok(Expression::Stddev(Box::new(AggFunc {
1533                ignore_nulls: None,
1534                having_max: None,
1535                this: f.args.into_iter().next().unwrap(),
1536                distinct: f.distinct,
1537                filter: f.filter,
1538                order_by: Vec::new(),
1539                name: None,
1540                limit: None,
1541                inferred_type: None,
1542            }))),
1543
1544            // STDEVP -> STDDEV_POP in PostgreSQL
1545            "STDEVP" if !f.args.is_empty() => Ok(Expression::StddevPop(Box::new(AggFunc {
1546                ignore_nulls: None,
1547                having_max: None,
1548                this: f.args.into_iter().next().unwrap(),
1549                distinct: f.distinct,
1550                filter: f.filter,
1551                order_by: Vec::new(),
1552                name: None,
1553                limit: None,
1554                inferred_type: None,
1555            }))),
1556
1557            // VAR -> VAR_SAMP in PostgreSQL
1558            "VAR" if !f.args.is_empty() => Ok(Expression::VarSamp(Box::new(AggFunc {
1559                ignore_nulls: None,
1560                having_max: None,
1561                this: f.args.into_iter().next().unwrap(),
1562                distinct: f.distinct,
1563                filter: f.filter,
1564                order_by: Vec::new(),
1565                name: None,
1566                limit: None,
1567                inferred_type: None,
1568            }))),
1569
1570            // VARP -> VAR_POP in PostgreSQL
1571            "VARP" if !f.args.is_empty() => Ok(Expression::VarPop(Box::new(AggFunc {
1572                ignore_nulls: None,
1573                having_max: None,
1574                this: f.args.into_iter().next().unwrap(),
1575                distinct: f.distinct,
1576                filter: f.filter,
1577                order_by: Vec::new(),
1578                name: None,
1579                limit: None,
1580                inferred_type: None,
1581            }))),
1582
1583            // BIT_AND is native to PostgreSQL
1584            "BIT_AND" => Ok(Expression::AggregateFunction(f)),
1585
1586            // BIT_OR is native to PostgreSQL
1587            "BIT_OR" => Ok(Expression::AggregateFunction(f)),
1588
1589            // BIT_XOR is native to PostgreSQL
1590            "BIT_XOR" => Ok(Expression::AggregateFunction(f)),
1591
1592            // BOOL_AND is native to PostgreSQL
1593            "BOOL_AND" => Ok(Expression::AggregateFunction(f)),
1594
1595            // BOOL_OR is native to PostgreSQL
1596            "BOOL_OR" => Ok(Expression::AggregateFunction(f)),
1597
1598            // VARIANCE -> VAR_SAMP in PostgreSQL
1599            "VARIANCE" if !f.args.is_empty() => Ok(Expression::VarSamp(Box::new(AggFunc {
1600                ignore_nulls: None,
1601                having_max: None,
1602                this: f.args.into_iter().next().unwrap(),
1603                distinct: f.distinct,
1604                filter: f.filter,
1605                order_by: Vec::new(),
1606                name: None,
1607                limit: None,
1608                inferred_type: None,
1609            }))),
1610
1611            // LOGICAL_OR -> BOOL_OR in PostgreSQL
1612            "LOGICAL_OR" if !f.args.is_empty() => {
1613                let mut new_agg = f.clone();
1614                new_agg.name = "BOOL_OR".to_string();
1615                Ok(Expression::AggregateFunction(new_agg))
1616            }
1617
1618            // LOGICAL_AND -> BOOL_AND in PostgreSQL
1619            "LOGICAL_AND" if !f.args.is_empty() => {
1620                let mut new_agg = f.clone();
1621                new_agg.name = "BOOL_AND".to_string();
1622                Ok(Expression::AggregateFunction(new_agg))
1623            }
1624
1625            // Pass through everything else
1626            _ => Ok(Expression::AggregateFunction(f)),
1627        }
1628    }
1629}
1630
1631#[cfg(test)]
1632mod tests {
1633    use super::*;
1634    use crate::dialects::Dialect;
1635
1636    fn transpile_to_postgres(sql: &str) -> String {
1637        let dialect = Dialect::get(DialectType::Generic);
1638        let result = dialect
1639            .transpile_to(sql, DialectType::PostgreSQL)
1640            .expect("Transpile failed");
1641        result[0].clone()
1642    }
1643
1644    #[test]
1645    fn test_ifnull_to_coalesce() {
1646        let result = transpile_to_postgres("SELECT IFNULL(a, b)");
1647        assert!(
1648            result.contains("COALESCE"),
1649            "Expected COALESCE, got: {}",
1650            result
1651        );
1652    }
1653
1654    #[test]
1655    fn test_nvl_to_coalesce() {
1656        let result = transpile_to_postgres("SELECT NVL(a, b)");
1657        assert!(
1658            result.contains("COALESCE"),
1659            "Expected COALESCE, got: {}",
1660            result
1661        );
1662    }
1663
1664    #[test]
1665    fn test_rand_to_random() {
1666        let result = transpile_to_postgres("SELECT RAND()");
1667        assert!(
1668            result.contains("RANDOM"),
1669            "Expected RANDOM, got: {}",
1670            result
1671        );
1672    }
1673
1674    #[test]
1675    fn test_basic_select() {
1676        let result = transpile_to_postgres("SELECT a, b FROM users WHERE id = 1");
1677        assert!(result.contains("SELECT"));
1678        assert!(result.contains("FROM users"));
1679    }
1680
1681    #[test]
1682    fn test_len_to_length() {
1683        let result = transpile_to_postgres("SELECT LEN(name)");
1684        assert!(
1685            result.contains("LENGTH"),
1686            "Expected LENGTH, got: {}",
1687            result
1688        );
1689    }
1690
1691    #[test]
1692    fn test_getdate_to_current_timestamp() {
1693        let result = transpile_to_postgres("SELECT GETDATE()");
1694        assert!(
1695            result.contains("CURRENT_TIMESTAMP"),
1696            "Expected CURRENT_TIMESTAMP, got: {}",
1697            result
1698        );
1699    }
1700
1701    #[test]
1702    fn test_substr_to_substring() {
1703        let result = transpile_to_postgres("SELECT SUBSTR(name, 1, 3)");
1704        assert!(
1705            result.contains("SUBSTRING"),
1706            "Expected SUBSTRING, got: {}",
1707            result
1708        );
1709    }
1710
1711    #[test]
1712    fn test_group_concat_to_string_agg() {
1713        let result = transpile_to_postgres("SELECT GROUP_CONCAT(name)");
1714        assert!(
1715            result.contains("STRING_AGG"),
1716            "Expected STRING_AGG, got: {}",
1717            result
1718        );
1719    }
1720
1721    #[test]
1722    fn test_double_quote_identifiers() {
1723        // PostgreSQL uses double quotes for identifiers
1724        let dialect = PostgresDialect;
1725        let config = dialect.generator_config();
1726        assert_eq!(config.identifier_quote, '"');
1727    }
1728
1729    #[test]
1730    fn test_char_length_to_length() {
1731        let result = transpile_to_postgres("SELECT CHAR_LENGTH(name)");
1732        assert!(
1733            result.contains("LENGTH"),
1734            "Expected LENGTH, got: {}",
1735            result
1736        );
1737    }
1738
1739    #[test]
1740    fn test_character_length_to_length() {
1741        let result = transpile_to_postgres("SELECT CHARACTER_LENGTH(name)");
1742        assert!(
1743            result.contains("LENGTH"),
1744            "Expected LENGTH, got: {}",
1745            result
1746        );
1747    }
1748
1749    /// Helper for PostgreSQL identity tests (parse and regenerate with PostgreSQL dialect)
1750    fn identity_postgres(sql: &str) -> String {
1751        let dialect = Dialect::get(DialectType::PostgreSQL);
1752        let exprs = dialect.parse(sql).expect("Parse failed");
1753        let transformed = dialect
1754            .transform(exprs[0].clone())
1755            .expect("Transform failed");
1756        dialect.generate(&transformed).expect("Generate failed")
1757    }
1758
1759    #[test]
1760    fn test_json_extract_with_column_path() {
1761        // When the path is a column reference (not a literal), should use function form
1762        let result = identity_postgres("json_data.data -> field_ids.field_id");
1763        assert!(
1764            result.contains("JSON_EXTRACT_PATH"),
1765            "Expected JSON_EXTRACT_PATH for column path, got: {}",
1766            result
1767        );
1768    }
1769
1770    #[test]
1771    fn test_json_extract_scalar_with_negative_index() {
1772        // When the path is a negative index, should use JSON_EXTRACT_PATH_TEXT function
1773        let result = identity_postgres("x::JSON -> 'duration' ->> -1");
1774        assert!(
1775            result.contains("JSON_EXTRACT_PATH_TEXT"),
1776            "Expected JSON_EXTRACT_PATH_TEXT for negative index, got: {}",
1777            result
1778        );
1779        // The first -> should still be arrow syntax since 'duration' is a string literal
1780        assert!(
1781            result.contains("->"),
1782            "Expected -> for string literal path, got: {}",
1783            result
1784        );
1785    }
1786
1787    #[test]
1788    fn test_json_extract_with_string_literal() {
1789        // When the path is a string literal, should keep arrow syntax
1790        let result = identity_postgres("data -> 'key'");
1791        assert!(
1792            result.contains("->"),
1793            "Expected -> for string literal path, got: {}",
1794            result
1795        );
1796        assert!(
1797            !result.contains("JSON_EXTRACT_PATH"),
1798            "Should NOT use function form for string literal, got: {}",
1799            result
1800        );
1801    }
1802}