Skip to main content

polyglot_sql/dialects/
postgres.rs

1//! PostgreSQL Dialect
2//!
3//! PostgreSQL-specific transformations based on sqlglot patterns.
4//! Comprehensive batch translation from Python sqlglot postgres.py
5//!
6//! Key differences from other dialects:
7//! - TRY_CAST not supported (falls back to CAST)
8//! - RANDOM() instead of RAND()
9//! - STRING_AGG instead of GROUP_CONCAT
10//! - Bitwise XOR is # operator
11//! - BOOL_AND/BOOL_OR for logical aggregates
12//! - GEN_RANDOM_UUID() for UUID generation
13//! - UNNEST instead of EXPLODE
14//! - Type mappings: TINYINT→SMALLINT, FLOAT→REAL, DOUBLE→DOUBLE PRECISION, etc.
15//! - RegexpLike uses ~ operator, RegexpILike uses ~* operator
16//! - JSONB operators: #>, #>>, ?, ?|, ?&
17
18use super::{DialectImpl, DialectType};
19use crate::error::Result;
20use crate::expressions::{
21    AggFunc, BinaryOp, BooleanLiteral, Case, Cast, CeilFunc, DataType, DateTimeField, Expression,
22    ExtractFunc, Function, Interval, IntervalUnit, IntervalUnitSpec, Join, JoinKind, Literal,
23    Paren, UnaryFunc, VarArgFunc,
24};
25use crate::generator::GeneratorConfig;
26use crate::tokens::TokenizerConfig;
27
28/// Helper to wrap JSON arrow expressions in parentheses when they appear
29/// in contexts that require it (Binary, In, Not expressions)
30/// This matches Python sqlglot's WRAPPED_JSON_EXTRACT_EXPRESSIONS behavior
31fn wrap_if_json_arrow(expr: Expression) -> Expression {
32    match &expr {
33        Expression::JsonExtract(f) if f.arrow_syntax => Expression::Paren(Box::new(Paren {
34            this: expr,
35            trailing_comments: Vec::new(),
36        })),
37        Expression::JsonExtractScalar(f) if f.arrow_syntax => Expression::Paren(Box::new(Paren {
38            this: expr,
39            trailing_comments: Vec::new(),
40        })),
41        _ => expr,
42    }
43}
44
45/// PostgreSQL dialect
46pub struct PostgresDialect;
47
48impl DialectImpl for PostgresDialect {
49    fn dialect_type(&self) -> DialectType {
50        DialectType::PostgreSQL
51    }
52
53    fn tokenizer_config(&self) -> TokenizerConfig {
54        use crate::tokens::TokenType;
55        let mut config = TokenizerConfig::default();
56        // PostgreSQL supports $$ string literals (heredoc strings)
57        config.quotes.insert("$$".to_string(), "$$".to_string());
58        // PostgreSQL uses double quotes for identifiers
59        config.identifiers.insert('"', '"');
60        // Nested comments supported
61        config.nested_comments = true;
62        // PostgreSQL treats EXEC as a generic command (not TSQL EXEC statement)
63        // Note: EXECUTE is kept as-is since it's used in GRANT/REVOKE EXECUTE ON FUNCTION
64        config
65            .keywords
66            .insert("EXEC".to_string(), TokenType::Command);
67        config
68    }
69
70    fn generator_config(&self) -> GeneratorConfig {
71        use crate::generator::IdentifierQuoteStyle;
72        GeneratorConfig {
73            identifier_quote: '"',
74            identifier_quote_style: IdentifierQuoteStyle::DOUBLE_QUOTE,
75            dialect: Some(DialectType::PostgreSQL),
76            // PostgreSQL uses TIMESTAMPTZ shorthand
77            tz_to_with_time_zone: false,
78            // PostgreSQL prefers INTERVAL '1 day' syntax
79            single_string_interval: true,
80            // TABLESAMPLE uses REPEATABLE in PostgreSQL
81            tablesample_seed_keyword: "REPEATABLE",
82            // PostgreSQL doesn't support NVL2
83            nvl2_supported: false,
84            // PostgreSQL uses $ for parameters
85            parameter_token: "$",
86            // PostgreSQL uses % for named placeholders
87            named_placeholder_token: "%",
88            // PostgreSQL supports SELECT INTO
89            supports_select_into: true,
90            // PostgreSQL: USING btree(col) without space before parens
91            index_using_no_space: true,
92            // PostgreSQL supports UNLOGGED tables
93            supports_unlogged_tables: true,
94            // PostgreSQL doesn't support multi-arg DISTINCT
95            multi_arg_distinct: false,
96            // PostgreSQL uses ANY (subquery) with space
97            quantified_no_paren_space: false,
98            // PostgreSQL supports window EXCLUDE clause
99            supports_window_exclude: true,
100            // PostgreSQL normalizes single-bound window frames to BETWEEN form
101            normalize_window_frame_between: true,
102            // PostgreSQL COPY doesn't use INTO keyword
103            copy_has_into_keyword: false,
104            // PostgreSQL ARRAY_SIZE requires dimension argument
105            array_size_dim_required: Some(true),
106            // PostgreSQL supports BETWEEN flags
107            supports_between_flags: true,
108            // PostgreSQL doesn't support hints
109            join_hints: false,
110            table_hints: false,
111            query_hints: false,
112            // PostgreSQL supports locking reads
113            locking_reads_supported: true,
114            // PostgreSQL doesn't rename tables with DB
115            rename_table_with_db: false,
116            // PostgreSQL can implement array any
117            can_implement_array_any: true,
118            // PostgreSQL ARRAY_CONCAT is not var-len
119            array_concat_is_var_len: false,
120            // PostgreSQL doesn't support MEDIAN
121            supports_median: false,
122            // PostgreSQL requires JSON type for extraction
123            json_type_required_for_extraction: true,
124            // PostgreSQL LIKE property inside schema
125            like_property_inside_schema: true,
126            ..Default::default()
127        }
128    }
129
130    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
131        match expr {
132            // ============================================
133            // DATA TYPE MAPPINGS (from TYPE_MAPPING)
134            // These are handled specially - transform DataType variants
135            // ============================================
136            Expression::DataType(dt) => self.transform_data_type(dt),
137
138            // ============================================
139            // NULL HANDLING
140            // ============================================
141            // IFNULL -> COALESCE in PostgreSQL
142            Expression::IfNull(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
143                original_name: None,
144                expressions: vec![f.this, f.expression],
145            }))),
146
147            // NVL -> COALESCE in PostgreSQL
148            Expression::Nvl(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
149                original_name: None,
150                expressions: vec![f.this, f.expression],
151            }))),
152
153            // Coalesce with original_name (e.g., IFNULL parsed as Coalesce) -> clear original_name
154            // so it outputs as COALESCE instead of the original function name
155            Expression::Coalesce(mut f) => {
156                f.original_name = None;
157                Ok(Expression::Coalesce(f))
158            }
159
160            // ============================================
161            // CAST OPERATIONS
162            // ============================================
163            // TryCast -> CAST (PostgreSQL doesn't support TRY_CAST)
164            Expression::TryCast(c) => Ok(Expression::Cast(c)),
165
166            // SafeCast -> CAST (PostgreSQL doesn't support safe casts)
167            Expression::SafeCast(c) => Ok(Expression::Cast(c)),
168
169            // ============================================
170            // RANDOM
171            // ============================================
172            // RAND -> RANDOM in PostgreSQL
173            Expression::Rand(r) => {
174                // PostgreSQL's RANDOM() doesn't take a seed argument
175                let _ = r.seed; // Ignore seed
176                Ok(Expression::Random(crate::expressions::Random))
177            }
178
179            // ============================================
180            // UUID
181            // ============================================
182            // Uuid -> GEN_RANDOM_UUID in PostgreSQL
183            Expression::Uuid(_) => Ok(Expression::Function(Box::new(Function::new(
184                "GEN_RANDOM_UUID".to_string(),
185                vec![],
186            )))),
187
188            // ============================================
189            // ARRAY OPERATIONS
190            // ============================================
191            // EXPLODE -> UNNEST in PostgreSQL
192            Expression::Explode(f) => Ok(Expression::Unnest(Box::new(
193                crate::expressions::UnnestFunc {
194                    this: f.this,
195                    expressions: Vec::new(),
196                    with_ordinality: false,
197                    alias: None,
198                    offset_alias: None,
199                },
200            ))),
201
202            // ExplodeOuter -> UNNEST in PostgreSQL
203            Expression::ExplodeOuter(f) => Ok(Expression::Unnest(Box::new(
204                crate::expressions::UnnestFunc {
205                    this: f.this,
206                    expressions: Vec::new(),
207                    with_ordinality: false,
208                    alias: None,
209                    offset_alias: None,
210                },
211            ))),
212
213            // ArrayConcat -> ARRAY_CAT in PostgreSQL
214            Expression::ArrayConcat(f) => Ok(Expression::Function(Box::new(Function::new(
215                "ARRAY_CAT".to_string(),
216                f.expressions,
217            )))),
218
219            // ArrayPrepend -> ARRAY_PREPEND in PostgreSQL (note: args swapped from other dialects)
220            Expression::ArrayPrepend(f) => Ok(Expression::Function(Box::new(Function::new(
221                "ARRAY_PREPEND".to_string(),
222                vec![f.expression, f.this], // PostgreSQL: ARRAY_PREPEND(element, array)
223            )))),
224
225            // ============================================
226            // BITWISE OPERATIONS
227            // ============================================
228            // BitwiseXor -> # operator in PostgreSQL
229            Expression::BitwiseXor(f) => {
230                // Use a special marker that generator will recognize
231                Ok(Expression::Function(Box::new(Function::new(
232                    "__PG_BITWISE_XOR__".to_string(),
233                    vec![f.left, f.right],
234                ))))
235            }
236
237            // BitwiseAndAgg -> BIT_AND
238            Expression::BitwiseAndAgg(f) => Ok(Expression::Function(Box::new(Function::new(
239                "BIT_AND".to_string(),
240                vec![f.this],
241            )))),
242
243            // BitwiseOrAgg -> BIT_OR
244            Expression::BitwiseOrAgg(f) => Ok(Expression::Function(Box::new(Function::new(
245                "BIT_OR".to_string(),
246                vec![f.this],
247            )))),
248
249            // BitwiseXorAgg -> BIT_XOR
250            Expression::BitwiseXorAgg(f) => Ok(Expression::Function(Box::new(Function::new(
251                "BIT_XOR".to_string(),
252                vec![f.this],
253            )))),
254
255            // ============================================
256            // BOOLEAN AGGREGATES
257            // ============================================
258            // LogicalAnd -> BOOL_AND
259            Expression::LogicalAnd(f) => Ok(Expression::Function(Box::new(Function::new(
260                "BOOL_AND".to_string(),
261                vec![f.this],
262            )))),
263
264            // LogicalOr -> BOOL_OR
265            Expression::LogicalOr(f) => Ok(Expression::Function(Box::new(Function::new(
266                "BOOL_OR".to_string(),
267                vec![f.this],
268            )))),
269
270            // Xor -> PostgreSQL bool_xor pattern: a <> b for boolean values
271            Expression::Xor(f) => {
272                if let (Some(a), Some(b)) = (f.this, f.expression) {
273                    Ok(Expression::Neq(Box::new(BinaryOp {
274                        left: *a,
275                        right: *b,
276                        left_comments: Vec::new(),
277                        operator_comments: Vec::new(),
278                        trailing_comments: Vec::new(),
279                    })))
280                } else {
281                    Ok(Expression::Boolean(BooleanLiteral { value: false }))
282                }
283            }
284
285            // ============================================
286            // ARRAY OPERATORS
287            // ============================================
288            // ArrayContainedBy (<@) -> ArrayContainsAll (@>) with swapped operands
289            // a <@ b -> b @> a (PostgreSQL prefers @> syntax)
290            Expression::ArrayContainedBy(op) => {
291                Ok(Expression::ArrayContainsAll(Box::new(BinaryOp {
292                    left: op.right,
293                    right: op.left,
294                    left_comments: Vec::new(),
295                    operator_comments: Vec::new(),
296                    trailing_comments: Vec::new(),
297                })))
298            }
299
300            // ============================================
301            // REGEXP OPERATIONS (PostgreSQL uses ~ and ~* operators)
302            // ============================================
303            // RegexpLike -> keep as-is, generator handles ~ operator output
304            Expression::RegexpLike(f) => {
305                // Generator will output as: expr ~ pattern
306                Ok(Expression::RegexpLike(f))
307            }
308
309            // ============================================
310            // DATE/TIME FUNCTIONS
311            // ============================================
312            // DateAdd -> date + INTERVAL in PostgreSQL
313            Expression::DateAdd(f) => {
314                let interval_expr = Expression::Interval(Box::new(Interval {
315                    this: Some(f.interval),
316                    unit: Some(IntervalUnitSpec::Simple {
317                        unit: f.unit,
318                        use_plural: false,
319                    }),
320                }));
321                Ok(Expression::Add(Box::new(BinaryOp {
322                    left: f.this,
323                    right: interval_expr,
324                    left_comments: Vec::new(),
325                    operator_comments: Vec::new(),
326                    trailing_comments: Vec::new(),
327                })))
328            }
329
330            // DateSub -> date - INTERVAL in PostgreSQL
331            Expression::DateSub(f) => {
332                let interval_expr = Expression::Interval(Box::new(Interval {
333                    this: Some(f.interval),
334                    unit: Some(IntervalUnitSpec::Simple {
335                        unit: f.unit,
336                        use_plural: false,
337                    }),
338                }));
339                Ok(Expression::Sub(Box::new(BinaryOp {
340                    left: f.this,
341                    right: interval_expr,
342                    left_comments: Vec::new(),
343                    operator_comments: Vec::new(),
344                    trailing_comments: Vec::new(),
345                })))
346            }
347
348            // DateDiff -> Complex PostgreSQL pattern using AGE/EXTRACT
349            Expression::DateDiff(f) => {
350                // For PostgreSQL, DATEDIFF is converted to EXTRACT(epoch FROM ...) pattern
351                // matching the 3-arg string-based DATEDIFF handler below
352                let unit = f.unit.unwrap_or(IntervalUnit::Day);
353
354                // Helper: CAST(expr AS TIMESTAMP)
355                let cast_ts = |e: Expression| -> Expression {
356                    Expression::Cast(Box::new(Cast {
357                        this: e,
358                        to: DataType::Timestamp {
359                            precision: None,
360                            timezone: false,
361                        },
362                        trailing_comments: Vec::new(),
363                        double_colon_syntax: false,
364                        format: None,
365                        default: None,
366                    }))
367                };
368
369                // Helper: CAST(expr AS BIGINT)
370                let cast_bigint = |e: Expression| -> Expression {
371                    Expression::Cast(Box::new(Cast {
372                        this: e,
373                        to: DataType::BigInt { length: None },
374                        trailing_comments: Vec::new(),
375                        double_colon_syntax: false,
376                        format: None,
377                        default: None,
378                    }))
379                };
380
381                // Clone end/start for reuse
382                let end_expr = f.this;
383                let start = f.expression;
384
385                // Helper: end_ts - start_ts
386                let ts_diff = || -> Expression {
387                    Expression::Sub(Box::new(BinaryOp::new(
388                        cast_ts(end_expr.clone()),
389                        cast_ts(start.clone()),
390                    )))
391                };
392
393                // Helper: AGE(end_ts, start_ts)
394                let age_call = || -> Expression {
395                    Expression::Function(Box::new(Function::new(
396                        "AGE".to_string(),
397                        vec![cast_ts(end_expr.clone()), cast_ts(start.clone())],
398                    )))
399                };
400
401                // Helper: EXTRACT(field FROM expr)
402                let extract = |field: DateTimeField, from: Expression| -> Expression {
403                    Expression::Extract(Box::new(ExtractFunc { this: from, field }))
404                };
405
406                // Helper: number literal
407                let num =
408                    |n: i64| -> Expression { Expression::Literal(Literal::Number(n.to_string())) };
409
410                let epoch_field = DateTimeField::Custom("epoch".to_string());
411
412                let result = match unit {
413                    IntervalUnit::Nanosecond => {
414                        let epoch = extract(epoch_field.clone(), ts_diff());
415                        cast_bigint(Expression::Mul(Box::new(BinaryOp::new(
416                            epoch,
417                            num(1000000000),
418                        ))))
419                    }
420                    IntervalUnit::Microsecond => {
421                        let epoch = extract(epoch_field, ts_diff());
422                        cast_bigint(Expression::Mul(Box::new(BinaryOp::new(
423                            epoch,
424                            num(1000000),
425                        ))))
426                    }
427                    IntervalUnit::Millisecond => {
428                        let epoch = extract(epoch_field, ts_diff());
429                        cast_bigint(Expression::Mul(Box::new(BinaryOp::new(epoch, num(1000)))))
430                    }
431                    IntervalUnit::Second => {
432                        let epoch = extract(epoch_field, ts_diff());
433                        cast_bigint(epoch)
434                    }
435                    IntervalUnit::Minute => {
436                        let epoch = extract(epoch_field, ts_diff());
437                        cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(60)))))
438                    }
439                    IntervalUnit::Hour => {
440                        let epoch = extract(epoch_field, ts_diff());
441                        cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(3600)))))
442                    }
443                    IntervalUnit::Day => {
444                        let epoch = extract(epoch_field, ts_diff());
445                        cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(86400)))))
446                    }
447                    IntervalUnit::Week => {
448                        let diff_parens = Expression::Paren(Box::new(Paren {
449                            this: ts_diff(),
450                            trailing_comments: Vec::new(),
451                        }));
452                        let days = extract(DateTimeField::Custom("days".to_string()), diff_parens);
453                        cast_bigint(Expression::Div(Box::new(BinaryOp::new(days, num(7)))))
454                    }
455                    IntervalUnit::Month => {
456                        let year_part =
457                            extract(DateTimeField::Custom("year".to_string()), age_call());
458                        let month_part =
459                            extract(DateTimeField::Custom("month".to_string()), age_call());
460                        let year_months =
461                            Expression::Mul(Box::new(BinaryOp::new(year_part, num(12))));
462                        cast_bigint(Expression::Add(Box::new(BinaryOp::new(
463                            year_months,
464                            month_part,
465                        ))))
466                    }
467                    IntervalUnit::Quarter => {
468                        let year_part =
469                            extract(DateTimeField::Custom("year".to_string()), age_call());
470                        let month_part =
471                            extract(DateTimeField::Custom("month".to_string()), age_call());
472                        let year_quarters =
473                            Expression::Mul(Box::new(BinaryOp::new(year_part, num(4))));
474                        let month_quarters =
475                            Expression::Div(Box::new(BinaryOp::new(month_part, num(3))));
476                        cast_bigint(Expression::Add(Box::new(BinaryOp::new(
477                            year_quarters,
478                            month_quarters,
479                        ))))
480                    }
481                    IntervalUnit::Year => cast_bigint(extract(
482                        DateTimeField::Custom("year".to_string()),
483                        age_call(),
484                    )),
485                };
486                Ok(result)
487            }
488
489            // UnixToTime -> TO_TIMESTAMP
490            Expression::UnixToTime(f) => Ok(Expression::Function(Box::new(Function::new(
491                "TO_TIMESTAMP".to_string(),
492                vec![*f.this],
493            )))),
494
495            // TimeToUnix -> DATE_PART('epoch', ...) in PostgreSQL
496            Expression::TimeToUnix(f) => Ok(Expression::Function(Box::new(Function::new(
497                "DATE_PART".to_string(),
498                vec![Expression::string("epoch"), f.this],
499            )))),
500
501            // StrToTime -> TO_TIMESTAMP in PostgreSQL
502            Expression::ToTimestamp(f) => {
503                let mut args = vec![f.this];
504                if let Some(fmt) = f.format {
505                    args.push(fmt);
506                }
507                Ok(Expression::Function(Box::new(Function::new(
508                    "TO_TIMESTAMP".to_string(),
509                    args,
510                ))))
511            }
512
513            // StrToDate -> TO_DATE in PostgreSQL
514            Expression::ToDate(f) => {
515                let mut args = vec![f.this];
516                if let Some(fmt) = f.format {
517                    args.push(fmt);
518                }
519                Ok(Expression::Function(Box::new(Function::new(
520                    "TO_DATE".to_string(),
521                    args,
522                ))))
523            }
524
525            // TimestampTrunc -> DATE_TRUNC
526            Expression::TimestampTrunc(f) => {
527                // Convert DateTimeField to string expression for DATE_TRUNC
528                let unit_str = format!("{:?}", f.unit).to_lowercase();
529                let args = vec![Expression::string(&unit_str), f.this];
530                Ok(Expression::Function(Box::new(Function::new(
531                    "DATE_TRUNC".to_string(),
532                    args,
533                ))))
534            }
535
536            // TimeFromParts -> MAKE_TIME
537            Expression::TimeFromParts(f) => {
538                let mut args = Vec::new();
539                if let Some(h) = f.hour {
540                    args.push(*h);
541                }
542                if let Some(m) = f.min {
543                    args.push(*m);
544                }
545                if let Some(s) = f.sec {
546                    args.push(*s);
547                }
548                Ok(Expression::Function(Box::new(Function::new(
549                    "MAKE_TIME".to_string(),
550                    args,
551                ))))
552            }
553
554            // TimestampFromParts -> MAKE_TIMESTAMP
555            Expression::MakeTimestamp(f) => {
556                // MakeTimestampFunc has direct Expression fields, not Options
557                let args = vec![f.year, f.month, f.day, f.hour, f.minute, f.second];
558                Ok(Expression::Function(Box::new(Function::new(
559                    "MAKE_TIMESTAMP".to_string(),
560                    args,
561                ))))
562            }
563
564            // ============================================
565            // STRING FUNCTIONS
566            // ============================================
567            // StringAgg is native to PostgreSQL - keep as-is
568            Expression::StringAgg(f) => Ok(Expression::StringAgg(f)),
569
570            // GroupConcat -> STRING_AGG in PostgreSQL
571            Expression::GroupConcat(f) => {
572                let mut args = vec![f.this.clone()];
573                if let Some(sep) = f.separator.clone() {
574                    args.push(sep);
575                } else {
576                    args.push(Expression::string(","));
577                }
578                Ok(Expression::Function(Box::new(Function::new(
579                    "STRING_AGG".to_string(),
580                    args,
581                ))))
582            }
583
584            // StrPosition -> POSITION function
585            Expression::Position(f) => {
586                // PostgreSQL: POSITION(substring IN string)
587                // Keep as Position, generator handles it
588                Ok(Expression::Position(f))
589            }
590
591            // ============================================
592            // AGGREGATE FUNCTIONS
593            // ============================================
594            // CountIf -> SUM(CASE WHEN condition THEN 1 ELSE 0 END) in PostgreSQL
595            Expression::CountIf(f) => {
596                let case_expr = Expression::Case(Box::new(Case {
597                    operand: None,
598                    whens: vec![(f.this.clone(), Expression::number(1))],
599                    else_: Some(Expression::number(0)),
600                    comments: Vec::new(),
601                }));
602                Ok(Expression::Sum(Box::new(AggFunc {
603                    ignore_nulls: None,
604                    having_max: None,
605                    this: case_expr,
606                    distinct: f.distinct,
607                    filter: f.filter,
608                    order_by: Vec::new(),
609                    name: None,
610                    limit: None,
611                })))
612            }
613
614            // AnyValue -> keep as ANY_VALUE for PostgreSQL (supported since PG 16)
615            Expression::AnyValue(f) => Ok(Expression::AnyValue(f)),
616
617            // Variance -> VAR_SAMP in PostgreSQL
618            Expression::Variance(f) => Ok(Expression::Function(Box::new(Function::new(
619                "VAR_SAMP".to_string(),
620                vec![f.this],
621            )))),
622
623            // VarPop -> VAR_POP in PostgreSQL
624            Expression::VarPop(f) => Ok(Expression::Function(Box::new(Function::new(
625                "VAR_POP".to_string(),
626                vec![f.this],
627            )))),
628
629            // ============================================
630            // JSON FUNCTIONS
631            // ============================================
632            // JSONExtract -> use arrow syntax (->) in PostgreSQL for simple literal paths
633            // Complex paths (like column references) should use JSON_EXTRACT_PATH function
634            Expression::JsonExtract(mut f) => {
635                // Only use arrow syntax for simple literal paths (string or non-negative number)
636                // Complex expressions like column references should use function form
637                f.arrow_syntax = Self::is_simple_json_path(&f.path);
638                Ok(Expression::JsonExtract(f))
639            }
640
641            // JSONExtractScalar -> use arrow syntax (->>) in PostgreSQL for simple paths
642            // Complex paths (like negative indices) should use JSON_EXTRACT_PATH_TEXT function
643            // #>> (hash_arrow_syntax) stays as #>>
644            Expression::JsonExtractScalar(mut f) => {
645                if !f.hash_arrow_syntax {
646                    // Only use arrow syntax for simple literal paths (string or non-negative number)
647                    // Complex expressions like Neg(-1) should use function form
648                    f.arrow_syntax = Self::is_simple_json_path(&f.path);
649                }
650                Ok(Expression::JsonExtractScalar(f))
651            }
652
653            // ParseJson: handled by generator (outputs CAST(x AS JSON) for PostgreSQL)
654
655            // JSONObjectAgg -> JSON_OBJECT_AGG
656            Expression::JsonObjectAgg(f) => {
657                // JsonObjectAggFunc has key and value as Expression, not Option
658                let args = vec![f.key, f.value];
659                Ok(Expression::Function(Box::new(Function::new(
660                    "JSON_OBJECT_AGG".to_string(),
661                    args,
662                ))))
663            }
664
665            // JSONArrayAgg -> JSON_AGG
666            Expression::JsonArrayAgg(f) => Ok(Expression::Function(Box::new(Function::new(
667                "JSON_AGG".to_string(),
668                vec![f.this],
669            )))),
670
671            // JSONPathRoot -> empty string ($ is implicit in PostgreSQL)
672            Expression::JSONPathRoot(_) => Ok(Expression::Literal(Literal::String(String::new()))),
673
674            // ============================================
675            // MISC FUNCTIONS
676            // ============================================
677            // IntDiv -> DIV in PostgreSQL
678            Expression::IntDiv(f) => Ok(Expression::Function(Box::new(Function::new(
679                "DIV".to_string(),
680                vec![f.this, f.expression],
681            )))),
682
683            // Unicode -> ASCII in PostgreSQL
684            Expression::Unicode(f) => Ok(Expression::Function(Box::new(Function::new(
685                "ASCII".to_string(),
686                vec![f.this],
687            )))),
688
689            // LastDay -> Complex expression (PostgreSQL doesn't have LAST_DAY)
690            Expression::LastDay(f) => {
691                // (DATE_TRUNC('month', date) + INTERVAL '1 month' - INTERVAL '1 day')::DATE
692                let truncated = Expression::Function(Box::new(Function::new(
693                    "DATE_TRUNC".to_string(),
694                    vec![Expression::string("month"), f.this.clone()],
695                )));
696                let plus_month = Expression::Add(Box::new(BinaryOp {
697                    left: truncated,
698                    right: Expression::Interval(Box::new(Interval {
699                        this: Some(Expression::string("1")),
700                        unit: Some(IntervalUnitSpec::Simple {
701                            unit: IntervalUnit::Month,
702                            use_plural: false,
703                        }),
704                    })),
705                    left_comments: Vec::new(),
706                    operator_comments: Vec::new(),
707                    trailing_comments: Vec::new(),
708                }));
709                let minus_day = Expression::Sub(Box::new(BinaryOp {
710                    left: plus_month,
711                    right: Expression::Interval(Box::new(Interval {
712                        this: Some(Expression::string("1")),
713                        unit: Some(IntervalUnitSpec::Simple {
714                            unit: IntervalUnit::Day,
715                            use_plural: false,
716                        }),
717                    })),
718                    left_comments: Vec::new(),
719                    operator_comments: Vec::new(),
720                    trailing_comments: Vec::new(),
721                }));
722                Ok(Expression::Cast(Box::new(Cast {
723                    this: minus_day,
724                    to: DataType::Date,
725                    trailing_comments: Vec::new(),
726                    double_colon_syntax: true, // Use PostgreSQL :: syntax
727                    format: None,
728                    default: None,
729                })))
730            }
731
732            // GenerateSeries is native to PostgreSQL
733            Expression::GenerateSeries(f) => Ok(Expression::GenerateSeries(f)),
734
735            // ExplodingGenerateSeries -> GENERATE_SERIES
736            Expression::ExplodingGenerateSeries(f) => {
737                let mut args = vec![f.start, f.stop];
738                if let Some(step) = f.step {
739                    args.push(step); // step is Expression, not Box<Expression>
740                }
741                Ok(Expression::Function(Box::new(Function::new(
742                    "GENERATE_SERIES".to_string(),
743                    args,
744                ))))
745            }
746
747            // ============================================
748            // SESSION/TIME FUNCTIONS (no parentheses in PostgreSQL)
749            // ============================================
750            // CurrentTimestamp -> CURRENT_TIMESTAMP (no parens)
751            Expression::CurrentTimestamp(_) => Ok(Expression::Function(Box::new(Function {
752                name: "CURRENT_TIMESTAMP".to_string(),
753                args: vec![],
754                distinct: false,
755                trailing_comments: vec![],
756                use_bracket_syntax: false,
757                no_parens: true,
758                quoted: false,
759            }))),
760
761            // CurrentUser -> CURRENT_USER (no parens)
762            Expression::CurrentUser(_) => Ok(Expression::Function(Box::new(Function::new(
763                "CURRENT_USER".to_string(),
764                vec![],
765            )))),
766
767            // CurrentDate -> CURRENT_DATE (no parens)
768            Expression::CurrentDate(_) => Ok(Expression::Function(Box::new(Function {
769                name: "CURRENT_DATE".to_string(),
770                args: vec![],
771                distinct: false,
772                trailing_comments: vec![],
773                use_bracket_syntax: false,
774                no_parens: true,
775                quoted: false,
776            }))),
777
778            // ============================================
779            // JOIN TRANSFORMATIONS
780            // ============================================
781            // CROSS APPLY -> INNER JOIN LATERAL ... ON TRUE in PostgreSQL
782            Expression::Join(join) if join.kind == JoinKind::CrossApply => {
783                Ok(Expression::Join(Box::new(Join {
784                    this: join.this,
785                    on: Some(Expression::Boolean(BooleanLiteral { value: true })),
786                    using: join.using,
787                    kind: JoinKind::CrossApply,
788                    use_inner_keyword: false,
789                    use_outer_keyword: false,
790                    deferred_condition: false,
791                    join_hint: None,
792                    match_condition: None,
793                    pivots: join.pivots,
794                    comments: join.comments,
795                    nesting_group: 0,
796                    directed: false,
797                })))
798            }
799
800            // OUTER APPLY -> LEFT JOIN LATERAL ... ON TRUE in PostgreSQL
801            Expression::Join(join) if join.kind == JoinKind::OuterApply => {
802                Ok(Expression::Join(Box::new(Join {
803                    this: join.this,
804                    on: Some(Expression::Boolean(BooleanLiteral { value: true })),
805                    using: join.using,
806                    kind: JoinKind::OuterApply,
807                    use_inner_keyword: false,
808                    use_outer_keyword: false,
809                    deferred_condition: false,
810                    join_hint: None,
811                    match_condition: None,
812                    pivots: join.pivots,
813                    comments: join.comments,
814                    nesting_group: 0,
815                    directed: false,
816                })))
817            }
818
819            // ============================================
820            // GENERIC FUNCTION TRANSFORMATIONS
821            // ============================================
822            Expression::Function(f) => self.transform_function(*f),
823
824            // Generic aggregate function transformations
825            Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
826
827            // ===== Context-aware JSON arrow wrapping =====
828            // When JSON arrow expressions appear in Binary/In/Not contexts,
829            // they need to be wrapped in parentheses for correct precedence.
830            // This matches Python sqlglot's WRAPPED_JSON_EXTRACT_EXPRESSIONS behavior.
831
832            // Binary operators that need JSON wrapping
833            Expression::Eq(op) => Ok(Expression::Eq(Box::new(BinaryOp {
834                left: wrap_if_json_arrow(op.left),
835                right: wrap_if_json_arrow(op.right),
836                ..*op
837            }))),
838            Expression::Neq(op) => Ok(Expression::Neq(Box::new(BinaryOp {
839                left: wrap_if_json_arrow(op.left),
840                right: wrap_if_json_arrow(op.right),
841                ..*op
842            }))),
843            Expression::Lt(op) => Ok(Expression::Lt(Box::new(BinaryOp {
844                left: wrap_if_json_arrow(op.left),
845                right: wrap_if_json_arrow(op.right),
846                ..*op
847            }))),
848            Expression::Lte(op) => Ok(Expression::Lte(Box::new(BinaryOp {
849                left: wrap_if_json_arrow(op.left),
850                right: wrap_if_json_arrow(op.right),
851                ..*op
852            }))),
853            Expression::Gt(op) => Ok(Expression::Gt(Box::new(BinaryOp {
854                left: wrap_if_json_arrow(op.left),
855                right: wrap_if_json_arrow(op.right),
856                ..*op
857            }))),
858            Expression::Gte(op) => Ok(Expression::Gte(Box::new(BinaryOp {
859                left: wrap_if_json_arrow(op.left),
860                right: wrap_if_json_arrow(op.right),
861                ..*op
862            }))),
863
864            // In expression - wrap the this part if it's JSON arrow
865            Expression::In(mut i) => {
866                i.this = wrap_if_json_arrow(i.this);
867                Ok(Expression::In(i))
868            }
869
870            // Not expression - wrap the this part if it's JSON arrow
871            Expression::Not(mut n) => {
872                n.this = wrap_if_json_arrow(n.this);
873                Ok(Expression::Not(n))
874            }
875
876            // MERGE: qualifier stripping is handled by the generator (dialect-aware)
877            // PostgreSQL generator strips qualifiers, Snowflake generator keeps them
878            Expression::Merge(m) => Ok(Expression::Merge(m)),
879
880            // JSONExtract with variant_extract (Databricks colon syntax) -> JSON_EXTRACT_PATH
881            Expression::JSONExtract(je) if je.variant_extract.is_some() => {
882                // Convert path from bracketed format to simple key
883                // e.g., '["fr''uit"]' -> 'fr''uit'
884                let path = match *je.expression {
885                    Expression::Literal(Literal::String(s)) => {
886                        // Strip bracketed JSON path format: ["key"] -> key
887                        let cleaned = if s.starts_with("[\"") && s.ends_with("\"]") {
888                            s[2..s.len() - 2].to_string()
889                        } else {
890                            s
891                        };
892                        Expression::Literal(Literal::String(cleaned))
893                    }
894                    other => other,
895                };
896                Ok(Expression::Function(Box::new(Function::new(
897                    "JSON_EXTRACT_PATH".to_string(),
898                    vec![*je.this, path],
899                ))))
900            }
901
902            // TRIM(str, chars) -> TRIM(chars FROM str) for PostgreSQL SQL standard syntax
903            Expression::Trim(t) if !t.sql_standard_syntax && t.characters.is_some() => {
904                Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
905                    this: t.this,
906                    characters: t.characters,
907                    position: t.position,
908                    sql_standard_syntax: true,
909                    position_explicit: t.position_explicit,
910                })))
911            }
912
913            // b'a' -> CAST(e'a' AS BYTEA) for PostgreSQL
914            Expression::Literal(Literal::ByteString(s)) => Ok(Expression::Cast(Box::new(Cast {
915                this: Expression::Literal(Literal::EscapeString(s)),
916                to: DataType::VarBinary { length: None },
917                trailing_comments: Vec::new(),
918                double_colon_syntax: false,
919                format: None,
920                default: None,
921            }))),
922
923            // Pass through everything else
924            _ => Ok(expr),
925        }
926    }
927}
928
929impl PostgresDialect {
930    /// Check if a JSON path expression is "simple" (string literal or non-negative integer)
931    /// Simple paths can use arrow syntax (->>) in PostgreSQL
932    /// Complex paths (like negative indices) should use JSON_EXTRACT_PATH_TEXT function
933    fn is_simple_json_path(path: &Expression) -> bool {
934        match path {
935            // String literals are always simple
936            Expression::Literal(Literal::String(_)) => true,
937            // Non-negative integer literals are simple
938            Expression::Literal(Literal::Number(n)) => {
939                // Check if it's non-negative
940                !n.starts_with('-')
941            }
942            // JSONPath expressions are simple (they're already parsed paths)
943            Expression::JSONPath(_) => true,
944            // Everything else (Neg, function calls, etc.) is complex
945            _ => false,
946        }
947    }
948
949    /// Transform data types according to PostgreSQL TYPE_MAPPING
950    fn transform_data_type(&self, dt: DataType) -> Result<Expression> {
951        let transformed = match dt {
952            // TINYINT -> SMALLINT
953            DataType::TinyInt { .. } => DataType::SmallInt { length: None },
954
955            // FLOAT -> DOUBLE PRECISION (Python sqlglot tokenizes FLOAT as DOUBLE)
956            DataType::Float { .. } => DataType::Custom {
957                name: "DOUBLE PRECISION".to_string(),
958            },
959
960            // DOUBLE -> DOUBLE PRECISION
961            DataType::Double { .. } => DataType::Custom {
962                name: "DOUBLE PRECISION".to_string(),
963            },
964
965            // BINARY -> BYTEA (handled by generator which preserves length)
966            DataType::Binary { .. } => dt,
967
968            // VARBINARY -> BYTEA (handled by generator which preserves length)
969            DataType::VarBinary { .. } => dt,
970
971            // BLOB -> BYTEA
972            DataType::Blob => DataType::Custom {
973                name: "BYTEA".to_string(),
974            },
975
976            // Custom type normalizations
977            DataType::Custom { ref name } => {
978                let upper = name.to_uppercase();
979                match upper.as_str() {
980                    // INT8 -> BIGINT (PostgreSQL alias)
981                    "INT8" => DataType::BigInt { length: None },
982                    // FLOAT8 -> DOUBLE PRECISION (PostgreSQL alias)
983                    "FLOAT8" => DataType::Custom {
984                        name: "DOUBLE PRECISION".to_string(),
985                    },
986                    // FLOAT4 -> REAL (PostgreSQL alias)
987                    "FLOAT4" => DataType::Custom {
988                        name: "REAL".to_string(),
989                    },
990                    // INT4 -> INTEGER (PostgreSQL alias)
991                    "INT4" => DataType::Int {
992                        length: None,
993                        integer_spelling: false,
994                    },
995                    // INT2 -> SMALLINT (PostgreSQL alias)
996                    "INT2" => DataType::SmallInt { length: None },
997                    _ => dt,
998                }
999            }
1000
1001            // Keep all other types as-is
1002            other => other,
1003        };
1004        Ok(Expression::DataType(transformed))
1005    }
1006
1007    fn transform_function(&self, f: Function) -> Result<Expression> {
1008        let name_upper = f.name.to_uppercase();
1009        match name_upper.as_str() {
1010            // IFNULL -> COALESCE
1011            "IFNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
1012                original_name: None,
1013                expressions: f.args,
1014            }))),
1015
1016            // NVL -> COALESCE
1017            "NVL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
1018                original_name: None,
1019                expressions: f.args,
1020            }))),
1021
1022            // ISNULL (SQL Server) -> COALESCE
1023            "ISNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
1024                original_name: None,
1025                expressions: f.args,
1026            }))),
1027
1028            // GROUP_CONCAT -> STRING_AGG in PostgreSQL
1029            "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
1030                Function::new("STRING_AGG".to_string(), f.args),
1031            ))),
1032
1033            // SUBSTR -> SUBSTRING (standard SQL)
1034            "SUBSTR" => Ok(Expression::Function(Box::new(Function::new(
1035                "SUBSTRING".to_string(),
1036                f.args,
1037            )))),
1038
1039            // RAND -> RANDOM in PostgreSQL
1040            "RAND" => Ok(Expression::Random(crate::expressions::Random)),
1041
1042            // CEILING -> CEIL (both work in PostgreSQL, but CEIL is preferred)
1043            "CEILING" if f.args.len() == 1 => Ok(Expression::Ceil(Box::new(CeilFunc {
1044                this: f.args.into_iter().next().unwrap(),
1045                decimals: None,
1046                to: None,
1047            }))),
1048
1049            // LEN -> LENGTH in PostgreSQL
1050            "LEN" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc {
1051                this: f.args.into_iter().next().unwrap(),
1052                original_name: None,
1053            }))),
1054
1055            // CHAR_LENGTH -> LENGTH in PostgreSQL
1056            "CHAR_LENGTH" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc {
1057                this: f.args.into_iter().next().unwrap(),
1058                original_name: None,
1059            }))),
1060
1061            // CHARACTER_LENGTH -> LENGTH in PostgreSQL
1062            "CHARACTER_LENGTH" if f.args.len() == 1 => {
1063                Ok(Expression::Length(Box::new(UnaryFunc {
1064                    this: f.args.into_iter().next().unwrap(),
1065                    original_name: None,
1066                })))
1067            }
1068
1069            // CHARINDEX -> POSITION in PostgreSQL
1070            // CHARINDEX(substring, string) -> POSITION(substring IN string)
1071            "CHARINDEX" if f.args.len() >= 2 => {
1072                let mut args = f.args;
1073                let substring = args.remove(0);
1074                let string = args.remove(0);
1075                Ok(Expression::Position(Box::new(
1076                    crate::expressions::PositionFunc {
1077                        substring,
1078                        string,
1079                        start: args.pop(),
1080                    },
1081                )))
1082            }
1083
1084            // GETDATE -> CURRENT_TIMESTAMP in PostgreSQL
1085            "GETDATE" => Ok(Expression::CurrentTimestamp(
1086                crate::expressions::CurrentTimestamp {
1087                    precision: None,
1088                    sysdate: false,
1089                },
1090            )),
1091
1092            // SYSDATETIME -> CURRENT_TIMESTAMP in PostgreSQL
1093            "SYSDATETIME" => Ok(Expression::CurrentTimestamp(
1094                crate::expressions::CurrentTimestamp {
1095                    precision: None,
1096                    sysdate: false,
1097                },
1098            )),
1099
1100            // NOW -> CURRENT_TIMESTAMP in PostgreSQL (NOW() is also valid)
1101            "NOW" => Ok(Expression::CurrentTimestamp(
1102                crate::expressions::CurrentTimestamp {
1103                    precision: None,
1104                    sysdate: false,
1105                },
1106            )),
1107
1108            // NEWID -> GEN_RANDOM_UUID in PostgreSQL
1109            "NEWID" => Ok(Expression::Function(Box::new(Function::new(
1110                "GEN_RANDOM_UUID".to_string(),
1111                vec![],
1112            )))),
1113
1114            // UUID -> GEN_RANDOM_UUID in PostgreSQL
1115            "UUID" => Ok(Expression::Function(Box::new(Function::new(
1116                "GEN_RANDOM_UUID".to_string(),
1117                vec![],
1118            )))),
1119
1120            // UNNEST is native to PostgreSQL
1121            "UNNEST" => Ok(Expression::Function(Box::new(f))),
1122
1123            // GENERATE_SERIES is native to PostgreSQL
1124            "GENERATE_SERIES" => Ok(Expression::Function(Box::new(f))),
1125
1126            // SHA256 -> SHA256 in PostgreSQL (via pgcrypto extension)
1127            "SHA256" if f.args.len() == 1 => Ok(Expression::Function(Box::new(Function::new(
1128                "SHA256".to_string(),
1129                f.args,
1130            )))),
1131
1132            // SHA2 -> SHA256/SHA512 based on length argument
1133            "SHA2" if f.args.len() == 2 => {
1134                // SHA2(data, length) -> SHA256/SHA384/SHA512
1135                let args = f.args;
1136                let data = args[0].clone();
1137                // Default to SHA256 - would need runtime inspection for exact mapping
1138                Ok(Expression::Function(Box::new(Function::new(
1139                    "SHA256".to_string(),
1140                    vec![data],
1141                ))))
1142            }
1143
1144            // LEVENSHTEIN is native to PostgreSQL (fuzzystrmatch extension)
1145            "LEVENSHTEIN" => Ok(Expression::Function(Box::new(f))),
1146
1147            // EDITDISTANCE -> LEVENSHTEIN_LESS_EQUAL (with max distance) or LEVENSHTEIN
1148            "EDITDISTANCE" if f.args.len() == 3 => Ok(Expression::Function(Box::new(
1149                Function::new("LEVENSHTEIN_LESS_EQUAL".to_string(), f.args),
1150            ))),
1151            "EDITDISTANCE" if f.args.len() == 2 => Ok(Expression::Function(Box::new(
1152                Function::new("LEVENSHTEIN".to_string(), f.args),
1153            ))),
1154
1155            // TRIM(value, chars) -> TRIM(chars FROM value) for Postgres
1156            "TRIM" if f.args.len() == 2 => {
1157                let value = f.args[0].clone();
1158                let chars = f.args[1].clone();
1159                Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
1160                    this: value,
1161                    characters: Some(chars),
1162                    position: crate::expressions::TrimPosition::Both,
1163                    sql_standard_syntax: true,
1164                    position_explicit: false,
1165                })))
1166            }
1167
1168            // DATEDIFF(unit, start, end) -> PostgreSQL EXTRACT/AGE patterns
1169            "DATEDIFF" if f.args.len() >= 2 => {
1170                let mut args = f.args;
1171                if args.len() == 2 {
1172                    // 2-arg form: DATEDIFF(start, end) -> AGE(start, end)
1173                    let first = args.remove(0);
1174                    let second = args.remove(0);
1175                    Ok(Expression::Function(Box::new(Function::new(
1176                        "AGE".to_string(),
1177                        vec![first, second],
1178                    ))))
1179                } else {
1180                    // 3-arg form: DATEDIFF(unit, start, end)
1181                    let unit_expr = args.remove(0);
1182                    let start = args.remove(0);
1183                    let end_expr = args.remove(0);
1184
1185                    // Extract unit name from identifier or column
1186                    let unit_name = match &unit_expr {
1187                        Expression::Identifier(id) => id.name.to_uppercase(),
1188                        Expression::Column(col) if col.table.is_none() => {
1189                            col.name.name.to_uppercase()
1190                        }
1191                        _ => "DAY".to_string(),
1192                    };
1193
1194                    // Helper: CAST(expr AS TIMESTAMP)
1195                    let cast_ts = |e: Expression| -> Expression {
1196                        Expression::Cast(Box::new(Cast {
1197                            this: e,
1198                            to: DataType::Timestamp {
1199                                precision: None,
1200                                timezone: false,
1201                            },
1202                            trailing_comments: Vec::new(),
1203                            double_colon_syntax: false,
1204                            format: None,
1205                            default: None,
1206                        }))
1207                    };
1208
1209                    // Helper: CAST(expr AS BIGINT)
1210                    let cast_bigint = |e: Expression| -> Expression {
1211                        Expression::Cast(Box::new(Cast {
1212                            this: e,
1213                            to: DataType::BigInt { length: None },
1214                            trailing_comments: Vec::new(),
1215                            double_colon_syntax: false,
1216                            format: None,
1217                            default: None,
1218                        }))
1219                    };
1220
1221                    let end_ts = cast_ts(end_expr.clone());
1222                    let start_ts = cast_ts(start.clone());
1223
1224                    // Helper: end_ts - start_ts
1225                    let ts_diff = || -> Expression {
1226                        Expression::Sub(Box::new(BinaryOp::new(
1227                            cast_ts(end_expr.clone()),
1228                            cast_ts(start.clone()),
1229                        )))
1230                    };
1231
1232                    // Helper: AGE(end_ts, start_ts)
1233                    let age_call = || -> Expression {
1234                        Expression::Function(Box::new(Function::new(
1235                            "AGE".to_string(),
1236                            vec![cast_ts(end_expr.clone()), cast_ts(start.clone())],
1237                        )))
1238                    };
1239
1240                    // Helper: EXTRACT(field FROM expr)
1241                    let extract = |field: DateTimeField, from: Expression| -> Expression {
1242                        Expression::Extract(Box::new(ExtractFunc { this: from, field }))
1243                    };
1244
1245                    // Helper: number literal
1246                    let num = |n: i64| -> Expression {
1247                        Expression::Literal(Literal::Number(n.to_string()))
1248                    };
1249
1250                    // Use Custom DateTimeField for lowercase output (PostgreSQL convention)
1251                    let epoch_field = DateTimeField::Custom("epoch".to_string());
1252
1253                    let result = match unit_name.as_str() {
1254                        "MICROSECOND" => {
1255                            // CAST(EXTRACT(epoch FROM end_ts - start_ts) * 1000000 AS BIGINT)
1256                            let epoch = extract(epoch_field, ts_diff());
1257                            cast_bigint(Expression::Mul(Box::new(BinaryOp::new(
1258                                epoch,
1259                                num(1000000),
1260                            ))))
1261                        }
1262                        "MILLISECOND" => {
1263                            let epoch = extract(epoch_field, ts_diff());
1264                            cast_bigint(Expression::Mul(Box::new(BinaryOp::new(epoch, num(1000)))))
1265                        }
1266                        "SECOND" => {
1267                            let epoch = extract(epoch_field, ts_diff());
1268                            cast_bigint(epoch)
1269                        }
1270                        "MINUTE" => {
1271                            let epoch = extract(epoch_field, ts_diff());
1272                            cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(60)))))
1273                        }
1274                        "HOUR" => {
1275                            let epoch = extract(epoch_field, ts_diff());
1276                            cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(3600)))))
1277                        }
1278                        "DAY" => {
1279                            let epoch = extract(epoch_field, ts_diff());
1280                            cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(86400)))))
1281                        }
1282                        "WEEK" => {
1283                            // CAST(EXTRACT(days FROM (end_ts - start_ts)) / 7 AS BIGINT)
1284                            let diff_parens = Expression::Paren(Box::new(Paren {
1285                                this: ts_diff(),
1286                                trailing_comments: Vec::new(),
1287                            }));
1288                            let days =
1289                                extract(DateTimeField::Custom("days".to_string()), diff_parens);
1290                            cast_bigint(Expression::Div(Box::new(BinaryOp::new(days, num(7)))))
1291                        }
1292                        "MONTH" => {
1293                            // CAST(EXTRACT(year FROM AGE(...)) * 12 + EXTRACT(month FROM AGE(...)) AS BIGINT)
1294                            let year_part =
1295                                extract(DateTimeField::Custom("year".to_string()), age_call());
1296                            let month_part =
1297                                extract(DateTimeField::Custom("month".to_string()), age_call());
1298                            let year_months =
1299                                Expression::Mul(Box::new(BinaryOp::new(year_part, num(12))));
1300                            cast_bigint(Expression::Add(Box::new(BinaryOp::new(
1301                                year_months,
1302                                month_part,
1303                            ))))
1304                        }
1305                        "QUARTER" => {
1306                            // CAST(EXTRACT(year FROM AGE(...)) * 4 + EXTRACT(month FROM AGE(...)) / 3 AS BIGINT)
1307                            let year_part =
1308                                extract(DateTimeField::Custom("year".to_string()), age_call());
1309                            let month_part =
1310                                extract(DateTimeField::Custom("month".to_string()), age_call());
1311                            let year_quarters =
1312                                Expression::Mul(Box::new(BinaryOp::new(year_part, num(4))));
1313                            let month_quarters =
1314                                Expression::Div(Box::new(BinaryOp::new(month_part, num(3))));
1315                            cast_bigint(Expression::Add(Box::new(BinaryOp::new(
1316                                year_quarters,
1317                                month_quarters,
1318                            ))))
1319                        }
1320                        "YEAR" => {
1321                            // CAST(EXTRACT(year FROM AGE(...)) AS BIGINT)
1322                            cast_bigint(extract(
1323                                DateTimeField::Custom("year".to_string()),
1324                                age_call(),
1325                            ))
1326                        }
1327                        _ => {
1328                            // Fallback: simple AGE
1329                            Expression::Function(Box::new(Function::new(
1330                                "AGE".to_string(),
1331                                vec![end_ts, start_ts],
1332                            )))
1333                        }
1334                    };
1335                    Ok(result)
1336                }
1337            }
1338
1339            // TIMESTAMPDIFF -> AGE or EXTRACT pattern
1340            "TIMESTAMPDIFF" if f.args.len() >= 3 => {
1341                let mut args = f.args;
1342                let _unit = args.remove(0); // Unit (ignored, AGE returns full interval)
1343                let start = args.remove(0);
1344                let end = args.remove(0);
1345                Ok(Expression::Function(Box::new(Function::new(
1346                    "AGE".to_string(),
1347                    vec![end, start],
1348                ))))
1349            }
1350
1351            // FROM_UNIXTIME -> TO_TIMESTAMP
1352            "FROM_UNIXTIME" => Ok(Expression::Function(Box::new(Function::new(
1353                "TO_TIMESTAMP".to_string(),
1354                f.args,
1355            )))),
1356
1357            // UNIX_TIMESTAMP -> EXTRACT(EPOCH FROM ...)
1358            "UNIX_TIMESTAMP" if f.args.len() == 1 => {
1359                let arg = f.args.into_iter().next().unwrap();
1360                Ok(Expression::Function(Box::new(Function::new(
1361                    "DATE_PART".to_string(),
1362                    vec![Expression::string("epoch"), arg],
1363                ))))
1364            }
1365
1366            // UNIX_TIMESTAMP() with no args -> EXTRACT(EPOCH FROM CURRENT_TIMESTAMP)
1367            "UNIX_TIMESTAMP" if f.args.is_empty() => {
1368                Ok(Expression::Function(Box::new(Function::new(
1369                    "DATE_PART".to_string(),
1370                    vec![
1371                        Expression::string("epoch"),
1372                        Expression::CurrentTimestamp(crate::expressions::CurrentTimestamp {
1373                            precision: None,
1374                            sysdate: false,
1375                        }),
1376                    ],
1377                ))))
1378            }
1379
1380            // DATEADD -> date + interval pattern
1381            "DATEADD" if f.args.len() == 3 => {
1382                // DATEADD(unit, count, date) -> date + interval 'count unit'
1383                // This is a simplified version - full impl would construct proper interval
1384                let mut args = f.args;
1385                let _unit = args.remove(0);
1386                let count = args.remove(0);
1387                let date = args.remove(0);
1388                Ok(Expression::Add(Box::new(BinaryOp {
1389                    left: date,
1390                    right: count,
1391                    left_comments: Vec::new(),
1392                    operator_comments: Vec::new(),
1393                    trailing_comments: Vec::new(),
1394                })))
1395            }
1396
1397            // INSTR -> POSITION (simplified)
1398            "INSTR" if f.args.len() >= 2 => {
1399                let mut args = f.args;
1400                let string = args.remove(0);
1401                let substring = args.remove(0);
1402                Ok(Expression::Position(Box::new(
1403                    crate::expressions::PositionFunc {
1404                        substring,
1405                        string,
1406                        start: args.pop(),
1407                    },
1408                )))
1409            }
1410
1411            // CONCAT_WS is native to PostgreSQL
1412            "CONCAT_WS" => Ok(Expression::Function(Box::new(f))),
1413
1414            // REGEXP_REPLACE: pass through without adding 'g' flag
1415            // The 'g' flag handling is managed by cross_dialect_normalize based on source dialect's default behavior
1416            "REGEXP_REPLACE" if f.args.len() == 3 || f.args.len() == 4 => {
1417                Ok(Expression::Function(Box::new(f)))
1418            }
1419            // 6 args from Snowflake: (subject, pattern, replacement, position, occurrence, params)
1420            // If occurrence is 0 (global), append 'g' to flags
1421            "REGEXP_REPLACE" if f.args.len() == 6 => {
1422                let is_global = match &f.args[4] {
1423                    Expression::Literal(crate::expressions::Literal::Number(n)) => n == "0",
1424                    _ => false,
1425                };
1426                if is_global {
1427                    let subject = f.args[0].clone();
1428                    let pattern = f.args[1].clone();
1429                    let replacement = f.args[2].clone();
1430                    let position = f.args[3].clone();
1431                    let occurrence = f.args[4].clone();
1432                    let params = &f.args[5];
1433                    let mut flags =
1434                        if let Expression::Literal(crate::expressions::Literal::String(s)) = params
1435                        {
1436                            s.clone()
1437                        } else {
1438                            String::new()
1439                        };
1440                    if !flags.contains('g') {
1441                        flags.push('g');
1442                    }
1443                    Ok(Expression::Function(Box::new(Function::new(
1444                        "REGEXP_REPLACE".to_string(),
1445                        vec![
1446                            subject,
1447                            pattern,
1448                            replacement,
1449                            position,
1450                            occurrence,
1451                            Expression::Literal(crate::expressions::Literal::String(flags)),
1452                        ],
1453                    ))))
1454                } else {
1455                    Ok(Expression::Function(Box::new(f)))
1456                }
1457            }
1458            // Default: pass through
1459            "REGEXP_REPLACE" => Ok(Expression::Function(Box::new(f))),
1460
1461            // Pass through everything else
1462            _ => Ok(Expression::Function(Box::new(f))),
1463        }
1464    }
1465
1466    fn transform_aggregate_function(
1467        &self,
1468        f: Box<crate::expressions::AggregateFunction>,
1469    ) -> Result<Expression> {
1470        let name_upper = f.name.to_uppercase();
1471        match name_upper.as_str() {
1472            // COUNT_IF -> SUM(CASE WHEN...)
1473            "COUNT_IF" if !f.args.is_empty() => {
1474                let condition = f.args.into_iter().next().unwrap();
1475                let case_expr = Expression::Case(Box::new(Case {
1476                    operand: None,
1477                    whens: vec![(condition, Expression::number(1))],
1478                    else_: Some(Expression::number(0)),
1479                    comments: Vec::new(),
1480                }));
1481                Ok(Expression::Sum(Box::new(AggFunc {
1482                    ignore_nulls: None,
1483                    having_max: None,
1484                    this: case_expr,
1485                    distinct: f.distinct,
1486                    filter: f.filter,
1487                    order_by: Vec::new(),
1488                    name: None,
1489                    limit: None,
1490                })))
1491            }
1492
1493            // GROUP_CONCAT -> STRING_AGG
1494            "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
1495                Function::new("STRING_AGG".to_string(), f.args),
1496            ))),
1497
1498            // STDEV -> STDDEV in PostgreSQL
1499            "STDEV" if !f.args.is_empty() => Ok(Expression::Stddev(Box::new(AggFunc {
1500                ignore_nulls: None,
1501                having_max: None,
1502                this: f.args.into_iter().next().unwrap(),
1503                distinct: f.distinct,
1504                filter: f.filter,
1505                order_by: Vec::new(),
1506                name: None,
1507                limit: None,
1508            }))),
1509
1510            // STDEVP -> STDDEV_POP in PostgreSQL
1511            "STDEVP" if !f.args.is_empty() => Ok(Expression::StddevPop(Box::new(AggFunc {
1512                ignore_nulls: None,
1513                having_max: None,
1514                this: f.args.into_iter().next().unwrap(),
1515                distinct: f.distinct,
1516                filter: f.filter,
1517                order_by: Vec::new(),
1518                name: None,
1519                limit: None,
1520            }))),
1521
1522            // VAR -> VAR_SAMP in PostgreSQL
1523            "VAR" if !f.args.is_empty() => Ok(Expression::VarSamp(Box::new(AggFunc {
1524                ignore_nulls: None,
1525                having_max: None,
1526                this: f.args.into_iter().next().unwrap(),
1527                distinct: f.distinct,
1528                filter: f.filter,
1529                order_by: Vec::new(),
1530                name: None,
1531                limit: None,
1532            }))),
1533
1534            // VARP -> VAR_POP in PostgreSQL
1535            "VARP" if !f.args.is_empty() => Ok(Expression::VarPop(Box::new(AggFunc {
1536                ignore_nulls: None,
1537                having_max: None,
1538                this: f.args.into_iter().next().unwrap(),
1539                distinct: f.distinct,
1540                filter: f.filter,
1541                order_by: Vec::new(),
1542                name: None,
1543                limit: None,
1544            }))),
1545
1546            // BIT_AND is native to PostgreSQL
1547            "BIT_AND" => Ok(Expression::AggregateFunction(f)),
1548
1549            // BIT_OR is native to PostgreSQL
1550            "BIT_OR" => Ok(Expression::AggregateFunction(f)),
1551
1552            // BIT_XOR is native to PostgreSQL
1553            "BIT_XOR" => Ok(Expression::AggregateFunction(f)),
1554
1555            // BOOL_AND is native to PostgreSQL
1556            "BOOL_AND" => Ok(Expression::AggregateFunction(f)),
1557
1558            // BOOL_OR is native to PostgreSQL
1559            "BOOL_OR" => Ok(Expression::AggregateFunction(f)),
1560
1561            // VARIANCE -> VAR_SAMP in PostgreSQL
1562            "VARIANCE" if !f.args.is_empty() => Ok(Expression::VarSamp(Box::new(AggFunc {
1563                ignore_nulls: None,
1564                having_max: None,
1565                this: f.args.into_iter().next().unwrap(),
1566                distinct: f.distinct,
1567                filter: f.filter,
1568                order_by: Vec::new(),
1569                name: None,
1570                limit: None,
1571            }))),
1572
1573            // LOGICAL_OR -> BOOL_OR in PostgreSQL
1574            "LOGICAL_OR" if !f.args.is_empty() => {
1575                let mut new_agg = f.clone();
1576                new_agg.name = "BOOL_OR".to_string();
1577                Ok(Expression::AggregateFunction(new_agg))
1578            }
1579
1580            // LOGICAL_AND -> BOOL_AND in PostgreSQL
1581            "LOGICAL_AND" if !f.args.is_empty() => {
1582                let mut new_agg = f.clone();
1583                new_agg.name = "BOOL_AND".to_string();
1584                Ok(Expression::AggregateFunction(new_agg))
1585            }
1586
1587            // Pass through everything else
1588            _ => Ok(Expression::AggregateFunction(f)),
1589        }
1590    }
1591}
1592
1593#[cfg(test)]
1594mod tests {
1595    use super::*;
1596    use crate::dialects::Dialect;
1597
1598    fn transpile_to_postgres(sql: &str) -> String {
1599        let dialect = Dialect::get(DialectType::Generic);
1600        let result = dialect
1601            .transpile_to(sql, DialectType::PostgreSQL)
1602            .expect("Transpile failed");
1603        result[0].clone()
1604    }
1605
1606    #[test]
1607    fn test_ifnull_to_coalesce() {
1608        let result = transpile_to_postgres("SELECT IFNULL(a, b)");
1609        assert!(
1610            result.contains("COALESCE"),
1611            "Expected COALESCE, got: {}",
1612            result
1613        );
1614    }
1615
1616    #[test]
1617    fn test_nvl_to_coalesce() {
1618        let result = transpile_to_postgres("SELECT NVL(a, b)");
1619        assert!(
1620            result.contains("COALESCE"),
1621            "Expected COALESCE, got: {}",
1622            result
1623        );
1624    }
1625
1626    #[test]
1627    fn test_rand_to_random() {
1628        let result = transpile_to_postgres("SELECT RAND()");
1629        assert!(
1630            result.contains("RANDOM"),
1631            "Expected RANDOM, got: {}",
1632            result
1633        );
1634    }
1635
1636    #[test]
1637    fn test_basic_select() {
1638        let result = transpile_to_postgres("SELECT a, b FROM users WHERE id = 1");
1639        assert!(result.contains("SELECT"));
1640        assert!(result.contains("FROM users"));
1641    }
1642
1643    #[test]
1644    fn test_len_to_length() {
1645        let result = transpile_to_postgres("SELECT LEN(name)");
1646        assert!(
1647            result.contains("LENGTH"),
1648            "Expected LENGTH, got: {}",
1649            result
1650        );
1651    }
1652
1653    #[test]
1654    fn test_getdate_to_current_timestamp() {
1655        let result = transpile_to_postgres("SELECT GETDATE()");
1656        assert!(
1657            result.contains("CURRENT_TIMESTAMP"),
1658            "Expected CURRENT_TIMESTAMP, got: {}",
1659            result
1660        );
1661    }
1662
1663    #[test]
1664    fn test_substr_to_substring() {
1665        let result = transpile_to_postgres("SELECT SUBSTR(name, 1, 3)");
1666        assert!(
1667            result.contains("SUBSTRING"),
1668            "Expected SUBSTRING, got: {}",
1669            result
1670        );
1671    }
1672
1673    #[test]
1674    fn test_group_concat_to_string_agg() {
1675        let result = transpile_to_postgres("SELECT GROUP_CONCAT(name)");
1676        assert!(
1677            result.contains("STRING_AGG"),
1678            "Expected STRING_AGG, got: {}",
1679            result
1680        );
1681    }
1682
1683    #[test]
1684    fn test_double_quote_identifiers() {
1685        // PostgreSQL uses double quotes for identifiers
1686        let dialect = PostgresDialect;
1687        let config = dialect.generator_config();
1688        assert_eq!(config.identifier_quote, '"');
1689    }
1690
1691    #[test]
1692    fn test_char_length_to_length() {
1693        let result = transpile_to_postgres("SELECT CHAR_LENGTH(name)");
1694        assert!(
1695            result.contains("LENGTH"),
1696            "Expected LENGTH, got: {}",
1697            result
1698        );
1699    }
1700
1701    #[test]
1702    fn test_character_length_to_length() {
1703        let result = transpile_to_postgres("SELECT CHARACTER_LENGTH(name)");
1704        assert!(
1705            result.contains("LENGTH"),
1706            "Expected LENGTH, got: {}",
1707            result
1708        );
1709    }
1710
1711    /// Helper for PostgreSQL identity tests (parse and regenerate with PostgreSQL dialect)
1712    fn identity_postgres(sql: &str) -> String {
1713        let dialect = Dialect::get(DialectType::PostgreSQL);
1714        let exprs = dialect.parse(sql).expect("Parse failed");
1715        let transformed = dialect
1716            .transform(exprs[0].clone())
1717            .expect("Transform failed");
1718        dialect.generate(&transformed).expect("Generate failed")
1719    }
1720
1721    #[test]
1722    fn test_json_extract_with_column_path() {
1723        // When the path is a column reference (not a literal), should use function form
1724        let result = identity_postgres("json_data.data -> field_ids.field_id");
1725        assert!(
1726            result.contains("JSON_EXTRACT_PATH"),
1727            "Expected JSON_EXTRACT_PATH for column path, got: {}",
1728            result
1729        );
1730    }
1731
1732    #[test]
1733    fn test_json_extract_scalar_with_negative_index() {
1734        // When the path is a negative index, should use JSON_EXTRACT_PATH_TEXT function
1735        let result = identity_postgres("x::JSON -> 'duration' ->> -1");
1736        assert!(
1737            result.contains("JSON_EXTRACT_PATH_TEXT"),
1738            "Expected JSON_EXTRACT_PATH_TEXT for negative index, got: {}",
1739            result
1740        );
1741        // The first -> should still be arrow syntax since 'duration' is a string literal
1742        assert!(
1743            result.contains("->"),
1744            "Expected -> for string literal path, got: {}",
1745            result
1746        );
1747    }
1748
1749    #[test]
1750    fn test_json_extract_with_string_literal() {
1751        // When the path is a string literal, should keep arrow syntax
1752        let result = identity_postgres("data -> 'key'");
1753        assert!(
1754            result.contains("->"),
1755            "Expected -> for string literal path, got: {}",
1756            result
1757        );
1758        assert!(
1759            !result.contains("JSON_EXTRACT_PATH"),
1760            "Should NOT use function form for string literal, got: {}",
1761            result
1762        );
1763    }
1764}