Skip to main content

polyglot_sql/dialects/
postgres.rs

1//! PostgreSQL Dialect
2//!
3//! PostgreSQL-specific transformations based on sqlglot patterns.
4//! Comprehensive batch translation from Python sqlglot postgres.py
5//!
6//! Key differences from other dialects:
7//! - TRY_CAST not supported (falls back to CAST)
8//! - RANDOM() instead of RAND()
9//! - STRING_AGG instead of GROUP_CONCAT
10//! - Bitwise XOR is # operator
11//! - BOOL_AND/BOOL_OR for logical aggregates
12//! - GEN_RANDOM_UUID() for UUID generation
13//! - UNNEST instead of EXPLODE
14//! - Type mappings: TINYINT→SMALLINT, FLOAT→REAL, DOUBLE→DOUBLE PRECISION, etc.
15//! - RegexpLike uses ~ operator, RegexpILike uses ~* operator
16//! - JSONB operators: #>, #>>, ?, ?|, ?&
17
18use super::{DialectImpl, DialectType};
19use crate::error::Result;
20use crate::expressions::{
21    AggFunc, BinaryOp, BooleanLiteral, Case, Cast, CeilFunc, DataType, DateTimeField, Expression,
22    ExtractFunc, Function, Interval, IntervalUnit, IntervalUnitSpec, Join, JoinKind, Literal,
23    Paren, UnaryFunc, VarArgFunc,
24};
25use crate::generator::GeneratorConfig;
26use crate::tokens::TokenizerConfig;
27
28/// Helper to wrap JSON arrow expressions in parentheses when they appear
29/// in contexts that require it (Binary, In, Not expressions)
30/// This matches Python sqlglot's WRAPPED_JSON_EXTRACT_EXPRESSIONS behavior
31fn wrap_if_json_arrow(expr: Expression) -> Expression {
32    match &expr {
33        Expression::JsonExtract(f) if f.arrow_syntax => Expression::Paren(Box::new(Paren {
34            this: expr,
35            trailing_comments: Vec::new(),
36        })),
37        Expression::JsonExtractScalar(f) if f.arrow_syntax => Expression::Paren(Box::new(Paren {
38            this: expr,
39            trailing_comments: Vec::new(),
40        })),
41        _ => expr,
42    }
43}
44
45/// PostgreSQL dialect
46pub struct PostgresDialect;
47
48impl DialectImpl for PostgresDialect {
49    fn dialect_type(&self) -> DialectType {
50        DialectType::PostgreSQL
51    }
52
53    fn tokenizer_config(&self) -> TokenizerConfig {
54        use crate::tokens::TokenType;
55        let mut config = TokenizerConfig::default();
56        // PostgreSQL supports $$ string literals (heredoc strings)
57        config.quotes.insert("$$".to_string(), "$$".to_string());
58        // PostgreSQL uses double quotes for identifiers
59        config.identifiers.insert('"', '"');
60        // Nested comments supported
61        config.nested_comments = true;
62        // PostgreSQL treats EXEC as a generic command (not TSQL EXEC statement)
63        // Note: EXECUTE is kept as-is since it's used in GRANT/REVOKE EXECUTE ON FUNCTION
64        config
65            .keywords
66            .insert("EXEC".to_string(), TokenType::Command);
67        config
68    }
69
70    fn generator_config(&self) -> GeneratorConfig {
71        use crate::generator::IdentifierQuoteStyle;
72        GeneratorConfig {
73            identifier_quote: '"',
74            identifier_quote_style: IdentifierQuoteStyle::DOUBLE_QUOTE,
75            dialect: Some(DialectType::PostgreSQL),
76            // PostgreSQL uses TIMESTAMPTZ shorthand
77            tz_to_with_time_zone: false,
78            // PostgreSQL prefers INTERVAL '1 day' syntax
79            single_string_interval: true,
80            // TABLESAMPLE uses REPEATABLE in PostgreSQL
81            tablesample_seed_keyword: "REPEATABLE",
82            // PostgreSQL doesn't support NVL2
83            nvl2_supported: false,
84            // PostgreSQL uses $ for parameters
85            parameter_token: "$",
86            // PostgreSQL uses % for named placeholders
87            named_placeholder_token: "%",
88            // PostgreSQL supports SELECT INTO
89            supports_select_into: true,
90            // PostgreSQL: USING btree(col) without space before parens
91            index_using_no_space: true,
92            // PostgreSQL supports UNLOGGED tables
93            supports_unlogged_tables: true,
94            // PostgreSQL doesn't support multi-arg DISTINCT
95            multi_arg_distinct: false,
96            // PostgreSQL uses ANY (subquery) with space
97            quantified_no_paren_space: false,
98            // PostgreSQL supports window EXCLUDE clause
99            supports_window_exclude: true,
100            // PostgreSQL normalizes single-bound window frames to BETWEEN form
101            normalize_window_frame_between: true,
102            // PostgreSQL COPY doesn't use INTO keyword
103            copy_has_into_keyword: false,
104            // PostgreSQL ARRAY_SIZE requires dimension argument
105            array_size_dim_required: Some(true),
106            // PostgreSQL supports BETWEEN flags
107            supports_between_flags: true,
108            // PostgreSQL doesn't support hints
109            join_hints: false,
110            table_hints: false,
111            query_hints: false,
112            // PostgreSQL supports locking reads
113            locking_reads_supported: true,
114            // PostgreSQL doesn't rename tables with DB
115            rename_table_with_db: false,
116            // PostgreSQL can implement array any
117            can_implement_array_any: true,
118            // PostgreSQL ARRAY_CONCAT is not var-len
119            array_concat_is_var_len: false,
120            // PostgreSQL doesn't support MEDIAN
121            supports_median: false,
122            // PostgreSQL requires JSON type for extraction
123            json_type_required_for_extraction: true,
124            // PostgreSQL LIKE property inside schema
125            like_property_inside_schema: true,
126            ..Default::default()
127        }
128    }
129
130    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
131        match expr {
132            // ============================================
133            // DATA TYPE MAPPINGS (from TYPE_MAPPING)
134            // These are handled specially - transform DataType variants
135            // ============================================
136            Expression::DataType(dt) => self.transform_data_type(dt),
137
138            // ============================================
139            // NULL HANDLING
140            // ============================================
141            // IFNULL -> COALESCE in PostgreSQL
142            Expression::IfNull(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
143                original_name: None,
144                expressions: vec![f.this, f.expression],
145            }))),
146
147            // NVL -> COALESCE in PostgreSQL
148            Expression::Nvl(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
149                original_name: None,
150                expressions: vec![f.this, f.expression],
151            }))),
152
153            // Coalesce with original_name (e.g., IFNULL parsed as Coalesce) -> clear original_name
154            // so it outputs as COALESCE instead of the original function name
155            Expression::Coalesce(mut f) => {
156                f.original_name = None;
157                Ok(Expression::Coalesce(f))
158            }
159
160            // ============================================
161            // CAST OPERATIONS
162            // ============================================
163            // TryCast -> CAST (PostgreSQL doesn't support TRY_CAST)
164            Expression::TryCast(c) => Ok(Expression::Cast(c)),
165
166            // SafeCast -> CAST (PostgreSQL doesn't support safe casts)
167            Expression::SafeCast(c) => Ok(Expression::Cast(c)),
168
169            // ============================================
170            // RANDOM
171            // ============================================
172            // RAND -> RANDOM in PostgreSQL
173            Expression::Rand(r) => {
174                // PostgreSQL's RANDOM() doesn't take a seed argument
175                let _ = r.seed; // Ignore seed
176                Ok(Expression::Random(crate::expressions::Random))
177            }
178
179            // ============================================
180            // UUID
181            // ============================================
182            // Uuid -> GEN_RANDOM_UUID in PostgreSQL
183            Expression::Uuid(_) => Ok(Expression::Function(Box::new(Function::new(
184                "GEN_RANDOM_UUID".to_string(),
185                vec![],
186            )))),
187
188            // ============================================
189            // ARRAY OPERATIONS
190            // ============================================
191            // EXPLODE -> UNNEST in PostgreSQL
192            Expression::Explode(f) => Ok(Expression::Unnest(Box::new(
193                crate::expressions::UnnestFunc {
194                    this: f.this,
195                    expressions: Vec::new(),
196                    with_ordinality: false,
197                    alias: None,
198                    offset_alias: None,
199                },
200            ))),
201
202            // ExplodeOuter -> UNNEST in PostgreSQL
203            Expression::ExplodeOuter(f) => Ok(Expression::Unnest(Box::new(
204                crate::expressions::UnnestFunc {
205                    this: f.this,
206                    expressions: Vec::new(),
207                    with_ordinality: false,
208                    alias: None,
209                    offset_alias: None,
210                },
211            ))),
212
213            // ArrayConcat -> ARRAY_CAT in PostgreSQL
214            Expression::ArrayConcat(f) => Ok(Expression::Function(Box::new(Function::new(
215                "ARRAY_CAT".to_string(),
216                f.expressions,
217            )))),
218
219            // ArrayPrepend -> ARRAY_PREPEND in PostgreSQL (note: args swapped from other dialects)
220            Expression::ArrayPrepend(f) => Ok(Expression::Function(Box::new(Function::new(
221                "ARRAY_PREPEND".to_string(),
222                vec![f.expression, f.this], // PostgreSQL: ARRAY_PREPEND(element, array)
223            )))),
224
225            // ============================================
226            // BITWISE OPERATIONS
227            // ============================================
228            // BitwiseXor -> # operator in PostgreSQL
229            Expression::BitwiseXor(f) => {
230                // Use a special marker that generator will recognize
231                Ok(Expression::Function(Box::new(Function::new(
232                    "__PG_BITWISE_XOR__".to_string(),
233                    vec![f.left, f.right],
234                ))))
235            }
236
237            // BitwiseAndAgg -> BIT_AND
238            Expression::BitwiseAndAgg(f) => Ok(Expression::Function(Box::new(Function::new(
239                "BIT_AND".to_string(),
240                vec![f.this],
241            )))),
242
243            // BitwiseOrAgg -> BIT_OR
244            Expression::BitwiseOrAgg(f) => Ok(Expression::Function(Box::new(Function::new(
245                "BIT_OR".to_string(),
246                vec![f.this],
247            )))),
248
249            // BitwiseXorAgg -> BIT_XOR
250            Expression::BitwiseXorAgg(f) => Ok(Expression::Function(Box::new(Function::new(
251                "BIT_XOR".to_string(),
252                vec![f.this],
253            )))),
254
255            // ============================================
256            // BOOLEAN AGGREGATES
257            // ============================================
258            // LogicalAnd -> BOOL_AND
259            Expression::LogicalAnd(f) => Ok(Expression::Function(Box::new(Function::new(
260                "BOOL_AND".to_string(),
261                vec![f.this],
262            )))),
263
264            // LogicalOr -> BOOL_OR
265            Expression::LogicalOr(f) => Ok(Expression::Function(Box::new(Function::new(
266                "BOOL_OR".to_string(),
267                vec![f.this],
268            )))),
269
270            // Xor -> PostgreSQL bool_xor pattern: a <> b for boolean values
271            Expression::Xor(f) => {
272                if let (Some(a), Some(b)) = (f.this, f.expression) {
273                    Ok(Expression::Neq(Box::new(BinaryOp {
274                        left: *a,
275                        right: *b,
276                        left_comments: Vec::new(),
277                        operator_comments: Vec::new(),
278                        trailing_comments: Vec::new(),
279                    })))
280                } else {
281                    Ok(Expression::Boolean(BooleanLiteral { value: false }))
282                }
283            }
284
285            // ============================================
286            // ARRAY OPERATORS
287            // ============================================
288            // ArrayContainedBy (<@) -> ArrayContainsAll (@>) with swapped operands
289            // a <@ b -> b @> a (PostgreSQL prefers @> syntax)
290            Expression::ArrayContainedBy(op) => {
291                Ok(Expression::ArrayContainsAll(Box::new(BinaryOp {
292                    left: op.right,
293                    right: op.left,
294                    left_comments: Vec::new(),
295                    operator_comments: Vec::new(),
296                    trailing_comments: Vec::new(),
297                })))
298            }
299
300            // ============================================
301            // REGEXP OPERATIONS (PostgreSQL uses ~ and ~* operators)
302            // ============================================
303            // RegexpLike -> keep as-is, generator handles ~ operator output
304            Expression::RegexpLike(f) => {
305                // Generator will output as: expr ~ pattern
306                Ok(Expression::RegexpLike(f))
307            }
308
309            // ============================================
310            // DATE/TIME FUNCTIONS
311            // ============================================
312            // DateAdd -> date + INTERVAL in PostgreSQL
313            Expression::DateAdd(f) => {
314                let interval_expr = Expression::Interval(Box::new(Interval {
315                    this: Some(f.interval),
316                    unit: Some(IntervalUnitSpec::Simple {
317                        unit: f.unit,
318                        use_plural: false,
319                    }),
320                }));
321                Ok(Expression::Add(Box::new(BinaryOp {
322                    left: f.this,
323                    right: interval_expr,
324                    left_comments: Vec::new(),
325                    operator_comments: Vec::new(),
326                    trailing_comments: Vec::new(),
327                })))
328            }
329
330            // DateSub -> date - INTERVAL in PostgreSQL
331            Expression::DateSub(f) => {
332                let interval_expr = Expression::Interval(Box::new(Interval {
333                    this: Some(f.interval),
334                    unit: Some(IntervalUnitSpec::Simple {
335                        unit: f.unit,
336                        use_plural: false,
337                    }),
338                }));
339                Ok(Expression::Sub(Box::new(BinaryOp {
340                    left: f.this,
341                    right: interval_expr,
342                    left_comments: Vec::new(),
343                    operator_comments: Vec::new(),
344                    trailing_comments: Vec::new(),
345                })))
346            }
347
348            // DateDiff -> Complex PostgreSQL pattern using AGE/EXTRACT
349            Expression::DateDiff(f) => {
350                // For PostgreSQL, DATEDIFF is converted to EXTRACT(epoch FROM ...) pattern
351                // matching the 3-arg string-based DATEDIFF handler below
352                let unit = f.unit.unwrap_or(IntervalUnit::Day);
353
354                // Helper: CAST(expr AS TIMESTAMP)
355                let cast_ts = |e: Expression| -> Expression {
356                    Expression::Cast(Box::new(Cast {
357                        this: e,
358                        to: DataType::Timestamp {
359                            precision: None,
360                            timezone: false,
361                        },
362                        trailing_comments: Vec::new(),
363                        double_colon_syntax: false,
364                        format: None,
365                        default: None,
366                    }))
367                };
368
369                // Helper: CAST(expr AS BIGINT)
370                let cast_bigint = |e: Expression| -> Expression {
371                    Expression::Cast(Box::new(Cast {
372                        this: e,
373                        to: DataType::BigInt { length: None },
374                        trailing_comments: Vec::new(),
375                        double_colon_syntax: false,
376                        format: None,
377                        default: None,
378                    }))
379                };
380
381                // Clone end/start for reuse
382                let end_expr = f.this;
383                let start = f.expression;
384
385                // Helper: end_ts - start_ts
386                let ts_diff = || -> Expression {
387                    Expression::Sub(Box::new(BinaryOp::new(
388                        cast_ts(end_expr.clone()),
389                        cast_ts(start.clone()),
390                    )))
391                };
392
393                // Helper: AGE(end_ts, start_ts)
394                let age_call = || -> Expression {
395                    Expression::Function(Box::new(Function::new(
396                        "AGE".to_string(),
397                        vec![cast_ts(end_expr.clone()), cast_ts(start.clone())],
398                    )))
399                };
400
401                // Helper: EXTRACT(field FROM expr)
402                let extract = |field: DateTimeField, from: Expression| -> Expression {
403                    Expression::Extract(Box::new(ExtractFunc { this: from, field }))
404                };
405
406                // Helper: number literal
407                let num =
408                    |n: i64| -> Expression { Expression::Literal(Literal::Number(n.to_string())) };
409
410                let epoch_field = DateTimeField::Custom("epoch".to_string());
411
412                let result = match unit {
413                    IntervalUnit::Nanosecond => {
414                        let epoch = extract(epoch_field.clone(), ts_diff());
415                        cast_bigint(Expression::Mul(Box::new(BinaryOp::new(
416                            epoch,
417                            num(1000000000),
418                        ))))
419                    }
420                    IntervalUnit::Microsecond => {
421                        let epoch = extract(epoch_field, ts_diff());
422                        cast_bigint(Expression::Mul(Box::new(BinaryOp::new(
423                            epoch,
424                            num(1000000),
425                        ))))
426                    }
427                    IntervalUnit::Millisecond => {
428                        let epoch = extract(epoch_field, ts_diff());
429                        cast_bigint(Expression::Mul(Box::new(BinaryOp::new(epoch, num(1000)))))
430                    }
431                    IntervalUnit::Second => {
432                        let epoch = extract(epoch_field, ts_diff());
433                        cast_bigint(epoch)
434                    }
435                    IntervalUnit::Minute => {
436                        let epoch = extract(epoch_field, ts_diff());
437                        cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(60)))))
438                    }
439                    IntervalUnit::Hour => {
440                        let epoch = extract(epoch_field, ts_diff());
441                        cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(3600)))))
442                    }
443                    IntervalUnit::Day => {
444                        let epoch = extract(epoch_field, ts_diff());
445                        cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(86400)))))
446                    }
447                    IntervalUnit::Week => {
448                        let diff_parens = Expression::Paren(Box::new(Paren {
449                            this: ts_diff(),
450                            trailing_comments: Vec::new(),
451                        }));
452                        let days = extract(DateTimeField::Custom("days".to_string()), diff_parens);
453                        cast_bigint(Expression::Div(Box::new(BinaryOp::new(days, num(7)))))
454                    }
455                    IntervalUnit::Month => {
456                        let year_part =
457                            extract(DateTimeField::Custom("year".to_string()), age_call());
458                        let month_part =
459                            extract(DateTimeField::Custom("month".to_string()), age_call());
460                        let year_months =
461                            Expression::Mul(Box::new(BinaryOp::new(year_part, num(12))));
462                        cast_bigint(Expression::Add(Box::new(BinaryOp::new(
463                            year_months,
464                            month_part,
465                        ))))
466                    }
467                    IntervalUnit::Quarter => {
468                        let year_part =
469                            extract(DateTimeField::Custom("year".to_string()), age_call());
470                        let month_part =
471                            extract(DateTimeField::Custom("month".to_string()), age_call());
472                        let year_quarters =
473                            Expression::Mul(Box::new(BinaryOp::new(year_part, num(4))));
474                        let month_quarters =
475                            Expression::Div(Box::new(BinaryOp::new(month_part, num(3))));
476                        cast_bigint(Expression::Add(Box::new(BinaryOp::new(
477                            year_quarters,
478                            month_quarters,
479                        ))))
480                    }
481                    IntervalUnit::Year => cast_bigint(extract(
482                        DateTimeField::Custom("year".to_string()),
483                        age_call(),
484                    )),
485                };
486                Ok(result)
487            }
488
489            // UnixToTime -> TO_TIMESTAMP
490            Expression::UnixToTime(f) => Ok(Expression::Function(Box::new(Function::new(
491                "TO_TIMESTAMP".to_string(),
492                vec![*f.this],
493            )))),
494
495            // TimeToUnix -> DATE_PART('epoch', ...) in PostgreSQL
496            Expression::TimeToUnix(f) => Ok(Expression::Function(Box::new(Function::new(
497                "DATE_PART".to_string(),
498                vec![Expression::string("epoch"), f.this],
499            )))),
500
501            // StrToTime -> TO_TIMESTAMP in PostgreSQL
502            Expression::ToTimestamp(f) => {
503                let mut args = vec![f.this];
504                if let Some(fmt) = f.format {
505                    args.push(fmt);
506                }
507                Ok(Expression::Function(Box::new(Function::new(
508                    "TO_TIMESTAMP".to_string(),
509                    args,
510                ))))
511            }
512
513            // StrToDate -> TO_DATE in PostgreSQL
514            Expression::ToDate(f) => {
515                let mut args = vec![f.this];
516                if let Some(fmt) = f.format {
517                    args.push(fmt);
518                }
519                Ok(Expression::Function(Box::new(Function::new(
520                    "TO_DATE".to_string(),
521                    args,
522                ))))
523            }
524
525            // TimestampTrunc -> DATE_TRUNC
526            Expression::TimestampTrunc(f) => {
527                // Convert DateTimeField to string expression for DATE_TRUNC
528                let unit_str = format!("{:?}", f.unit).to_lowercase();
529                let args = vec![Expression::string(&unit_str), f.this];
530                Ok(Expression::Function(Box::new(Function::new(
531                    "DATE_TRUNC".to_string(),
532                    args,
533                ))))
534            }
535
536            // TimeFromParts -> MAKE_TIME
537            Expression::TimeFromParts(f) => {
538                let mut args = Vec::new();
539                if let Some(h) = f.hour {
540                    args.push(*h);
541                }
542                if let Some(m) = f.min {
543                    args.push(*m);
544                }
545                if let Some(s) = f.sec {
546                    args.push(*s);
547                }
548                Ok(Expression::Function(Box::new(Function::new(
549                    "MAKE_TIME".to_string(),
550                    args,
551                ))))
552            }
553
554            // TimestampFromParts -> MAKE_TIMESTAMP
555            Expression::MakeTimestamp(f) => {
556                // MakeTimestampFunc has direct Expression fields, not Options
557                let args = vec![f.year, f.month, f.day, f.hour, f.minute, f.second];
558                Ok(Expression::Function(Box::new(Function::new(
559                    "MAKE_TIMESTAMP".to_string(),
560                    args,
561                ))))
562            }
563
564            // ============================================
565            // STRING FUNCTIONS
566            // ============================================
567            // StringAgg is native to PostgreSQL - keep as-is
568            Expression::StringAgg(f) => Ok(Expression::StringAgg(f)),
569
570            // GroupConcat -> STRING_AGG in PostgreSQL
571            Expression::GroupConcat(f) => {
572                let mut args = vec![f.this.clone()];
573                if let Some(sep) = f.separator.clone() {
574                    args.push(sep);
575                } else {
576                    args.push(Expression::string(","));
577                }
578                Ok(Expression::Function(Box::new(Function::new(
579                    "STRING_AGG".to_string(),
580                    args,
581                ))))
582            }
583
584            // StrPosition -> POSITION function
585            Expression::Position(f) => {
586                // PostgreSQL: POSITION(substring IN string)
587                // Keep as Position, generator handles it
588                Ok(Expression::Position(f))
589            }
590
591            // ============================================
592            // AGGREGATE FUNCTIONS
593            // ============================================
594            // CountIf -> SUM(CASE WHEN condition THEN 1 ELSE 0 END) in PostgreSQL
595            Expression::CountIf(f) => {
596                let case_expr = Expression::Case(Box::new(Case {
597                    operand: None,
598                    whens: vec![(f.this.clone(), Expression::number(1))],
599                    else_: Some(Expression::number(0)),
600                    comments: Vec::new(),
601                }));
602                Ok(Expression::Sum(Box::new(AggFunc {
603                    ignore_nulls: None,
604                    having_max: None,
605                    this: case_expr,
606                    distinct: f.distinct,
607                    filter: f.filter,
608                    order_by: Vec::new(),
609                    name: None,
610                    limit: None,
611                })))
612            }
613
614            // AnyValue -> keep as ANY_VALUE for PostgreSQL (supported since PG 16)
615            Expression::AnyValue(f) => Ok(Expression::AnyValue(f)),
616
617            // Variance -> VAR_SAMP in PostgreSQL
618            Expression::Variance(f) => Ok(Expression::Function(Box::new(Function::new(
619                "VAR_SAMP".to_string(),
620                vec![f.this],
621            )))),
622
623            // VarPop -> VAR_POP in PostgreSQL
624            Expression::VarPop(f) => Ok(Expression::Function(Box::new(Function::new(
625                "VAR_POP".to_string(),
626                vec![f.this],
627            )))),
628
629            // ============================================
630            // JSON FUNCTIONS
631            // ============================================
632            // JSONExtract -> use arrow syntax (->) in PostgreSQL for simple literal paths
633            // Complex paths (like column references) should use JSON_EXTRACT_PATH function
634            Expression::JsonExtract(mut f) => {
635                // Only use arrow syntax for simple literal paths (string or non-negative number)
636                // Complex expressions like column references should use function form
637                f.arrow_syntax = Self::is_simple_json_path(&f.path);
638                Ok(Expression::JsonExtract(f))
639            }
640
641            // JSONExtractScalar -> use arrow syntax (->>) in PostgreSQL for simple paths
642            // Complex paths (like negative indices) should use JSON_EXTRACT_PATH_TEXT function
643            // #>> (hash_arrow_syntax) stays as #>>
644            Expression::JsonExtractScalar(mut f) => {
645                if !f.hash_arrow_syntax {
646                    // Only use arrow syntax for simple literal paths (string or non-negative number)
647                    // Complex expressions like Neg(-1) should use function form
648                    f.arrow_syntax = Self::is_simple_json_path(&f.path);
649                }
650                Ok(Expression::JsonExtractScalar(f))
651            }
652
653            // ParseJson: handled by generator (outputs CAST(x AS JSON) for PostgreSQL)
654
655            // JSONObjectAgg -> JSON_OBJECT_AGG
656            Expression::JsonObjectAgg(f) => {
657                // JsonObjectAggFunc has key and value as Expression, not Option
658                let args = vec![f.key, f.value];
659                Ok(Expression::Function(Box::new(Function::new(
660                    "JSON_OBJECT_AGG".to_string(),
661                    args,
662                ))))
663            }
664
665            // JSONArrayAgg -> JSON_AGG
666            Expression::JsonArrayAgg(f) => Ok(Expression::Function(Box::new(Function::new(
667                "JSON_AGG".to_string(),
668                vec![f.this],
669            )))),
670
671            // JSONPathRoot -> empty string ($ is implicit in PostgreSQL)
672            Expression::JSONPathRoot(_) => Ok(Expression::Literal(Literal::String(String::new()))),
673
674            // ============================================
675            // MISC FUNCTIONS
676            // ============================================
677            // IntDiv -> DIV in PostgreSQL
678            Expression::IntDiv(f) => Ok(Expression::Function(Box::new(Function::new(
679                "DIV".to_string(),
680                vec![f.this, f.expression],
681            )))),
682
683            // Unicode -> ASCII in PostgreSQL
684            Expression::Unicode(f) => Ok(Expression::Function(Box::new(Function::new(
685                "ASCII".to_string(),
686                vec![f.this],
687            )))),
688
689            // LastDay -> Complex expression (PostgreSQL doesn't have LAST_DAY)
690            Expression::LastDay(f) => {
691                // (DATE_TRUNC('month', date) + INTERVAL '1 month' - INTERVAL '1 day')::DATE
692                let truncated = Expression::Function(Box::new(Function::new(
693                    "DATE_TRUNC".to_string(),
694                    vec![Expression::string("month"), f.this.clone()],
695                )));
696                let plus_month = Expression::Add(Box::new(BinaryOp {
697                    left: truncated,
698                    right: Expression::Interval(Box::new(Interval {
699                        this: Some(Expression::string("1")),
700                        unit: Some(IntervalUnitSpec::Simple {
701                            unit: IntervalUnit::Month,
702                            use_plural: false,
703                        }),
704                    })),
705                    left_comments: Vec::new(),
706                    operator_comments: Vec::new(),
707                    trailing_comments: Vec::new(),
708                }));
709                let minus_day = Expression::Sub(Box::new(BinaryOp {
710                    left: plus_month,
711                    right: Expression::Interval(Box::new(Interval {
712                        this: Some(Expression::string("1")),
713                        unit: Some(IntervalUnitSpec::Simple {
714                            unit: IntervalUnit::Day,
715                            use_plural: false,
716                        }),
717                    })),
718                    left_comments: Vec::new(),
719                    operator_comments: Vec::new(),
720                    trailing_comments: Vec::new(),
721                }));
722                Ok(Expression::Cast(Box::new(Cast {
723                    this: minus_day,
724                    to: DataType::Date,
725                    trailing_comments: Vec::new(),
726                    double_colon_syntax: true, // Use PostgreSQL :: syntax
727                    format: None,
728                    default: None,
729                })))
730            }
731
732            // GenerateSeries is native to PostgreSQL
733            Expression::GenerateSeries(f) => Ok(Expression::GenerateSeries(f)),
734
735            // ExplodingGenerateSeries -> GENERATE_SERIES
736            Expression::ExplodingGenerateSeries(f) => {
737                let mut args = vec![f.start, f.stop];
738                if let Some(step) = f.step {
739                    args.push(step); // step is Expression, not Box<Expression>
740                }
741                Ok(Expression::Function(Box::new(Function::new(
742                    "GENERATE_SERIES".to_string(),
743                    args,
744                ))))
745            }
746
747            // ============================================
748            // SESSION/TIME FUNCTIONS (no parentheses in PostgreSQL)
749            // ============================================
750            // CurrentTimestamp -> CURRENT_TIMESTAMP (no parens)
751            Expression::CurrentTimestamp(_) => Ok(Expression::Function(Box::new(Function {
752                name: "CURRENT_TIMESTAMP".to_string(),
753                args: vec![],
754                distinct: false,
755                trailing_comments: vec![],
756                use_bracket_syntax: false,
757                no_parens: true,
758                quoted: false,
759                span: None,
760            }))),
761
762            // CurrentUser -> CURRENT_USER (no parens)
763            Expression::CurrentUser(_) => Ok(Expression::Function(Box::new(Function::new(
764                "CURRENT_USER".to_string(),
765                vec![],
766            )))),
767
768            // CurrentDate -> CURRENT_DATE (no parens)
769            Expression::CurrentDate(_) => Ok(Expression::Function(Box::new(Function {
770                name: "CURRENT_DATE".to_string(),
771                args: vec![],
772                distinct: false,
773                trailing_comments: vec![],
774                use_bracket_syntax: false,
775                no_parens: true,
776                quoted: false,
777                span: None,
778            }))),
779
780            // ============================================
781            // JOIN TRANSFORMATIONS
782            // ============================================
783            // CROSS APPLY -> INNER JOIN LATERAL ... ON TRUE in PostgreSQL
784            Expression::Join(join) if join.kind == JoinKind::CrossApply => {
785                Ok(Expression::Join(Box::new(Join {
786                    this: join.this,
787                    on: Some(Expression::Boolean(BooleanLiteral { value: true })),
788                    using: join.using,
789                    kind: JoinKind::CrossApply,
790                    use_inner_keyword: false,
791                    use_outer_keyword: false,
792                    deferred_condition: false,
793                    join_hint: None,
794                    match_condition: None,
795                    pivots: join.pivots,
796                    comments: join.comments,
797                    nesting_group: 0,
798                    directed: false,
799                })))
800            }
801
802            // OUTER APPLY -> LEFT JOIN LATERAL ... ON TRUE in PostgreSQL
803            Expression::Join(join) if join.kind == JoinKind::OuterApply => {
804                Ok(Expression::Join(Box::new(Join {
805                    this: join.this,
806                    on: Some(Expression::Boolean(BooleanLiteral { value: true })),
807                    using: join.using,
808                    kind: JoinKind::OuterApply,
809                    use_inner_keyword: false,
810                    use_outer_keyword: false,
811                    deferred_condition: false,
812                    join_hint: None,
813                    match_condition: None,
814                    pivots: join.pivots,
815                    comments: join.comments,
816                    nesting_group: 0,
817                    directed: false,
818                })))
819            }
820
821            // ============================================
822            // GENERIC FUNCTION TRANSFORMATIONS
823            // ============================================
824            Expression::Function(f) => self.transform_function(*f),
825
826            // Generic aggregate function transformations
827            Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
828
829            // ===== Context-aware JSON arrow wrapping =====
830            // When JSON arrow expressions appear in Binary/In/Not contexts,
831            // they need to be wrapped in parentheses for correct precedence.
832            // This matches Python sqlglot's WRAPPED_JSON_EXTRACT_EXPRESSIONS behavior.
833
834            // Binary operators that need JSON wrapping
835            Expression::Eq(op) => Ok(Expression::Eq(Box::new(BinaryOp {
836                left: wrap_if_json_arrow(op.left),
837                right: wrap_if_json_arrow(op.right),
838                ..*op
839            }))),
840            Expression::Neq(op) => Ok(Expression::Neq(Box::new(BinaryOp {
841                left: wrap_if_json_arrow(op.left),
842                right: wrap_if_json_arrow(op.right),
843                ..*op
844            }))),
845            Expression::Lt(op) => Ok(Expression::Lt(Box::new(BinaryOp {
846                left: wrap_if_json_arrow(op.left),
847                right: wrap_if_json_arrow(op.right),
848                ..*op
849            }))),
850            Expression::Lte(op) => Ok(Expression::Lte(Box::new(BinaryOp {
851                left: wrap_if_json_arrow(op.left),
852                right: wrap_if_json_arrow(op.right),
853                ..*op
854            }))),
855            Expression::Gt(op) => Ok(Expression::Gt(Box::new(BinaryOp {
856                left: wrap_if_json_arrow(op.left),
857                right: wrap_if_json_arrow(op.right),
858                ..*op
859            }))),
860            Expression::Gte(op) => Ok(Expression::Gte(Box::new(BinaryOp {
861                left: wrap_if_json_arrow(op.left),
862                right: wrap_if_json_arrow(op.right),
863                ..*op
864            }))),
865
866            // In expression - wrap the this part if it's JSON arrow
867            Expression::In(mut i) => {
868                i.this = wrap_if_json_arrow(i.this);
869                Ok(Expression::In(i))
870            }
871
872            // Not expression - wrap the this part if it's JSON arrow
873            Expression::Not(mut n) => {
874                n.this = wrap_if_json_arrow(n.this);
875                Ok(Expression::Not(n))
876            }
877
878            // MERGE: qualifier stripping is handled by the generator (dialect-aware)
879            // PostgreSQL generator strips qualifiers, Snowflake generator keeps them
880            Expression::Merge(m) => Ok(Expression::Merge(m)),
881
882            // JSONExtract with variant_extract (Databricks colon syntax) -> JSON_EXTRACT_PATH
883            Expression::JSONExtract(je) if je.variant_extract.is_some() => {
884                // Convert path from bracketed format to simple key
885                // e.g., '["fr''uit"]' -> 'fr''uit'
886                let path = match *je.expression {
887                    Expression::Literal(Literal::String(s)) => {
888                        // Strip bracketed JSON path format: ["key"] -> key
889                        let cleaned = if s.starts_with("[\"") && s.ends_with("\"]") {
890                            s[2..s.len() - 2].to_string()
891                        } else {
892                            s
893                        };
894                        Expression::Literal(Literal::String(cleaned))
895                    }
896                    other => other,
897                };
898                Ok(Expression::Function(Box::new(Function::new(
899                    "JSON_EXTRACT_PATH".to_string(),
900                    vec![*je.this, path],
901                ))))
902            }
903
904            // TRIM(str, chars) -> TRIM(chars FROM str) for PostgreSQL SQL standard syntax
905            Expression::Trim(t) if !t.sql_standard_syntax && t.characters.is_some() => {
906                Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
907                    this: t.this,
908                    characters: t.characters,
909                    position: t.position,
910                    sql_standard_syntax: true,
911                    position_explicit: t.position_explicit,
912                })))
913            }
914
915            // b'a' -> CAST(e'a' AS BYTEA) for PostgreSQL
916            Expression::Literal(Literal::ByteString(s)) => Ok(Expression::Cast(Box::new(Cast {
917                this: Expression::Literal(Literal::EscapeString(s)),
918                to: DataType::VarBinary { length: None },
919                trailing_comments: Vec::new(),
920                double_colon_syntax: false,
921                format: None,
922                default: None,
923            }))),
924
925            // Pass through everything else
926            _ => Ok(expr),
927        }
928    }
929}
930
931impl PostgresDialect {
932    /// Check if a JSON path expression is "simple" (string literal or non-negative integer)
933    /// Simple paths can use arrow syntax (->>) in PostgreSQL
934    /// Complex paths (like negative indices) should use JSON_EXTRACT_PATH_TEXT function
935    fn is_simple_json_path(path: &Expression) -> bool {
936        match path {
937            // String literals are always simple
938            Expression::Literal(Literal::String(_)) => true,
939            // Non-negative integer literals are simple
940            Expression::Literal(Literal::Number(n)) => {
941                // Check if it's non-negative
942                !n.starts_with('-')
943            }
944            // JSONPath expressions are simple (they're already parsed paths)
945            Expression::JSONPath(_) => true,
946            // Everything else (Neg, function calls, etc.) is complex
947            _ => false,
948        }
949    }
950
951    /// Transform data types according to PostgreSQL TYPE_MAPPING
952    fn transform_data_type(&self, dt: DataType) -> Result<Expression> {
953        let transformed = match dt {
954            // TINYINT -> SMALLINT
955            DataType::TinyInt { .. } => DataType::SmallInt { length: None },
956
957            // FLOAT -> DOUBLE PRECISION (Python sqlglot tokenizes FLOAT as DOUBLE)
958            DataType::Float { .. } => DataType::Custom {
959                name: "DOUBLE PRECISION".to_string(),
960            },
961
962            // DOUBLE -> DOUBLE PRECISION
963            DataType::Double { .. } => DataType::Custom {
964                name: "DOUBLE PRECISION".to_string(),
965            },
966
967            // BINARY -> BYTEA (handled by generator which preserves length)
968            DataType::Binary { .. } => dt,
969
970            // VARBINARY -> BYTEA (handled by generator which preserves length)
971            DataType::VarBinary { .. } => dt,
972
973            // BLOB -> BYTEA
974            DataType::Blob => DataType::Custom {
975                name: "BYTEA".to_string(),
976            },
977
978            // Custom type normalizations
979            DataType::Custom { ref name } => {
980                let upper = name.to_uppercase();
981                match upper.as_str() {
982                    // INT8 -> BIGINT (PostgreSQL alias)
983                    "INT8" => DataType::BigInt { length: None },
984                    // FLOAT8 -> DOUBLE PRECISION (PostgreSQL alias)
985                    "FLOAT8" => DataType::Custom {
986                        name: "DOUBLE PRECISION".to_string(),
987                    },
988                    // FLOAT4 -> REAL (PostgreSQL alias)
989                    "FLOAT4" => DataType::Custom {
990                        name: "REAL".to_string(),
991                    },
992                    // INT4 -> INTEGER (PostgreSQL alias)
993                    "INT4" => DataType::Int {
994                        length: None,
995                        integer_spelling: false,
996                    },
997                    // INT2 -> SMALLINT (PostgreSQL alias)
998                    "INT2" => DataType::SmallInt { length: None },
999                    _ => dt,
1000                }
1001            }
1002
1003            // Keep all other types as-is
1004            other => other,
1005        };
1006        Ok(Expression::DataType(transformed))
1007    }
1008
1009    fn transform_function(&self, f: Function) -> Result<Expression> {
1010        let name_upper = f.name.to_uppercase();
1011        match name_upper.as_str() {
1012            // IFNULL -> COALESCE
1013            "IFNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
1014                original_name: None,
1015                expressions: f.args,
1016            }))),
1017
1018            // NVL -> COALESCE
1019            "NVL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
1020                original_name: None,
1021                expressions: f.args,
1022            }))),
1023
1024            // ISNULL (SQL Server) -> COALESCE
1025            "ISNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
1026                original_name: None,
1027                expressions: f.args,
1028            }))),
1029
1030            // GROUP_CONCAT -> STRING_AGG in PostgreSQL
1031            "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
1032                Function::new("STRING_AGG".to_string(), f.args),
1033            ))),
1034
1035            // SUBSTR -> SUBSTRING (standard SQL)
1036            "SUBSTR" => Ok(Expression::Function(Box::new(Function::new(
1037                "SUBSTRING".to_string(),
1038                f.args,
1039            )))),
1040
1041            // RAND -> RANDOM in PostgreSQL
1042            "RAND" => Ok(Expression::Random(crate::expressions::Random)),
1043
1044            // CEILING -> CEIL (both work in PostgreSQL, but CEIL is preferred)
1045            "CEILING" if f.args.len() == 1 => Ok(Expression::Ceil(Box::new(CeilFunc {
1046                this: f.args.into_iter().next().unwrap(),
1047                decimals: None,
1048                to: None,
1049            }))),
1050
1051            // LEN -> LENGTH in PostgreSQL
1052            "LEN" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc {
1053                this: f.args.into_iter().next().unwrap(),
1054                original_name: None,
1055            }))),
1056
1057            // CHAR_LENGTH -> LENGTH in PostgreSQL
1058            "CHAR_LENGTH" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc {
1059                this: f.args.into_iter().next().unwrap(),
1060                original_name: None,
1061            }))),
1062
1063            // CHARACTER_LENGTH -> LENGTH in PostgreSQL
1064            "CHARACTER_LENGTH" if f.args.len() == 1 => {
1065                Ok(Expression::Length(Box::new(UnaryFunc {
1066                    this: f.args.into_iter().next().unwrap(),
1067                    original_name: None,
1068                })))
1069            }
1070
1071            // CHARINDEX -> POSITION in PostgreSQL
1072            // CHARINDEX(substring, string) -> POSITION(substring IN string)
1073            "CHARINDEX" if f.args.len() >= 2 => {
1074                let mut args = f.args;
1075                let substring = args.remove(0);
1076                let string = args.remove(0);
1077                Ok(Expression::Position(Box::new(
1078                    crate::expressions::PositionFunc {
1079                        substring,
1080                        string,
1081                        start: args.pop(),
1082                    },
1083                )))
1084            }
1085
1086            // GETDATE -> CURRENT_TIMESTAMP in PostgreSQL
1087            "GETDATE" => Ok(Expression::CurrentTimestamp(
1088                crate::expressions::CurrentTimestamp {
1089                    precision: None,
1090                    sysdate: false,
1091                },
1092            )),
1093
1094            // SYSDATETIME -> CURRENT_TIMESTAMP in PostgreSQL
1095            "SYSDATETIME" => Ok(Expression::CurrentTimestamp(
1096                crate::expressions::CurrentTimestamp {
1097                    precision: None,
1098                    sysdate: false,
1099                },
1100            )),
1101
1102            // NOW -> CURRENT_TIMESTAMP in PostgreSQL (NOW() is also valid)
1103            "NOW" => Ok(Expression::CurrentTimestamp(
1104                crate::expressions::CurrentTimestamp {
1105                    precision: None,
1106                    sysdate: false,
1107                },
1108            )),
1109
1110            // NEWID -> GEN_RANDOM_UUID in PostgreSQL
1111            "NEWID" => Ok(Expression::Function(Box::new(Function::new(
1112                "GEN_RANDOM_UUID".to_string(),
1113                vec![],
1114            )))),
1115
1116            // UUID -> GEN_RANDOM_UUID in PostgreSQL
1117            "UUID" => Ok(Expression::Function(Box::new(Function::new(
1118                "GEN_RANDOM_UUID".to_string(),
1119                vec![],
1120            )))),
1121
1122            // UNNEST is native to PostgreSQL
1123            "UNNEST" => Ok(Expression::Function(Box::new(f))),
1124
1125            // GENERATE_SERIES is native to PostgreSQL
1126            "GENERATE_SERIES" => Ok(Expression::Function(Box::new(f))),
1127
1128            // SHA256 -> SHA256 in PostgreSQL (via pgcrypto extension)
1129            "SHA256" if f.args.len() == 1 => Ok(Expression::Function(Box::new(Function::new(
1130                "SHA256".to_string(),
1131                f.args,
1132            )))),
1133
1134            // SHA2 -> SHA256/SHA512 based on length argument
1135            "SHA2" if f.args.len() == 2 => {
1136                // SHA2(data, length) -> SHA256/SHA384/SHA512
1137                let args = f.args;
1138                let data = args[0].clone();
1139                // Default to SHA256 - would need runtime inspection for exact mapping
1140                Ok(Expression::Function(Box::new(Function::new(
1141                    "SHA256".to_string(),
1142                    vec![data],
1143                ))))
1144            }
1145
1146            // LEVENSHTEIN is native to PostgreSQL (fuzzystrmatch extension)
1147            "LEVENSHTEIN" => Ok(Expression::Function(Box::new(f))),
1148
1149            // EDITDISTANCE -> LEVENSHTEIN_LESS_EQUAL (with max distance) or LEVENSHTEIN
1150            "EDITDISTANCE" if f.args.len() == 3 => Ok(Expression::Function(Box::new(
1151                Function::new("LEVENSHTEIN_LESS_EQUAL".to_string(), f.args),
1152            ))),
1153            "EDITDISTANCE" if f.args.len() == 2 => Ok(Expression::Function(Box::new(
1154                Function::new("LEVENSHTEIN".to_string(), f.args),
1155            ))),
1156
1157            // TRIM(value, chars) -> TRIM(chars FROM value) for Postgres
1158            "TRIM" if f.args.len() == 2 => {
1159                let value = f.args[0].clone();
1160                let chars = f.args[1].clone();
1161                Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
1162                    this: value,
1163                    characters: Some(chars),
1164                    position: crate::expressions::TrimPosition::Both,
1165                    sql_standard_syntax: true,
1166                    position_explicit: false,
1167                })))
1168            }
1169
1170            // DATEDIFF(unit, start, end) -> PostgreSQL EXTRACT/AGE patterns
1171            "DATEDIFF" if f.args.len() >= 2 => {
1172                let mut args = f.args;
1173                if args.len() == 2 {
1174                    // 2-arg form: DATEDIFF(start, end) -> AGE(start, end)
1175                    let first = args.remove(0);
1176                    let second = args.remove(0);
1177                    Ok(Expression::Function(Box::new(Function::new(
1178                        "AGE".to_string(),
1179                        vec![first, second],
1180                    ))))
1181                } else {
1182                    // 3-arg form: DATEDIFF(unit, start, end)
1183                    let unit_expr = args.remove(0);
1184                    let start = args.remove(0);
1185                    let end_expr = args.remove(0);
1186
1187                    // Extract unit name from identifier or column
1188                    let unit_name = match &unit_expr {
1189                        Expression::Identifier(id) => id.name.to_uppercase(),
1190                        Expression::Column(col) if col.table.is_none() => {
1191                            col.name.name.to_uppercase()
1192                        }
1193                        _ => "DAY".to_string(),
1194                    };
1195
1196                    // Helper: CAST(expr AS TIMESTAMP)
1197                    let cast_ts = |e: Expression| -> Expression {
1198                        Expression::Cast(Box::new(Cast {
1199                            this: e,
1200                            to: DataType::Timestamp {
1201                                precision: None,
1202                                timezone: false,
1203                            },
1204                            trailing_comments: Vec::new(),
1205                            double_colon_syntax: false,
1206                            format: None,
1207                            default: None,
1208                        }))
1209                    };
1210
1211                    // Helper: CAST(expr AS BIGINT)
1212                    let cast_bigint = |e: Expression| -> Expression {
1213                        Expression::Cast(Box::new(Cast {
1214                            this: e,
1215                            to: DataType::BigInt { length: None },
1216                            trailing_comments: Vec::new(),
1217                            double_colon_syntax: false,
1218                            format: None,
1219                            default: None,
1220                        }))
1221                    };
1222
1223                    let end_ts = cast_ts(end_expr.clone());
1224                    let start_ts = cast_ts(start.clone());
1225
1226                    // Helper: end_ts - start_ts
1227                    let ts_diff = || -> Expression {
1228                        Expression::Sub(Box::new(BinaryOp::new(
1229                            cast_ts(end_expr.clone()),
1230                            cast_ts(start.clone()),
1231                        )))
1232                    };
1233
1234                    // Helper: AGE(end_ts, start_ts)
1235                    let age_call = || -> Expression {
1236                        Expression::Function(Box::new(Function::new(
1237                            "AGE".to_string(),
1238                            vec![cast_ts(end_expr.clone()), cast_ts(start.clone())],
1239                        )))
1240                    };
1241
1242                    // Helper: EXTRACT(field FROM expr)
1243                    let extract = |field: DateTimeField, from: Expression| -> Expression {
1244                        Expression::Extract(Box::new(ExtractFunc { this: from, field }))
1245                    };
1246
1247                    // Helper: number literal
1248                    let num = |n: i64| -> Expression {
1249                        Expression::Literal(Literal::Number(n.to_string()))
1250                    };
1251
1252                    // Use Custom DateTimeField for lowercase output (PostgreSQL convention)
1253                    let epoch_field = DateTimeField::Custom("epoch".to_string());
1254
1255                    let result = match unit_name.as_str() {
1256                        "MICROSECOND" => {
1257                            // CAST(EXTRACT(epoch FROM end_ts - start_ts) * 1000000 AS BIGINT)
1258                            let epoch = extract(epoch_field, ts_diff());
1259                            cast_bigint(Expression::Mul(Box::new(BinaryOp::new(
1260                                epoch,
1261                                num(1000000),
1262                            ))))
1263                        }
1264                        "MILLISECOND" => {
1265                            let epoch = extract(epoch_field, ts_diff());
1266                            cast_bigint(Expression::Mul(Box::new(BinaryOp::new(epoch, num(1000)))))
1267                        }
1268                        "SECOND" => {
1269                            let epoch = extract(epoch_field, ts_diff());
1270                            cast_bigint(epoch)
1271                        }
1272                        "MINUTE" => {
1273                            let epoch = extract(epoch_field, ts_diff());
1274                            cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(60)))))
1275                        }
1276                        "HOUR" => {
1277                            let epoch = extract(epoch_field, ts_diff());
1278                            cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(3600)))))
1279                        }
1280                        "DAY" => {
1281                            let epoch = extract(epoch_field, ts_diff());
1282                            cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(86400)))))
1283                        }
1284                        "WEEK" => {
1285                            // CAST(EXTRACT(days FROM (end_ts - start_ts)) / 7 AS BIGINT)
1286                            let diff_parens = Expression::Paren(Box::new(Paren {
1287                                this: ts_diff(),
1288                                trailing_comments: Vec::new(),
1289                            }));
1290                            let days =
1291                                extract(DateTimeField::Custom("days".to_string()), diff_parens);
1292                            cast_bigint(Expression::Div(Box::new(BinaryOp::new(days, num(7)))))
1293                        }
1294                        "MONTH" => {
1295                            // CAST(EXTRACT(year FROM AGE(...)) * 12 + EXTRACT(month FROM AGE(...)) AS BIGINT)
1296                            let year_part =
1297                                extract(DateTimeField::Custom("year".to_string()), age_call());
1298                            let month_part =
1299                                extract(DateTimeField::Custom("month".to_string()), age_call());
1300                            let year_months =
1301                                Expression::Mul(Box::new(BinaryOp::new(year_part, num(12))));
1302                            cast_bigint(Expression::Add(Box::new(BinaryOp::new(
1303                                year_months,
1304                                month_part,
1305                            ))))
1306                        }
1307                        "QUARTER" => {
1308                            // CAST(EXTRACT(year FROM AGE(...)) * 4 + EXTRACT(month FROM AGE(...)) / 3 AS BIGINT)
1309                            let year_part =
1310                                extract(DateTimeField::Custom("year".to_string()), age_call());
1311                            let month_part =
1312                                extract(DateTimeField::Custom("month".to_string()), age_call());
1313                            let year_quarters =
1314                                Expression::Mul(Box::new(BinaryOp::new(year_part, num(4))));
1315                            let month_quarters =
1316                                Expression::Div(Box::new(BinaryOp::new(month_part, num(3))));
1317                            cast_bigint(Expression::Add(Box::new(BinaryOp::new(
1318                                year_quarters,
1319                                month_quarters,
1320                            ))))
1321                        }
1322                        "YEAR" => {
1323                            // CAST(EXTRACT(year FROM AGE(...)) AS BIGINT)
1324                            cast_bigint(extract(
1325                                DateTimeField::Custom("year".to_string()),
1326                                age_call(),
1327                            ))
1328                        }
1329                        _ => {
1330                            // Fallback: simple AGE
1331                            Expression::Function(Box::new(Function::new(
1332                                "AGE".to_string(),
1333                                vec![end_ts, start_ts],
1334                            )))
1335                        }
1336                    };
1337                    Ok(result)
1338                }
1339            }
1340
1341            // TIMESTAMPDIFF -> AGE or EXTRACT pattern
1342            "TIMESTAMPDIFF" if f.args.len() >= 3 => {
1343                let mut args = f.args;
1344                let _unit = args.remove(0); // Unit (ignored, AGE returns full interval)
1345                let start = args.remove(0);
1346                let end = args.remove(0);
1347                Ok(Expression::Function(Box::new(Function::new(
1348                    "AGE".to_string(),
1349                    vec![end, start],
1350                ))))
1351            }
1352
1353            // FROM_UNIXTIME -> TO_TIMESTAMP
1354            "FROM_UNIXTIME" => Ok(Expression::Function(Box::new(Function::new(
1355                "TO_TIMESTAMP".to_string(),
1356                f.args,
1357            )))),
1358
1359            // UNIX_TIMESTAMP -> EXTRACT(EPOCH FROM ...)
1360            "UNIX_TIMESTAMP" if f.args.len() == 1 => {
1361                let arg = f.args.into_iter().next().unwrap();
1362                Ok(Expression::Function(Box::new(Function::new(
1363                    "DATE_PART".to_string(),
1364                    vec![Expression::string("epoch"), arg],
1365                ))))
1366            }
1367
1368            // UNIX_TIMESTAMP() with no args -> EXTRACT(EPOCH FROM CURRENT_TIMESTAMP)
1369            "UNIX_TIMESTAMP" if f.args.is_empty() => {
1370                Ok(Expression::Function(Box::new(Function::new(
1371                    "DATE_PART".to_string(),
1372                    vec![
1373                        Expression::string("epoch"),
1374                        Expression::CurrentTimestamp(crate::expressions::CurrentTimestamp {
1375                            precision: None,
1376                            sysdate: false,
1377                        }),
1378                    ],
1379                ))))
1380            }
1381
1382            // DATEADD -> date + interval pattern
1383            "DATEADD" if f.args.len() == 3 => {
1384                // DATEADD(unit, count, date) -> date + interval 'count unit'
1385                // This is a simplified version - full impl would construct proper interval
1386                let mut args = f.args;
1387                let _unit = args.remove(0);
1388                let count = args.remove(0);
1389                let date = args.remove(0);
1390                Ok(Expression::Add(Box::new(BinaryOp {
1391                    left: date,
1392                    right: count,
1393                    left_comments: Vec::new(),
1394                    operator_comments: Vec::new(),
1395                    trailing_comments: Vec::new(),
1396                })))
1397            }
1398
1399            // INSTR -> POSITION (simplified)
1400            "INSTR" if f.args.len() >= 2 => {
1401                let mut args = f.args;
1402                let string = args.remove(0);
1403                let substring = args.remove(0);
1404                Ok(Expression::Position(Box::new(
1405                    crate::expressions::PositionFunc {
1406                        substring,
1407                        string,
1408                        start: args.pop(),
1409                    },
1410                )))
1411            }
1412
1413            // CONCAT_WS is native to PostgreSQL
1414            "CONCAT_WS" => Ok(Expression::Function(Box::new(f))),
1415
1416            // REGEXP_REPLACE: pass through without adding 'g' flag
1417            // The 'g' flag handling is managed by cross_dialect_normalize based on source dialect's default behavior
1418            "REGEXP_REPLACE" if f.args.len() == 3 || f.args.len() == 4 => {
1419                Ok(Expression::Function(Box::new(f)))
1420            }
1421            // 6 args from Snowflake: (subject, pattern, replacement, position, occurrence, params)
1422            // If occurrence is 0 (global), append 'g' to flags
1423            "REGEXP_REPLACE" if f.args.len() == 6 => {
1424                let is_global = match &f.args[4] {
1425                    Expression::Literal(crate::expressions::Literal::Number(n)) => n == "0",
1426                    _ => false,
1427                };
1428                if is_global {
1429                    let subject = f.args[0].clone();
1430                    let pattern = f.args[1].clone();
1431                    let replacement = f.args[2].clone();
1432                    let position = f.args[3].clone();
1433                    let occurrence = f.args[4].clone();
1434                    let params = &f.args[5];
1435                    let mut flags =
1436                        if let Expression::Literal(crate::expressions::Literal::String(s)) = params
1437                        {
1438                            s.clone()
1439                        } else {
1440                            String::new()
1441                        };
1442                    if !flags.contains('g') {
1443                        flags.push('g');
1444                    }
1445                    Ok(Expression::Function(Box::new(Function::new(
1446                        "REGEXP_REPLACE".to_string(),
1447                        vec![
1448                            subject,
1449                            pattern,
1450                            replacement,
1451                            position,
1452                            occurrence,
1453                            Expression::Literal(crate::expressions::Literal::String(flags)),
1454                        ],
1455                    ))))
1456                } else {
1457                    Ok(Expression::Function(Box::new(f)))
1458                }
1459            }
1460            // Default: pass through
1461            "REGEXP_REPLACE" => Ok(Expression::Function(Box::new(f))),
1462
1463            // Pass through everything else
1464            _ => Ok(Expression::Function(Box::new(f))),
1465        }
1466    }
1467
1468    fn transform_aggregate_function(
1469        &self,
1470        f: Box<crate::expressions::AggregateFunction>,
1471    ) -> Result<Expression> {
1472        let name_upper = f.name.to_uppercase();
1473        match name_upper.as_str() {
1474            // COUNT_IF -> SUM(CASE WHEN...)
1475            "COUNT_IF" if !f.args.is_empty() => {
1476                let condition = f.args.into_iter().next().unwrap();
1477                let case_expr = Expression::Case(Box::new(Case {
1478                    operand: None,
1479                    whens: vec![(condition, Expression::number(1))],
1480                    else_: Some(Expression::number(0)),
1481                    comments: Vec::new(),
1482                }));
1483                Ok(Expression::Sum(Box::new(AggFunc {
1484                    ignore_nulls: None,
1485                    having_max: None,
1486                    this: case_expr,
1487                    distinct: f.distinct,
1488                    filter: f.filter,
1489                    order_by: Vec::new(),
1490                    name: None,
1491                    limit: None,
1492                })))
1493            }
1494
1495            // GROUP_CONCAT -> STRING_AGG
1496            "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
1497                Function::new("STRING_AGG".to_string(), f.args),
1498            ))),
1499
1500            // STDEV -> STDDEV in PostgreSQL
1501            "STDEV" if !f.args.is_empty() => Ok(Expression::Stddev(Box::new(AggFunc {
1502                ignore_nulls: None,
1503                having_max: None,
1504                this: f.args.into_iter().next().unwrap(),
1505                distinct: f.distinct,
1506                filter: f.filter,
1507                order_by: Vec::new(),
1508                name: None,
1509                limit: None,
1510            }))),
1511
1512            // STDEVP -> STDDEV_POP in PostgreSQL
1513            "STDEVP" if !f.args.is_empty() => Ok(Expression::StddevPop(Box::new(AggFunc {
1514                ignore_nulls: None,
1515                having_max: None,
1516                this: f.args.into_iter().next().unwrap(),
1517                distinct: f.distinct,
1518                filter: f.filter,
1519                order_by: Vec::new(),
1520                name: None,
1521                limit: None,
1522            }))),
1523
1524            // VAR -> VAR_SAMP in PostgreSQL
1525            "VAR" if !f.args.is_empty() => Ok(Expression::VarSamp(Box::new(AggFunc {
1526                ignore_nulls: None,
1527                having_max: None,
1528                this: f.args.into_iter().next().unwrap(),
1529                distinct: f.distinct,
1530                filter: f.filter,
1531                order_by: Vec::new(),
1532                name: None,
1533                limit: None,
1534            }))),
1535
1536            // VARP -> VAR_POP in PostgreSQL
1537            "VARP" if !f.args.is_empty() => Ok(Expression::VarPop(Box::new(AggFunc {
1538                ignore_nulls: None,
1539                having_max: None,
1540                this: f.args.into_iter().next().unwrap(),
1541                distinct: f.distinct,
1542                filter: f.filter,
1543                order_by: Vec::new(),
1544                name: None,
1545                limit: None,
1546            }))),
1547
1548            // BIT_AND is native to PostgreSQL
1549            "BIT_AND" => Ok(Expression::AggregateFunction(f)),
1550
1551            // BIT_OR is native to PostgreSQL
1552            "BIT_OR" => Ok(Expression::AggregateFunction(f)),
1553
1554            // BIT_XOR is native to PostgreSQL
1555            "BIT_XOR" => Ok(Expression::AggregateFunction(f)),
1556
1557            // BOOL_AND is native to PostgreSQL
1558            "BOOL_AND" => Ok(Expression::AggregateFunction(f)),
1559
1560            // BOOL_OR is native to PostgreSQL
1561            "BOOL_OR" => Ok(Expression::AggregateFunction(f)),
1562
1563            // VARIANCE -> VAR_SAMP in PostgreSQL
1564            "VARIANCE" if !f.args.is_empty() => Ok(Expression::VarSamp(Box::new(AggFunc {
1565                ignore_nulls: None,
1566                having_max: None,
1567                this: f.args.into_iter().next().unwrap(),
1568                distinct: f.distinct,
1569                filter: f.filter,
1570                order_by: Vec::new(),
1571                name: None,
1572                limit: None,
1573            }))),
1574
1575            // LOGICAL_OR -> BOOL_OR in PostgreSQL
1576            "LOGICAL_OR" if !f.args.is_empty() => {
1577                let mut new_agg = f.clone();
1578                new_agg.name = "BOOL_OR".to_string();
1579                Ok(Expression::AggregateFunction(new_agg))
1580            }
1581
1582            // LOGICAL_AND -> BOOL_AND in PostgreSQL
1583            "LOGICAL_AND" if !f.args.is_empty() => {
1584                let mut new_agg = f.clone();
1585                new_agg.name = "BOOL_AND".to_string();
1586                Ok(Expression::AggregateFunction(new_agg))
1587            }
1588
1589            // Pass through everything else
1590            _ => Ok(Expression::AggregateFunction(f)),
1591        }
1592    }
1593}
1594
1595#[cfg(test)]
1596mod tests {
1597    use super::*;
1598    use crate::dialects::Dialect;
1599
1600    fn transpile_to_postgres(sql: &str) -> String {
1601        let dialect = Dialect::get(DialectType::Generic);
1602        let result = dialect
1603            .transpile_to(sql, DialectType::PostgreSQL)
1604            .expect("Transpile failed");
1605        result[0].clone()
1606    }
1607
1608    #[test]
1609    fn test_ifnull_to_coalesce() {
1610        let result = transpile_to_postgres("SELECT IFNULL(a, b)");
1611        assert!(
1612            result.contains("COALESCE"),
1613            "Expected COALESCE, got: {}",
1614            result
1615        );
1616    }
1617
1618    #[test]
1619    fn test_nvl_to_coalesce() {
1620        let result = transpile_to_postgres("SELECT NVL(a, b)");
1621        assert!(
1622            result.contains("COALESCE"),
1623            "Expected COALESCE, got: {}",
1624            result
1625        );
1626    }
1627
1628    #[test]
1629    fn test_rand_to_random() {
1630        let result = transpile_to_postgres("SELECT RAND()");
1631        assert!(
1632            result.contains("RANDOM"),
1633            "Expected RANDOM, got: {}",
1634            result
1635        );
1636    }
1637
1638    #[test]
1639    fn test_basic_select() {
1640        let result = transpile_to_postgres("SELECT a, b FROM users WHERE id = 1");
1641        assert!(result.contains("SELECT"));
1642        assert!(result.contains("FROM users"));
1643    }
1644
1645    #[test]
1646    fn test_len_to_length() {
1647        let result = transpile_to_postgres("SELECT LEN(name)");
1648        assert!(
1649            result.contains("LENGTH"),
1650            "Expected LENGTH, got: {}",
1651            result
1652        );
1653    }
1654
1655    #[test]
1656    fn test_getdate_to_current_timestamp() {
1657        let result = transpile_to_postgres("SELECT GETDATE()");
1658        assert!(
1659            result.contains("CURRENT_TIMESTAMP"),
1660            "Expected CURRENT_TIMESTAMP, got: {}",
1661            result
1662        );
1663    }
1664
1665    #[test]
1666    fn test_substr_to_substring() {
1667        let result = transpile_to_postgres("SELECT SUBSTR(name, 1, 3)");
1668        assert!(
1669            result.contains("SUBSTRING"),
1670            "Expected SUBSTRING, got: {}",
1671            result
1672        );
1673    }
1674
1675    #[test]
1676    fn test_group_concat_to_string_agg() {
1677        let result = transpile_to_postgres("SELECT GROUP_CONCAT(name)");
1678        assert!(
1679            result.contains("STRING_AGG"),
1680            "Expected STRING_AGG, got: {}",
1681            result
1682        );
1683    }
1684
1685    #[test]
1686    fn test_double_quote_identifiers() {
1687        // PostgreSQL uses double quotes for identifiers
1688        let dialect = PostgresDialect;
1689        let config = dialect.generator_config();
1690        assert_eq!(config.identifier_quote, '"');
1691    }
1692
1693    #[test]
1694    fn test_char_length_to_length() {
1695        let result = transpile_to_postgres("SELECT CHAR_LENGTH(name)");
1696        assert!(
1697            result.contains("LENGTH"),
1698            "Expected LENGTH, got: {}",
1699            result
1700        );
1701    }
1702
1703    #[test]
1704    fn test_character_length_to_length() {
1705        let result = transpile_to_postgres("SELECT CHARACTER_LENGTH(name)");
1706        assert!(
1707            result.contains("LENGTH"),
1708            "Expected LENGTH, got: {}",
1709            result
1710        );
1711    }
1712
1713    /// Helper for PostgreSQL identity tests (parse and regenerate with PostgreSQL dialect)
1714    fn identity_postgres(sql: &str) -> String {
1715        let dialect = Dialect::get(DialectType::PostgreSQL);
1716        let exprs = dialect.parse(sql).expect("Parse failed");
1717        let transformed = dialect
1718            .transform(exprs[0].clone())
1719            .expect("Transform failed");
1720        dialect.generate(&transformed).expect("Generate failed")
1721    }
1722
1723    #[test]
1724    fn test_json_extract_with_column_path() {
1725        // When the path is a column reference (not a literal), should use function form
1726        let result = identity_postgres("json_data.data -> field_ids.field_id");
1727        assert!(
1728            result.contains("JSON_EXTRACT_PATH"),
1729            "Expected JSON_EXTRACT_PATH for column path, got: {}",
1730            result
1731        );
1732    }
1733
1734    #[test]
1735    fn test_json_extract_scalar_with_negative_index() {
1736        // When the path is a negative index, should use JSON_EXTRACT_PATH_TEXT function
1737        let result = identity_postgres("x::JSON -> 'duration' ->> -1");
1738        assert!(
1739            result.contains("JSON_EXTRACT_PATH_TEXT"),
1740            "Expected JSON_EXTRACT_PATH_TEXT for negative index, got: {}",
1741            result
1742        );
1743        // The first -> should still be arrow syntax since 'duration' is a string literal
1744        assert!(
1745            result.contains("->"),
1746            "Expected -> for string literal path, got: {}",
1747            result
1748        );
1749    }
1750
1751    #[test]
1752    fn test_json_extract_with_string_literal() {
1753        // When the path is a string literal, should keep arrow syntax
1754        let result = identity_postgres("data -> 'key'");
1755        assert!(
1756            result.contains("->"),
1757            "Expected -> for string literal path, got: {}",
1758            result
1759        );
1760        assert!(
1761            !result.contains("JSON_EXTRACT_PATH"),
1762            "Should NOT use function form for string literal, got: {}",
1763            result
1764        );
1765    }
1766}