Skip to main content

polyglot_sql/dialects/
postgres.rs

1//! PostgreSQL Dialect
2//!
3//! PostgreSQL-specific transformations based on sqlglot patterns.
4//! Comprehensive batch translation from Python sqlglot postgres.py
5//!
6//! Key differences from other dialects:
7//! - TRY_CAST not supported (falls back to CAST)
8//! - RANDOM() instead of RAND()
9//! - STRING_AGG instead of GROUP_CONCAT
10//! - Bitwise XOR is # operator
11//! - BOOL_AND/BOOL_OR for logical aggregates
12//! - GEN_RANDOM_UUID() for UUID generation
13//! - UNNEST instead of EXPLODE
14//! - Type mappings: TINYINT→SMALLINT, FLOAT→REAL, DOUBLE→DOUBLE PRECISION, etc.
15//! - RegexpLike uses ~ operator, RegexpILike uses ~* operator
16//! - JSONB operators: #>, #>>, ?, ?|, ?&
17
18use super::{DialectImpl, DialectType};
19use crate::error::Result;
20use crate::expressions::{
21    AggFunc, BinaryOp, BooleanLiteral, Case, Cast, CeilFunc, DataType, DateTimeField,
22    Expression, ExtractFunc, Function, Interval, IntervalUnit, IntervalUnitSpec,
23    Join, JoinKind, Literal, Paren, UnaryFunc, VarArgFunc,
24};
25use crate::generator::GeneratorConfig;
26use crate::tokens::TokenizerConfig;
27
28/// Helper to wrap JSON arrow expressions in parentheses when they appear
29/// in contexts that require it (Binary, In, Not expressions)
30/// This matches Python sqlglot's WRAPPED_JSON_EXTRACT_EXPRESSIONS behavior
31fn wrap_if_json_arrow(expr: Expression) -> Expression {
32    match &expr {
33        Expression::JsonExtract(f) if f.arrow_syntax => {
34            Expression::Paren(Box::new(Paren {
35                this: expr,
36                trailing_comments: Vec::new(),
37            }))
38        }
39        Expression::JsonExtractScalar(f) if f.arrow_syntax => {
40            Expression::Paren(Box::new(Paren {
41                this: expr,
42                trailing_comments: Vec::new(),
43            }))
44        }
45        _ => expr,
46    }
47}
48
49/// PostgreSQL dialect
50pub struct PostgresDialect;
51
52impl DialectImpl for PostgresDialect {
53    fn dialect_type(&self) -> DialectType {
54        DialectType::PostgreSQL
55    }
56
57    fn tokenizer_config(&self) -> TokenizerConfig {
58        use crate::tokens::TokenType;
59        let mut config = TokenizerConfig::default();
60        // PostgreSQL supports $$ string literals (heredoc strings)
61        config.quotes.insert("$$".to_string(), "$$".to_string());
62        // PostgreSQL uses double quotes for identifiers
63        config.identifiers.insert('"', '"');
64        // Nested comments supported
65        config.nested_comments = true;
66        // PostgreSQL treats EXEC as a generic command (not TSQL EXEC statement)
67        // Note: EXECUTE is kept as-is since it's used in GRANT/REVOKE EXECUTE ON FUNCTION
68        config.keywords.insert("EXEC".to_string(), TokenType::Command);
69        config
70    }
71
72    fn generator_config(&self) -> GeneratorConfig {
73        use crate::generator::IdentifierQuoteStyle;
74        GeneratorConfig {
75            identifier_quote: '"',
76            identifier_quote_style: IdentifierQuoteStyle::DOUBLE_QUOTE,
77            dialect: Some(DialectType::PostgreSQL),
78            // PostgreSQL uses TIMESTAMPTZ shorthand
79            tz_to_with_time_zone: false,
80            // PostgreSQL prefers INTERVAL '1 day' syntax
81            single_string_interval: true,
82            // TABLESAMPLE uses REPEATABLE in PostgreSQL
83            tablesample_seed_keyword: "REPEATABLE",
84            // PostgreSQL doesn't support NVL2
85            nvl2_supported: false,
86            // PostgreSQL uses $ for parameters
87            parameter_token: "$",
88            // PostgreSQL uses % for named placeholders
89            named_placeholder_token: "%",
90            // PostgreSQL supports SELECT INTO
91            supports_select_into: true,
92            // PostgreSQL: USING btree(col) without space before parens
93            index_using_no_space: true,
94            // PostgreSQL supports UNLOGGED tables
95            supports_unlogged_tables: true,
96            // PostgreSQL doesn't support multi-arg DISTINCT
97            multi_arg_distinct: false,
98            // PostgreSQL uses ANY(subquery) without space
99            quantified_no_paren_space: true,
100            // PostgreSQL supports window EXCLUDE clause
101            supports_window_exclude: true,
102            // PostgreSQL normalizes single-bound window frames to BETWEEN form
103            normalize_window_frame_between: true,
104            // PostgreSQL COPY doesn't use INTO keyword
105            copy_has_into_keyword: false,
106            // PostgreSQL ARRAY_SIZE requires dimension argument
107            array_size_dim_required: Some(true),
108            // PostgreSQL supports BETWEEN flags
109            supports_between_flags: true,
110            // PostgreSQL doesn't support hints
111            join_hints: false,
112            table_hints: false,
113            query_hints: false,
114            // PostgreSQL supports locking reads
115            locking_reads_supported: true,
116            // PostgreSQL doesn't rename tables with DB
117            rename_table_with_db: false,
118            // PostgreSQL can implement array any
119            can_implement_array_any: true,
120            // PostgreSQL ARRAY_CONCAT is not var-len
121            array_concat_is_var_len: false,
122            // PostgreSQL doesn't support MEDIAN
123            supports_median: false,
124            // PostgreSQL requires JSON type for extraction
125            json_type_required_for_extraction: true,
126            // PostgreSQL LIKE property inside schema
127            like_property_inside_schema: true,
128            ..Default::default()
129        }
130    }
131
132    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
133        match expr {
134            // ============================================
135            // DATA TYPE MAPPINGS (from TYPE_MAPPING)
136            // These are handled specially - transform DataType variants
137            // ============================================
138            Expression::DataType(dt) => self.transform_data_type(dt),
139
140            // ============================================
141            // NULL HANDLING
142            // ============================================
143            // IFNULL -> COALESCE in PostgreSQL
144            Expression::IfNull(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc { original_name: None,
145                expressions: vec![f.this, f.expression],
146            }))),
147
148            // NVL -> COALESCE in PostgreSQL
149            Expression::Nvl(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc { original_name: None,
150                expressions: vec![f.this, f.expression],
151            }))),
152
153            // Coalesce with original_name (e.g., IFNULL parsed as Coalesce) -> clear original_name
154            // so it outputs as COALESCE instead of the original function name
155            Expression::Coalesce(mut f) => {
156                f.original_name = None;
157                Ok(Expression::Coalesce(f))
158            }
159
160            // ============================================
161            // CAST OPERATIONS
162            // ============================================
163            // TryCast -> CAST (PostgreSQL doesn't support TRY_CAST)
164            Expression::TryCast(c) => Ok(Expression::Cast(c)),
165
166            // SafeCast -> CAST (PostgreSQL doesn't support safe casts)
167            Expression::SafeCast(c) => Ok(Expression::Cast(c)),
168
169            // ============================================
170            // RANDOM
171            // ============================================
172            // RAND -> RANDOM in PostgreSQL
173            Expression::Rand(r) => {
174                // PostgreSQL's RANDOM() doesn't take a seed argument
175                let _ = r.seed; // Ignore seed
176                Ok(Expression::Random(crate::expressions::Random))
177            }
178
179            // ============================================
180            // UUID
181            // ============================================
182            // Uuid -> GEN_RANDOM_UUID in PostgreSQL
183            Expression::Uuid(_) => Ok(Expression::Function(Box::new(Function::new(
184                "GEN_RANDOM_UUID".to_string(),
185                vec![],
186            )))),
187
188            // ============================================
189            // ARRAY OPERATIONS
190            // ============================================
191            // EXPLODE -> UNNEST in PostgreSQL
192            Expression::Explode(f) => Ok(Expression::Unnest(Box::new(
193                crate::expressions::UnnestFunc {
194                    this: f.this,
195                    expressions: Vec::new(),
196                    with_ordinality: false,
197                    alias: None,
198                    offset_alias: None,
199                },
200            ))),
201
202            // ExplodeOuter -> UNNEST in PostgreSQL
203            Expression::ExplodeOuter(f) => Ok(Expression::Unnest(Box::new(
204                crate::expressions::UnnestFunc {
205                    this: f.this,
206                    expressions: Vec::new(),
207                    with_ordinality: false,
208                    alias: None,
209                    offset_alias: None,
210                },
211            ))),
212
213            // ArrayConcat -> ARRAY_CAT in PostgreSQL
214            Expression::ArrayConcat(f) => Ok(Expression::Function(Box::new(Function::new(
215                "ARRAY_CAT".to_string(),
216                f.expressions,
217            )))),
218
219            // ArrayPrepend -> ARRAY_PREPEND in PostgreSQL (note: args swapped from other dialects)
220            Expression::ArrayPrepend(f) => Ok(Expression::Function(Box::new(Function::new(
221                "ARRAY_PREPEND".to_string(),
222                vec![f.expression, f.this], // PostgreSQL: ARRAY_PREPEND(element, array)
223            )))),
224
225            // ============================================
226            // BITWISE OPERATIONS
227            // ============================================
228            // BitwiseXor -> # operator in PostgreSQL
229            Expression::BitwiseXor(f) => {
230                // Use a special marker that generator will recognize
231                Ok(Expression::Function(Box::new(Function::new(
232                    "__PG_BITWISE_XOR__".to_string(),
233                    vec![f.left, f.right],
234                ))))
235            }
236
237            // BitwiseAndAgg -> BIT_AND
238            Expression::BitwiseAndAgg(f) => Ok(Expression::Function(Box::new(Function::new(
239                "BIT_AND".to_string(),
240                vec![f.this],
241            )))),
242
243            // BitwiseOrAgg -> BIT_OR
244            Expression::BitwiseOrAgg(f) => Ok(Expression::Function(Box::new(Function::new(
245                "BIT_OR".to_string(),
246                vec![f.this],
247            )))),
248
249            // BitwiseXorAgg -> BIT_XOR
250            Expression::BitwiseXorAgg(f) => Ok(Expression::Function(Box::new(Function::new(
251                "BIT_XOR".to_string(),
252                vec![f.this],
253            )))),
254
255            // ============================================
256            // BOOLEAN AGGREGATES
257            // ============================================
258            // LogicalAnd -> BOOL_AND
259            Expression::LogicalAnd(f) => Ok(Expression::Function(Box::new(Function::new(
260                "BOOL_AND".to_string(),
261                vec![f.this],
262            )))),
263
264            // LogicalOr -> BOOL_OR
265            Expression::LogicalOr(f) => Ok(Expression::Function(Box::new(Function::new(
266                "BOOL_OR".to_string(),
267                vec![f.this],
268            )))),
269
270            // Xor -> PostgreSQL bool_xor pattern: a <> b for boolean values
271            Expression::Xor(f) => {
272                if let (Some(a), Some(b)) = (f.this, f.expression) {
273                    Ok(Expression::Neq(Box::new(BinaryOp {
274                        left: *a,
275                        right: *b,
276                        left_comments: Vec::new(),
277                        operator_comments: Vec::new(),
278                        trailing_comments: Vec::new(),
279                    })))
280                } else {
281                    Ok(Expression::Boolean(BooleanLiteral { value: false }))
282                }
283            }
284
285            // ============================================
286            // ARRAY OPERATORS
287            // ============================================
288            // ArrayContainedBy (<@) -> ArrayContainsAll (@>) with swapped operands
289            // a <@ b -> b @> a (PostgreSQL prefers @> syntax)
290            Expression::ArrayContainedBy(op) => Ok(Expression::ArrayContainsAll(Box::new(BinaryOp {
291                left: op.right,
292                right: op.left,
293                left_comments: Vec::new(),
294                operator_comments: Vec::new(),
295                trailing_comments: Vec::new(),
296            }))),
297
298            // ============================================
299            // REGEXP OPERATIONS (PostgreSQL uses ~ and ~* operators)
300            // ============================================
301            // RegexpLike -> keep as-is, generator handles ~ operator output
302            Expression::RegexpLike(f) => {
303                // Generator will output as: expr ~ pattern
304                Ok(Expression::RegexpLike(f))
305            }
306
307            // ============================================
308            // DATE/TIME FUNCTIONS
309            // ============================================
310            // DateAdd -> date + INTERVAL in PostgreSQL
311            Expression::DateAdd(f) => {
312                let interval_expr = Expression::Interval(Box::new(Interval {
313                    this: Some(f.interval),
314                    unit: Some(IntervalUnitSpec::Simple {
315                        unit: f.unit,
316                        use_plural: false,
317                    }),
318                }));
319                Ok(Expression::Add(Box::new(BinaryOp {
320                    left: f.this,
321                    right: interval_expr,
322                    left_comments: Vec::new(),
323                    operator_comments: Vec::new(),
324                    trailing_comments: Vec::new(),
325                })))
326            }
327
328            // DateSub -> date - INTERVAL in PostgreSQL
329            Expression::DateSub(f) => {
330                let interval_expr = Expression::Interval(Box::new(Interval {
331                    this: Some(f.interval),
332                    unit: Some(IntervalUnitSpec::Simple {
333                        unit: f.unit,
334                        use_plural: false,
335                    }),
336                }));
337                Ok(Expression::Sub(Box::new(BinaryOp {
338                    left: f.this,
339                    right: interval_expr,
340                    left_comments: Vec::new(),
341                    operator_comments: Vec::new(),
342                    trailing_comments: Vec::new(),
343                })))
344            }
345
346            // DateDiff -> Complex PostgreSQL pattern using AGE/EXTRACT
347            Expression::DateDiff(f) => {
348                // For PostgreSQL, DATEDIFF is converted to EXTRACT(epoch FROM ...) pattern
349                // Simplified: use AGE function
350                Ok(Expression::Function(Box::new(Function::new(
351                    "AGE".to_string(),
352                    vec![f.this, f.expression],
353                ))))
354            }
355
356            // UnixToTime -> TO_TIMESTAMP
357            Expression::UnixToTime(f) => Ok(Expression::Function(Box::new(Function::new(
358                "TO_TIMESTAMP".to_string(),
359                vec![*f.this],
360            )))),
361
362            // TimeToUnix -> DATE_PART('epoch', ...) in PostgreSQL
363            Expression::TimeToUnix(f) => Ok(Expression::Function(Box::new(Function::new(
364                "DATE_PART".to_string(),
365                vec![Expression::string("epoch"), f.this],
366            )))),
367
368            // StrToTime -> TO_TIMESTAMP in PostgreSQL
369            Expression::ToTimestamp(f) => {
370                let mut args = vec![f.this];
371                if let Some(fmt) = f.format {
372                    args.push(fmt);
373                }
374                Ok(Expression::Function(Box::new(Function::new(
375                    "TO_TIMESTAMP".to_string(),
376                    args,
377                ))))
378            }
379
380            // StrToDate -> TO_DATE in PostgreSQL
381            Expression::ToDate(f) => {
382                let mut args = vec![f.this];
383                if let Some(fmt) = f.format {
384                    args.push(fmt);
385                }
386                Ok(Expression::Function(Box::new(Function::new(
387                    "TO_DATE".to_string(),
388                    args,
389                ))))
390            }
391
392            // TimestampTrunc -> DATE_TRUNC
393            Expression::TimestampTrunc(f) => {
394                // Convert DateTimeField to string expression for DATE_TRUNC
395                let unit_str = format!("{:?}", f.unit).to_lowercase();
396                let args = vec![Expression::string(&unit_str), f.this];
397                Ok(Expression::Function(Box::new(Function::new(
398                    "DATE_TRUNC".to_string(),
399                    args,
400                ))))
401            }
402
403            // TimeFromParts -> MAKE_TIME
404            Expression::TimeFromParts(f) => {
405                let mut args = Vec::new();
406                if let Some(h) = f.hour {
407                    args.push(*h);
408                }
409                if let Some(m) = f.min {
410                    args.push(*m);
411                }
412                if let Some(s) = f.sec {
413                    args.push(*s);
414                }
415                Ok(Expression::Function(Box::new(Function::new(
416                    "MAKE_TIME".to_string(),
417                    args,
418                ))))
419            }
420
421            // TimestampFromParts -> MAKE_TIMESTAMP
422            Expression::MakeTimestamp(f) => {
423                // MakeTimestampFunc has direct Expression fields, not Options
424                let args = vec![f.year, f.month, f.day, f.hour, f.minute, f.second];
425                Ok(Expression::Function(Box::new(Function::new(
426                    "MAKE_TIMESTAMP".to_string(),
427                    args,
428                ))))
429            }
430
431            // ============================================
432            // STRING FUNCTIONS
433            // ============================================
434            // StringAgg is native to PostgreSQL - keep as-is
435            Expression::StringAgg(f) => Ok(Expression::StringAgg(f)),
436
437            // GroupConcat -> STRING_AGG in PostgreSQL
438            Expression::GroupConcat(f) => {
439                let mut args = vec![f.this.clone()];
440                if let Some(sep) = f.separator.clone() {
441                    args.push(sep);
442                } else {
443                    args.push(Expression::string(","));
444                }
445                Ok(Expression::Function(Box::new(Function::new(
446                    "STRING_AGG".to_string(),
447                    args,
448                ))))
449            }
450
451            // StrPosition -> POSITION function
452            Expression::Position(f) => {
453                // PostgreSQL: POSITION(substring IN string)
454                // Keep as Position, generator handles it
455                Ok(Expression::Position(f))
456            }
457
458            // ============================================
459            // AGGREGATE FUNCTIONS
460            // ============================================
461            // CountIf -> SUM(CASE WHEN condition THEN 1 ELSE 0 END) in PostgreSQL
462            Expression::CountIf(f) => {
463                let case_expr = Expression::Case(Box::new(Case {
464                    operand: None,
465                    whens: vec![(f.this.clone(), Expression::number(1))],
466                    else_: Some(Expression::number(0)),
467                }));
468                Ok(Expression::Sum(Box::new(AggFunc { ignore_nulls: None, having_max: None,
469                    this: case_expr,
470                    distinct: f.distinct,
471                    filter: f.filter,
472                    order_by: Vec::new(),
473                    name: None,
474                    limit: None,
475                })))
476            }
477
478            // AnyValue -> keep as ANY_VALUE for PostgreSQL (supported since PG 16)
479            Expression::AnyValue(f) => Ok(Expression::AnyValue(f)),
480
481            // Variance -> VAR_SAMP in PostgreSQL
482            Expression::Variance(f) => Ok(Expression::Function(Box::new(Function::new(
483                "VAR_SAMP".to_string(),
484                vec![f.this],
485            )))),
486
487            // VarPop -> VAR_POP in PostgreSQL
488            Expression::VarPop(f) => Ok(Expression::Function(Box::new(Function::new(
489                "VAR_POP".to_string(),
490                vec![f.this],
491            )))),
492
493            // ============================================
494            // JSON FUNCTIONS
495            // ============================================
496            // JSONExtract -> use arrow syntax (->) in PostgreSQL for simple literal paths
497            // Complex paths (like column references) should use JSON_EXTRACT_PATH function
498            Expression::JsonExtract(mut f) => {
499                // Only use arrow syntax for simple literal paths (string or non-negative number)
500                // Complex expressions like column references should use function form
501                f.arrow_syntax = Self::is_simple_json_path(&f.path);
502                Ok(Expression::JsonExtract(f))
503            }
504
505            // JSONExtractScalar -> use arrow syntax (->>) in PostgreSQL for simple paths
506            // Complex paths (like negative indices) should use JSON_EXTRACT_PATH_TEXT function
507            // #>> (hash_arrow_syntax) stays as #>>
508            Expression::JsonExtractScalar(mut f) => {
509                if !f.hash_arrow_syntax {
510                    // Only use arrow syntax for simple literal paths (string or non-negative number)
511                    // Complex expressions like Neg(-1) should use function form
512                    f.arrow_syntax = Self::is_simple_json_path(&f.path);
513                }
514                Ok(Expression::JsonExtractScalar(f))
515            }
516
517            // ParseJson: handled by generator (outputs CAST(x AS JSON) for PostgreSQL)
518
519            // JSONObjectAgg -> JSON_OBJECT_AGG
520            Expression::JsonObjectAgg(f) => {
521                // JsonObjectAggFunc has key and value as Expression, not Option
522                let args = vec![f.key, f.value];
523                Ok(Expression::Function(Box::new(Function::new(
524                    "JSON_OBJECT_AGG".to_string(),
525                    args,
526                ))))
527            }
528
529            // JSONArrayAgg -> JSON_AGG
530            Expression::JsonArrayAgg(f) => Ok(Expression::Function(Box::new(Function::new(
531                "JSON_AGG".to_string(),
532                vec![f.this],
533            )))),
534
535            // JSONPathRoot -> empty string ($ is implicit in PostgreSQL)
536            Expression::JSONPathRoot(_) => {
537                Ok(Expression::Literal(Literal::String(String::new())))
538            }
539
540            // ============================================
541            // MISC FUNCTIONS
542            // ============================================
543            // IntDiv -> DIV in PostgreSQL
544            Expression::IntDiv(f) => Ok(Expression::Function(Box::new(Function::new(
545                "DIV".to_string(),
546                vec![f.this, f.expression],
547            )))),
548
549            // Unicode -> ASCII in PostgreSQL
550            Expression::Unicode(f) => Ok(Expression::Function(Box::new(Function::new(
551                "ASCII".to_string(),
552                vec![f.this],
553            )))),
554
555            // LastDay -> Complex expression (PostgreSQL doesn't have LAST_DAY)
556            Expression::LastDay(f) => {
557                // (DATE_TRUNC('month', date) + INTERVAL '1 month' - INTERVAL '1 day')::DATE
558                let truncated = Expression::Function(Box::new(Function::new(
559                    "DATE_TRUNC".to_string(),
560                    vec![Expression::string("month"), f.this.clone()],
561                )));
562                let plus_month = Expression::Add(Box::new(BinaryOp {
563                    left: truncated,
564                    right: Expression::Interval(Box::new(Interval {
565                        this: Some(Expression::string("1")),
566                        unit: Some(IntervalUnitSpec::Simple {
567                            unit: IntervalUnit::Month,
568                            use_plural: false,
569                        }),
570                    })),
571                    left_comments: Vec::new(),
572                    operator_comments: Vec::new(),
573                    trailing_comments: Vec::new(),
574                }));
575                let minus_day = Expression::Sub(Box::new(BinaryOp {
576                    left: plus_month,
577                    right: Expression::Interval(Box::new(Interval {
578                        this: Some(Expression::string("1")),
579                        unit: Some(IntervalUnitSpec::Simple {
580                            unit: IntervalUnit::Day,
581                            use_plural: false,
582                        }),
583                    })),
584                    left_comments: Vec::new(),
585                    operator_comments: Vec::new(),
586                    trailing_comments: Vec::new(),
587                }));
588                Ok(Expression::Cast(Box::new(Cast {
589                    this: minus_day,
590                    to: DataType::Date,
591                    trailing_comments: Vec::new(),
592                    double_colon_syntax: true, // Use PostgreSQL :: syntax
593                    format: None,
594                    default: None,
595                })))
596            }
597
598            // GenerateSeries is native to PostgreSQL
599            Expression::GenerateSeries(f) => Ok(Expression::GenerateSeries(f)),
600
601            // ExplodingGenerateSeries -> GENERATE_SERIES
602            Expression::ExplodingGenerateSeries(f) => {
603                let mut args = vec![f.start, f.stop];
604                if let Some(step) = f.step {
605                    args.push(step); // step is Expression, not Box<Expression>
606                }
607                Ok(Expression::Function(Box::new(Function::new(
608                    "GENERATE_SERIES".to_string(),
609                    args,
610                ))))
611            }
612
613            // ============================================
614            // SESSION/TIME FUNCTIONS (no parentheses in PostgreSQL)
615            // ============================================
616            // CurrentTimestamp -> CURRENT_TIMESTAMP (no parens)
617            Expression::CurrentTimestamp(_) => Ok(Expression::Function(Box::new(Function {
618                name: "CURRENT_TIMESTAMP".to_string(),
619                args: vec![],
620                distinct: false,
621                trailing_comments: vec![],
622                use_bracket_syntax: false,
623                no_parens: true,
624                quoted: false,
625            }))),
626
627            // CurrentUser -> CURRENT_USER (no parens)
628            Expression::CurrentUser(_) => Ok(Expression::Function(Box::new(Function::new(
629                "CURRENT_USER".to_string(),
630                vec![],
631            )))),
632
633            // CurrentDate -> CURRENT_DATE (no parens)
634            Expression::CurrentDate(_) => Ok(Expression::Function(Box::new(Function {
635                name: "CURRENT_DATE".to_string(),
636                args: vec![],
637                distinct: false,
638                trailing_comments: vec![],
639                use_bracket_syntax: false,
640                no_parens: true,
641                quoted: false,
642            }))),
643
644            // ============================================
645            // JOIN TRANSFORMATIONS
646            // ============================================
647            // CROSS APPLY -> CROSS JOIN LATERAL in PostgreSQL
648            Expression::Join(join) if join.kind == JoinKind::CrossApply => {
649                Ok(Expression::Join(Box::new(Join {
650                    this: join.this,
651                    on: None,
652                    using: join.using,
653                    kind: JoinKind::Lateral,
654                    use_inner_keyword: false,
655                    use_outer_keyword: false,
656                    deferred_condition: false,
657                    join_hint: None,
658                    match_condition: None,
659                    pivots: join.pivots,
660                })))
661            }
662
663            // OUTER APPLY -> LEFT JOIN LATERAL ... ON TRUE in PostgreSQL
664            Expression::Join(join) if join.kind == JoinKind::OuterApply => {
665                Ok(Expression::Join(Box::new(Join {
666                    this: join.this,
667                    on: Some(Expression::Boolean(BooleanLiteral { value: true })),
668                    using: join.using,
669                    kind: JoinKind::LeftLateral,
670                    use_inner_keyword: false,
671                    use_outer_keyword: false,
672                    deferred_condition: false,
673                    join_hint: None,
674                    match_condition: None,
675                    pivots: join.pivots,
676                })))
677            }
678
679            // ============================================
680            // GENERIC FUNCTION TRANSFORMATIONS
681            // ============================================
682            Expression::Function(f) => self.transform_function(*f),
683
684            // Generic aggregate function transformations
685            Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
686
687            // ===== Context-aware JSON arrow wrapping =====
688            // When JSON arrow expressions appear in Binary/In/Not contexts,
689            // they need to be wrapped in parentheses for correct precedence.
690            // This matches Python sqlglot's WRAPPED_JSON_EXTRACT_EXPRESSIONS behavior.
691
692            // Binary operators that need JSON wrapping
693            Expression::Eq(op) => Ok(Expression::Eq(Box::new(BinaryOp {
694                left: wrap_if_json_arrow(op.left),
695                right: wrap_if_json_arrow(op.right),
696                ..*op
697            }))),
698            Expression::Neq(op) => Ok(Expression::Neq(Box::new(BinaryOp {
699                left: wrap_if_json_arrow(op.left),
700                right: wrap_if_json_arrow(op.right),
701                ..*op
702            }))),
703            Expression::Lt(op) => Ok(Expression::Lt(Box::new(BinaryOp {
704                left: wrap_if_json_arrow(op.left),
705                right: wrap_if_json_arrow(op.right),
706                ..*op
707            }))),
708            Expression::Lte(op) => Ok(Expression::Lte(Box::new(BinaryOp {
709                left: wrap_if_json_arrow(op.left),
710                right: wrap_if_json_arrow(op.right),
711                ..*op
712            }))),
713            Expression::Gt(op) => Ok(Expression::Gt(Box::new(BinaryOp {
714                left: wrap_if_json_arrow(op.left),
715                right: wrap_if_json_arrow(op.right),
716                ..*op
717            }))),
718            Expression::Gte(op) => Ok(Expression::Gte(Box::new(BinaryOp {
719                left: wrap_if_json_arrow(op.left),
720                right: wrap_if_json_arrow(op.right),
721                ..*op
722            }))),
723
724            // In expression - wrap the this part if it's JSON arrow
725            Expression::In(mut i) => {
726                i.this = wrap_if_json_arrow(i.this);
727                Ok(Expression::In(i))
728            }
729
730            // Not expression - wrap the this part if it's JSON arrow
731            Expression::Not(mut n) => {
732                n.this = wrap_if_json_arrow(n.this);
733                Ok(Expression::Not(n))
734            }
735
736            // MERGE: qualifier stripping is handled by the generator (dialect-aware)
737            // PostgreSQL generator strips qualifiers, Snowflake generator keeps them
738            Expression::Merge(m) => Ok(Expression::Merge(m)),
739
740            // JSONExtract with variant_extract (Databricks colon syntax) -> JSON_EXTRACT_PATH
741            Expression::JSONExtract(je) if je.variant_extract.is_some() => {
742                // Convert path from bracketed format to simple key
743                // e.g., '["fr''uit"]' -> 'fr''uit'
744                let path = match *je.expression {
745                    Expression::Literal(Literal::String(s)) => {
746                        // Strip bracketed JSON path format: ["key"] -> key
747                        let cleaned = if s.starts_with("[\"") && s.ends_with("\"]") {
748                            s[2..s.len() - 2].to_string()
749                        } else {
750                            s
751                        };
752                        Expression::Literal(Literal::String(cleaned))
753                    }
754                    other => other,
755                };
756                Ok(Expression::Function(Box::new(Function::new(
757                    "JSON_EXTRACT_PATH".to_string(),
758                    vec![*je.this, path],
759                ))))
760            }
761
762            // TRIM(str, chars) -> TRIM(chars FROM str) for PostgreSQL SQL standard syntax
763            Expression::Trim(t) if !t.sql_standard_syntax && t.characters.is_some() => {
764                Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
765                    this: t.this,
766                    characters: t.characters,
767                    position: t.position,
768                    sql_standard_syntax: true,
769                    position_explicit: t.position_explicit,
770                })))
771            }
772
773            // b'a' -> CAST(e'a' AS BYTEA) for PostgreSQL
774            Expression::Literal(Literal::ByteString(s)) => {
775                Ok(Expression::Cast(Box::new(Cast {
776                    this: Expression::Literal(Literal::EscapeString(s)),
777                    to: DataType::VarBinary { length: None },
778                    trailing_comments: Vec::new(),
779                    double_colon_syntax: false,
780                    format: None,
781                    default: None,
782                })))
783            }
784
785            // Pass through everything else
786            _ => Ok(expr),
787        }
788    }
789}
790
791impl PostgresDialect {
792    /// Check if a JSON path expression is "simple" (string literal or non-negative integer)
793    /// Simple paths can use arrow syntax (->>) in PostgreSQL
794    /// Complex paths (like negative indices) should use JSON_EXTRACT_PATH_TEXT function
795    fn is_simple_json_path(path: &Expression) -> bool {
796        match path {
797            // String literals are always simple
798            Expression::Literal(Literal::String(_)) => true,
799            // Non-negative integer literals are simple
800            Expression::Literal(Literal::Number(n)) => {
801                // Check if it's non-negative
802                !n.starts_with('-')
803            }
804            // JSONPath expressions are simple (they're already parsed paths)
805            Expression::JSONPath(_) => true,
806            // Everything else (Neg, function calls, etc.) is complex
807            _ => false,
808        }
809    }
810
811    /// Transform data types according to PostgreSQL TYPE_MAPPING
812    fn transform_data_type(&self, dt: DataType) -> Result<Expression> {
813        let transformed = match dt {
814            // TINYINT -> SMALLINT
815            DataType::TinyInt { .. } => DataType::SmallInt { length: None },
816
817            // FLOAT -> DOUBLE PRECISION (Python sqlglot tokenizes FLOAT as DOUBLE)
818            DataType::Float { .. } => DataType::Custom {
819                name: "DOUBLE PRECISION".to_string(),
820            },
821
822            // DOUBLE -> DOUBLE PRECISION
823            DataType::Double { .. } => DataType::Custom {
824                name: "DOUBLE PRECISION".to_string(),
825            },
826
827            // BINARY -> BYTEA
828            DataType::Binary { .. } => DataType::Custom {
829                name: "BYTEA".to_string(),
830            },
831
832            // VARBINARY -> BYTEA
833            DataType::VarBinary { .. } => DataType::Custom {
834                name: "BYTEA".to_string(),
835            },
836
837            // BLOB -> BYTEA
838            DataType::Blob => DataType::Custom {
839                name: "BYTEA".to_string(),
840            },
841
842            // Custom type normalizations
843            DataType::Custom { ref name } => {
844                let upper = name.to_uppercase();
845                match upper.as_str() {
846                    // INT8 -> BIGINT (PostgreSQL alias)
847                    "INT8" => DataType::BigInt { length: None },
848                    // FLOAT8 -> DOUBLE PRECISION (PostgreSQL alias)
849                    "FLOAT8" => DataType::Custom {
850                        name: "DOUBLE PRECISION".to_string(),
851                    },
852                    // FLOAT4 -> REAL (PostgreSQL alias)
853                    "FLOAT4" => DataType::Custom {
854                        name: "REAL".to_string(),
855                    },
856                    // INT4 -> INTEGER (PostgreSQL alias)
857                    "INT4" => DataType::Int { length: None, integer_spelling: false },
858                    // INT2 -> SMALLINT (PostgreSQL alias)
859                    "INT2" => DataType::SmallInt { length: None },
860                    _ => dt,
861                }
862            }
863
864            // Keep all other types as-is
865            other => other,
866        };
867        Ok(Expression::DataType(transformed))
868    }
869
870    fn transform_function(&self, f: Function) -> Result<Expression> {
871        let name_upper = f.name.to_uppercase();
872        match name_upper.as_str() {
873            // IFNULL -> COALESCE
874            "IFNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc { original_name: None,
875                expressions: f.args,
876            }))),
877
878            // NVL -> COALESCE
879            "NVL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc { original_name: None,
880                expressions: f.args,
881            }))),
882
883            // ISNULL (SQL Server) -> COALESCE
884            "ISNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc { original_name: None,
885                expressions: f.args,
886            }))),
887
888            // GROUP_CONCAT -> STRING_AGG in PostgreSQL
889            "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
890                Function::new("STRING_AGG".to_string(), f.args),
891            ))),
892
893            // SUBSTR -> SUBSTRING (standard SQL)
894            "SUBSTR" => Ok(Expression::Function(Box::new(Function::new(
895                "SUBSTRING".to_string(),
896                f.args,
897            )))),
898
899            // RAND -> RANDOM in PostgreSQL
900            "RAND" => Ok(Expression::Random(crate::expressions::Random)),
901
902            // CEILING -> CEIL (both work in PostgreSQL, but CEIL is preferred)
903            "CEILING" if f.args.len() == 1 => Ok(Expression::Ceil(Box::new(CeilFunc {
904                this: f.args.into_iter().next().unwrap(),
905                decimals: None,
906                to: None,
907            }))),
908
909            // LEN -> LENGTH in PostgreSQL
910            "LEN" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc {
911                this: f.args.into_iter().next().unwrap(),
912                original_name: None,
913            }))),
914
915            // CHAR_LENGTH -> LENGTH in PostgreSQL
916            "CHAR_LENGTH" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc {
917                this: f.args.into_iter().next().unwrap(),
918                original_name: None,
919            }))),
920
921            // CHARACTER_LENGTH -> LENGTH in PostgreSQL
922            "CHARACTER_LENGTH" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc {
923                this: f.args.into_iter().next().unwrap(),
924                original_name: None,
925            }))),
926
927            // CHARINDEX -> POSITION in PostgreSQL
928            // CHARINDEX(substring, string) -> POSITION(substring IN string)
929            "CHARINDEX" if f.args.len() >= 2 => {
930                let mut args = f.args;
931                let substring = args.remove(0);
932                let string = args.remove(0);
933                Ok(Expression::Position(Box::new(
934                    crate::expressions::PositionFunc {
935                        substring,
936                        string,
937                        start: args.pop(),
938                    },
939                )))
940            }
941
942            // GETDATE -> CURRENT_TIMESTAMP in PostgreSQL
943            "GETDATE" => Ok(Expression::CurrentTimestamp(
944                crate::expressions::CurrentTimestamp { precision: None, sysdate: false },
945            )),
946
947            // SYSDATETIME -> CURRENT_TIMESTAMP in PostgreSQL
948            "SYSDATETIME" => Ok(Expression::CurrentTimestamp(
949                crate::expressions::CurrentTimestamp { precision: None, sysdate: false },
950            )),
951
952            // NOW -> CURRENT_TIMESTAMP in PostgreSQL (NOW() is also valid)
953            "NOW" => Ok(Expression::CurrentTimestamp(
954                crate::expressions::CurrentTimestamp { precision: None, sysdate: false },
955            )),
956
957            // NEWID -> GEN_RANDOM_UUID in PostgreSQL
958            "NEWID" => Ok(Expression::Function(Box::new(Function::new(
959                "GEN_RANDOM_UUID".to_string(),
960                vec![],
961            )))),
962
963            // UUID -> GEN_RANDOM_UUID in PostgreSQL
964            "UUID" => Ok(Expression::Function(Box::new(Function::new(
965                "GEN_RANDOM_UUID".to_string(),
966                vec![],
967            )))),
968
969            // UNNEST is native to PostgreSQL
970            "UNNEST" => Ok(Expression::Function(Box::new(f))),
971
972            // GENERATE_SERIES is native to PostgreSQL
973            "GENERATE_SERIES" => Ok(Expression::Function(Box::new(f))),
974
975            // SHA256 -> SHA256 in PostgreSQL (via pgcrypto extension)
976            "SHA256" if f.args.len() == 1 => Ok(Expression::Function(Box::new(Function::new(
977                "SHA256".to_string(),
978                f.args,
979            )))),
980
981            // SHA2 -> SHA256/SHA512 based on length argument
982            "SHA2" if f.args.len() == 2 => {
983                // SHA2(data, length) -> SHA256/SHA384/SHA512
984                let args = f.args;
985                let data = args[0].clone();
986                // Default to SHA256 - would need runtime inspection for exact mapping
987                Ok(Expression::Function(Box::new(Function::new(
988                    "SHA256".to_string(),
989                    vec![data],
990                ))))
991            }
992
993            // LEVENSHTEIN is native to PostgreSQL (fuzzystrmatch extension)
994            "LEVENSHTEIN" => Ok(Expression::Function(Box::new(f))),
995
996            // EDITDISTANCE -> LEVENSHTEIN_LESS_EQUAL (with max distance) or LEVENSHTEIN
997            "EDITDISTANCE" if f.args.len() == 3 => Ok(Expression::Function(Box::new(Function::new(
998                "LEVENSHTEIN_LESS_EQUAL".to_string(),
999                f.args,
1000            )))),
1001            "EDITDISTANCE" if f.args.len() == 2 => Ok(Expression::Function(Box::new(Function::new(
1002                "LEVENSHTEIN".to_string(),
1003                f.args,
1004            )))),
1005
1006            // TRIM(value, chars) -> TRIM(chars FROM value) for Postgres
1007            "TRIM" if f.args.len() == 2 => {
1008                let value = f.args[0].clone();
1009                let chars = f.args[1].clone();
1010                Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
1011                    this: value,
1012                    characters: Some(chars),
1013                    position: crate::expressions::TrimPosition::Both,
1014                    sql_standard_syntax: true,
1015                    position_explicit: false,
1016                })))
1017            }
1018
1019            // DATEDIFF(unit, start, end) -> PostgreSQL EXTRACT/AGE patterns
1020            "DATEDIFF" if f.args.len() >= 2 => {
1021                let mut args = f.args;
1022                if args.len() == 2 {
1023                    // 2-arg form: DATEDIFF(start, end) -> AGE(start, end)
1024                    let first = args.remove(0);
1025                    let second = args.remove(0);
1026                    Ok(Expression::Function(Box::new(Function::new(
1027                        "AGE".to_string(),
1028                        vec![first, second],
1029                    ))))
1030                } else {
1031                    // 3-arg form: DATEDIFF(unit, start, end)
1032                    let unit_expr = args.remove(0);
1033                    let start = args.remove(0);
1034                    let end_expr = args.remove(0);
1035
1036                    // Extract unit name from identifier or column
1037                    let unit_name = match &unit_expr {
1038                        Expression::Identifier(id) => id.name.to_uppercase(),
1039                        Expression::Column(col) if col.table.is_none() => col.name.name.to_uppercase(),
1040                        _ => "DAY".to_string(),
1041                    };
1042
1043                    // Helper: CAST(expr AS TIMESTAMP)
1044                    let cast_ts = |e: Expression| -> Expression {
1045                        Expression::Cast(Box::new(Cast {
1046                            this: e,
1047                            to: DataType::Timestamp { precision: None, timezone: false },
1048                            trailing_comments: Vec::new(),
1049                            double_colon_syntax: false,
1050                            format: None,
1051                            default: None,
1052                        }))
1053                    };
1054
1055                    // Helper: CAST(expr AS BIGINT)
1056                    let cast_bigint = |e: Expression| -> Expression {
1057                        Expression::Cast(Box::new(Cast {
1058                            this: e,
1059                            to: DataType::BigInt { length: None },
1060                            trailing_comments: Vec::new(),
1061                            double_colon_syntax: false,
1062                            format: None,
1063                            default: None,
1064                        }))
1065                    };
1066
1067                    let end_ts = cast_ts(end_expr.clone());
1068                    let start_ts = cast_ts(start.clone());
1069
1070                    // Helper: end_ts - start_ts
1071                    let ts_diff = || -> Expression {
1072                        Expression::Sub(Box::new(BinaryOp::new(
1073                            cast_ts(end_expr.clone()),
1074                            cast_ts(start.clone()),
1075                        )))
1076                    };
1077
1078                    // Helper: AGE(end_ts, start_ts)
1079                    let age_call = || -> Expression {
1080                        Expression::Function(Box::new(Function::new(
1081                            "AGE".to_string(),
1082                            vec![cast_ts(end_expr.clone()), cast_ts(start.clone())],
1083                        )))
1084                    };
1085
1086                    // Helper: EXTRACT(field FROM expr)
1087                    let extract = |field: DateTimeField, from: Expression| -> Expression {
1088                        Expression::Extract(Box::new(ExtractFunc {
1089                            this: from,
1090                            field,
1091                        }))
1092                    };
1093
1094                    // Helper: number literal
1095                    let num = |n: i64| -> Expression {
1096                        Expression::Literal(Literal::Number(n.to_string()))
1097                    };
1098
1099                    // Use Custom DateTimeField for lowercase output (PostgreSQL convention)
1100                    let epoch_field = DateTimeField::Custom("epoch".to_string());
1101
1102                    let result = match unit_name.as_str() {
1103                        "MICROSECOND" => {
1104                            // CAST(EXTRACT(epoch FROM end_ts - start_ts) * 1000000 AS BIGINT)
1105                            let epoch = extract(epoch_field, ts_diff());
1106                            cast_bigint(Expression::Mul(Box::new(BinaryOp::new(epoch, num(1000000)))))
1107                        }
1108                        "MILLISECOND" => {
1109                            let epoch = extract(epoch_field, ts_diff());
1110                            cast_bigint(Expression::Mul(Box::new(BinaryOp::new(epoch, num(1000)))))
1111                        }
1112                        "SECOND" => {
1113                            let epoch = extract(epoch_field, ts_diff());
1114                            cast_bigint(epoch)
1115                        }
1116                        "MINUTE" => {
1117                            let epoch = extract(epoch_field, ts_diff());
1118                            cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(60)))))
1119                        }
1120                        "HOUR" => {
1121                            let epoch = extract(epoch_field, ts_diff());
1122                            cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(3600)))))
1123                        }
1124                        "DAY" => {
1125                            let epoch = extract(epoch_field, ts_diff());
1126                            cast_bigint(Expression::Div(Box::new(BinaryOp::new(epoch, num(86400)))))
1127                        }
1128                        "WEEK" => {
1129                            // CAST(EXTRACT(days FROM (end_ts - start_ts)) / 7 AS BIGINT)
1130                            let diff_parens = Expression::Paren(Box::new(Paren {
1131                                this: ts_diff(),
1132                                trailing_comments: Vec::new(),
1133                            }));
1134                            let days = extract(DateTimeField::Custom("days".to_string()), diff_parens);
1135                            cast_bigint(Expression::Div(Box::new(BinaryOp::new(days, num(7)))))
1136                        }
1137                        "MONTH" => {
1138                            // CAST(EXTRACT(year FROM AGE(...)) * 12 + EXTRACT(month FROM AGE(...)) AS BIGINT)
1139                            let year_part = extract(DateTimeField::Custom("year".to_string()), age_call());
1140                            let month_part = extract(DateTimeField::Custom("month".to_string()), age_call());
1141                            let year_months = Expression::Mul(Box::new(BinaryOp::new(year_part, num(12))));
1142                            cast_bigint(Expression::Add(Box::new(BinaryOp::new(year_months, month_part))))
1143                        }
1144                        "QUARTER" => {
1145                            // CAST(EXTRACT(year FROM AGE(...)) * 4 + EXTRACT(month FROM AGE(...)) / 3 AS BIGINT)
1146                            let year_part = extract(DateTimeField::Custom("year".to_string()), age_call());
1147                            let month_part = extract(DateTimeField::Custom("month".to_string()), age_call());
1148                            let year_quarters = Expression::Mul(Box::new(BinaryOp::new(year_part, num(4))));
1149                            let month_quarters = Expression::Div(Box::new(BinaryOp::new(month_part, num(3))));
1150                            cast_bigint(Expression::Add(Box::new(BinaryOp::new(year_quarters, month_quarters))))
1151                        }
1152                        "YEAR" => {
1153                            // CAST(EXTRACT(year FROM AGE(...)) AS BIGINT)
1154                            cast_bigint(extract(DateTimeField::Custom("year".to_string()), age_call()))
1155                        }
1156                        _ => {
1157                            // Fallback: simple AGE
1158                            Expression::Function(Box::new(Function::new(
1159                                "AGE".to_string(),
1160                                vec![end_ts, start_ts],
1161                            )))
1162                        }
1163                    };
1164                    Ok(result)
1165                }
1166            }
1167
1168            // TIMESTAMPDIFF -> AGE or EXTRACT pattern
1169            "TIMESTAMPDIFF" if f.args.len() >= 3 => {
1170                let mut args = f.args;
1171                let _unit = args.remove(0); // Unit (ignored, AGE returns full interval)
1172                let start = args.remove(0);
1173                let end = args.remove(0);
1174                Ok(Expression::Function(Box::new(Function::new(
1175                    "AGE".to_string(),
1176                    vec![end, start],
1177                ))))
1178            }
1179
1180            // FROM_UNIXTIME -> TO_TIMESTAMP
1181            "FROM_UNIXTIME" => Ok(Expression::Function(Box::new(Function::new(
1182                "TO_TIMESTAMP".to_string(),
1183                f.args,
1184            )))),
1185
1186            // UNIX_TIMESTAMP -> EXTRACT(EPOCH FROM ...)
1187            "UNIX_TIMESTAMP" if f.args.len() == 1 => {
1188                let arg = f.args.into_iter().next().unwrap();
1189                Ok(Expression::Function(Box::new(Function::new(
1190                    "DATE_PART".to_string(),
1191                    vec![Expression::string("epoch"), arg],
1192                ))))
1193            }
1194
1195            // UNIX_TIMESTAMP() with no args -> EXTRACT(EPOCH FROM CURRENT_TIMESTAMP)
1196            "UNIX_TIMESTAMP" if f.args.is_empty() => {
1197                Ok(Expression::Function(Box::new(Function::new(
1198                    "DATE_PART".to_string(),
1199                    vec![
1200                        Expression::string("epoch"),
1201                        Expression::CurrentTimestamp(crate::expressions::CurrentTimestamp {
1202                            precision: None,
1203                            sysdate: false,
1204                        }),
1205                    ],
1206                ))))
1207            }
1208
1209            // DATEADD -> date + interval pattern
1210            "DATEADD" if f.args.len() == 3 => {
1211                // DATEADD(unit, count, date) -> date + interval 'count unit'
1212                // This is a simplified version - full impl would construct proper interval
1213                let mut args = f.args;
1214                let _unit = args.remove(0);
1215                let count = args.remove(0);
1216                let date = args.remove(0);
1217                Ok(Expression::Add(Box::new(BinaryOp {
1218                    left: date,
1219                    right: count,
1220                    left_comments: Vec::new(),
1221                    operator_comments: Vec::new(),
1222                    trailing_comments: Vec::new(),
1223                })))
1224            }
1225
1226            // INSTR -> POSITION (simplified)
1227            "INSTR" if f.args.len() >= 2 => {
1228                let mut args = f.args;
1229                let string = args.remove(0);
1230                let substring = args.remove(0);
1231                Ok(Expression::Position(Box::new(
1232                    crate::expressions::PositionFunc {
1233                        substring,
1234                        string,
1235                        start: args.pop(),
1236                    },
1237                )))
1238            }
1239
1240            // CONCAT_WS is native to PostgreSQL
1241            "CONCAT_WS" => Ok(Expression::Function(Box::new(f))),
1242
1243            // REGEXP_REPLACE: pass through without adding 'g' flag
1244            // The 'g' flag handling is managed by cross_dialect_normalize based on source dialect's default behavior
1245            "REGEXP_REPLACE" if f.args.len() == 3 || f.args.len() == 4 => {
1246                Ok(Expression::Function(Box::new(f)))
1247            }
1248            // 6 args from Snowflake: (subject, pattern, replacement, position, occurrence, params)
1249            // If occurrence is 0 (global), append 'g' to flags
1250            "REGEXP_REPLACE" if f.args.len() == 6 => {
1251                let is_global = match &f.args[4] {
1252                    Expression::Literal(crate::expressions::Literal::Number(n)) => n == "0",
1253                    _ => false,
1254                };
1255                if is_global {
1256                    let subject = f.args[0].clone();
1257                    let pattern = f.args[1].clone();
1258                    let replacement = f.args[2].clone();
1259                    let position = f.args[3].clone();
1260                    let occurrence = f.args[4].clone();
1261                    let params = &f.args[5];
1262                    let mut flags = if let Expression::Literal(crate::expressions::Literal::String(s)) = params {
1263                        s.clone()
1264                    } else {
1265                        String::new()
1266                    };
1267                    if !flags.contains('g') {
1268                        flags.push('g');
1269                    }
1270                    Ok(Expression::Function(Box::new(Function::new(
1271                        "REGEXP_REPLACE".to_string(),
1272                        vec![subject, pattern, replacement, position, occurrence, Expression::Literal(crate::expressions::Literal::String(flags))],
1273                    ))))
1274                } else {
1275                    Ok(Expression::Function(Box::new(f)))
1276                }
1277            }
1278            // Default: pass through
1279            "REGEXP_REPLACE" => Ok(Expression::Function(Box::new(f))),
1280
1281            // Pass through everything else
1282            _ => Ok(Expression::Function(Box::new(f))),
1283        }
1284    }
1285
1286    fn transform_aggregate_function(
1287        &self,
1288        f: Box<crate::expressions::AggregateFunction>,
1289    ) -> Result<Expression> {
1290        let name_upper = f.name.to_uppercase();
1291        match name_upper.as_str() {
1292            // COUNT_IF -> SUM(CASE WHEN...)
1293            "COUNT_IF" if !f.args.is_empty() => {
1294                let condition = f.args.into_iter().next().unwrap();
1295                let case_expr = Expression::Case(Box::new(Case {
1296                    operand: None,
1297                    whens: vec![(condition, Expression::number(1))],
1298                    else_: Some(Expression::number(0)),
1299                }));
1300                Ok(Expression::Sum(Box::new(AggFunc { ignore_nulls: None, having_max: None,
1301                    this: case_expr,
1302                    distinct: f.distinct,
1303                    filter: f.filter,
1304                    order_by: Vec::new(),
1305                    name: None,
1306                    limit: None,
1307                })))
1308            }
1309
1310            // GROUP_CONCAT -> STRING_AGG
1311            "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
1312                Function::new("STRING_AGG".to_string(), f.args),
1313            ))),
1314
1315            // STDEV -> STDDEV in PostgreSQL
1316            "STDEV" if !f.args.is_empty() => Ok(Expression::Stddev(Box::new(AggFunc { ignore_nulls: None, having_max: None,
1317                this: f.args.into_iter().next().unwrap(),
1318                distinct: f.distinct,
1319                filter: f.filter,
1320                order_by: Vec::new(),
1321                name: None,
1322                limit: None,
1323            }))),
1324
1325            // STDEVP -> STDDEV_POP in PostgreSQL
1326            "STDEVP" if !f.args.is_empty() => Ok(Expression::StddevPop(Box::new(AggFunc { ignore_nulls: None, having_max: None,
1327                this: f.args.into_iter().next().unwrap(),
1328                distinct: f.distinct,
1329                filter: f.filter,
1330                order_by: Vec::new(),
1331                name: None,
1332                limit: None,
1333            }))),
1334
1335            // VAR -> VAR_SAMP in PostgreSQL
1336            "VAR" if !f.args.is_empty() => Ok(Expression::VarSamp(Box::new(AggFunc { ignore_nulls: None, having_max: None,
1337                this: f.args.into_iter().next().unwrap(),
1338                distinct: f.distinct,
1339                filter: f.filter,
1340                order_by: Vec::new(),
1341                name: None,
1342                limit: None,
1343            }))),
1344
1345            // VARP -> VAR_POP in PostgreSQL
1346            "VARP" if !f.args.is_empty() => Ok(Expression::VarPop(Box::new(AggFunc { ignore_nulls: None, having_max: None,
1347                this: f.args.into_iter().next().unwrap(),
1348                distinct: f.distinct,
1349                filter: f.filter,
1350                order_by: Vec::new(),
1351                name: None,
1352                limit: None,
1353            }))),
1354
1355            // BIT_AND is native to PostgreSQL
1356            "BIT_AND" => Ok(Expression::AggregateFunction(f)),
1357
1358            // BIT_OR is native to PostgreSQL
1359            "BIT_OR" => Ok(Expression::AggregateFunction(f)),
1360
1361            // BIT_XOR is native to PostgreSQL
1362            "BIT_XOR" => Ok(Expression::AggregateFunction(f)),
1363
1364            // BOOL_AND is native to PostgreSQL
1365            "BOOL_AND" => Ok(Expression::AggregateFunction(f)),
1366
1367            // BOOL_OR is native to PostgreSQL
1368            "BOOL_OR" => Ok(Expression::AggregateFunction(f)),
1369
1370            // VARIANCE -> VAR_SAMP in PostgreSQL
1371            "VARIANCE" if !f.args.is_empty() => Ok(Expression::VarSamp(Box::new(AggFunc { ignore_nulls: None, having_max: None,
1372                this: f.args.into_iter().next().unwrap(),
1373                distinct: f.distinct,
1374                filter: f.filter,
1375                order_by: Vec::new(),
1376                name: None,
1377                limit: None,
1378            }))),
1379
1380            // LOGICAL_OR -> BOOL_OR in PostgreSQL
1381            "LOGICAL_OR" if !f.args.is_empty() => {
1382                let mut new_agg = f.clone();
1383                new_agg.name = "BOOL_OR".to_string();
1384                Ok(Expression::AggregateFunction(new_agg))
1385            }
1386
1387            // LOGICAL_AND -> BOOL_AND in PostgreSQL
1388            "LOGICAL_AND" if !f.args.is_empty() => {
1389                let mut new_agg = f.clone();
1390                new_agg.name = "BOOL_AND".to_string();
1391                Ok(Expression::AggregateFunction(new_agg))
1392            }
1393
1394            // Pass through everything else
1395            _ => Ok(Expression::AggregateFunction(f)),
1396        }
1397    }
1398}
1399
1400#[cfg(test)]
1401mod tests {
1402    use super::*;
1403    use crate::dialects::Dialect;
1404
1405    fn transpile_to_postgres(sql: &str) -> String {
1406        let dialect = Dialect::get(DialectType::Generic);
1407        let result = dialect
1408            .transpile_to(sql, DialectType::PostgreSQL)
1409            .expect("Transpile failed");
1410        result[0].clone()
1411    }
1412
1413    #[test]
1414    fn test_ifnull_to_coalesce() {
1415        let result = transpile_to_postgres("SELECT IFNULL(a, b)");
1416        assert!(
1417            result.contains("COALESCE"),
1418            "Expected COALESCE, got: {}",
1419            result
1420        );
1421    }
1422
1423    #[test]
1424    fn test_nvl_to_coalesce() {
1425        let result = transpile_to_postgres("SELECT NVL(a, b)");
1426        assert!(
1427            result.contains("COALESCE"),
1428            "Expected COALESCE, got: {}",
1429            result
1430        );
1431    }
1432
1433    #[test]
1434    fn test_rand_to_random() {
1435        let result = transpile_to_postgres("SELECT RAND()");
1436        assert!(
1437            result.contains("RANDOM"),
1438            "Expected RANDOM, got: {}",
1439            result
1440        );
1441    }
1442
1443    #[test]
1444    fn test_basic_select() {
1445        let result = transpile_to_postgres("SELECT a, b FROM users WHERE id = 1");
1446        assert!(result.contains("SELECT"));
1447        assert!(result.contains("FROM users"));
1448    }
1449
1450    #[test]
1451    fn test_len_to_length() {
1452        let result = transpile_to_postgres("SELECT LEN(name)");
1453        assert!(
1454            result.contains("LENGTH"),
1455            "Expected LENGTH, got: {}",
1456            result
1457        );
1458    }
1459
1460    #[test]
1461    fn test_getdate_to_current_timestamp() {
1462        let result = transpile_to_postgres("SELECT GETDATE()");
1463        assert!(
1464            result.contains("CURRENT_TIMESTAMP"),
1465            "Expected CURRENT_TIMESTAMP, got: {}",
1466            result
1467        );
1468    }
1469
1470    #[test]
1471    fn test_substr_to_substring() {
1472        let result = transpile_to_postgres("SELECT SUBSTR(name, 1, 3)");
1473        assert!(
1474            result.contains("SUBSTRING"),
1475            "Expected SUBSTRING, got: {}",
1476            result
1477        );
1478    }
1479
1480    #[test]
1481    fn test_group_concat_to_string_agg() {
1482        let result = transpile_to_postgres("SELECT GROUP_CONCAT(name)");
1483        assert!(
1484            result.contains("STRING_AGG"),
1485            "Expected STRING_AGG, got: {}",
1486            result
1487        );
1488    }
1489
1490    #[test]
1491    fn test_double_quote_identifiers() {
1492        // PostgreSQL uses double quotes for identifiers
1493        let dialect = PostgresDialect;
1494        let config = dialect.generator_config();
1495        assert_eq!(config.identifier_quote, '"');
1496    }
1497
1498    #[test]
1499    fn test_char_length_to_length() {
1500        let result = transpile_to_postgres("SELECT CHAR_LENGTH(name)");
1501        assert!(
1502            result.contains("LENGTH"),
1503            "Expected LENGTH, got: {}",
1504            result
1505        );
1506    }
1507
1508    #[test]
1509    fn test_character_length_to_length() {
1510        let result = transpile_to_postgres("SELECT CHARACTER_LENGTH(name)");
1511        assert!(
1512            result.contains("LENGTH"),
1513            "Expected LENGTH, got: {}",
1514            result
1515        );
1516    }
1517
1518    /// Helper for PostgreSQL identity tests (parse and regenerate with PostgreSQL dialect)
1519    fn identity_postgres(sql: &str) -> String {
1520        let dialect = Dialect::get(DialectType::PostgreSQL);
1521        let exprs = dialect.parse(sql).expect("Parse failed");
1522        let transformed = dialect.transform(exprs[0].clone()).expect("Transform failed");
1523        dialect.generate(&transformed).expect("Generate failed")
1524    }
1525
1526    #[test]
1527    fn test_json_extract_with_column_path() {
1528        // When the path is a column reference (not a literal), should use function form
1529        let result = identity_postgres("json_data.data -> field_ids.field_id");
1530        assert!(
1531            result.contains("JSON_EXTRACT_PATH"),
1532            "Expected JSON_EXTRACT_PATH for column path, got: {}",
1533            result
1534        );
1535    }
1536
1537    #[test]
1538    fn test_json_extract_scalar_with_negative_index() {
1539        // When the path is a negative index, should use JSON_EXTRACT_PATH_TEXT function
1540        let result = identity_postgres("x::JSON -> 'duration' ->> -1");
1541        assert!(
1542            result.contains("JSON_EXTRACT_PATH_TEXT"),
1543            "Expected JSON_EXTRACT_PATH_TEXT for negative index, got: {}",
1544            result
1545        );
1546        // The first -> should still be arrow syntax since 'duration' is a string literal
1547        assert!(
1548            result.contains("->"),
1549            "Expected -> for string literal path, got: {}",
1550            result
1551        );
1552    }
1553
1554    #[test]
1555    fn test_json_extract_with_string_literal() {
1556        // When the path is a string literal, should keep arrow syntax
1557        let result = identity_postgres("data -> 'key'");
1558        assert!(
1559            result.contains("->"),
1560            "Expected -> for string literal path, got: {}",
1561            result
1562        );
1563        assert!(
1564            !result.contains("JSON_EXTRACT_PATH"),
1565            "Should NOT use function form for string literal, got: {}",
1566            result
1567        );
1568    }
1569}