Skip to main content

polyglot_sql/dialects/
presto.rs

1//! Presto Dialect
2//!
3//! Presto-specific transformations based on sqlglot patterns.
4//! Presto is the base for Trino dialect.
5
6use super::{DialectImpl, DialectType};
7use crate::error::Result;
8use crate::expressions::{
9    AggFunc, AggregateFunction, BinaryOp, Case, Cast, Column, DataType, Expression, Function,
10    JsonExtractFunc, LikeOp, Literal, UnaryFunc, VarArgFunc,
11};
12use crate::generator::GeneratorConfig;
13use crate::tokens::TokenizerConfig;
14
15/// Presto dialect
16pub struct PrestoDialect;
17
18impl DialectImpl for PrestoDialect {
19    fn dialect_type(&self) -> DialectType {
20        DialectType::Presto
21    }
22
23    fn tokenizer_config(&self) -> TokenizerConfig {
24        let mut config = TokenizerConfig::default();
25        // Presto uses double quotes for identifiers
26        config.identifiers.insert('"', '"');
27        // Presto does NOT support nested comments
28        config.nested_comments = false;
29        // Presto does NOT support QUALIFY - it's a valid identifier
30        // (unlike Snowflake, BigQuery, DuckDB which have QUALIFY clause)
31        config.keywords.remove("QUALIFY");
32        config
33    }
34
35    fn generator_config(&self) -> GeneratorConfig {
36        use crate::generator::IdentifierQuoteStyle;
37        GeneratorConfig {
38            identifier_quote: '"',
39            identifier_quote_style: IdentifierQuoteStyle::DOUBLE_QUOTE,
40            dialect: Some(DialectType::Presto),
41            limit_only_literals: true,
42            tz_to_with_time_zone: true,
43            ..Default::default()
44        }
45    }
46
47    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
48        match expr {
49            // IFNULL -> COALESCE in Presto
50            Expression::IfNull(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
51                original_name: None,
52                expressions: vec![f.this, f.expression],
53                inferred_type: None,
54            }))),
55
56            // NVL -> COALESCE in Presto
57            Expression::Nvl(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
58                original_name: None,
59                expressions: vec![f.this, f.expression],
60                inferred_type: None,
61            }))),
62
63            // TryCast stays as TryCast (Presto supports TRY_CAST)
64            Expression::TryCast(c) => Ok(Expression::TryCast(c)),
65
66            // SafeCast -> TRY_CAST in Presto
67            Expression::SafeCast(c) => Ok(Expression::TryCast(c)),
68
69            // ILike -> LOWER() LIKE LOWER() (Presto doesn't support ILIKE)
70            Expression::ILike(op) => {
71                let lower_left = Expression::Lower(Box::new(UnaryFunc::new(op.left.clone())));
72                let lower_right = Expression::Lower(Box::new(UnaryFunc::new(op.right.clone())));
73                Ok(Expression::Like(Box::new(LikeOp {
74                    left: lower_left,
75                    right: lower_right,
76                    escape: op.escape,
77                    quantifier: op.quantifier.clone(),
78                    inferred_type: None,
79                })))
80            }
81
82            // CountIf is native in Presto (keep as-is)
83            Expression::CountIf(f) => Ok(Expression::CountIf(f)),
84
85            // EXPLODE -> UNNEST in Presto
86            Expression::Explode(f) => Ok(Expression::Unnest(Box::new(
87                crate::expressions::UnnestFunc {
88                    this: f.this,
89                    expressions: Vec::new(),
90                    with_ordinality: false,
91                    alias: None,
92                    offset_alias: None,
93                },
94            ))),
95
96            // ExplodeOuter -> UNNEST in Presto
97            Expression::ExplodeOuter(f) => Ok(Expression::Unnest(Box::new(
98                crate::expressions::UnnestFunc {
99                    this: f.this,
100                    expressions: Vec::new(),
101                    with_ordinality: false,
102                    alias: None,
103                    offset_alias: None,
104                },
105            ))),
106
107            // StringAgg -> ARRAY_JOIN(ARRAY_AGG()) in Presto
108            Expression::StringAgg(f) => {
109                let array_agg = Expression::Function(Box::new(Function::new(
110                    "ARRAY_AGG".to_string(),
111                    vec![f.this.clone()],
112                )));
113                let mut join_args = vec![array_agg];
114                if let Some(sep) = f.separator {
115                    join_args.push(sep);
116                }
117                Ok(Expression::Function(Box::new(Function::new(
118                    "ARRAY_JOIN".to_string(),
119                    join_args,
120                ))))
121            }
122
123            // GroupConcat -> ARRAY_JOIN(ARRAY_AGG()) in Presto
124            Expression::GroupConcat(f) => {
125                let array_agg = Expression::Function(Box::new(Function::new(
126                    "ARRAY_AGG".to_string(),
127                    vec![f.this.clone()],
128                )));
129                let mut join_args = vec![array_agg];
130                if let Some(sep) = f.separator {
131                    join_args.push(sep);
132                }
133                Ok(Expression::Function(Box::new(Function::new(
134                    "ARRAY_JOIN".to_string(),
135                    join_args,
136                ))))
137            }
138
139            // ListAgg -> ARRAY_JOIN(ARRAY_AGG()) in Presto
140            Expression::ListAgg(f) => {
141                let array_agg = Expression::Function(Box::new(Function::new(
142                    "ARRAY_AGG".to_string(),
143                    vec![f.this.clone()],
144                )));
145                let mut join_args = vec![array_agg];
146                if let Some(sep) = f.separator {
147                    join_args.push(sep);
148                }
149                Ok(Expression::Function(Box::new(Function::new(
150                    "ARRAY_JOIN".to_string(),
151                    join_args,
152                ))))
153            }
154
155            // ParseJson: handled by generator (outputs JSON_PARSE for Presto)
156
157            // JSONExtract (variant_extract/colon accessor) -> JSON_EXTRACT in Presto
158            Expression::JSONExtract(e) if e.variant_extract.is_some() => {
159                let path = match *e.expression {
160                    Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
161                        let Literal::String(s) = lit.as_ref() else { unreachable!() };
162                        let normalized = if s.starts_with('$') {
163                            s.clone()
164                        } else if s.starts_with('[') {
165                            format!("${}", s)
166                        } else {
167                            format!("$.{}", s)
168                        };
169                        Expression::Literal(Box::new(Literal::String(normalized)))
170                    }
171                    other => other,
172                };
173                Ok(Expression::JsonExtract(Box::new(JsonExtractFunc {
174                    this: *e.this,
175                    path,
176                    returning: None,
177                    arrow_syntax: false,
178                    hash_arrow_syntax: false,
179                    wrapper_option: None,
180                    quotes_option: None,
181                    on_scalar_string: false,
182                    on_error: None,
183                })))
184            }
185
186            // Generic function transformations
187            Expression::Function(f) => self.transform_function(*f),
188
189            // Generic aggregate function transformations
190            Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
191
192            // Cast transformations
193            Expression::Cast(c) => self.transform_cast(*c),
194
195            // Div: Presto has TYPED_DIVISION - wrap left operand in CAST(AS DOUBLE)
196            // to ensure float division (only when left isn't already a float cast)
197            Expression::Div(mut op) => {
198                if !Self::is_float_cast(&op.left) {
199                    op.left = Expression::Cast(Box::new(crate::expressions::Cast {
200                        this: op.left,
201                        to: DataType::Double {
202                            precision: None,
203                            scale: None,
204                        },
205                        trailing_comments: Vec::new(),
206                        double_colon_syntax: false,
207                        format: None,
208                        default: None,
209                        inferred_type: None,
210                    }));
211                }
212                Ok(Expression::Div(op))
213            }
214
215            // IntDiv -> CAST(CAST(x AS DOUBLE) / y AS INTEGER) in Presto
216            Expression::IntDiv(f) => {
217                let cast_x = Expression::Cast(Box::new(Cast {
218                    this: f.this,
219                    to: crate::expressions::DataType::Double {
220                        precision: None,
221                        scale: None,
222                    },
223                    trailing_comments: Vec::new(),
224                    double_colon_syntax: false,
225                    format: None,
226                    default: None,
227                    inferred_type: None,
228                }));
229                let div_expr = Expression::Div(Box::new(BinaryOp::new(cast_x, f.expression)));
230                Ok(Expression::Cast(Box::new(Cast {
231                    this: div_expr,
232                    to: crate::expressions::DataType::Int {
233                        length: None,
234                        integer_spelling: true,
235                    },
236                    trailing_comments: Vec::new(),
237                    double_colon_syntax: false,
238                    format: None,
239                    default: None,
240                    inferred_type: None,
241                })))
242            }
243
244            // DELETE: Strip table alias and unqualify columns (Presto doesn't support DELETE aliases)
245            Expression::Delete(mut d) => {
246                if d.alias.is_some() {
247                    d.alias = None;
248                    d.alias_explicit_as = false;
249                    // Unqualify all columns in the WHERE clause
250                    if let Some(ref mut where_clause) = d.where_clause {
251                        where_clause.this = Self::unqualify_columns(where_clause.this.clone());
252                    }
253                }
254                Ok(Expression::Delete(d))
255            }
256
257            // Pass through everything else
258            _ => Ok(expr),
259        }
260    }
261}
262
263impl PrestoDialect {
264    /// Recursively unqualify columns - remove table qualifiers from Column references
265    fn unqualify_columns(expr: Expression) -> Expression {
266        match expr {
267            Expression::Column(c) => {
268                if c.table.is_some() {
269                    Expression::boxed_column(Column {
270                        name: c.name,
271                        table: None,
272                        join_mark: c.join_mark,
273                        trailing_comments: c.trailing_comments,
274                        span: None,
275                        inferred_type: None,
276                    })
277                } else {
278                    Expression::Column(c)
279                }
280            }
281            // DotAccess: db.t2.c -> c (strip all qualifiers, keep only the final field name)
282            Expression::Dot(d) => Expression::boxed_column(Column {
283                name: d.field,
284                table: None,
285                join_mark: false,
286                trailing_comments: Vec::new(),
287                span: None,
288                inferred_type: None,
289            }),
290            // Recursively walk common binary expression types
291            Expression::And(mut op) => {
292                op.left = Self::unqualify_columns(op.left);
293                op.right = Self::unqualify_columns(op.right);
294                Expression::And(op)
295            }
296            Expression::Or(mut op) => {
297                op.left = Self::unqualify_columns(op.left);
298                op.right = Self::unqualify_columns(op.right);
299                Expression::Or(op)
300            }
301            Expression::Eq(mut op) => {
302                op.left = Self::unqualify_columns(op.left);
303                op.right = Self::unqualify_columns(op.right);
304                Expression::Eq(op)
305            }
306            Expression::Neq(mut op) => {
307                op.left = Self::unqualify_columns(op.left);
308                op.right = Self::unqualify_columns(op.right);
309                Expression::Neq(op)
310            }
311            Expression::Gt(mut op) => {
312                op.left = Self::unqualify_columns(op.left);
313                op.right = Self::unqualify_columns(op.right);
314                Expression::Gt(op)
315            }
316            Expression::Lt(mut op) => {
317                op.left = Self::unqualify_columns(op.left);
318                op.right = Self::unqualify_columns(op.right);
319                Expression::Lt(op)
320            }
321            Expression::Gte(mut op) => {
322                op.left = Self::unqualify_columns(op.left);
323                op.right = Self::unqualify_columns(op.right);
324                Expression::Gte(op)
325            }
326            Expression::Lte(mut op) => {
327                op.left = Self::unqualify_columns(op.left);
328                op.right = Self::unqualify_columns(op.right);
329                Expression::Lte(op)
330            }
331            // Unary operators
332            Expression::Not(mut e) => {
333                e.this = Self::unqualify_columns(e.this);
334                Expression::Not(e)
335            }
336            // Predicates
337            Expression::In(mut i) => {
338                i.this = Self::unqualify_columns(i.this);
339                i.expressions = i
340                    .expressions
341                    .into_iter()
342                    .map(Self::unqualify_columns)
343                    .collect();
344                // Also recurse into subquery if present
345                if let Some(q) = i.query {
346                    i.query = Some(Self::unqualify_columns(q));
347                }
348                Expression::In(i)
349            }
350            Expression::IsNull(mut f) => {
351                f.this = Self::unqualify_columns(f.this);
352                Expression::IsNull(f)
353            }
354            Expression::Paren(mut p) => {
355                p.this = Self::unqualify_columns(p.this);
356                Expression::Paren(p)
357            }
358            Expression::Function(mut f) => {
359                f.args = f.args.into_iter().map(Self::unqualify_columns).collect();
360                Expression::Function(f)
361            }
362            // For subqueries (SELECT statements inside IN, etc), also unqualify
363            Expression::Select(mut s) => {
364                s.expressions = s
365                    .expressions
366                    .into_iter()
367                    .map(Self::unqualify_columns)
368                    .collect();
369                if let Some(ref mut w) = s.where_clause {
370                    w.this = Self::unqualify_columns(w.this.clone());
371                }
372                Expression::Select(s)
373            }
374            Expression::Subquery(mut sq) => {
375                sq.this = Self::unqualify_columns(sq.this);
376                Expression::Subquery(sq)
377            }
378            Expression::Alias(mut a) => {
379                a.this = Self::unqualify_columns(a.this);
380                Expression::Alias(a)
381            }
382            // Pass through other expressions unchanged
383            other => other,
384        }
385    }
386
387    /// Check if an expression is already a CAST to a float type
388    fn is_float_cast(expr: &Expression) -> bool {
389        if let Expression::Cast(cast) = expr {
390            matches!(&cast.to, DataType::Double { .. } | DataType::Float { .. })
391        } else {
392            false
393        }
394    }
395
396    /// Convert Oracle/PostgreSQL-style date format to Presto's C-style format
397    /// Oracle: dd, hh, hh24, mi, mm, ss, yyyy, yy
398    /// Presto: %d, %H, %H, %i, %m, %s, %Y, %y
399    pub fn oracle_to_presto_format(fmt: &str) -> String {
400        // Process character by character to avoid double-replacement issues
401        let chars: Vec<char> = fmt.chars().collect();
402        let mut result = String::new();
403        let mut i = 0;
404        while i < chars.len() {
405            let remaining = &fmt[i..];
406            if remaining.starts_with("yyyy") {
407                result.push_str("%Y");
408                i += 4;
409            } else if remaining.starts_with("yy") {
410                result.push_str("%y");
411                i += 2;
412            } else if remaining.starts_with("hh24") {
413                result.push_str("%H");
414                i += 4;
415            } else if remaining.starts_with("hh") {
416                result.push_str("%H");
417                i += 2;
418            } else if remaining.starts_with("mi") {
419                result.push_str("%i");
420                i += 2;
421            } else if remaining.starts_with("mm") {
422                result.push_str("%m");
423                i += 2;
424            } else if remaining.starts_with("dd") {
425                result.push_str("%d");
426                i += 2;
427            } else if remaining.starts_with("ss") {
428                result.push_str("%s");
429                i += 2;
430            } else {
431                result.push(chars[i]);
432                i += 1;
433            }
434        }
435        result
436    }
437
438    /// Convert Presto's C-style date format to Java-style format (for Hive/Spark)
439    /// Presto: %Y, %m, %d, %H, %i, %S, %s, %y, %T, %F
440    /// Java:   yyyy, MM, dd, HH, mm, ss, ss, yy, HH:mm:ss, yyyy-MM-dd
441    pub fn presto_to_java_format(fmt: &str) -> String {
442        fmt.replace("%Y", "yyyy")
443            .replace("%m", "MM")
444            .replace("%d", "dd")
445            .replace("%H", "HH")
446            .replace("%i", "mm")
447            .replace("%S", "ss")
448            .replace("%s", "ss")
449            .replace("%y", "yy")
450            .replace("%T", "HH:mm:ss")
451            .replace("%F", "yyyy-MM-dd")
452            .replace("%M", "MMMM")
453    }
454
455    /// Normalize Presto format strings (e.g., %H:%i:%S -> %T, %Y-%m-%d -> %F)
456    pub fn normalize_presto_format(fmt: &str) -> String {
457        fmt.replace("%H:%i:%S", "%T").replace("%H:%i:%s", "%T")
458    }
459
460    /// Convert Presto's C-style format to DuckDB C-style (only difference: %i -> %M for minutes)
461    pub fn presto_to_duckdb_format(fmt: &str) -> String {
462        fmt.replace("%i", "%M")
463            .replace("%s", "%S")
464            .replace("%T", "%H:%M:%S")
465    }
466
467    /// Convert Presto's C-style format to BigQuery format
468    pub fn presto_to_bigquery_format(fmt: &str) -> String {
469        // BigQuery uses %F for %Y-%m-%d, %T for %H:%M:%S
470        // BigQuery uses %M for minutes (like DuckDB), not %i
471        let result = fmt
472            .replace("%Y-%m-%d", "%F")
473            .replace("%H:%i:%S", "%T")
474            .replace("%H:%i:%s", "%T")
475            .replace("%i", "%M")
476            .replace("%s", "%S");
477        result
478    }
479
480    /// Check if a Presto format string matches the default timestamp format
481    pub fn is_default_timestamp_format(fmt: &str) -> bool {
482        let normalized = Self::normalize_presto_format(fmt);
483        normalized == "%Y-%m-%d %T"
484            || normalized == "%Y-%m-%d %H:%i:%S"
485            || fmt == "%Y-%m-%d %H:%i:%S"
486            || fmt == "%Y-%m-%d %T"
487    }
488
489    /// Check if a Presto format string matches the default date format
490    pub fn is_default_date_format(fmt: &str) -> bool {
491        fmt == "%Y-%m-%d" || fmt == "%F"
492    }
493
494    fn transform_function(&self, f: Function) -> Result<Expression> {
495        let name_upper = f.name.to_uppercase();
496        match name_upper.as_str() {
497            // IFNULL -> COALESCE
498            "IFNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
499                original_name: None,
500                expressions: f.args,
501                inferred_type: None,
502            }))),
503
504            // NVL -> COALESCE
505            "NVL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
506                original_name: None,
507                expressions: f.args,
508                inferred_type: None,
509            }))),
510
511            // ISNULL -> COALESCE
512            "ISNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
513                original_name: None,
514                expressions: f.args,
515                inferred_type: None,
516            }))),
517
518            // GETDATE -> CURRENT_TIMESTAMP
519            "GETDATE" => Ok(Expression::CurrentTimestamp(
520                crate::expressions::CurrentTimestamp {
521                    precision: None,
522                    sysdate: false,
523                },
524            )),
525
526            // NOW -> CURRENT_TIMESTAMP
527            "NOW" => Ok(Expression::CurrentTimestamp(
528                crate::expressions::CurrentTimestamp {
529                    precision: None,
530                    sysdate: false,
531                },
532            )),
533
534            // RAND -> RANDOM in Presto (but it's actually RANDOM())
535            "RAND" => Ok(Expression::Function(Box::new(Function::new(
536                "RANDOM".to_string(),
537                vec![],
538            )))),
539
540            // GROUP_CONCAT -> ARRAY_JOIN(ARRAY_AGG())
541            "GROUP_CONCAT" if !f.args.is_empty() => {
542                let mut args = f.args;
543                let first = args.remove(0);
544                let separator = args.pop();
545                let array_agg = Expression::Function(Box::new(Function::new(
546                    "ARRAY_AGG".to_string(),
547                    vec![first],
548                )));
549                let mut join_args = vec![array_agg];
550                if let Some(sep) = separator {
551                    join_args.push(sep);
552                }
553                Ok(Expression::Function(Box::new(Function::new(
554                    "ARRAY_JOIN".to_string(),
555                    join_args,
556                ))))
557            }
558
559            // STRING_AGG -> ARRAY_JOIN(ARRAY_AGG())
560            "STRING_AGG" if !f.args.is_empty() => {
561                let mut args = f.args;
562                let first = args.remove(0);
563                let separator = args.pop();
564                let array_agg = Expression::Function(Box::new(Function::new(
565                    "ARRAY_AGG".to_string(),
566                    vec![first],
567                )));
568                let mut join_args = vec![array_agg];
569                if let Some(sep) = separator {
570                    join_args.push(sep);
571                }
572                Ok(Expression::Function(Box::new(Function::new(
573                    "ARRAY_JOIN".to_string(),
574                    join_args,
575                ))))
576            }
577
578            // LISTAGG -> ARRAY_JOIN(ARRAY_AGG())
579            "LISTAGG" if !f.args.is_empty() => {
580                let mut args = f.args;
581                let first = args.remove(0);
582                let separator = args.pop();
583                let array_agg = Expression::Function(Box::new(Function::new(
584                    "ARRAY_AGG".to_string(),
585                    vec![first],
586                )));
587                let mut join_args = vec![array_agg];
588                if let Some(sep) = separator {
589                    join_args.push(sep);
590                }
591                Ok(Expression::Function(Box::new(Function::new(
592                    "ARRAY_JOIN".to_string(),
593                    join_args,
594                ))))
595            }
596
597            // SUBSTR is native in Presto (keep as-is, don't convert to SUBSTRING)
598            "SUBSTR" => Ok(Expression::Function(Box::new(f))),
599
600            // LEN -> LENGTH
601            "LEN" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc::new(
602                f.args.into_iter().next().unwrap(),
603            )))),
604
605            // CHARINDEX -> STRPOS in Presto (with swapped args)
606            "CHARINDEX" if f.args.len() >= 2 => {
607                let mut args = f.args;
608                let substring = args.remove(0);
609                let string = args.remove(0);
610                // STRPOS(string, substring) - note: argument order is reversed
611                Ok(Expression::Function(Box::new(Function::new(
612                    "STRPOS".to_string(),
613                    vec![string, substring],
614                ))))
615            }
616
617            // INSTR -> STRPOS (with same argument order)
618            "INSTR" if f.args.len() >= 2 => {
619                let args = f.args;
620                // INSTR(string, substring) -> STRPOS(string, substring)
621                Ok(Expression::Function(Box::new(Function::new(
622                    "STRPOS".to_string(),
623                    args,
624                ))))
625            }
626
627            // LOCATE -> STRPOS in Presto (with swapped args)
628            "LOCATE" if f.args.len() >= 2 => {
629                let mut args = f.args;
630                let substring = args.remove(0);
631                let string = args.remove(0);
632                // LOCATE(substring, string) -> STRPOS(string, substring)
633                Ok(Expression::Function(Box::new(Function::new(
634                    "STRPOS".to_string(),
635                    vec![string, substring],
636                ))))
637            }
638
639            // ARRAY_LENGTH -> CARDINALITY in Presto
640            "ARRAY_LENGTH" if f.args.len() == 1 => Ok(Expression::Function(Box::new(
641                Function::new("CARDINALITY".to_string(), f.args),
642            ))),
643
644            // SIZE -> CARDINALITY in Presto
645            "SIZE" if f.args.len() == 1 => Ok(Expression::Function(Box::new(Function::new(
646                "CARDINALITY".to_string(),
647                f.args,
648            )))),
649
650            // ARRAY_CONTAINS -> CONTAINS in Presto
651            "ARRAY_CONTAINS" if f.args.len() == 2 => Ok(Expression::Function(Box::new(
652                Function::new("CONTAINS".to_string(), f.args),
653            ))),
654
655            // TO_DATE -> DATE_PARSE in Presto (or CAST to DATE)
656            "TO_DATE" if !f.args.is_empty() => {
657                if f.args.len() == 1 {
658                    // Simple case: just cast to DATE
659                    Ok(Expression::Cast(Box::new(Cast {
660                        this: f.args.into_iter().next().unwrap(),
661                        to: DataType::Date,
662                        trailing_comments: Vec::new(),
663                        double_colon_syntax: false,
664                        format: None,
665                        default: None,
666                        inferred_type: None,
667                    })))
668                } else {
669                    // With format: use DATE_PARSE
670                    Ok(Expression::Function(Box::new(Function::new(
671                        "DATE_PARSE".to_string(),
672                        f.args,
673                    ))))
674                }
675            }
676
677            // TO_TIMESTAMP -> DATE_PARSE / CAST
678            "TO_TIMESTAMP" if !f.args.is_empty() => {
679                if f.args.len() == 1 {
680                    Ok(Expression::Cast(Box::new(Cast {
681                        this: f.args.into_iter().next().unwrap(),
682                        to: DataType::Timestamp {
683                            precision: None,
684                            timezone: false,
685                        },
686                        trailing_comments: Vec::new(),
687                        double_colon_syntax: false,
688                        format: None,
689                        default: None,
690                        inferred_type: None,
691                    })))
692                } else {
693                    Ok(Expression::Function(Box::new(Function::new(
694                        "DATE_PARSE".to_string(),
695                        f.args,
696                    ))))
697                }
698            }
699
700            // DATE_FORMAT -> DATE_FORMAT (native in Presto)
701            "DATE_FORMAT" => Ok(Expression::Function(Box::new(f))),
702
703            // strftime -> DATE_FORMAT in Presto
704            "STRFTIME" if f.args.len() >= 2 => {
705                let mut args = f.args;
706                // strftime(format, date) -> DATE_FORMAT(date, format)
707                let format = args.remove(0);
708                let date = args.remove(0);
709                Ok(Expression::Function(Box::new(Function::new(
710                    "DATE_FORMAT".to_string(),
711                    vec![date, format],
712                ))))
713            }
714
715            // TO_CHAR -> DATE_FORMAT in Presto (convert Oracle-style format to Presto C-style)
716            "TO_CHAR" if f.args.len() >= 2 => {
717                let mut args = f.args;
718                // Convert Oracle-style format string to Presto C-style
719                if let Expression::Literal(ref lit) = args[1] {
720                    if let Literal::String(ref s) = lit.as_ref() {
721                    let converted = Self::oracle_to_presto_format(s);
722                    args[1] = Expression::Literal(Box::new(Literal::String(converted)));
723                }
724                }
725                Ok(Expression::Function(Box::new(Function::new(
726                    "DATE_FORMAT".to_string(),
727                    args,
728                ))))
729            }
730
731            // LEVENSHTEIN -> LEVENSHTEIN_DISTANCE in Presto
732            "LEVENSHTEIN" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
733                Function::new("LEVENSHTEIN_DISTANCE".to_string(), f.args),
734            ))),
735
736            // FLATTEN -> FLATTEN is supported in Presto for nested arrays
737            "FLATTEN" => Ok(Expression::Function(Box::new(f))),
738
739            // JSON_EXTRACT -> JSON_EXTRACT (native in Presto)
740            "JSON_EXTRACT" => Ok(Expression::Function(Box::new(f))),
741
742            // JSON_EXTRACT_SCALAR -> JSON_EXTRACT_SCALAR (native in Presto)
743            "JSON_EXTRACT_SCALAR" => Ok(Expression::Function(Box::new(f))),
744
745            // GET_JSON_OBJECT -> JSON_EXTRACT_SCALAR in Presto
746            "GET_JSON_OBJECT" if f.args.len() == 2 => Ok(Expression::Function(Box::new(
747                Function::new("JSON_EXTRACT_SCALAR".to_string(), f.args),
748            ))),
749
750            // COLLECT_LIST -> ARRAY_AGG
751            "COLLECT_LIST" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
752                Function::new("ARRAY_AGG".to_string(), f.args),
753            ))),
754
755            // COLLECT_SET -> ARRAY_DISTINCT(ARRAY_AGG())
756            "COLLECT_SET" if !f.args.is_empty() => {
757                let array_agg =
758                    Expression::Function(Box::new(Function::new("ARRAY_AGG".to_string(), f.args)));
759                Ok(Expression::Function(Box::new(Function::new(
760                    "ARRAY_DISTINCT".to_string(),
761                    vec![array_agg],
762                ))))
763            }
764
765            // RLIKE -> REGEXP_LIKE in Presto
766            "RLIKE" if f.args.len() == 2 => Ok(Expression::Function(Box::new(Function::new(
767                "REGEXP_LIKE".to_string(),
768                f.args,
769            )))),
770
771            // REGEXP -> REGEXP_LIKE in Presto
772            "REGEXP" if f.args.len() == 2 => Ok(Expression::Function(Box::new(Function::new(
773                "REGEXP_LIKE".to_string(),
774                f.args,
775            )))),
776
777            // PARSE_JSON -> JSON_PARSE in Presto
778            "PARSE_JSON" => Ok(Expression::Function(Box::new(Function::new(
779                "JSON_PARSE".to_string(),
780                f.args,
781            )))),
782
783            // GET_PATH(obj, path) -> JSON_EXTRACT(obj, json_path) in Presto
784            "GET_PATH" if f.args.len() == 2 => {
785                let mut args = f.args;
786                let this = args.remove(0);
787                let path = args.remove(0);
788                let json_path = match &path {
789                    Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
790                        let Literal::String(s) = lit.as_ref() else { unreachable!() };
791                        let normalized = if s.starts_with('$') {
792                            s.clone()
793                        } else if s.starts_with('[') {
794                            format!("${}", s)
795                        } else {
796                            format!("$.{}", s)
797                        };
798                        Expression::Literal(Box::new(Literal::String(normalized)))
799                    }
800                    _ => path,
801                };
802                Ok(Expression::JsonExtract(Box::new(JsonExtractFunc {
803                    this,
804                    path: json_path,
805                    returning: None,
806                    arrow_syntax: false,
807                    hash_arrow_syntax: false,
808                    wrapper_option: None,
809                    quotes_option: None,
810                    on_scalar_string: false,
811                    on_error: None,
812                })))
813            }
814
815            // REGEXP_SUBSTR(subject, pattern, ...) -> REGEXP_EXTRACT(subject, pattern[, group])
816            "REGEXP_SUBSTR" if f.args.len() >= 2 => {
817                let mut args = f.args;
818                let subject = args.remove(0);
819                let pattern = args.remove(0);
820                // If 6-arg form: (subject, pattern, pos, occ, params, group) -> keep group
821                if args.len() >= 4 {
822                    let _pos = args.remove(0);
823                    let _occ = args.remove(0);
824                    let _params = args.remove(0);
825                    let group = args.remove(0);
826                    Ok(Expression::Function(Box::new(Function::new(
827                        "REGEXP_EXTRACT".to_string(),
828                        vec![subject, pattern, group],
829                    ))))
830                } else {
831                    Ok(Expression::Function(Box::new(Function::new(
832                        "REGEXP_EXTRACT".to_string(),
833                        vec![subject, pattern],
834                    ))))
835                }
836            }
837
838            // DATE_PART(epoch_second, x) -> TO_UNIXTIME(CAST(x AS TIMESTAMP))
839            // DATE_PART(epoch_millisecond[s], x) -> TO_UNIXTIME(CAST(x AS TIMESTAMP)) * 1000
840            "DATE_PART" if f.args.len() == 2 => {
841                let part_name = match &f.args[0] {
842                    Expression::Identifier(id) => Some(id.name.to_uppercase()),
843                    Expression::Column(c) => Some(c.name.name.to_uppercase()),
844                    _ => None,
845                };
846                match part_name.as_deref() {
847                    Some("EPOCH_SECOND" | "EPOCH_SECONDS") => {
848                        let mut args = f.args;
849                        let value = args.remove(1);
850                        let cast_expr = Expression::Cast(Box::new(Cast {
851                            this: value,
852                            to: DataType::Timestamp {
853                                precision: None,
854                                timezone: false,
855                            },
856                            trailing_comments: Vec::new(),
857                            double_colon_syntax: false,
858                            format: None,
859                            default: None,
860                            inferred_type: None,
861                        }));
862                        Ok(Expression::Function(Box::new(Function::new(
863                            "TO_UNIXTIME".to_string(),
864                            vec![cast_expr],
865                        ))))
866                    }
867                    Some("EPOCH_MILLISECOND" | "EPOCH_MILLISECONDS") => {
868                        let mut args = f.args;
869                        let value = args.remove(1);
870                        let cast_expr = Expression::Cast(Box::new(Cast {
871                            this: value,
872                            to: DataType::Timestamp {
873                                precision: None,
874                                timezone: false,
875                            },
876                            trailing_comments: Vec::new(),
877                            double_colon_syntax: false,
878                            format: None,
879                            default: None,
880                            inferred_type: None,
881                        }));
882                        let unixtime = Expression::Function(Box::new(Function::new(
883                            "TO_UNIXTIME".to_string(),
884                            vec![cast_expr],
885                        )));
886                        Ok(Expression::Mul(Box::new(BinaryOp {
887                            left: unixtime,
888                            right: Expression::Literal(Box::new(Literal::Number("1000".to_string()))),
889                            left_comments: Vec::new(),
890                            operator_comments: Vec::new(),
891                            trailing_comments: Vec::new(),
892                            inferred_type: None,
893                        })))
894                    }
895                    _ => Ok(Expression::Function(Box::new(f))),
896                }
897            }
898
899            // REPLACE(x, y) with 2 args -> REPLACE(x, y, '') - Presto requires explicit empty string
900            "REPLACE" if f.args.len() == 2 => {
901                let mut args = f.args;
902                args.push(Expression::string(""));
903                Ok(Expression::Function(Box::new(Function::new(
904                    "REPLACE".to_string(),
905                    args,
906                ))))
907            }
908
909            // REGEXP_REPLACE(x, y) with 2 args -> REGEXP_REPLACE(x, y, '')
910            "REGEXP_REPLACE" if f.args.len() == 2 => {
911                let mut args = f.args;
912                args.push(Expression::string(""));
913                Ok(Expression::Function(Box::new(Function::new(
914                    "REGEXP_REPLACE".to_string(),
915                    args,
916                ))))
917            }
918
919            // Pass through everything else
920            _ => Ok(Expression::Function(Box::new(f))),
921        }
922    }
923
924    fn transform_aggregate_function(
925        &self,
926        f: Box<crate::expressions::AggregateFunction>,
927    ) -> Result<Expression> {
928        let name_upper = f.name.to_uppercase();
929        match name_upper.as_str() {
930            // COUNT_IF -> SUM(CASE WHEN...)
931            "COUNT_IF" if !f.args.is_empty() => {
932                let condition = f.args.into_iter().next().unwrap();
933                let case_expr = Expression::Case(Box::new(Case {
934                    operand: None,
935                    whens: vec![(condition, Expression::number(1))],
936                    else_: Some(Expression::number(0)),
937                    comments: Vec::new(),
938                    inferred_type: None,
939                }));
940                Ok(Expression::Sum(Box::new(AggFunc {
941                    ignore_nulls: None,
942                    having_max: None,
943                    this: case_expr,
944                    distinct: f.distinct,
945                    filter: f.filter,
946                    order_by: Vec::new(),
947                    name: None,
948                    limit: None,
949                    inferred_type: None,
950                })))
951            }
952
953            // ANY_VALUE -> ARBITRARY in Presto
954            "ANY_VALUE" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
955                "ARBITRARY".to_string(),
956                f.args,
957            )))),
958
959            // GROUP_CONCAT -> ARRAY_JOIN(ARRAY_AGG())
960            "GROUP_CONCAT" if !f.args.is_empty() => {
961                let mut args = f.args;
962                let first = args.remove(0);
963                let separator = args.pop();
964                let array_agg = Expression::Function(Box::new(Function::new(
965                    "ARRAY_AGG".to_string(),
966                    vec![first],
967                )));
968                let mut join_args = vec![array_agg];
969                if let Some(sep) = separator {
970                    join_args.push(sep);
971                }
972                Ok(Expression::Function(Box::new(Function::new(
973                    "ARRAY_JOIN".to_string(),
974                    join_args,
975                ))))
976            }
977
978            // STRING_AGG -> ARRAY_JOIN(ARRAY_AGG())
979            "STRING_AGG" if !f.args.is_empty() => {
980                let mut args = f.args;
981                let first = args.remove(0);
982                let separator = args.pop();
983                let array_agg = Expression::Function(Box::new(Function::new(
984                    "ARRAY_AGG".to_string(),
985                    vec![first],
986                )));
987                let mut join_args = vec![array_agg];
988                if let Some(sep) = separator {
989                    join_args.push(sep);
990                }
991                Ok(Expression::Function(Box::new(Function::new(
992                    "ARRAY_JOIN".to_string(),
993                    join_args,
994                ))))
995            }
996
997            // LISTAGG -> ARRAY_JOIN(ARRAY_AGG())
998            "LISTAGG" if !f.args.is_empty() => {
999                let mut args = f.args;
1000                let first = args.remove(0);
1001                let separator = args.pop();
1002                let array_agg = Expression::Function(Box::new(Function::new(
1003                    "ARRAY_AGG".to_string(),
1004                    vec![first],
1005                )));
1006                let mut join_args = vec![array_agg];
1007                if let Some(sep) = separator {
1008                    join_args.push(sep);
1009                }
1010                Ok(Expression::Function(Box::new(Function::new(
1011                    "ARRAY_JOIN".to_string(),
1012                    join_args,
1013                ))))
1014            }
1015
1016            // VAR -> VAR_POP in Presto
1017            "VAR" if !f.args.is_empty() => {
1018                Ok(Expression::AggregateFunction(Box::new(AggregateFunction {
1019                    name: "VAR_POP".to_string(),
1020                    args: f.args,
1021                    distinct: f.distinct,
1022                    filter: f.filter,
1023                    order_by: Vec::new(),
1024                    limit: None,
1025                    ignore_nulls: None,
1026                    inferred_type: None,
1027                })))
1028            }
1029
1030            // VARIANCE -> VAR_SAMP in Presto (for sample variance)
1031            "VARIANCE" if !f.args.is_empty() => {
1032                Ok(Expression::AggregateFunction(Box::new(AggregateFunction {
1033                    name: "VAR_SAMP".to_string(),
1034                    args: f.args,
1035                    distinct: f.distinct,
1036                    filter: f.filter,
1037                    order_by: Vec::new(),
1038                    limit: None,
1039                    ignore_nulls: None,
1040                    inferred_type: None,
1041                })))
1042            }
1043
1044            // Pass through everything else
1045            _ => Ok(Expression::AggregateFunction(f)),
1046        }
1047    }
1048
1049    fn transform_cast(&self, c: Cast) -> Result<Expression> {
1050        // Presto type mappings are handled in the generator
1051        Ok(Expression::Cast(Box::new(c)))
1052    }
1053}