Skip to main content

polyglot_sql/dialects/
presto.rs

1//! Presto Dialect
2//!
3//! Presto-specific transformations based on sqlglot patterns.
4//! Presto is the base for Trino dialect.
5
6use super::{DialectImpl, DialectType};
7use crate::error::Result;
8use crate::expressions::{
9    AggFunc, AggregateFunction, BinaryOp, Case, Cast, Column, DataType, Expression, Function,
10    JsonExtractFunc, LikeOp, Literal, UnaryFunc, VarArgFunc,
11};
12#[cfg(feature = "generate")]
13use crate::generator::GeneratorConfig;
14use crate::tokens::TokenizerConfig;
15
16/// Presto dialect
17pub struct PrestoDialect;
18
19impl DialectImpl for PrestoDialect {
20    fn dialect_type(&self) -> DialectType {
21        DialectType::Presto
22    }
23
24    fn tokenizer_config(&self) -> TokenizerConfig {
25        let mut config = TokenizerConfig::default();
26        // Presto uses double quotes for identifiers
27        config.identifiers.insert('"', '"');
28        // Presto does NOT support nested comments
29        config.nested_comments = false;
30        // Presto does NOT support QUALIFY - it's a valid identifier
31        // (unlike Snowflake, BigQuery, DuckDB which have QUALIFY clause)
32        config.keywords.remove("QUALIFY");
33        config
34    }
35
36    #[cfg(feature = "generate")]
37
38    fn generator_config(&self) -> GeneratorConfig {
39        use crate::generator::IdentifierQuoteStyle;
40        GeneratorConfig {
41            identifier_quote: '"',
42            identifier_quote_style: IdentifierQuoteStyle::DOUBLE_QUOTE,
43            dialect: Some(DialectType::Presto),
44            limit_only_literals: true,
45            tz_to_with_time_zone: true,
46            ..Default::default()
47        }
48    }
49
50    #[cfg(feature = "transpile")]
51
52    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
53        match expr {
54            // IFNULL -> COALESCE in Presto
55            Expression::IfNull(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
56                original_name: None,
57                expressions: vec![f.this, f.expression],
58                inferred_type: None,
59            }))),
60
61            // NVL -> COALESCE in Presto
62            Expression::Nvl(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
63                original_name: None,
64                expressions: vec![f.this, f.expression],
65                inferred_type: None,
66            }))),
67
68            // TryCast stays as TryCast (Presto supports TRY_CAST)
69            Expression::TryCast(c) => Ok(Expression::TryCast(c)),
70
71            // SafeCast -> TRY_CAST in Presto
72            Expression::SafeCast(c) => Ok(Expression::TryCast(c)),
73
74            // ILike -> LOWER() LIKE LOWER() (Presto doesn't support ILIKE)
75            Expression::ILike(op) => {
76                let lower_left = Expression::Lower(Box::new(UnaryFunc::new(op.left.clone())));
77                let lower_right = Expression::Lower(Box::new(UnaryFunc::new(op.right.clone())));
78                Ok(Expression::Like(Box::new(LikeOp {
79                    left: lower_left,
80                    right: lower_right,
81                    escape: op.escape,
82                    quantifier: op.quantifier.clone(),
83                    inferred_type: None,
84                })))
85            }
86
87            // CountIf is native in Presto (keep as-is)
88            Expression::CountIf(f) => Ok(Expression::CountIf(f)),
89
90            // EXPLODE -> UNNEST in Presto
91            Expression::Explode(f) => Ok(Expression::Unnest(Box::new(
92                crate::expressions::UnnestFunc {
93                    this: f.this,
94                    expressions: Vec::new(),
95                    with_ordinality: false,
96                    alias: None,
97                    offset_alias: None,
98                },
99            ))),
100
101            // ExplodeOuter -> UNNEST in Presto
102            Expression::ExplodeOuter(f) => Ok(Expression::Unnest(Box::new(
103                crate::expressions::UnnestFunc {
104                    this: f.this,
105                    expressions: Vec::new(),
106                    with_ordinality: false,
107                    alias: None,
108                    offset_alias: None,
109                },
110            ))),
111
112            // StringAgg -> ARRAY_JOIN(ARRAY_AGG()) in Presto
113            Expression::StringAgg(f) => {
114                let array_agg = Expression::Function(Box::new(Function::new(
115                    "ARRAY_AGG".to_string(),
116                    vec![f.this.clone()],
117                )));
118                let mut join_args = vec![array_agg];
119                if let Some(sep) = f.separator {
120                    join_args.push(sep);
121                }
122                Ok(Expression::Function(Box::new(Function::new(
123                    "ARRAY_JOIN".to_string(),
124                    join_args,
125                ))))
126            }
127
128            // GroupConcat -> ARRAY_JOIN(ARRAY_AGG()) in Presto
129            Expression::GroupConcat(f) => {
130                let array_agg = Expression::Function(Box::new(Function::new(
131                    "ARRAY_AGG".to_string(),
132                    vec![f.this.clone()],
133                )));
134                let mut join_args = vec![array_agg];
135                if let Some(sep) = f.separator {
136                    join_args.push(sep);
137                }
138                Ok(Expression::Function(Box::new(Function::new(
139                    "ARRAY_JOIN".to_string(),
140                    join_args,
141                ))))
142            }
143
144            // ListAgg -> ARRAY_JOIN(ARRAY_AGG()) in Presto
145            Expression::ListAgg(f) => {
146                let array_agg = Expression::Function(Box::new(Function::new(
147                    "ARRAY_AGG".to_string(),
148                    vec![f.this.clone()],
149                )));
150                let mut join_args = vec![array_agg];
151                if let Some(sep) = f.separator {
152                    join_args.push(sep);
153                }
154                Ok(Expression::Function(Box::new(Function::new(
155                    "ARRAY_JOIN".to_string(),
156                    join_args,
157                ))))
158            }
159
160            // ParseJson: handled by generator (outputs JSON_PARSE for Presto)
161
162            // JSONExtract (variant_extract/colon accessor) -> JSON_EXTRACT in Presto
163            Expression::JSONExtract(e) if e.variant_extract.is_some() => {
164                let path = match *e.expression {
165                    Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
166                        let Literal::String(s) = lit.as_ref() else {
167                            unreachable!()
168                        };
169                        let normalized = if s.starts_with('$') {
170                            s.clone()
171                        } else if s.starts_with('[') {
172                            format!("${}", s)
173                        } else {
174                            format!("$.{}", s)
175                        };
176                        Expression::Literal(Box::new(Literal::String(normalized)))
177                    }
178                    other => other,
179                };
180                Ok(Expression::JsonExtract(Box::new(JsonExtractFunc {
181                    this: *e.this,
182                    path,
183                    returning: None,
184                    arrow_syntax: false,
185                    hash_arrow_syntax: false,
186                    wrapper_option: None,
187                    quotes_option: None,
188                    on_scalar_string: false,
189                    on_error: None,
190                })))
191            }
192
193            // Generic function transformations
194            Expression::Function(f) => self.transform_function(*f),
195
196            // Generic aggregate function transformations
197            Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
198
199            // Cast transformations
200            Expression::Cast(c) => self.transform_cast(*c),
201
202            // Div: Presto has TYPED_DIVISION - wrap left operand in CAST(AS DOUBLE)
203            // to ensure float division (only when left isn't already a float cast)
204            Expression::Div(mut op) => {
205                if !Self::is_float_cast(&op.left) {
206                    op.left = Expression::Cast(Box::new(crate::expressions::Cast {
207                        this: op.left,
208                        to: DataType::Double {
209                            precision: None,
210                            scale: None,
211                        },
212                        trailing_comments: Vec::new(),
213                        double_colon_syntax: false,
214                        format: None,
215                        default: None,
216                        inferred_type: None,
217                    }));
218                }
219                Ok(Expression::Div(op))
220            }
221
222            // IntDiv -> CAST(CAST(x AS DOUBLE) / y AS INTEGER) in Presto
223            Expression::IntDiv(f) => {
224                let cast_x = Expression::Cast(Box::new(Cast {
225                    this: f.this,
226                    to: crate::expressions::DataType::Double {
227                        precision: None,
228                        scale: None,
229                    },
230                    trailing_comments: Vec::new(),
231                    double_colon_syntax: false,
232                    format: None,
233                    default: None,
234                    inferred_type: None,
235                }));
236                let div_expr = Expression::Div(Box::new(BinaryOp::new(cast_x, f.expression)));
237                Ok(Expression::Cast(Box::new(Cast {
238                    this: div_expr,
239                    to: crate::expressions::DataType::Int {
240                        length: None,
241                        integer_spelling: true,
242                    },
243                    trailing_comments: Vec::new(),
244                    double_colon_syntax: false,
245                    format: None,
246                    default: None,
247                    inferred_type: None,
248                })))
249            }
250
251            // DELETE: Strip table alias and unqualify columns (Presto doesn't support DELETE aliases)
252            Expression::Delete(mut d) => {
253                if d.alias.is_some() {
254                    d.alias = None;
255                    d.alias_explicit_as = false;
256                    // Unqualify all columns in the WHERE clause
257                    if let Some(ref mut where_clause) = d.where_clause {
258                        where_clause.this = Self::unqualify_columns(where_clause.this.clone());
259                    }
260                }
261                Ok(Expression::Delete(d))
262            }
263
264            // Pass through everything else
265            _ => Ok(expr),
266        }
267    }
268}
269
270#[cfg(feature = "transpile")]
271impl PrestoDialect {
272    /// Recursively unqualify columns - remove table qualifiers from Column references
273    fn unqualify_columns(expr: Expression) -> Expression {
274        match expr {
275            Expression::Column(c) => {
276                if c.table.is_some() {
277                    Expression::boxed_column(Column {
278                        name: c.name,
279                        table: None,
280                        join_mark: c.join_mark,
281                        trailing_comments: c.trailing_comments,
282                        span: None,
283                        inferred_type: None,
284                    })
285                } else {
286                    Expression::Column(c)
287                }
288            }
289            // DotAccess: db.t2.c -> c (strip all qualifiers, keep only the final field name)
290            Expression::Dot(d) => Expression::boxed_column(Column {
291                name: d.field,
292                table: None,
293                join_mark: false,
294                trailing_comments: Vec::new(),
295                span: None,
296                inferred_type: None,
297            }),
298            // Recursively walk common binary expression types
299            Expression::And(mut op) => {
300                op.left = Self::unqualify_columns(op.left);
301                op.right = Self::unqualify_columns(op.right);
302                Expression::And(op)
303            }
304            Expression::Or(mut op) => {
305                op.left = Self::unqualify_columns(op.left);
306                op.right = Self::unqualify_columns(op.right);
307                Expression::Or(op)
308            }
309            Expression::Eq(mut op) => {
310                op.left = Self::unqualify_columns(op.left);
311                op.right = Self::unqualify_columns(op.right);
312                Expression::Eq(op)
313            }
314            Expression::Neq(mut op) => {
315                op.left = Self::unqualify_columns(op.left);
316                op.right = Self::unqualify_columns(op.right);
317                Expression::Neq(op)
318            }
319            Expression::Gt(mut op) => {
320                op.left = Self::unqualify_columns(op.left);
321                op.right = Self::unqualify_columns(op.right);
322                Expression::Gt(op)
323            }
324            Expression::Lt(mut op) => {
325                op.left = Self::unqualify_columns(op.left);
326                op.right = Self::unqualify_columns(op.right);
327                Expression::Lt(op)
328            }
329            Expression::Gte(mut op) => {
330                op.left = Self::unqualify_columns(op.left);
331                op.right = Self::unqualify_columns(op.right);
332                Expression::Gte(op)
333            }
334            Expression::Lte(mut op) => {
335                op.left = Self::unqualify_columns(op.left);
336                op.right = Self::unqualify_columns(op.right);
337                Expression::Lte(op)
338            }
339            // Unary operators
340            Expression::Not(mut e) => {
341                e.this = Self::unqualify_columns(e.this);
342                Expression::Not(e)
343            }
344            // Predicates
345            Expression::In(mut i) => {
346                i.this = Self::unqualify_columns(i.this);
347                i.expressions = i
348                    .expressions
349                    .into_iter()
350                    .map(Self::unqualify_columns)
351                    .collect();
352                // Also recurse into subquery if present
353                if let Some(q) = i.query {
354                    i.query = Some(Self::unqualify_columns(q));
355                }
356                Expression::In(i)
357            }
358            Expression::IsNull(mut f) => {
359                f.this = Self::unqualify_columns(f.this);
360                Expression::IsNull(f)
361            }
362            Expression::Paren(mut p) => {
363                p.this = Self::unqualify_columns(p.this);
364                Expression::Paren(p)
365            }
366            Expression::Function(mut f) => {
367                f.args = f.args.into_iter().map(Self::unqualify_columns).collect();
368                Expression::Function(f)
369            }
370            // For subqueries (SELECT statements inside IN, etc), also unqualify
371            Expression::Select(mut s) => {
372                s.expressions = s
373                    .expressions
374                    .into_iter()
375                    .map(Self::unqualify_columns)
376                    .collect();
377                if let Some(ref mut w) = s.where_clause {
378                    w.this = Self::unqualify_columns(w.this.clone());
379                }
380                Expression::Select(s)
381            }
382            Expression::Subquery(mut sq) => {
383                sq.this = Self::unqualify_columns(sq.this);
384                Expression::Subquery(sq)
385            }
386            Expression::Alias(mut a) => {
387                a.this = Self::unqualify_columns(a.this);
388                Expression::Alias(a)
389            }
390            // Pass through other expressions unchanged
391            other => other,
392        }
393    }
394
395    /// Check if an expression is already a CAST to a float type
396    fn is_float_cast(expr: &Expression) -> bool {
397        if let Expression::Cast(cast) = expr {
398            matches!(&cast.to, DataType::Double { .. } | DataType::Float { .. })
399        } else {
400            false
401        }
402    }
403
404    /// Convert Oracle/PostgreSQL-style date format to Presto's C-style format
405    /// Oracle: dd, hh, hh24, mi, mm, ss, yyyy, yy
406    /// Presto: %d, %H, %H, %i, %m, %s, %Y, %y
407    pub fn oracle_to_presto_format(fmt: &str) -> String {
408        // Process character by character to avoid double-replacement issues
409        let chars: Vec<char> = fmt.chars().collect();
410        let mut result = String::new();
411        let mut i = 0;
412        while i < chars.len() {
413            let remaining = &fmt[i..];
414            if remaining.starts_with("yyyy") {
415                result.push_str("%Y");
416                i += 4;
417            } else if remaining.starts_with("yy") {
418                result.push_str("%y");
419                i += 2;
420            } else if remaining.starts_with("hh24") {
421                result.push_str("%H");
422                i += 4;
423            } else if remaining.starts_with("hh") {
424                result.push_str("%H");
425                i += 2;
426            } else if remaining.starts_with("mi") {
427                result.push_str("%i");
428                i += 2;
429            } else if remaining.starts_with("mm") {
430                result.push_str("%m");
431                i += 2;
432            } else if remaining.starts_with("dd") {
433                result.push_str("%d");
434                i += 2;
435            } else if remaining.starts_with("ss") {
436                result.push_str("%s");
437                i += 2;
438            } else {
439                result.push(chars[i]);
440                i += 1;
441            }
442        }
443        result
444    }
445
446    /// Convert Presto's C-style date format to Java-style format (for Hive/Spark)
447    /// Presto: %Y, %m, %d, %H, %i, %S, %s, %y, %T, %F
448    /// Java:   yyyy, MM, dd, HH, mm, ss, ss, yy, HH:mm:ss, yyyy-MM-dd
449    pub fn presto_to_java_format(fmt: &str) -> String {
450        fmt.replace("%Y", "yyyy")
451            .replace("%m", "MM")
452            .replace("%d", "dd")
453            .replace("%H", "HH")
454            .replace("%i", "mm")
455            .replace("%S", "ss")
456            .replace("%s", "ss")
457            .replace("%y", "yy")
458            .replace("%T", "HH:mm:ss")
459            .replace("%F", "yyyy-MM-dd")
460            .replace("%M", "MMMM")
461    }
462
463    /// Normalize Presto format strings (e.g., %H:%i:%S -> %T, %Y-%m-%d -> %F)
464    pub fn normalize_presto_format(fmt: &str) -> String {
465        fmt.replace("%H:%i:%S", "%T").replace("%H:%i:%s", "%T")
466    }
467
468    /// Convert Presto's C-style format to DuckDB C-style (only difference: %i -> %M for minutes)
469    pub fn presto_to_duckdb_format(fmt: &str) -> String {
470        fmt.replace("%i", "%M")
471            .replace("%s", "%S")
472            .replace("%T", "%H:%M:%S")
473    }
474
475    /// Convert Presto's C-style format to BigQuery format
476    pub fn presto_to_bigquery_format(fmt: &str) -> String {
477        // BigQuery uses %F for %Y-%m-%d, %T for %H:%M:%S
478        // BigQuery uses %M for minutes (like DuckDB), not %i
479        let result = fmt
480            .replace("%Y-%m-%d", "%F")
481            .replace("%H:%i:%S", "%T")
482            .replace("%H:%i:%s", "%T")
483            .replace("%i", "%M")
484            .replace("%s", "%S");
485        result
486    }
487
488    /// Check if a Presto format string matches the default timestamp format
489    pub fn is_default_timestamp_format(fmt: &str) -> bool {
490        let normalized = Self::normalize_presto_format(fmt);
491        normalized == "%Y-%m-%d %T"
492            || normalized == "%Y-%m-%d %H:%i:%S"
493            || fmt == "%Y-%m-%d %H:%i:%S"
494            || fmt == "%Y-%m-%d %T"
495    }
496
497    /// Check if a Presto format string matches the default date format
498    pub fn is_default_date_format(fmt: &str) -> bool {
499        fmt == "%Y-%m-%d" || fmt == "%F"
500    }
501
502    fn transform_function(&self, f: Function) -> Result<Expression> {
503        let name_upper = f.name.to_uppercase();
504        match name_upper.as_str() {
505            // IFNULL -> COALESCE
506            "IFNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
507                original_name: None,
508                expressions: f.args,
509                inferred_type: None,
510            }))),
511
512            // NVL -> COALESCE
513            "NVL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
514                original_name: None,
515                expressions: f.args,
516                inferred_type: None,
517            }))),
518
519            // ISNULL -> COALESCE
520            "ISNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
521                original_name: None,
522                expressions: f.args,
523                inferred_type: None,
524            }))),
525
526            // GETDATE -> CURRENT_TIMESTAMP
527            "GETDATE" => Ok(Expression::CurrentTimestamp(
528                crate::expressions::CurrentTimestamp {
529                    precision: None,
530                    sysdate: false,
531                },
532            )),
533
534            // NOW -> CURRENT_TIMESTAMP
535            "NOW" => Ok(Expression::CurrentTimestamp(
536                crate::expressions::CurrentTimestamp {
537                    precision: None,
538                    sysdate: false,
539                },
540            )),
541
542            // RAND -> RANDOM in Presto (but it's actually RANDOM())
543            "RAND" => Ok(Expression::Function(Box::new(Function::new(
544                "RANDOM".to_string(),
545                vec![],
546            )))),
547
548            // GROUP_CONCAT -> ARRAY_JOIN(ARRAY_AGG())
549            "GROUP_CONCAT" if !f.args.is_empty() => {
550                let mut args = f.args;
551                let first = args.remove(0);
552                let separator = args.pop();
553                let array_agg = Expression::Function(Box::new(Function::new(
554                    "ARRAY_AGG".to_string(),
555                    vec![first],
556                )));
557                let mut join_args = vec![array_agg];
558                if let Some(sep) = separator {
559                    join_args.push(sep);
560                }
561                Ok(Expression::Function(Box::new(Function::new(
562                    "ARRAY_JOIN".to_string(),
563                    join_args,
564                ))))
565            }
566
567            // STRING_AGG -> ARRAY_JOIN(ARRAY_AGG())
568            "STRING_AGG" if !f.args.is_empty() => {
569                let mut args = f.args;
570                let first = args.remove(0);
571                let separator = args.pop();
572                let array_agg = Expression::Function(Box::new(Function::new(
573                    "ARRAY_AGG".to_string(),
574                    vec![first],
575                )));
576                let mut join_args = vec![array_agg];
577                if let Some(sep) = separator {
578                    join_args.push(sep);
579                }
580                Ok(Expression::Function(Box::new(Function::new(
581                    "ARRAY_JOIN".to_string(),
582                    join_args,
583                ))))
584            }
585
586            // LISTAGG -> ARRAY_JOIN(ARRAY_AGG())
587            "LISTAGG" if !f.args.is_empty() => {
588                let mut args = f.args;
589                let first = args.remove(0);
590                let separator = args.pop();
591                let array_agg = Expression::Function(Box::new(Function::new(
592                    "ARRAY_AGG".to_string(),
593                    vec![first],
594                )));
595                let mut join_args = vec![array_agg];
596                if let Some(sep) = separator {
597                    join_args.push(sep);
598                }
599                Ok(Expression::Function(Box::new(Function::new(
600                    "ARRAY_JOIN".to_string(),
601                    join_args,
602                ))))
603            }
604
605            // SUBSTR is native in Presto (keep as-is, don't convert to SUBSTRING)
606            "SUBSTR" => Ok(Expression::Function(Box::new(f))),
607
608            // LEN -> LENGTH
609            "LEN" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc::new(
610                f.args.into_iter().next().unwrap(),
611            )))),
612
613            // CHARINDEX -> STRPOS in Presto (with swapped args)
614            "CHARINDEX" if f.args.len() >= 2 => {
615                let mut args = f.args;
616                let substring = args.remove(0);
617                let string = args.remove(0);
618                // STRPOS(string, substring) - note: argument order is reversed
619                Ok(Expression::Function(Box::new(Function::new(
620                    "STRPOS".to_string(),
621                    vec![string, substring],
622                ))))
623            }
624
625            // INSTR -> STRPOS (with same argument order)
626            "INSTR" if f.args.len() >= 2 => {
627                let args = f.args;
628                // INSTR(string, substring) -> STRPOS(string, substring)
629                Ok(Expression::Function(Box::new(Function::new(
630                    "STRPOS".to_string(),
631                    args,
632                ))))
633            }
634
635            // LOCATE -> STRPOS in Presto (with swapped args)
636            "LOCATE" if f.args.len() >= 2 => {
637                let mut args = f.args;
638                let substring = args.remove(0);
639                let string = args.remove(0);
640                // LOCATE(substring, string) -> STRPOS(string, substring)
641                Ok(Expression::Function(Box::new(Function::new(
642                    "STRPOS".to_string(),
643                    vec![string, substring],
644                ))))
645            }
646
647            // ARRAY_LENGTH -> CARDINALITY in Presto
648            "ARRAY_LENGTH" if f.args.len() == 1 => Ok(Expression::Function(Box::new(
649                Function::new("CARDINALITY".to_string(), f.args),
650            ))),
651
652            // SIZE -> CARDINALITY in Presto
653            "SIZE" if f.args.len() == 1 => Ok(Expression::Function(Box::new(Function::new(
654                "CARDINALITY".to_string(),
655                f.args,
656            )))),
657
658            // ARRAY_CONTAINS -> CONTAINS in Presto
659            "ARRAY_CONTAINS" if f.args.len() == 2 => Ok(Expression::Function(Box::new(
660                Function::new("CONTAINS".to_string(), f.args),
661            ))),
662
663            // TO_DATE -> DATE_PARSE in Presto (or CAST to DATE)
664            "TO_DATE" if !f.args.is_empty() => {
665                if f.args.len() == 1 {
666                    // Simple case: just cast to DATE
667                    Ok(Expression::Cast(Box::new(Cast {
668                        this: f.args.into_iter().next().unwrap(),
669                        to: DataType::Date,
670                        trailing_comments: Vec::new(),
671                        double_colon_syntax: false,
672                        format: None,
673                        default: None,
674                        inferred_type: None,
675                    })))
676                } else {
677                    // With format: use DATE_PARSE
678                    Ok(Expression::Function(Box::new(Function::new(
679                        "DATE_PARSE".to_string(),
680                        f.args,
681                    ))))
682                }
683            }
684
685            // TO_TIMESTAMP -> DATE_PARSE / CAST
686            "TO_TIMESTAMP" if !f.args.is_empty() => {
687                if f.args.len() == 1 {
688                    Ok(Expression::Cast(Box::new(Cast {
689                        this: f.args.into_iter().next().unwrap(),
690                        to: DataType::Timestamp {
691                            precision: None,
692                            timezone: false,
693                        },
694                        trailing_comments: Vec::new(),
695                        double_colon_syntax: false,
696                        format: None,
697                        default: None,
698                        inferred_type: None,
699                    })))
700                } else {
701                    Ok(Expression::Function(Box::new(Function::new(
702                        "DATE_PARSE".to_string(),
703                        f.args,
704                    ))))
705                }
706            }
707
708            // DATE_FORMAT -> DATE_FORMAT (native in Presto)
709            "DATE_FORMAT" => Ok(Expression::Function(Box::new(f))),
710
711            // strftime -> DATE_FORMAT in Presto
712            "STRFTIME" if f.args.len() >= 2 => {
713                let mut args = f.args;
714                // strftime(format, date) -> DATE_FORMAT(date, format)
715                let format = args.remove(0);
716                let date = args.remove(0);
717                Ok(Expression::Function(Box::new(Function::new(
718                    "DATE_FORMAT".to_string(),
719                    vec![date, format],
720                ))))
721            }
722
723            // TO_CHAR -> DATE_FORMAT in Presto (convert Oracle-style format to Presto C-style)
724            "TO_CHAR" if f.args.len() >= 2 => {
725                let mut args = f.args;
726                // Convert Oracle-style format string to Presto C-style
727                if let Expression::Literal(ref lit) = args[1] {
728                    if let Literal::String(ref s) = lit.as_ref() {
729                        let converted = Self::oracle_to_presto_format(s);
730                        args[1] = Expression::Literal(Box::new(Literal::String(converted)));
731                    }
732                }
733                Ok(Expression::Function(Box::new(Function::new(
734                    "DATE_FORMAT".to_string(),
735                    args,
736                ))))
737            }
738
739            // LEVENSHTEIN -> LEVENSHTEIN_DISTANCE in Presto
740            "LEVENSHTEIN" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
741                Function::new("LEVENSHTEIN_DISTANCE".to_string(), f.args),
742            ))),
743
744            // FLATTEN -> FLATTEN is supported in Presto for nested arrays
745            "FLATTEN" => Ok(Expression::Function(Box::new(f))),
746
747            // JSON_EXTRACT -> JSON_EXTRACT (native in Presto)
748            "JSON_EXTRACT" => Ok(Expression::Function(Box::new(f))),
749
750            // JSON_EXTRACT_SCALAR -> JSON_EXTRACT_SCALAR (native in Presto)
751            "JSON_EXTRACT_SCALAR" => Ok(Expression::Function(Box::new(f))),
752
753            // GET_JSON_OBJECT -> JSON_EXTRACT_SCALAR in Presto
754            "GET_JSON_OBJECT" if f.args.len() == 2 => Ok(Expression::Function(Box::new(
755                Function::new("JSON_EXTRACT_SCALAR".to_string(), f.args),
756            ))),
757
758            // COLLECT_LIST -> ARRAY_AGG
759            "COLLECT_LIST" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
760                Function::new("ARRAY_AGG".to_string(), f.args),
761            ))),
762
763            // COLLECT_SET -> ARRAY_DISTINCT(ARRAY_AGG())
764            "COLLECT_SET" if !f.args.is_empty() => {
765                let array_agg =
766                    Expression::Function(Box::new(Function::new("ARRAY_AGG".to_string(), f.args)));
767                Ok(Expression::Function(Box::new(Function::new(
768                    "ARRAY_DISTINCT".to_string(),
769                    vec![array_agg],
770                ))))
771            }
772
773            // RLIKE -> REGEXP_LIKE in Presto
774            "RLIKE" if f.args.len() == 2 => Ok(Expression::Function(Box::new(Function::new(
775                "REGEXP_LIKE".to_string(),
776                f.args,
777            )))),
778
779            // REGEXP -> REGEXP_LIKE in Presto
780            "REGEXP" if f.args.len() == 2 => Ok(Expression::Function(Box::new(Function::new(
781                "REGEXP_LIKE".to_string(),
782                f.args,
783            )))),
784
785            // PARSE_JSON -> JSON_PARSE in Presto
786            "PARSE_JSON" => Ok(Expression::Function(Box::new(Function::new(
787                "JSON_PARSE".to_string(),
788                f.args,
789            )))),
790
791            // GET_PATH(obj, path) -> JSON_EXTRACT(obj, json_path) in Presto
792            "GET_PATH" if f.args.len() == 2 => {
793                let mut args = f.args;
794                let this = args.remove(0);
795                let path = args.remove(0);
796                let json_path = match &path {
797                    Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
798                        let Literal::String(s) = lit.as_ref() else {
799                            unreachable!()
800                        };
801                        let normalized = if s.starts_with('$') {
802                            s.clone()
803                        } else if s.starts_with('[') {
804                            format!("${}", s)
805                        } else {
806                            format!("$.{}", s)
807                        };
808                        Expression::Literal(Box::new(Literal::String(normalized)))
809                    }
810                    _ => path,
811                };
812                Ok(Expression::JsonExtract(Box::new(JsonExtractFunc {
813                    this,
814                    path: json_path,
815                    returning: None,
816                    arrow_syntax: false,
817                    hash_arrow_syntax: false,
818                    wrapper_option: None,
819                    quotes_option: None,
820                    on_scalar_string: false,
821                    on_error: None,
822                })))
823            }
824
825            // REGEXP_SUBSTR(subject, pattern, ...) -> REGEXP_EXTRACT(subject, pattern[, group])
826            "REGEXP_SUBSTR" if f.args.len() >= 2 => {
827                let mut args = f.args;
828                let subject = args.remove(0);
829                let pattern = args.remove(0);
830                // If 6-arg form: (subject, pattern, pos, occ, params, group) -> keep group
831                if args.len() >= 4 {
832                    let _pos = args.remove(0);
833                    let _occ = args.remove(0);
834                    let _params = args.remove(0);
835                    let group = args.remove(0);
836                    Ok(Expression::Function(Box::new(Function::new(
837                        "REGEXP_EXTRACT".to_string(),
838                        vec![subject, pattern, group],
839                    ))))
840                } else {
841                    Ok(Expression::Function(Box::new(Function::new(
842                        "REGEXP_EXTRACT".to_string(),
843                        vec![subject, pattern],
844                    ))))
845                }
846            }
847
848            // DATE_PART(epoch_second, x) -> TO_UNIXTIME(CAST(x AS TIMESTAMP))
849            // DATE_PART(epoch_millisecond[s], x) -> TO_UNIXTIME(CAST(x AS TIMESTAMP)) * 1000
850            "DATE_PART" if f.args.len() == 2 => {
851                let part_name = match &f.args[0] {
852                    Expression::Identifier(id) => Some(id.name.to_uppercase()),
853                    Expression::Var(v) => Some(v.this.to_uppercase()),
854                    Expression::Column(c) => Some(c.name.name.to_uppercase()),
855                    _ => None,
856                };
857                match part_name.as_deref() {
858                    Some("EPOCH_SECOND" | "EPOCH_SECONDS") => {
859                        let mut args = f.args;
860                        let value = args.remove(1);
861                        let cast_expr = Expression::Cast(Box::new(Cast {
862                            this: value,
863                            to: DataType::Timestamp {
864                                precision: None,
865                                timezone: false,
866                            },
867                            trailing_comments: Vec::new(),
868                            double_colon_syntax: false,
869                            format: None,
870                            default: None,
871                            inferred_type: None,
872                        }));
873                        Ok(Expression::Function(Box::new(Function::new(
874                            "TO_UNIXTIME".to_string(),
875                            vec![cast_expr],
876                        ))))
877                    }
878                    Some("EPOCH_MILLISECOND" | "EPOCH_MILLISECONDS") => {
879                        let mut args = f.args;
880                        let value = args.remove(1);
881                        let cast_expr = Expression::Cast(Box::new(Cast {
882                            this: value,
883                            to: DataType::Timestamp {
884                                precision: None,
885                                timezone: false,
886                            },
887                            trailing_comments: Vec::new(),
888                            double_colon_syntax: false,
889                            format: None,
890                            default: None,
891                            inferred_type: None,
892                        }));
893                        let unixtime = Expression::Function(Box::new(Function::new(
894                            "TO_UNIXTIME".to_string(),
895                            vec![cast_expr],
896                        )));
897                        Ok(Expression::Mul(Box::new(BinaryOp {
898                            left: unixtime,
899                            right: Expression::Literal(Box::new(Literal::Number(
900                                "1000".to_string(),
901                            ))),
902                            left_comments: Vec::new(),
903                            operator_comments: Vec::new(),
904                            trailing_comments: Vec::new(),
905                            inferred_type: None,
906                        })))
907                    }
908                    _ => Ok(Expression::Function(Box::new(f))),
909                }
910            }
911
912            // REPLACE(x, y) with 2 args -> REPLACE(x, y, '') - Presto requires explicit empty string
913            "REPLACE" if f.args.len() == 2 => {
914                let mut args = f.args;
915                args.push(Expression::string(""));
916                Ok(Expression::Function(Box::new(Function::new(
917                    "REPLACE".to_string(),
918                    args,
919                ))))
920            }
921
922            // REGEXP_REPLACE(x, y) with 2 args -> REGEXP_REPLACE(x, y, '')
923            "REGEXP_REPLACE" if f.args.len() == 2 => {
924                let mut args = f.args;
925                args.push(Expression::string(""));
926                Ok(Expression::Function(Box::new(Function::new(
927                    "REGEXP_REPLACE".to_string(),
928                    args,
929                ))))
930            }
931
932            // Pass through everything else
933            _ => Ok(Expression::Function(Box::new(f))),
934        }
935    }
936
937    fn transform_aggregate_function(
938        &self,
939        f: Box<crate::expressions::AggregateFunction>,
940    ) -> Result<Expression> {
941        let name_upper = f.name.to_uppercase();
942        match name_upper.as_str() {
943            // COUNT_IF -> SUM(CASE WHEN...)
944            "COUNT_IF" if !f.args.is_empty() => {
945                let condition = f.args.into_iter().next().unwrap();
946                let case_expr = Expression::Case(Box::new(Case {
947                    operand: None,
948                    whens: vec![(condition, Expression::number(1))],
949                    else_: Some(Expression::number(0)),
950                    comments: Vec::new(),
951                    inferred_type: None,
952                }));
953                Ok(Expression::Sum(Box::new(AggFunc {
954                    ignore_nulls: None,
955                    having_max: None,
956                    this: case_expr,
957                    distinct: f.distinct,
958                    filter: f.filter,
959                    order_by: Vec::new(),
960                    name: None,
961                    limit: None,
962                    inferred_type: None,
963                })))
964            }
965
966            // ANY_VALUE -> ARBITRARY in Presto
967            "ANY_VALUE" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
968                "ARBITRARY".to_string(),
969                f.args,
970            )))),
971
972            // GROUP_CONCAT -> ARRAY_JOIN(ARRAY_AGG())
973            "GROUP_CONCAT" if !f.args.is_empty() => {
974                let mut args = f.args;
975                let first = args.remove(0);
976                let separator = args.pop();
977                let array_agg = Expression::Function(Box::new(Function::new(
978                    "ARRAY_AGG".to_string(),
979                    vec![first],
980                )));
981                let mut join_args = vec![array_agg];
982                if let Some(sep) = separator {
983                    join_args.push(sep);
984                }
985                Ok(Expression::Function(Box::new(Function::new(
986                    "ARRAY_JOIN".to_string(),
987                    join_args,
988                ))))
989            }
990
991            // STRING_AGG -> ARRAY_JOIN(ARRAY_AGG())
992            "STRING_AGG" if !f.args.is_empty() => {
993                let mut args = f.args;
994                let first = args.remove(0);
995                let separator = args.pop();
996                let array_agg = Expression::Function(Box::new(Function::new(
997                    "ARRAY_AGG".to_string(),
998                    vec![first],
999                )));
1000                let mut join_args = vec![array_agg];
1001                if let Some(sep) = separator {
1002                    join_args.push(sep);
1003                }
1004                Ok(Expression::Function(Box::new(Function::new(
1005                    "ARRAY_JOIN".to_string(),
1006                    join_args,
1007                ))))
1008            }
1009
1010            // LISTAGG -> ARRAY_JOIN(ARRAY_AGG())
1011            "LISTAGG" if !f.args.is_empty() => {
1012                let mut args = f.args;
1013                let first = args.remove(0);
1014                let separator = args.pop();
1015                let array_agg = Expression::Function(Box::new(Function::new(
1016                    "ARRAY_AGG".to_string(),
1017                    vec![first],
1018                )));
1019                let mut join_args = vec![array_agg];
1020                if let Some(sep) = separator {
1021                    join_args.push(sep);
1022                }
1023                Ok(Expression::Function(Box::new(Function::new(
1024                    "ARRAY_JOIN".to_string(),
1025                    join_args,
1026                ))))
1027            }
1028
1029            // VAR -> VAR_POP in Presto
1030            "VAR" if !f.args.is_empty() => {
1031                Ok(Expression::AggregateFunction(Box::new(AggregateFunction {
1032                    name: "VAR_POP".to_string(),
1033                    args: f.args,
1034                    distinct: f.distinct,
1035                    filter: f.filter,
1036                    order_by: Vec::new(),
1037                    limit: None,
1038                    ignore_nulls: None,
1039                    inferred_type: None,
1040                })))
1041            }
1042
1043            // VARIANCE -> VAR_SAMP in Presto (for sample variance)
1044            "VARIANCE" if !f.args.is_empty() => {
1045                Ok(Expression::AggregateFunction(Box::new(AggregateFunction {
1046                    name: "VAR_SAMP".to_string(),
1047                    args: f.args,
1048                    distinct: f.distinct,
1049                    filter: f.filter,
1050                    order_by: Vec::new(),
1051                    limit: None,
1052                    ignore_nulls: None,
1053                    inferred_type: None,
1054                })))
1055            }
1056
1057            // Pass through everything else
1058            _ => Ok(Expression::AggregateFunction(f)),
1059        }
1060    }
1061
1062    fn transform_cast(&self, c: Cast) -> Result<Expression> {
1063        // Presto type mappings are handled in the generator
1064        Ok(Expression::Cast(Box::new(c)))
1065    }
1066}