Skip to main content

polyglot_sql/dialects/
presto.rs

1//! Presto Dialect
2//!
3//! Presto-specific transformations based on sqlglot patterns.
4//! Presto is the base for Trino dialect.
5
6use super::{DialectImpl, DialectType};
7use crate::error::Result;
8use crate::expressions::{
9    AggFunc, AggregateFunction, BinaryOp, Case, Cast, Column, DataType, Expression, Function, JsonExtractFunc, LikeOp, Literal, UnaryFunc, VarArgFunc,
10};
11use crate::generator::GeneratorConfig;
12use crate::tokens::TokenizerConfig;
13
14/// Presto dialect
15pub struct PrestoDialect;
16
17impl DialectImpl for PrestoDialect {
18    fn dialect_type(&self) -> DialectType {
19        DialectType::Presto
20    }
21
22    fn tokenizer_config(&self) -> TokenizerConfig {
23        let mut config = TokenizerConfig::default();
24        // Presto uses double quotes for identifiers
25        config.identifiers.insert('"', '"');
26        // Presto does NOT support nested comments
27        config.nested_comments = false;
28        // Presto does NOT support QUALIFY - it's a valid identifier
29        // (unlike Snowflake, BigQuery, DuckDB which have QUALIFY clause)
30        config.keywords.remove("QUALIFY");
31        config
32    }
33
34    fn generator_config(&self) -> GeneratorConfig {
35        use crate::generator::IdentifierQuoteStyle;
36        GeneratorConfig {
37            identifier_quote: '"',
38            identifier_quote_style: IdentifierQuoteStyle::DOUBLE_QUOTE,
39            dialect: Some(DialectType::Presto),
40            limit_only_literals: true,
41            tz_to_with_time_zone: true,
42            ..Default::default()
43        }
44    }
45
46    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
47        match expr {
48            // IFNULL -> COALESCE in Presto
49            Expression::IfNull(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc { original_name: None,
50                expressions: vec![f.this, f.expression],
51            }))),
52
53            // NVL -> COALESCE in Presto
54            Expression::Nvl(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc { original_name: None,
55                expressions: vec![f.this, f.expression],
56            }))),
57
58            // TryCast stays as TryCast (Presto supports TRY_CAST)
59            Expression::TryCast(c) => Ok(Expression::TryCast(c)),
60
61            // SafeCast -> TRY_CAST in Presto
62            Expression::SafeCast(c) => Ok(Expression::TryCast(c)),
63
64            // ILike -> LOWER() LIKE LOWER() (Presto doesn't support ILIKE)
65            Expression::ILike(op) => {
66                let lower_left = Expression::Lower(Box::new(UnaryFunc::new(op.left.clone())));
67                let lower_right = Expression::Lower(Box::new(UnaryFunc::new(op.right.clone())));
68                Ok(Expression::Like(Box::new(LikeOp {
69                    left: lower_left,
70                    right: lower_right,
71                    escape: op.escape,
72                    quantifier: op.quantifier.clone(),
73                })))
74            }
75
76            // CountIf -> SUM(CASE WHEN condition THEN 1 ELSE 0 END)
77            Expression::CountIf(f) => {
78                let case_expr = Expression::Case(Box::new(Case {
79                    operand: None,
80                    whens: vec![(f.this.clone(), Expression::number(1))],
81                    else_: Some(Expression::number(0)),
82                }));
83                Ok(Expression::Sum(Box::new(AggFunc { ignore_nulls: None, having_max: None,
84                    this: case_expr,
85                    distinct: f.distinct,
86                    filter: f.filter,
87                    order_by: Vec::new(),
88                name: None,
89                limit: None,
90                })))
91            }
92
93            // EXPLODE -> UNNEST in Presto
94            Expression::Explode(f) => Ok(Expression::Unnest(Box::new(
95                crate::expressions::UnnestFunc {
96                    this: f.this,
97                    expressions: Vec::new(),
98                    with_ordinality: false,
99                    alias: None,
100                    offset_alias: None,
101                },
102            ))),
103
104            // ExplodeOuter -> UNNEST in Presto
105            Expression::ExplodeOuter(f) => Ok(Expression::Unnest(Box::new(
106                crate::expressions::UnnestFunc {
107                    this: f.this,
108                    expressions: Vec::new(),
109                    with_ordinality: false,
110                    alias: None,
111                    offset_alias: None,
112                },
113            ))),
114
115            // StringAgg -> ARRAY_JOIN(ARRAY_AGG()) in Presto
116            Expression::StringAgg(f) => {
117                let array_agg = Expression::Function(Box::new(Function::new(
118                    "ARRAY_AGG".to_string(),
119                    vec![f.this.clone()],
120                )));
121                let mut join_args = vec![array_agg];
122                if let Some(sep) = f.separator {
123                    join_args.push(sep);
124                }
125                Ok(Expression::Function(Box::new(Function::new(
126                    "ARRAY_JOIN".to_string(),
127                    join_args,
128                ))))
129            }
130
131            // GroupConcat -> ARRAY_JOIN(ARRAY_AGG()) in Presto
132            Expression::GroupConcat(f) => {
133                let array_agg = Expression::Function(Box::new(Function::new(
134                    "ARRAY_AGG".to_string(),
135                    vec![f.this.clone()],
136                )));
137                let mut join_args = vec![array_agg];
138                if let Some(sep) = f.separator {
139                    join_args.push(sep);
140                }
141                Ok(Expression::Function(Box::new(Function::new(
142                    "ARRAY_JOIN".to_string(),
143                    join_args,
144                ))))
145            }
146
147            // ListAgg -> ARRAY_JOIN(ARRAY_AGG()) in Presto
148            Expression::ListAgg(f) => {
149                let array_agg = Expression::Function(Box::new(Function::new(
150                    "ARRAY_AGG".to_string(),
151                    vec![f.this.clone()],
152                )));
153                let mut join_args = vec![array_agg];
154                if let Some(sep) = f.separator {
155                    join_args.push(sep);
156                }
157                Ok(Expression::Function(Box::new(Function::new(
158                    "ARRAY_JOIN".to_string(),
159                    join_args,
160                ))))
161            }
162
163            // ParseJson: handled by generator (outputs JSON_PARSE for Presto)
164
165            // JSONExtract (variant_extract/colon accessor) -> JSON_EXTRACT in Presto
166            Expression::JSONExtract(e) if e.variant_extract.is_some() => {
167                let path = match *e.expression {
168                    Expression::Literal(Literal::String(s)) => {
169                        let normalized = if s.starts_with('$') { s } else if s.starts_with('[') { format!("${}", s) } else { format!("$.{}", s) };
170                        Expression::Literal(Literal::String(normalized))
171                    }
172                    other => other,
173                };
174                Ok(Expression::JsonExtract(Box::new(JsonExtractFunc {
175                    this: *e.this,
176                    path,
177                    returning: None,
178                    arrow_syntax: false,
179                    hash_arrow_syntax: false,
180                    wrapper_option: None,
181                    quotes_option: None,
182                    on_scalar_string: false,
183                    on_error: None,
184                })))
185            }
186
187            // Generic function transformations
188            Expression::Function(f) => self.transform_function(*f),
189
190            // Generic aggregate function transformations
191            Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
192
193            // Cast transformations
194            Expression::Cast(c) => self.transform_cast(*c),
195
196            // Div: Presto has TYPED_DIVISION - wrap left operand in CAST(AS DOUBLE)
197            // to ensure float division (only when left isn't already a float cast)
198            Expression::Div(mut op) => {
199                if !Self::is_float_cast(&op.left) {
200                    op.left = Expression::Cast(Box::new(crate::expressions::Cast {
201                        this: op.left,
202                        to: DataType::Double { precision: None, scale: None },
203                        trailing_comments: Vec::new(),
204                        double_colon_syntax: false,
205                        format: None,
206                        default: None,
207                    }));
208                }
209                Ok(Expression::Div(op))
210            }
211
212            // IntDiv -> CAST(CAST(x AS DOUBLE) / y AS INTEGER) in Presto
213            Expression::IntDiv(f) => {
214                let cast_x = Expression::Cast(Box::new(Cast {
215                    this: f.this,
216                    to: crate::expressions::DataType::Double { precision: None, scale: None },
217                    trailing_comments: Vec::new(),
218                    double_colon_syntax: false,
219                    format: None,
220                    default: None,
221                }));
222                let div_expr = Expression::Div(Box::new(BinaryOp::new(cast_x, f.expression)));
223                Ok(Expression::Cast(Box::new(Cast {
224                    this: div_expr,
225                    to: crate::expressions::DataType::Int { length: None, integer_spelling: true },
226                    trailing_comments: Vec::new(),
227                    double_colon_syntax: false,
228                    format: None,
229                    default: None,
230                })))
231            }
232
233            // DELETE: Strip table alias and unqualify columns (Presto doesn't support DELETE aliases)
234            Expression::Delete(mut d) => {
235                if d.alias.is_some() {
236                    d.alias = None;
237                    d.alias_explicit_as = false;
238                    // Unqualify all columns in the WHERE clause
239                    if let Some(ref mut where_clause) = d.where_clause {
240                        where_clause.this = Self::unqualify_columns(where_clause.this.clone());
241                    }
242                }
243                Ok(Expression::Delete(d))
244            }
245
246            // Pass through everything else
247            _ => Ok(expr),
248        }
249    }
250}
251
252impl PrestoDialect {
253    /// Recursively unqualify columns - remove table qualifiers from Column references
254    fn unqualify_columns(expr: Expression) -> Expression {
255        match expr {
256            Expression::Column(c) => {
257                if c.table.is_some() {
258                    Expression::Column(Column {
259                        name: c.name,
260                        table: None,
261                        join_mark: c.join_mark,
262                        trailing_comments: c.trailing_comments,
263                    })
264                } else {
265                    Expression::Column(c)
266                }
267            }
268            // DotAccess: db.t2.c -> c (strip all qualifiers, keep only the final field name)
269            Expression::Dot(d) => {
270                Expression::Column(Column {
271                    name: d.field,
272                    table: None,
273                    join_mark: false,
274                    trailing_comments: Vec::new(),
275                })
276            }
277            // Recursively walk common binary expression types
278            Expression::And(mut op) => {
279                op.left = Self::unqualify_columns(op.left);
280                op.right = Self::unqualify_columns(op.right);
281                Expression::And(op)
282            }
283            Expression::Or(mut op) => {
284                op.left = Self::unqualify_columns(op.left);
285                op.right = Self::unqualify_columns(op.right);
286                Expression::Or(op)
287            }
288            Expression::Eq(mut op) => {
289                op.left = Self::unqualify_columns(op.left);
290                op.right = Self::unqualify_columns(op.right);
291                Expression::Eq(op)
292            }
293            Expression::Neq(mut op) => {
294                op.left = Self::unqualify_columns(op.left);
295                op.right = Self::unqualify_columns(op.right);
296                Expression::Neq(op)
297            }
298            Expression::Gt(mut op) => {
299                op.left = Self::unqualify_columns(op.left);
300                op.right = Self::unqualify_columns(op.right);
301                Expression::Gt(op)
302            }
303            Expression::Lt(mut op) => {
304                op.left = Self::unqualify_columns(op.left);
305                op.right = Self::unqualify_columns(op.right);
306                Expression::Lt(op)
307            }
308            Expression::Gte(mut op) => {
309                op.left = Self::unqualify_columns(op.left);
310                op.right = Self::unqualify_columns(op.right);
311                Expression::Gte(op)
312            }
313            Expression::Lte(mut op) => {
314                op.left = Self::unqualify_columns(op.left);
315                op.right = Self::unqualify_columns(op.right);
316                Expression::Lte(op)
317            }
318            // Unary operators
319            Expression::Not(mut e) => {
320                e.this = Self::unqualify_columns(e.this);
321                Expression::Not(e)
322            }
323            // Predicates
324            Expression::In(mut i) => {
325                i.this = Self::unqualify_columns(i.this);
326                i.expressions = i.expressions.into_iter().map(Self::unqualify_columns).collect();
327                // Also recurse into subquery if present
328                if let Some(q) = i.query {
329                    i.query = Some(Self::unqualify_columns(q));
330                }
331                Expression::In(i)
332            }
333            Expression::IsNull(mut f) => {
334                f.this = Self::unqualify_columns(f.this);
335                Expression::IsNull(f)
336            }
337            Expression::Paren(mut p) => {
338                p.this = Self::unqualify_columns(p.this);
339                Expression::Paren(p)
340            }
341            Expression::Function(mut f) => {
342                f.args = f.args.into_iter().map(Self::unqualify_columns).collect();
343                Expression::Function(f)
344            }
345            // For subqueries (SELECT statements inside IN, etc), also unqualify
346            Expression::Select(mut s) => {
347                s.expressions = s.expressions.into_iter().map(Self::unqualify_columns).collect();
348                if let Some(ref mut w) = s.where_clause {
349                    w.this = Self::unqualify_columns(w.this.clone());
350                }
351                Expression::Select(s)
352            }
353            Expression::Subquery(mut sq) => {
354                sq.this = Self::unqualify_columns(sq.this);
355                Expression::Subquery(sq)
356            }
357            Expression::Alias(mut a) => {
358                a.this = Self::unqualify_columns(a.this);
359                Expression::Alias(a)
360            }
361            // Pass through other expressions unchanged
362            other => other,
363        }
364    }
365
366    /// Check if an expression is already a CAST to a float type
367    fn is_float_cast(expr: &Expression) -> bool {
368        if let Expression::Cast(cast) = expr {
369            matches!(&cast.to, DataType::Double { .. } | DataType::Float { .. })
370        } else {
371            false
372        }
373    }
374
375    /// Convert Oracle/PostgreSQL-style date format to Presto's C-style format
376    /// Oracle: dd, hh, hh24, mi, mm, ss, yyyy, yy
377    /// Presto: %d, %H, %H, %i, %m, %s, %Y, %y
378    pub fn oracle_to_presto_format(fmt: &str) -> String {
379        // Process character by character to avoid double-replacement issues
380        let chars: Vec<char> = fmt.chars().collect();
381        let mut result = String::new();
382        let mut i = 0;
383        while i < chars.len() {
384            let remaining = &fmt[i..];
385            if remaining.starts_with("yyyy") {
386                result.push_str("%Y");
387                i += 4;
388            } else if remaining.starts_with("yy") {
389                result.push_str("%y");
390                i += 2;
391            } else if remaining.starts_with("hh24") {
392                result.push_str("%H");
393                i += 4;
394            } else if remaining.starts_with("hh") {
395                result.push_str("%H");
396                i += 2;
397            } else if remaining.starts_with("mi") {
398                result.push_str("%i");
399                i += 2;
400            } else if remaining.starts_with("mm") {
401                result.push_str("%m");
402                i += 2;
403            } else if remaining.starts_with("dd") {
404                result.push_str("%d");
405                i += 2;
406            } else if remaining.starts_with("ss") {
407                result.push_str("%s");
408                i += 2;
409            } else {
410                result.push(chars[i]);
411                i += 1;
412            }
413        }
414        result
415    }
416
417    /// Convert Presto's C-style date format to Java-style format (for Hive/Spark)
418    /// Presto: %Y, %m, %d, %H, %i, %S, %s, %y, %T, %F
419    /// Java:   yyyy, MM, dd, HH, mm, ss, ss, yy, HH:mm:ss, yyyy-MM-dd
420    pub fn presto_to_java_format(fmt: &str) -> String {
421        fmt.replace("%Y", "yyyy")
422           .replace("%m", "MM")
423           .replace("%d", "dd")
424           .replace("%H", "HH")
425           .replace("%i", "mm")
426           .replace("%S", "ss")
427           .replace("%s", "ss")
428           .replace("%y", "yy")
429           .replace("%T", "HH:mm:ss")
430           .replace("%F", "yyyy-MM-dd")
431           .replace("%M", "MMMM")
432    }
433
434    /// Normalize Presto format strings (e.g., %H:%i:%S -> %T, %Y-%m-%d -> %F)
435    pub fn normalize_presto_format(fmt: &str) -> String {
436        fmt.replace("%H:%i:%S", "%T")
437           .replace("%H:%i:%s", "%T")
438    }
439
440    /// Convert Presto's C-style format to DuckDB C-style (only difference: %i -> %M for minutes)
441    pub fn presto_to_duckdb_format(fmt: &str) -> String {
442        fmt.replace("%i", "%M")
443           .replace("%s", "%S")
444           .replace("%T", "%H:%M:%S")
445    }
446
447    /// Convert Presto's C-style format to BigQuery format
448    pub fn presto_to_bigquery_format(fmt: &str) -> String {
449        // BigQuery uses %F for %Y-%m-%d, %T for %H:%M:%S
450        // BigQuery uses %M for minutes (like DuckDB), not %i
451        let result = fmt.replace("%Y-%m-%d", "%F")
452           .replace("%H:%i:%S", "%T")
453           .replace("%H:%i:%s", "%T")
454           .replace("%i", "%M")
455           .replace("%s", "%S");
456        result
457    }
458
459    /// Check if a Presto format string matches the default timestamp format
460    pub fn is_default_timestamp_format(fmt: &str) -> bool {
461        let normalized = Self::normalize_presto_format(fmt);
462        normalized == "%Y-%m-%d %T" || normalized == "%Y-%m-%d %H:%i:%S"
463            || fmt == "%Y-%m-%d %H:%i:%S" || fmt == "%Y-%m-%d %T"
464    }
465
466    /// Check if a Presto format string matches the default date format
467    pub fn is_default_date_format(fmt: &str) -> bool {
468        fmt == "%Y-%m-%d" || fmt == "%F"
469    }
470
471    fn transform_function(&self, f: Function) -> Result<Expression> {
472        let name_upper = f.name.to_uppercase();
473        match name_upper.as_str() {
474            // IFNULL -> COALESCE
475            "IFNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc { original_name: None,
476                expressions: f.args,
477            }))),
478
479            // NVL -> COALESCE
480            "NVL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc { original_name: None,
481                expressions: f.args,
482            }))),
483
484            // ISNULL -> COALESCE
485            "ISNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc { original_name: None,
486                expressions: f.args,
487            }))),
488
489            // GETDATE -> CURRENT_TIMESTAMP
490            "GETDATE" => Ok(Expression::CurrentTimestamp(
491                crate::expressions::CurrentTimestamp { precision: None, sysdate: false },
492            )),
493
494            // NOW -> CURRENT_TIMESTAMP
495            "NOW" => Ok(Expression::CurrentTimestamp(
496                crate::expressions::CurrentTimestamp { precision: None, sysdate: false },
497            )),
498
499            // RAND -> RANDOM in Presto (but it's actually RANDOM())
500            "RAND" => Ok(Expression::Function(Box::new(Function::new(
501                "RANDOM".to_string(),
502                vec![],
503            )))),
504
505            // GROUP_CONCAT -> ARRAY_JOIN(ARRAY_AGG())
506            "GROUP_CONCAT" if !f.args.is_empty() => {
507                let mut args = f.args;
508                let first = args.remove(0);
509                let separator = args.pop();
510                let array_agg = Expression::Function(Box::new(Function::new(
511                    "ARRAY_AGG".to_string(),
512                    vec![first],
513                )));
514                let mut join_args = vec![array_agg];
515                if let Some(sep) = separator {
516                    join_args.push(sep);
517                }
518                Ok(Expression::Function(Box::new(Function::new(
519                    "ARRAY_JOIN".to_string(),
520                    join_args,
521                ))))
522            }
523
524            // STRING_AGG -> ARRAY_JOIN(ARRAY_AGG())
525            "STRING_AGG" if !f.args.is_empty() => {
526                let mut args = f.args;
527                let first = args.remove(0);
528                let separator = args.pop();
529                let array_agg = Expression::Function(Box::new(Function::new(
530                    "ARRAY_AGG".to_string(),
531                    vec![first],
532                )));
533                let mut join_args = vec![array_agg];
534                if let Some(sep) = separator {
535                    join_args.push(sep);
536                }
537                Ok(Expression::Function(Box::new(Function::new(
538                    "ARRAY_JOIN".to_string(),
539                    join_args,
540                ))))
541            }
542
543            // LISTAGG -> ARRAY_JOIN(ARRAY_AGG())
544            "LISTAGG" if !f.args.is_empty() => {
545                let mut args = f.args;
546                let first = args.remove(0);
547                let separator = args.pop();
548                let array_agg = Expression::Function(Box::new(Function::new(
549                    "ARRAY_AGG".to_string(),
550                    vec![first],
551                )));
552                let mut join_args = vec![array_agg];
553                if let Some(sep) = separator {
554                    join_args.push(sep);
555                }
556                Ok(Expression::Function(Box::new(Function::new(
557                    "ARRAY_JOIN".to_string(),
558                    join_args,
559                ))))
560            }
561
562            // SUBSTR -> SUBSTRING (both work, but SUBSTRING is standard)
563            "SUBSTR" => Ok(Expression::Function(Box::new(Function::new(
564                "SUBSTRING".to_string(),
565                f.args,
566            )))),
567
568            // LEN -> LENGTH
569            "LEN" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc::new(
570                f.args.into_iter().next().unwrap(),
571            )))),
572
573            // CHARINDEX -> STRPOS in Presto (with swapped args)
574            "CHARINDEX" if f.args.len() >= 2 => {
575                let mut args = f.args;
576                let substring = args.remove(0);
577                let string = args.remove(0);
578                // STRPOS(string, substring) - note: argument order is reversed
579                Ok(Expression::Function(Box::new(Function::new(
580                    "STRPOS".to_string(),
581                    vec![string, substring],
582                ))))
583            }
584
585            // INSTR -> STRPOS (with same argument order)
586            "INSTR" if f.args.len() >= 2 => {
587                let args = f.args;
588                // INSTR(string, substring) -> STRPOS(string, substring)
589                Ok(Expression::Function(Box::new(Function::new(
590                    "STRPOS".to_string(),
591                    args,
592                ))))
593            }
594
595            // LOCATE -> STRPOS in Presto (with swapped args)
596            "LOCATE" if f.args.len() >= 2 => {
597                let mut args = f.args;
598                let substring = args.remove(0);
599                let string = args.remove(0);
600                // LOCATE(substring, string) -> STRPOS(string, substring)
601                Ok(Expression::Function(Box::new(Function::new(
602                    "STRPOS".to_string(),
603                    vec![string, substring],
604                ))))
605            }
606
607            // ARRAY_LENGTH -> CARDINALITY in Presto
608            "ARRAY_LENGTH" if f.args.len() == 1 => Ok(Expression::Function(Box::new(
609                Function::new("CARDINALITY".to_string(), f.args),
610            ))),
611
612            // SIZE -> CARDINALITY in Presto
613            "SIZE" if f.args.len() == 1 => Ok(Expression::Function(Box::new(Function::new(
614                "CARDINALITY".to_string(),
615                f.args,
616            )))),
617
618            // ARRAY_CONTAINS -> CONTAINS in Presto
619            "ARRAY_CONTAINS" if f.args.len() == 2 => Ok(Expression::Function(Box::new(
620                Function::new("CONTAINS".to_string(), f.args),
621            ))),
622
623            // TO_DATE -> DATE_PARSE in Presto (or CAST to DATE)
624            "TO_DATE" if !f.args.is_empty() => {
625                if f.args.len() == 1 {
626                    // Simple case: just cast to DATE
627                    Ok(Expression::Cast(Box::new(Cast {
628                        this: f.args.into_iter().next().unwrap(),
629                        to: DataType::Date,
630                        trailing_comments: Vec::new(),
631                        double_colon_syntax: false,
632                        format: None,
633                        default: None,
634                    })))
635                } else {
636                    // With format: use DATE_PARSE
637                    Ok(Expression::Function(Box::new(Function::new(
638                        "DATE_PARSE".to_string(),
639                        f.args,
640                    ))))
641                }
642            }
643
644            // TO_TIMESTAMP -> DATE_PARSE / CAST
645            "TO_TIMESTAMP" if !f.args.is_empty() => {
646                if f.args.len() == 1 {
647                    Ok(Expression::Cast(Box::new(Cast {
648                        this: f.args.into_iter().next().unwrap(),
649                        to: DataType::Timestamp { precision: None, timezone: false },
650                        trailing_comments: Vec::new(),
651                        double_colon_syntax: false,
652                        format: None,
653                        default: None,
654                    })))
655                } else {
656                    Ok(Expression::Function(Box::new(Function::new(
657                        "DATE_PARSE".to_string(),
658                        f.args,
659                    ))))
660                }
661            }
662
663            // DATE_FORMAT -> DATE_FORMAT (native in Presto)
664            "DATE_FORMAT" => Ok(Expression::Function(Box::new(f))),
665
666            // strftime -> DATE_FORMAT in Presto
667            "STRFTIME" if f.args.len() >= 2 => {
668                let mut args = f.args;
669                // strftime(format, date) -> DATE_FORMAT(date, format)
670                let format = args.remove(0);
671                let date = args.remove(0);
672                Ok(Expression::Function(Box::new(Function::new(
673                    "DATE_FORMAT".to_string(),
674                    vec![date, format],
675                ))))
676            }
677
678            // TO_CHAR -> DATE_FORMAT in Presto (convert Oracle-style format to Presto C-style)
679            "TO_CHAR" if f.args.len() >= 2 => {
680                let mut args = f.args;
681                // Convert Oracle-style format string to Presto C-style
682                if let Expression::Literal(Literal::String(ref s)) = args[1] {
683                    let converted = Self::oracle_to_presto_format(s);
684                    args[1] = Expression::Literal(Literal::String(converted));
685                }
686                Ok(Expression::Function(Box::new(Function::new(
687                    "DATE_FORMAT".to_string(),
688                    args,
689                ))))
690            }
691
692            // LEVENSHTEIN -> LEVENSHTEIN_DISTANCE in Presto
693            "LEVENSHTEIN" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
694                Function::new("LEVENSHTEIN_DISTANCE".to_string(), f.args),
695            ))),
696
697            // FLATTEN -> FLATTEN is supported in Presto for nested arrays
698            "FLATTEN" => Ok(Expression::Function(Box::new(f))),
699
700            // JSON_EXTRACT -> JSON_EXTRACT (native in Presto)
701            "JSON_EXTRACT" => Ok(Expression::Function(Box::new(f))),
702
703            // JSON_EXTRACT_SCALAR -> JSON_EXTRACT_SCALAR (native in Presto)
704            "JSON_EXTRACT_SCALAR" => Ok(Expression::Function(Box::new(f))),
705
706            // GET_JSON_OBJECT -> JSON_EXTRACT_SCALAR in Presto
707            "GET_JSON_OBJECT" if f.args.len() == 2 => Ok(Expression::Function(Box::new(
708                Function::new("JSON_EXTRACT_SCALAR".to_string(), f.args),
709            ))),
710
711            // COLLECT_LIST -> ARRAY_AGG
712            "COLLECT_LIST" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
713                Function::new("ARRAY_AGG".to_string(), f.args),
714            ))),
715
716            // COLLECT_SET -> ARRAY_DISTINCT(ARRAY_AGG())
717            "COLLECT_SET" if !f.args.is_empty() => {
718                let array_agg = Expression::Function(Box::new(Function::new(
719                    "ARRAY_AGG".to_string(),
720                    f.args,
721                )));
722                Ok(Expression::Function(Box::new(Function::new(
723                    "ARRAY_DISTINCT".to_string(),
724                    vec![array_agg],
725                ))))
726            }
727
728            // RLIKE -> REGEXP_LIKE in Presto
729            "RLIKE" if f.args.len() == 2 => Ok(Expression::Function(Box::new(Function::new(
730                "REGEXP_LIKE".to_string(),
731                f.args,
732            )))),
733
734            // REGEXP -> REGEXP_LIKE in Presto
735            "REGEXP" if f.args.len() == 2 => Ok(Expression::Function(Box::new(Function::new(
736                "REGEXP_LIKE".to_string(),
737                f.args,
738            )))),
739
740            // PARSE_JSON -> JSON_PARSE in Presto
741            "PARSE_JSON" => Ok(Expression::Function(Box::new(Function::new(
742                "JSON_PARSE".to_string(),
743                f.args,
744            )))),
745
746            // GET_PATH(obj, path) -> JSON_EXTRACT(obj, json_path) in Presto
747            "GET_PATH" if f.args.len() == 2 => {
748                let mut args = f.args;
749                let this = args.remove(0);
750                let path = args.remove(0);
751                let json_path = match &path {
752                    Expression::Literal(Literal::String(s)) => {
753                        let normalized = if s.starts_with('$') {
754                            s.clone()
755                        } else if s.starts_with('[') {
756                            format!("${}", s)
757                        } else {
758                            format!("$.{}", s)
759                        };
760                        Expression::Literal(Literal::String(normalized))
761                    }
762                    _ => path,
763                };
764                Ok(Expression::JsonExtract(Box::new(JsonExtractFunc {
765                    this,
766                    path: json_path,
767                    returning: None,
768                    arrow_syntax: false,
769                    hash_arrow_syntax: false,
770                    wrapper_option: None,
771                    quotes_option: None,
772                    on_scalar_string: false,
773                    on_error: None,
774                })))
775            }
776
777            // REGEXP_SUBSTR(subject, pattern, ...) -> REGEXP_EXTRACT(subject, pattern[, group])
778            "REGEXP_SUBSTR" if f.args.len() >= 2 => {
779                let mut args = f.args;
780                let subject = args.remove(0);
781                let pattern = args.remove(0);
782                // If 6-arg form: (subject, pattern, pos, occ, params, group) -> keep group
783                if args.len() >= 4 {
784                    let _pos = args.remove(0);
785                    let _occ = args.remove(0);
786                    let _params = args.remove(0);
787                    let group = args.remove(0);
788                    Ok(Expression::Function(Box::new(Function::new(
789                        "REGEXP_EXTRACT".to_string(),
790                        vec![subject, pattern, group],
791                    ))))
792                } else {
793                    Ok(Expression::Function(Box::new(Function::new(
794                        "REGEXP_EXTRACT".to_string(),
795                        vec![subject, pattern],
796                    ))))
797                }
798            }
799
800            // DATE_PART(epoch_second, x) -> TO_UNIXTIME(CAST(x AS TIMESTAMP))
801            // DATE_PART(epoch_millisecond[s], x) -> TO_UNIXTIME(CAST(x AS TIMESTAMP)) * 1000
802            "DATE_PART" if f.args.len() == 2 => {
803                let part_name = match &f.args[0] {
804                    Expression::Identifier(id) => Some(id.name.to_uppercase()),
805                    Expression::Column(c) => Some(c.name.name.to_uppercase()),
806                    _ => None,
807                };
808                match part_name.as_deref() {
809                    Some("EPOCH_SECOND" | "EPOCH_SECONDS") => {
810                        let mut args = f.args;
811                        let value = args.remove(1);
812                        let cast_expr = Expression::Cast(Box::new(Cast {
813                            this: value,
814                            to: DataType::Timestamp { precision: None, timezone: false },
815                            trailing_comments: Vec::new(),
816                            double_colon_syntax: false,
817                            format: None,
818                            default: None,
819                        }));
820                        Ok(Expression::Function(Box::new(Function::new(
821                            "TO_UNIXTIME".to_string(),
822                            vec![cast_expr],
823                        ))))
824                    }
825                    Some("EPOCH_MILLISECOND" | "EPOCH_MILLISECONDS") => {
826                        let mut args = f.args;
827                        let value = args.remove(1);
828                        let cast_expr = Expression::Cast(Box::new(Cast {
829                            this: value,
830                            to: DataType::Timestamp { precision: None, timezone: false },
831                            trailing_comments: Vec::new(),
832                            double_colon_syntax: false,
833                            format: None,
834                            default: None,
835                        }));
836                        let unixtime = Expression::Function(Box::new(Function::new(
837                            "TO_UNIXTIME".to_string(),
838                            vec![cast_expr],
839                        )));
840                        Ok(Expression::Mul(Box::new(BinaryOp {
841                            left: unixtime,
842                            right: Expression::Literal(Literal::Number("1000".to_string())),
843                            left_comments: Vec::new(),
844                            operator_comments: Vec::new(),
845                            trailing_comments: Vec::new(),
846                        })))
847                    }
848                    _ => Ok(Expression::Function(Box::new(f))),
849                }
850            }
851
852            // REPLACE(x, y) with 2 args -> REPLACE(x, y, '') - Presto requires explicit empty string
853            "REPLACE" if f.args.len() == 2 => {
854                let mut args = f.args;
855                args.push(Expression::string(""));
856                Ok(Expression::Function(Box::new(Function::new("REPLACE".to_string(), args))))
857            }
858
859            // REGEXP_REPLACE(x, y) with 2 args -> REGEXP_REPLACE(x, y, '')
860            "REGEXP_REPLACE" if f.args.len() == 2 => {
861                let mut args = f.args;
862                args.push(Expression::string(""));
863                Ok(Expression::Function(Box::new(Function::new("REGEXP_REPLACE".to_string(), args))))
864            }
865
866            // Pass through everything else
867            _ => Ok(Expression::Function(Box::new(f))),
868        }
869    }
870
871    fn transform_aggregate_function(
872        &self,
873        f: Box<crate::expressions::AggregateFunction>,
874    ) -> Result<Expression> {
875        let name_upper = f.name.to_uppercase();
876        match name_upper.as_str() {
877            // COUNT_IF -> SUM(CASE WHEN...)
878            "COUNT_IF" if !f.args.is_empty() => {
879                let condition = f.args.into_iter().next().unwrap();
880                let case_expr = Expression::Case(Box::new(Case {
881                    operand: None,
882                    whens: vec![(condition, Expression::number(1))],
883                    else_: Some(Expression::number(0)),
884                }));
885                Ok(Expression::Sum(Box::new(AggFunc { ignore_nulls: None, having_max: None,
886                    this: case_expr,
887                    distinct: f.distinct,
888                    filter: f.filter,
889                    order_by: Vec::new(),
890                name: None,
891                limit: None,
892                })))
893            }
894
895            // ANY_VALUE -> ARBITRARY in Presto
896            "ANY_VALUE" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
897                "ARBITRARY".to_string(),
898                f.args,
899            )))),
900
901            // GROUP_CONCAT -> ARRAY_JOIN(ARRAY_AGG())
902            "GROUP_CONCAT" if !f.args.is_empty() => {
903                let mut args = f.args;
904                let first = args.remove(0);
905                let separator = args.pop();
906                let array_agg = Expression::Function(Box::new(Function::new(
907                    "ARRAY_AGG".to_string(),
908                    vec![first],
909                )));
910                let mut join_args = vec![array_agg];
911                if let Some(sep) = separator {
912                    join_args.push(sep);
913                }
914                Ok(Expression::Function(Box::new(Function::new(
915                    "ARRAY_JOIN".to_string(),
916                    join_args,
917                ))))
918            }
919
920            // STRING_AGG -> ARRAY_JOIN(ARRAY_AGG())
921            "STRING_AGG" if !f.args.is_empty() => {
922                let mut args = f.args;
923                let first = args.remove(0);
924                let separator = args.pop();
925                let array_agg = Expression::Function(Box::new(Function::new(
926                    "ARRAY_AGG".to_string(),
927                    vec![first],
928                )));
929                let mut join_args = vec![array_agg];
930                if let Some(sep) = separator {
931                    join_args.push(sep);
932                }
933                Ok(Expression::Function(Box::new(Function::new(
934                    "ARRAY_JOIN".to_string(),
935                    join_args,
936                ))))
937            }
938
939            // LISTAGG -> ARRAY_JOIN(ARRAY_AGG())
940            "LISTAGG" if !f.args.is_empty() => {
941                let mut args = f.args;
942                let first = args.remove(0);
943                let separator = args.pop();
944                let array_agg = Expression::Function(Box::new(Function::new(
945                    "ARRAY_AGG".to_string(),
946                    vec![first],
947                )));
948                let mut join_args = vec![array_agg];
949                if let Some(sep) = separator {
950                    join_args.push(sep);
951                }
952                Ok(Expression::Function(Box::new(Function::new(
953                    "ARRAY_JOIN".to_string(),
954                    join_args,
955                ))))
956            }
957
958            // VAR -> VAR_POP in Presto
959            "VAR" if !f.args.is_empty() => Ok(Expression::AggregateFunction(Box::new(
960                AggregateFunction {
961                    name: "VAR_POP".to_string(),
962                    args: f.args,
963                    distinct: f.distinct,
964                    filter: f.filter,
965                    order_by: Vec::new(),
966                    limit: None,
967                    ignore_nulls: None,
968                },
969            ))),
970
971            // VARIANCE -> VAR_SAMP in Presto (for sample variance)
972            "VARIANCE" if !f.args.is_empty() => Ok(Expression::AggregateFunction(Box::new(
973                AggregateFunction {
974                    name: "VAR_SAMP".to_string(),
975                    args: f.args,
976                    distinct: f.distinct,
977                    filter: f.filter,
978                    order_by: Vec::new(),
979                    limit: None,
980                    ignore_nulls: None,
981                },
982            ))),
983
984            // Pass through everything else
985            _ => Ok(Expression::AggregateFunction(f)),
986        }
987    }
988
989    fn transform_cast(&self, c: Cast) -> Result<Expression> {
990        // Presto type mappings are handled in the generator
991        Ok(Expression::Cast(Box::new(c)))
992    }
993}