Skip to main content

polyglot_sql/dialects/
presto.rs

1//! Presto Dialect
2//!
3//! Presto-specific transformations based on sqlglot patterns.
4//! Presto is the base for Trino dialect.
5
6use super::{DialectImpl, DialectType};
7use crate::error::Result;
8use crate::expressions::{
9    AggFunc, AggregateFunction, BinaryOp, Case, Cast, Column, DataType, Expression, Function,
10    JsonExtractFunc, LikeOp, Literal, UnaryFunc, VarArgFunc,
11};
12use crate::generator::GeneratorConfig;
13use crate::tokens::TokenizerConfig;
14
15/// Presto dialect
16pub struct PrestoDialect;
17
18impl DialectImpl for PrestoDialect {
19    fn dialect_type(&self) -> DialectType {
20        DialectType::Presto
21    }
22
23    fn tokenizer_config(&self) -> TokenizerConfig {
24        let mut config = TokenizerConfig::default();
25        // Presto uses double quotes for identifiers
26        config.identifiers.insert('"', '"');
27        // Presto does NOT support nested comments
28        config.nested_comments = false;
29        // Presto does NOT support QUALIFY - it's a valid identifier
30        // (unlike Snowflake, BigQuery, DuckDB which have QUALIFY clause)
31        config.keywords.remove("QUALIFY");
32        config
33    }
34
35    fn generator_config(&self) -> GeneratorConfig {
36        use crate::generator::IdentifierQuoteStyle;
37        GeneratorConfig {
38            identifier_quote: '"',
39            identifier_quote_style: IdentifierQuoteStyle::DOUBLE_QUOTE,
40            dialect: Some(DialectType::Presto),
41            limit_only_literals: true,
42            tz_to_with_time_zone: true,
43            ..Default::default()
44        }
45    }
46
47    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
48        match expr {
49            // IFNULL -> COALESCE in Presto
50            Expression::IfNull(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
51                original_name: None,
52                expressions: vec![f.this, f.expression],
53                inferred_type: None,
54            }))),
55
56            // NVL -> COALESCE in Presto
57            Expression::Nvl(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
58                original_name: None,
59                expressions: vec![f.this, f.expression],
60                inferred_type: None,
61            }))),
62
63            // TryCast stays as TryCast (Presto supports TRY_CAST)
64            Expression::TryCast(c) => Ok(Expression::TryCast(c)),
65
66            // SafeCast -> TRY_CAST in Presto
67            Expression::SafeCast(c) => Ok(Expression::TryCast(c)),
68
69            // ILike -> LOWER() LIKE LOWER() (Presto doesn't support ILIKE)
70            Expression::ILike(op) => {
71                let lower_left = Expression::Lower(Box::new(UnaryFunc::new(op.left.clone())));
72                let lower_right = Expression::Lower(Box::new(UnaryFunc::new(op.right.clone())));
73                Ok(Expression::Like(Box::new(LikeOp {
74                    left: lower_left,
75                    right: lower_right,
76                    escape: op.escape,
77                    quantifier: op.quantifier.clone(),
78                    inferred_type: None,
79                })))
80            }
81
82            // CountIf is native in Presto (keep as-is)
83            Expression::CountIf(f) => Ok(Expression::CountIf(f)),
84
85            // EXPLODE -> UNNEST in Presto
86            Expression::Explode(f) => Ok(Expression::Unnest(Box::new(
87                crate::expressions::UnnestFunc {
88                    this: f.this,
89                    expressions: Vec::new(),
90                    with_ordinality: false,
91                    alias: None,
92                    offset_alias: None,
93                },
94            ))),
95
96            // ExplodeOuter -> UNNEST in Presto
97            Expression::ExplodeOuter(f) => Ok(Expression::Unnest(Box::new(
98                crate::expressions::UnnestFunc {
99                    this: f.this,
100                    expressions: Vec::new(),
101                    with_ordinality: false,
102                    alias: None,
103                    offset_alias: None,
104                },
105            ))),
106
107            // StringAgg -> ARRAY_JOIN(ARRAY_AGG()) in Presto
108            Expression::StringAgg(f) => {
109                let array_agg = Expression::Function(Box::new(Function::new(
110                    "ARRAY_AGG".to_string(),
111                    vec![f.this.clone()],
112                )));
113                let mut join_args = vec![array_agg];
114                if let Some(sep) = f.separator {
115                    join_args.push(sep);
116                }
117                Ok(Expression::Function(Box::new(Function::new(
118                    "ARRAY_JOIN".to_string(),
119                    join_args,
120                ))))
121            }
122
123            // GroupConcat -> ARRAY_JOIN(ARRAY_AGG()) in Presto
124            Expression::GroupConcat(f) => {
125                let array_agg = Expression::Function(Box::new(Function::new(
126                    "ARRAY_AGG".to_string(),
127                    vec![f.this.clone()],
128                )));
129                let mut join_args = vec![array_agg];
130                if let Some(sep) = f.separator {
131                    join_args.push(sep);
132                }
133                Ok(Expression::Function(Box::new(Function::new(
134                    "ARRAY_JOIN".to_string(),
135                    join_args,
136                ))))
137            }
138
139            // ListAgg -> ARRAY_JOIN(ARRAY_AGG()) in Presto
140            Expression::ListAgg(f) => {
141                let array_agg = Expression::Function(Box::new(Function::new(
142                    "ARRAY_AGG".to_string(),
143                    vec![f.this.clone()],
144                )));
145                let mut join_args = vec![array_agg];
146                if let Some(sep) = f.separator {
147                    join_args.push(sep);
148                }
149                Ok(Expression::Function(Box::new(Function::new(
150                    "ARRAY_JOIN".to_string(),
151                    join_args,
152                ))))
153            }
154
155            // ParseJson: handled by generator (outputs JSON_PARSE for Presto)
156
157            // JSONExtract (variant_extract/colon accessor) -> JSON_EXTRACT in Presto
158            Expression::JSONExtract(e) if e.variant_extract.is_some() => {
159                let path = match *e.expression {
160                    Expression::Literal(Literal::String(s)) => {
161                        let normalized = if s.starts_with('$') {
162                            s
163                        } else if s.starts_with('[') {
164                            format!("${}", s)
165                        } else {
166                            format!("$.{}", s)
167                        };
168                        Expression::Literal(Literal::String(normalized))
169                    }
170                    other => other,
171                };
172                Ok(Expression::JsonExtract(Box::new(JsonExtractFunc {
173                    this: *e.this,
174                    path,
175                    returning: None,
176                    arrow_syntax: false,
177                    hash_arrow_syntax: false,
178                    wrapper_option: None,
179                    quotes_option: None,
180                    on_scalar_string: false,
181                    on_error: None,
182                })))
183            }
184
185            // Generic function transformations
186            Expression::Function(f) => self.transform_function(*f),
187
188            // Generic aggregate function transformations
189            Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
190
191            // Cast transformations
192            Expression::Cast(c) => self.transform_cast(*c),
193
194            // Div: Presto has TYPED_DIVISION - wrap left operand in CAST(AS DOUBLE)
195            // to ensure float division (only when left isn't already a float cast)
196            Expression::Div(mut op) => {
197                if !Self::is_float_cast(&op.left) {
198                    op.left = Expression::Cast(Box::new(crate::expressions::Cast {
199                        this: op.left,
200                        to: DataType::Double {
201                            precision: None,
202                            scale: None,
203                        },
204                        trailing_comments: Vec::new(),
205                        double_colon_syntax: false,
206                        format: None,
207                        default: None,
208                        inferred_type: None,
209                    }));
210                }
211                Ok(Expression::Div(op))
212            }
213
214            // IntDiv -> CAST(CAST(x AS DOUBLE) / y AS INTEGER) in Presto
215            Expression::IntDiv(f) => {
216                let cast_x = Expression::Cast(Box::new(Cast {
217                    this: f.this,
218                    to: crate::expressions::DataType::Double {
219                        precision: None,
220                        scale: None,
221                    },
222                    trailing_comments: Vec::new(),
223                    double_colon_syntax: false,
224                    format: None,
225                    default: None,
226                    inferred_type: None,
227                }));
228                let div_expr = Expression::Div(Box::new(BinaryOp::new(cast_x, f.expression)));
229                Ok(Expression::Cast(Box::new(Cast {
230                    this: div_expr,
231                    to: crate::expressions::DataType::Int {
232                        length: None,
233                        integer_spelling: true,
234                    },
235                    trailing_comments: Vec::new(),
236                    double_colon_syntax: false,
237                    format: None,
238                    default: None,
239                    inferred_type: None,
240                })))
241            }
242
243            // DELETE: Strip table alias and unqualify columns (Presto doesn't support DELETE aliases)
244            Expression::Delete(mut d) => {
245                if d.alias.is_some() {
246                    d.alias = None;
247                    d.alias_explicit_as = false;
248                    // Unqualify all columns in the WHERE clause
249                    if let Some(ref mut where_clause) = d.where_clause {
250                        where_clause.this = Self::unqualify_columns(where_clause.this.clone());
251                    }
252                }
253                Ok(Expression::Delete(d))
254            }
255
256            // Pass through everything else
257            _ => Ok(expr),
258        }
259    }
260}
261
262impl PrestoDialect {
263    /// Recursively unqualify columns - remove table qualifiers from Column references
264    fn unqualify_columns(expr: Expression) -> Expression {
265        match expr {
266            Expression::Column(c) => {
267                if c.table.is_some() {
268                    Expression::Column(Column {
269                        name: c.name,
270                        table: None,
271                        join_mark: c.join_mark,
272                        trailing_comments: c.trailing_comments,
273                        span: None,
274                        inferred_type: None,
275                    })
276                } else {
277                    Expression::Column(c)
278                }
279            }
280            // DotAccess: db.t2.c -> c (strip all qualifiers, keep only the final field name)
281            Expression::Dot(d) => Expression::Column(Column {
282                name: d.field,
283                table: None,
284                join_mark: false,
285                trailing_comments: Vec::new(),
286                span: None,
287                inferred_type: None,
288            }),
289            // Recursively walk common binary expression types
290            Expression::And(mut op) => {
291                op.left = Self::unqualify_columns(op.left);
292                op.right = Self::unqualify_columns(op.right);
293                Expression::And(op)
294            }
295            Expression::Or(mut op) => {
296                op.left = Self::unqualify_columns(op.left);
297                op.right = Self::unqualify_columns(op.right);
298                Expression::Or(op)
299            }
300            Expression::Eq(mut op) => {
301                op.left = Self::unqualify_columns(op.left);
302                op.right = Self::unqualify_columns(op.right);
303                Expression::Eq(op)
304            }
305            Expression::Neq(mut op) => {
306                op.left = Self::unqualify_columns(op.left);
307                op.right = Self::unqualify_columns(op.right);
308                Expression::Neq(op)
309            }
310            Expression::Gt(mut op) => {
311                op.left = Self::unqualify_columns(op.left);
312                op.right = Self::unqualify_columns(op.right);
313                Expression::Gt(op)
314            }
315            Expression::Lt(mut op) => {
316                op.left = Self::unqualify_columns(op.left);
317                op.right = Self::unqualify_columns(op.right);
318                Expression::Lt(op)
319            }
320            Expression::Gte(mut op) => {
321                op.left = Self::unqualify_columns(op.left);
322                op.right = Self::unqualify_columns(op.right);
323                Expression::Gte(op)
324            }
325            Expression::Lte(mut op) => {
326                op.left = Self::unqualify_columns(op.left);
327                op.right = Self::unqualify_columns(op.right);
328                Expression::Lte(op)
329            }
330            // Unary operators
331            Expression::Not(mut e) => {
332                e.this = Self::unqualify_columns(e.this);
333                Expression::Not(e)
334            }
335            // Predicates
336            Expression::In(mut i) => {
337                i.this = Self::unqualify_columns(i.this);
338                i.expressions = i
339                    .expressions
340                    .into_iter()
341                    .map(Self::unqualify_columns)
342                    .collect();
343                // Also recurse into subquery if present
344                if let Some(q) = i.query {
345                    i.query = Some(Self::unqualify_columns(q));
346                }
347                Expression::In(i)
348            }
349            Expression::IsNull(mut f) => {
350                f.this = Self::unqualify_columns(f.this);
351                Expression::IsNull(f)
352            }
353            Expression::Paren(mut p) => {
354                p.this = Self::unqualify_columns(p.this);
355                Expression::Paren(p)
356            }
357            Expression::Function(mut f) => {
358                f.args = f.args.into_iter().map(Self::unqualify_columns).collect();
359                Expression::Function(f)
360            }
361            // For subqueries (SELECT statements inside IN, etc), also unqualify
362            Expression::Select(mut s) => {
363                s.expressions = s
364                    .expressions
365                    .into_iter()
366                    .map(Self::unqualify_columns)
367                    .collect();
368                if let Some(ref mut w) = s.where_clause {
369                    w.this = Self::unqualify_columns(w.this.clone());
370                }
371                Expression::Select(s)
372            }
373            Expression::Subquery(mut sq) => {
374                sq.this = Self::unqualify_columns(sq.this);
375                Expression::Subquery(sq)
376            }
377            Expression::Alias(mut a) => {
378                a.this = Self::unqualify_columns(a.this);
379                Expression::Alias(a)
380            }
381            // Pass through other expressions unchanged
382            other => other,
383        }
384    }
385
386    /// Check if an expression is already a CAST to a float type
387    fn is_float_cast(expr: &Expression) -> bool {
388        if let Expression::Cast(cast) = expr {
389            matches!(&cast.to, DataType::Double { .. } | DataType::Float { .. })
390        } else {
391            false
392        }
393    }
394
395    /// Convert Oracle/PostgreSQL-style date format to Presto's C-style format
396    /// Oracle: dd, hh, hh24, mi, mm, ss, yyyy, yy
397    /// Presto: %d, %H, %H, %i, %m, %s, %Y, %y
398    pub fn oracle_to_presto_format(fmt: &str) -> String {
399        // Process character by character to avoid double-replacement issues
400        let chars: Vec<char> = fmt.chars().collect();
401        let mut result = String::new();
402        let mut i = 0;
403        while i < chars.len() {
404            let remaining = &fmt[i..];
405            if remaining.starts_with("yyyy") {
406                result.push_str("%Y");
407                i += 4;
408            } else if remaining.starts_with("yy") {
409                result.push_str("%y");
410                i += 2;
411            } else if remaining.starts_with("hh24") {
412                result.push_str("%H");
413                i += 4;
414            } else if remaining.starts_with("hh") {
415                result.push_str("%H");
416                i += 2;
417            } else if remaining.starts_with("mi") {
418                result.push_str("%i");
419                i += 2;
420            } else if remaining.starts_with("mm") {
421                result.push_str("%m");
422                i += 2;
423            } else if remaining.starts_with("dd") {
424                result.push_str("%d");
425                i += 2;
426            } else if remaining.starts_with("ss") {
427                result.push_str("%s");
428                i += 2;
429            } else {
430                result.push(chars[i]);
431                i += 1;
432            }
433        }
434        result
435    }
436
437    /// Convert Presto's C-style date format to Java-style format (for Hive/Spark)
438    /// Presto: %Y, %m, %d, %H, %i, %S, %s, %y, %T, %F
439    /// Java:   yyyy, MM, dd, HH, mm, ss, ss, yy, HH:mm:ss, yyyy-MM-dd
440    pub fn presto_to_java_format(fmt: &str) -> String {
441        fmt.replace("%Y", "yyyy")
442            .replace("%m", "MM")
443            .replace("%d", "dd")
444            .replace("%H", "HH")
445            .replace("%i", "mm")
446            .replace("%S", "ss")
447            .replace("%s", "ss")
448            .replace("%y", "yy")
449            .replace("%T", "HH:mm:ss")
450            .replace("%F", "yyyy-MM-dd")
451            .replace("%M", "MMMM")
452    }
453
454    /// Normalize Presto format strings (e.g., %H:%i:%S -> %T, %Y-%m-%d -> %F)
455    pub fn normalize_presto_format(fmt: &str) -> String {
456        fmt.replace("%H:%i:%S", "%T").replace("%H:%i:%s", "%T")
457    }
458
459    /// Convert Presto's C-style format to DuckDB C-style (only difference: %i -> %M for minutes)
460    pub fn presto_to_duckdb_format(fmt: &str) -> String {
461        fmt.replace("%i", "%M")
462            .replace("%s", "%S")
463            .replace("%T", "%H:%M:%S")
464    }
465
466    /// Convert Presto's C-style format to BigQuery format
467    pub fn presto_to_bigquery_format(fmt: &str) -> String {
468        // BigQuery uses %F for %Y-%m-%d, %T for %H:%M:%S
469        // BigQuery uses %M for minutes (like DuckDB), not %i
470        let result = fmt
471            .replace("%Y-%m-%d", "%F")
472            .replace("%H:%i:%S", "%T")
473            .replace("%H:%i:%s", "%T")
474            .replace("%i", "%M")
475            .replace("%s", "%S");
476        result
477    }
478
479    /// Check if a Presto format string matches the default timestamp format
480    pub fn is_default_timestamp_format(fmt: &str) -> bool {
481        let normalized = Self::normalize_presto_format(fmt);
482        normalized == "%Y-%m-%d %T"
483            || normalized == "%Y-%m-%d %H:%i:%S"
484            || fmt == "%Y-%m-%d %H:%i:%S"
485            || fmt == "%Y-%m-%d %T"
486    }
487
488    /// Check if a Presto format string matches the default date format
489    pub fn is_default_date_format(fmt: &str) -> bool {
490        fmt == "%Y-%m-%d" || fmt == "%F"
491    }
492
493    fn transform_function(&self, f: Function) -> Result<Expression> {
494        let name_upper = f.name.to_uppercase();
495        match name_upper.as_str() {
496            // IFNULL -> COALESCE
497            "IFNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
498                original_name: None,
499                expressions: f.args,
500                inferred_type: None,
501            }))),
502
503            // NVL -> COALESCE
504            "NVL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
505                original_name: None,
506                expressions: f.args,
507                inferred_type: None,
508            }))),
509
510            // ISNULL -> COALESCE
511            "ISNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
512                original_name: None,
513                expressions: f.args,
514                inferred_type: None,
515            }))),
516
517            // GETDATE -> CURRENT_TIMESTAMP
518            "GETDATE" => Ok(Expression::CurrentTimestamp(
519                crate::expressions::CurrentTimestamp {
520                    precision: None,
521                    sysdate: false,
522                },
523            )),
524
525            // NOW -> CURRENT_TIMESTAMP
526            "NOW" => Ok(Expression::CurrentTimestamp(
527                crate::expressions::CurrentTimestamp {
528                    precision: None,
529                    sysdate: false,
530                },
531            )),
532
533            // RAND -> RANDOM in Presto (but it's actually RANDOM())
534            "RAND" => Ok(Expression::Function(Box::new(Function::new(
535                "RANDOM".to_string(),
536                vec![],
537            )))),
538
539            // GROUP_CONCAT -> ARRAY_JOIN(ARRAY_AGG())
540            "GROUP_CONCAT" if !f.args.is_empty() => {
541                let mut args = f.args;
542                let first = args.remove(0);
543                let separator = args.pop();
544                let array_agg = Expression::Function(Box::new(Function::new(
545                    "ARRAY_AGG".to_string(),
546                    vec![first],
547                )));
548                let mut join_args = vec![array_agg];
549                if let Some(sep) = separator {
550                    join_args.push(sep);
551                }
552                Ok(Expression::Function(Box::new(Function::new(
553                    "ARRAY_JOIN".to_string(),
554                    join_args,
555                ))))
556            }
557
558            // STRING_AGG -> ARRAY_JOIN(ARRAY_AGG())
559            "STRING_AGG" if !f.args.is_empty() => {
560                let mut args = f.args;
561                let first = args.remove(0);
562                let separator = args.pop();
563                let array_agg = Expression::Function(Box::new(Function::new(
564                    "ARRAY_AGG".to_string(),
565                    vec![first],
566                )));
567                let mut join_args = vec![array_agg];
568                if let Some(sep) = separator {
569                    join_args.push(sep);
570                }
571                Ok(Expression::Function(Box::new(Function::new(
572                    "ARRAY_JOIN".to_string(),
573                    join_args,
574                ))))
575            }
576
577            // LISTAGG -> ARRAY_JOIN(ARRAY_AGG())
578            "LISTAGG" if !f.args.is_empty() => {
579                let mut args = f.args;
580                let first = args.remove(0);
581                let separator = args.pop();
582                let array_agg = Expression::Function(Box::new(Function::new(
583                    "ARRAY_AGG".to_string(),
584                    vec![first],
585                )));
586                let mut join_args = vec![array_agg];
587                if let Some(sep) = separator {
588                    join_args.push(sep);
589                }
590                Ok(Expression::Function(Box::new(Function::new(
591                    "ARRAY_JOIN".to_string(),
592                    join_args,
593                ))))
594            }
595
596            // SUBSTR is native in Presto (keep as-is, don't convert to SUBSTRING)
597            "SUBSTR" => Ok(Expression::Function(Box::new(f))),
598
599            // LEN -> LENGTH
600            "LEN" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc::new(
601                f.args.into_iter().next().unwrap(),
602            )))),
603
604            // CHARINDEX -> STRPOS in Presto (with swapped args)
605            "CHARINDEX" if f.args.len() >= 2 => {
606                let mut args = f.args;
607                let substring = args.remove(0);
608                let string = args.remove(0);
609                // STRPOS(string, substring) - note: argument order is reversed
610                Ok(Expression::Function(Box::new(Function::new(
611                    "STRPOS".to_string(),
612                    vec![string, substring],
613                ))))
614            }
615
616            // INSTR -> STRPOS (with same argument order)
617            "INSTR" if f.args.len() >= 2 => {
618                let args = f.args;
619                // INSTR(string, substring) -> STRPOS(string, substring)
620                Ok(Expression::Function(Box::new(Function::new(
621                    "STRPOS".to_string(),
622                    args,
623                ))))
624            }
625
626            // LOCATE -> STRPOS in Presto (with swapped args)
627            "LOCATE" if f.args.len() >= 2 => {
628                let mut args = f.args;
629                let substring = args.remove(0);
630                let string = args.remove(0);
631                // LOCATE(substring, string) -> STRPOS(string, substring)
632                Ok(Expression::Function(Box::new(Function::new(
633                    "STRPOS".to_string(),
634                    vec![string, substring],
635                ))))
636            }
637
638            // ARRAY_LENGTH -> CARDINALITY in Presto
639            "ARRAY_LENGTH" if f.args.len() == 1 => Ok(Expression::Function(Box::new(
640                Function::new("CARDINALITY".to_string(), f.args),
641            ))),
642
643            // SIZE -> CARDINALITY in Presto
644            "SIZE" if f.args.len() == 1 => Ok(Expression::Function(Box::new(Function::new(
645                "CARDINALITY".to_string(),
646                f.args,
647            )))),
648
649            // ARRAY_CONTAINS -> CONTAINS in Presto
650            "ARRAY_CONTAINS" if f.args.len() == 2 => Ok(Expression::Function(Box::new(
651                Function::new("CONTAINS".to_string(), f.args),
652            ))),
653
654            // TO_DATE -> DATE_PARSE in Presto (or CAST to DATE)
655            "TO_DATE" if !f.args.is_empty() => {
656                if f.args.len() == 1 {
657                    // Simple case: just cast to DATE
658                    Ok(Expression::Cast(Box::new(Cast {
659                        this: f.args.into_iter().next().unwrap(),
660                        to: DataType::Date,
661                        trailing_comments: Vec::new(),
662                        double_colon_syntax: false,
663                        format: None,
664                        default: None,
665                        inferred_type: None,
666                    })))
667                } else {
668                    // With format: use DATE_PARSE
669                    Ok(Expression::Function(Box::new(Function::new(
670                        "DATE_PARSE".to_string(),
671                        f.args,
672                    ))))
673                }
674            }
675
676            // TO_TIMESTAMP -> DATE_PARSE / CAST
677            "TO_TIMESTAMP" if !f.args.is_empty() => {
678                if f.args.len() == 1 {
679                    Ok(Expression::Cast(Box::new(Cast {
680                        this: f.args.into_iter().next().unwrap(),
681                        to: DataType::Timestamp {
682                            precision: None,
683                            timezone: false,
684                        },
685                        trailing_comments: Vec::new(),
686                        double_colon_syntax: false,
687                        format: None,
688                        default: None,
689                        inferred_type: None,
690                    })))
691                } else {
692                    Ok(Expression::Function(Box::new(Function::new(
693                        "DATE_PARSE".to_string(),
694                        f.args,
695                    ))))
696                }
697            }
698
699            // DATE_FORMAT -> DATE_FORMAT (native in Presto)
700            "DATE_FORMAT" => Ok(Expression::Function(Box::new(f))),
701
702            // strftime -> DATE_FORMAT in Presto
703            "STRFTIME" if f.args.len() >= 2 => {
704                let mut args = f.args;
705                // strftime(format, date) -> DATE_FORMAT(date, format)
706                let format = args.remove(0);
707                let date = args.remove(0);
708                Ok(Expression::Function(Box::new(Function::new(
709                    "DATE_FORMAT".to_string(),
710                    vec![date, format],
711                ))))
712            }
713
714            // TO_CHAR -> DATE_FORMAT in Presto (convert Oracle-style format to Presto C-style)
715            "TO_CHAR" if f.args.len() >= 2 => {
716                let mut args = f.args;
717                // Convert Oracle-style format string to Presto C-style
718                if let Expression::Literal(Literal::String(ref s)) = args[1] {
719                    let converted = Self::oracle_to_presto_format(s);
720                    args[1] = Expression::Literal(Literal::String(converted));
721                }
722                Ok(Expression::Function(Box::new(Function::new(
723                    "DATE_FORMAT".to_string(),
724                    args,
725                ))))
726            }
727
728            // LEVENSHTEIN -> LEVENSHTEIN_DISTANCE in Presto
729            "LEVENSHTEIN" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
730                Function::new("LEVENSHTEIN_DISTANCE".to_string(), f.args),
731            ))),
732
733            // FLATTEN -> FLATTEN is supported in Presto for nested arrays
734            "FLATTEN" => Ok(Expression::Function(Box::new(f))),
735
736            // JSON_EXTRACT -> JSON_EXTRACT (native in Presto)
737            "JSON_EXTRACT" => Ok(Expression::Function(Box::new(f))),
738
739            // JSON_EXTRACT_SCALAR -> JSON_EXTRACT_SCALAR (native in Presto)
740            "JSON_EXTRACT_SCALAR" => Ok(Expression::Function(Box::new(f))),
741
742            // GET_JSON_OBJECT -> JSON_EXTRACT_SCALAR in Presto
743            "GET_JSON_OBJECT" if f.args.len() == 2 => Ok(Expression::Function(Box::new(
744                Function::new("JSON_EXTRACT_SCALAR".to_string(), f.args),
745            ))),
746
747            // COLLECT_LIST -> ARRAY_AGG
748            "COLLECT_LIST" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
749                Function::new("ARRAY_AGG".to_string(), f.args),
750            ))),
751
752            // COLLECT_SET -> ARRAY_DISTINCT(ARRAY_AGG())
753            "COLLECT_SET" if !f.args.is_empty() => {
754                let array_agg =
755                    Expression::Function(Box::new(Function::new("ARRAY_AGG".to_string(), f.args)));
756                Ok(Expression::Function(Box::new(Function::new(
757                    "ARRAY_DISTINCT".to_string(),
758                    vec![array_agg],
759                ))))
760            }
761
762            // RLIKE -> REGEXP_LIKE in Presto
763            "RLIKE" if f.args.len() == 2 => Ok(Expression::Function(Box::new(Function::new(
764                "REGEXP_LIKE".to_string(),
765                f.args,
766            )))),
767
768            // REGEXP -> REGEXP_LIKE in Presto
769            "REGEXP" if f.args.len() == 2 => Ok(Expression::Function(Box::new(Function::new(
770                "REGEXP_LIKE".to_string(),
771                f.args,
772            )))),
773
774            // PARSE_JSON -> JSON_PARSE in Presto
775            "PARSE_JSON" => Ok(Expression::Function(Box::new(Function::new(
776                "JSON_PARSE".to_string(),
777                f.args,
778            )))),
779
780            // GET_PATH(obj, path) -> JSON_EXTRACT(obj, json_path) in Presto
781            "GET_PATH" if f.args.len() == 2 => {
782                let mut args = f.args;
783                let this = args.remove(0);
784                let path = args.remove(0);
785                let json_path = match &path {
786                    Expression::Literal(Literal::String(s)) => {
787                        let normalized = if s.starts_with('$') {
788                            s.clone()
789                        } else if s.starts_with('[') {
790                            format!("${}", s)
791                        } else {
792                            format!("$.{}", s)
793                        };
794                        Expression::Literal(Literal::String(normalized))
795                    }
796                    _ => path,
797                };
798                Ok(Expression::JsonExtract(Box::new(JsonExtractFunc {
799                    this,
800                    path: json_path,
801                    returning: None,
802                    arrow_syntax: false,
803                    hash_arrow_syntax: false,
804                    wrapper_option: None,
805                    quotes_option: None,
806                    on_scalar_string: false,
807                    on_error: None,
808                })))
809            }
810
811            // REGEXP_SUBSTR(subject, pattern, ...) -> REGEXP_EXTRACT(subject, pattern[, group])
812            "REGEXP_SUBSTR" if f.args.len() >= 2 => {
813                let mut args = f.args;
814                let subject = args.remove(0);
815                let pattern = args.remove(0);
816                // If 6-arg form: (subject, pattern, pos, occ, params, group) -> keep group
817                if args.len() >= 4 {
818                    let _pos = args.remove(0);
819                    let _occ = args.remove(0);
820                    let _params = args.remove(0);
821                    let group = args.remove(0);
822                    Ok(Expression::Function(Box::new(Function::new(
823                        "REGEXP_EXTRACT".to_string(),
824                        vec![subject, pattern, group],
825                    ))))
826                } else {
827                    Ok(Expression::Function(Box::new(Function::new(
828                        "REGEXP_EXTRACT".to_string(),
829                        vec![subject, pattern],
830                    ))))
831                }
832            }
833
834            // DATE_PART(epoch_second, x) -> TO_UNIXTIME(CAST(x AS TIMESTAMP))
835            // DATE_PART(epoch_millisecond[s], x) -> TO_UNIXTIME(CAST(x AS TIMESTAMP)) * 1000
836            "DATE_PART" if f.args.len() == 2 => {
837                let part_name = match &f.args[0] {
838                    Expression::Identifier(id) => Some(id.name.to_uppercase()),
839                    Expression::Column(c) => Some(c.name.name.to_uppercase()),
840                    _ => None,
841                };
842                match part_name.as_deref() {
843                    Some("EPOCH_SECOND" | "EPOCH_SECONDS") => {
844                        let mut args = f.args;
845                        let value = args.remove(1);
846                        let cast_expr = Expression::Cast(Box::new(Cast {
847                            this: value,
848                            to: DataType::Timestamp {
849                                precision: None,
850                                timezone: false,
851                            },
852                            trailing_comments: Vec::new(),
853                            double_colon_syntax: false,
854                            format: None,
855                            default: None,
856                            inferred_type: None,
857                        }));
858                        Ok(Expression::Function(Box::new(Function::new(
859                            "TO_UNIXTIME".to_string(),
860                            vec![cast_expr],
861                        ))))
862                    }
863                    Some("EPOCH_MILLISECOND" | "EPOCH_MILLISECONDS") => {
864                        let mut args = f.args;
865                        let value = args.remove(1);
866                        let cast_expr = Expression::Cast(Box::new(Cast {
867                            this: value,
868                            to: DataType::Timestamp {
869                                precision: None,
870                                timezone: false,
871                            },
872                            trailing_comments: Vec::new(),
873                            double_colon_syntax: false,
874                            format: None,
875                            default: None,
876                            inferred_type: None,
877                        }));
878                        let unixtime = Expression::Function(Box::new(Function::new(
879                            "TO_UNIXTIME".to_string(),
880                            vec![cast_expr],
881                        )));
882                        Ok(Expression::Mul(Box::new(BinaryOp {
883                            left: unixtime,
884                            right: Expression::Literal(Literal::Number("1000".to_string())),
885                            left_comments: Vec::new(),
886                            operator_comments: Vec::new(),
887                            trailing_comments: Vec::new(),
888                            inferred_type: None,
889                        })))
890                    }
891                    _ => Ok(Expression::Function(Box::new(f))),
892                }
893            }
894
895            // REPLACE(x, y) with 2 args -> REPLACE(x, y, '') - Presto requires explicit empty string
896            "REPLACE" if f.args.len() == 2 => {
897                let mut args = f.args;
898                args.push(Expression::string(""));
899                Ok(Expression::Function(Box::new(Function::new(
900                    "REPLACE".to_string(),
901                    args,
902                ))))
903            }
904
905            // REGEXP_REPLACE(x, y) with 2 args -> REGEXP_REPLACE(x, y, '')
906            "REGEXP_REPLACE" if f.args.len() == 2 => {
907                let mut args = f.args;
908                args.push(Expression::string(""));
909                Ok(Expression::Function(Box::new(Function::new(
910                    "REGEXP_REPLACE".to_string(),
911                    args,
912                ))))
913            }
914
915            // Pass through everything else
916            _ => Ok(Expression::Function(Box::new(f))),
917        }
918    }
919
920    fn transform_aggregate_function(
921        &self,
922        f: Box<crate::expressions::AggregateFunction>,
923    ) -> Result<Expression> {
924        let name_upper = f.name.to_uppercase();
925        match name_upper.as_str() {
926            // COUNT_IF -> SUM(CASE WHEN...)
927            "COUNT_IF" if !f.args.is_empty() => {
928                let condition = f.args.into_iter().next().unwrap();
929                let case_expr = Expression::Case(Box::new(Case {
930                    operand: None,
931                    whens: vec![(condition, Expression::number(1))],
932                    else_: Some(Expression::number(0)),
933                    comments: Vec::new(),
934                    inferred_type: None,
935                }));
936                Ok(Expression::Sum(Box::new(AggFunc {
937                    ignore_nulls: None,
938                    having_max: None,
939                    this: case_expr,
940                    distinct: f.distinct,
941                    filter: f.filter,
942                    order_by: Vec::new(),
943                    name: None,
944                    limit: None,
945                    inferred_type: None,
946                })))
947            }
948
949            // ANY_VALUE -> ARBITRARY in Presto
950            "ANY_VALUE" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
951                "ARBITRARY".to_string(),
952                f.args,
953            )))),
954
955            // GROUP_CONCAT -> ARRAY_JOIN(ARRAY_AGG())
956            "GROUP_CONCAT" if !f.args.is_empty() => {
957                let mut args = f.args;
958                let first = args.remove(0);
959                let separator = args.pop();
960                let array_agg = Expression::Function(Box::new(Function::new(
961                    "ARRAY_AGG".to_string(),
962                    vec![first],
963                )));
964                let mut join_args = vec![array_agg];
965                if let Some(sep) = separator {
966                    join_args.push(sep);
967                }
968                Ok(Expression::Function(Box::new(Function::new(
969                    "ARRAY_JOIN".to_string(),
970                    join_args,
971                ))))
972            }
973
974            // STRING_AGG -> ARRAY_JOIN(ARRAY_AGG())
975            "STRING_AGG" if !f.args.is_empty() => {
976                let mut args = f.args;
977                let first = args.remove(0);
978                let separator = args.pop();
979                let array_agg = Expression::Function(Box::new(Function::new(
980                    "ARRAY_AGG".to_string(),
981                    vec![first],
982                )));
983                let mut join_args = vec![array_agg];
984                if let Some(sep) = separator {
985                    join_args.push(sep);
986                }
987                Ok(Expression::Function(Box::new(Function::new(
988                    "ARRAY_JOIN".to_string(),
989                    join_args,
990                ))))
991            }
992
993            // LISTAGG -> ARRAY_JOIN(ARRAY_AGG())
994            "LISTAGG" if !f.args.is_empty() => {
995                let mut args = f.args;
996                let first = args.remove(0);
997                let separator = args.pop();
998                let array_agg = Expression::Function(Box::new(Function::new(
999                    "ARRAY_AGG".to_string(),
1000                    vec![first],
1001                )));
1002                let mut join_args = vec![array_agg];
1003                if let Some(sep) = separator {
1004                    join_args.push(sep);
1005                }
1006                Ok(Expression::Function(Box::new(Function::new(
1007                    "ARRAY_JOIN".to_string(),
1008                    join_args,
1009                ))))
1010            }
1011
1012            // VAR -> VAR_POP in Presto
1013            "VAR" if !f.args.is_empty() => {
1014                Ok(Expression::AggregateFunction(Box::new(AggregateFunction {
1015                    name: "VAR_POP".to_string(),
1016                    args: f.args,
1017                    distinct: f.distinct,
1018                    filter: f.filter,
1019                    order_by: Vec::new(),
1020                    limit: None,
1021                    ignore_nulls: None,
1022                    inferred_type: None,
1023                })))
1024            }
1025
1026            // VARIANCE -> VAR_SAMP in Presto (for sample variance)
1027            "VARIANCE" if !f.args.is_empty() => {
1028                Ok(Expression::AggregateFunction(Box::new(AggregateFunction {
1029                    name: "VAR_SAMP".to_string(),
1030                    args: f.args,
1031                    distinct: f.distinct,
1032                    filter: f.filter,
1033                    order_by: Vec::new(),
1034                    limit: None,
1035                    ignore_nulls: None,
1036                    inferred_type: None,
1037                })))
1038            }
1039
1040            // Pass through everything else
1041            _ => Ok(Expression::AggregateFunction(f)),
1042        }
1043    }
1044
1045    fn transform_cast(&self, c: Cast) -> Result<Expression> {
1046        // Presto type mappings are handled in the generator
1047        Ok(Expression::Cast(Box::new(c)))
1048    }
1049}