Skip to main content

polyglot_sql/dialects/
presto.rs

1//! Presto Dialect
2//!
3//! Presto-specific transformations based on sqlglot patterns.
4//! Presto is the base for Trino dialect.
5
6use super::{DialectImpl, DialectType};
7use crate::error::Result;
8use crate::expressions::{
9    AggFunc, AggregateFunction, BinaryOp, Case, Cast, Column, DataType, Expression, Function,
10    JsonExtractFunc, LikeOp, Literal, UnaryFunc, VarArgFunc,
11};
12use crate::generator::GeneratorConfig;
13use crate::tokens::TokenizerConfig;
14
15/// Presto dialect
16pub struct PrestoDialect;
17
18impl DialectImpl for PrestoDialect {
19    fn dialect_type(&self) -> DialectType {
20        DialectType::Presto
21    }
22
23    fn tokenizer_config(&self) -> TokenizerConfig {
24        let mut config = TokenizerConfig::default();
25        // Presto uses double quotes for identifiers
26        config.identifiers.insert('"', '"');
27        // Presto does NOT support nested comments
28        config.nested_comments = false;
29        // Presto does NOT support QUALIFY - it's a valid identifier
30        // (unlike Snowflake, BigQuery, DuckDB which have QUALIFY clause)
31        config.keywords.remove("QUALIFY");
32        config
33    }
34
35    fn generator_config(&self) -> GeneratorConfig {
36        use crate::generator::IdentifierQuoteStyle;
37        GeneratorConfig {
38            identifier_quote: '"',
39            identifier_quote_style: IdentifierQuoteStyle::DOUBLE_QUOTE,
40            dialect: Some(DialectType::Presto),
41            limit_only_literals: true,
42            tz_to_with_time_zone: true,
43            ..Default::default()
44        }
45    }
46
47    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
48        match expr {
49            // IFNULL -> COALESCE in Presto
50            Expression::IfNull(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
51                original_name: None,
52                expressions: vec![f.this, f.expression],
53            }))),
54
55            // NVL -> COALESCE in Presto
56            Expression::Nvl(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
57                original_name: None,
58                expressions: vec![f.this, f.expression],
59            }))),
60
61            // TryCast stays as TryCast (Presto supports TRY_CAST)
62            Expression::TryCast(c) => Ok(Expression::TryCast(c)),
63
64            // SafeCast -> TRY_CAST in Presto
65            Expression::SafeCast(c) => Ok(Expression::TryCast(c)),
66
67            // ILike -> LOWER() LIKE LOWER() (Presto doesn't support ILIKE)
68            Expression::ILike(op) => {
69                let lower_left = Expression::Lower(Box::new(UnaryFunc::new(op.left.clone())));
70                let lower_right = Expression::Lower(Box::new(UnaryFunc::new(op.right.clone())));
71                Ok(Expression::Like(Box::new(LikeOp {
72                    left: lower_left,
73                    right: lower_right,
74                    escape: op.escape,
75                    quantifier: op.quantifier.clone(),
76                })))
77            }
78
79            // CountIf is native in Presto (keep as-is)
80            Expression::CountIf(f) => Ok(Expression::CountIf(f)),
81
82            // EXPLODE -> UNNEST in Presto
83            Expression::Explode(f) => Ok(Expression::Unnest(Box::new(
84                crate::expressions::UnnestFunc {
85                    this: f.this,
86                    expressions: Vec::new(),
87                    with_ordinality: false,
88                    alias: None,
89                    offset_alias: None,
90                },
91            ))),
92
93            // ExplodeOuter -> UNNEST in Presto
94            Expression::ExplodeOuter(f) => Ok(Expression::Unnest(Box::new(
95                crate::expressions::UnnestFunc {
96                    this: f.this,
97                    expressions: Vec::new(),
98                    with_ordinality: false,
99                    alias: None,
100                    offset_alias: None,
101                },
102            ))),
103
104            // StringAgg -> ARRAY_JOIN(ARRAY_AGG()) in Presto
105            Expression::StringAgg(f) => {
106                let array_agg = Expression::Function(Box::new(Function::new(
107                    "ARRAY_AGG".to_string(),
108                    vec![f.this.clone()],
109                )));
110                let mut join_args = vec![array_agg];
111                if let Some(sep) = f.separator {
112                    join_args.push(sep);
113                }
114                Ok(Expression::Function(Box::new(Function::new(
115                    "ARRAY_JOIN".to_string(),
116                    join_args,
117                ))))
118            }
119
120            // GroupConcat -> ARRAY_JOIN(ARRAY_AGG()) in Presto
121            Expression::GroupConcat(f) => {
122                let array_agg = Expression::Function(Box::new(Function::new(
123                    "ARRAY_AGG".to_string(),
124                    vec![f.this.clone()],
125                )));
126                let mut join_args = vec![array_agg];
127                if let Some(sep) = f.separator {
128                    join_args.push(sep);
129                }
130                Ok(Expression::Function(Box::new(Function::new(
131                    "ARRAY_JOIN".to_string(),
132                    join_args,
133                ))))
134            }
135
136            // ListAgg -> ARRAY_JOIN(ARRAY_AGG()) in Presto
137            Expression::ListAgg(f) => {
138                let array_agg = Expression::Function(Box::new(Function::new(
139                    "ARRAY_AGG".to_string(),
140                    vec![f.this.clone()],
141                )));
142                let mut join_args = vec![array_agg];
143                if let Some(sep) = f.separator {
144                    join_args.push(sep);
145                }
146                Ok(Expression::Function(Box::new(Function::new(
147                    "ARRAY_JOIN".to_string(),
148                    join_args,
149                ))))
150            }
151
152            // ParseJson: handled by generator (outputs JSON_PARSE for Presto)
153
154            // JSONExtract (variant_extract/colon accessor) -> JSON_EXTRACT in Presto
155            Expression::JSONExtract(e) if e.variant_extract.is_some() => {
156                let path = match *e.expression {
157                    Expression::Literal(Literal::String(s)) => {
158                        let normalized = if s.starts_with('$') {
159                            s
160                        } else if s.starts_with('[') {
161                            format!("${}", s)
162                        } else {
163                            format!("$.{}", s)
164                        };
165                        Expression::Literal(Literal::String(normalized))
166                    }
167                    other => other,
168                };
169                Ok(Expression::JsonExtract(Box::new(JsonExtractFunc {
170                    this: *e.this,
171                    path,
172                    returning: None,
173                    arrow_syntax: false,
174                    hash_arrow_syntax: false,
175                    wrapper_option: None,
176                    quotes_option: None,
177                    on_scalar_string: false,
178                    on_error: None,
179                })))
180            }
181
182            // Generic function transformations
183            Expression::Function(f) => self.transform_function(*f),
184
185            // Generic aggregate function transformations
186            Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
187
188            // Cast transformations
189            Expression::Cast(c) => self.transform_cast(*c),
190
191            // Div: Presto has TYPED_DIVISION - wrap left operand in CAST(AS DOUBLE)
192            // to ensure float division (only when left isn't already a float cast)
193            Expression::Div(mut op) => {
194                if !Self::is_float_cast(&op.left) {
195                    op.left = Expression::Cast(Box::new(crate::expressions::Cast {
196                        this: op.left,
197                        to: DataType::Double {
198                            precision: None,
199                            scale: None,
200                        },
201                        trailing_comments: Vec::new(),
202                        double_colon_syntax: false,
203                        format: None,
204                        default: None,
205                    }));
206                }
207                Ok(Expression::Div(op))
208            }
209
210            // IntDiv -> CAST(CAST(x AS DOUBLE) / y AS INTEGER) in Presto
211            Expression::IntDiv(f) => {
212                let cast_x = Expression::Cast(Box::new(Cast {
213                    this: f.this,
214                    to: crate::expressions::DataType::Double {
215                        precision: None,
216                        scale: None,
217                    },
218                    trailing_comments: Vec::new(),
219                    double_colon_syntax: false,
220                    format: None,
221                    default: None,
222                }));
223                let div_expr = Expression::Div(Box::new(BinaryOp::new(cast_x, f.expression)));
224                Ok(Expression::Cast(Box::new(Cast {
225                    this: div_expr,
226                    to: crate::expressions::DataType::Int {
227                        length: None,
228                        integer_spelling: true,
229                    },
230                    trailing_comments: Vec::new(),
231                    double_colon_syntax: false,
232                    format: None,
233                    default: None,
234                })))
235            }
236
237            // DELETE: Strip table alias and unqualify columns (Presto doesn't support DELETE aliases)
238            Expression::Delete(mut d) => {
239                if d.alias.is_some() {
240                    d.alias = None;
241                    d.alias_explicit_as = false;
242                    // Unqualify all columns in the WHERE clause
243                    if let Some(ref mut where_clause) = d.where_clause {
244                        where_clause.this = Self::unqualify_columns(where_clause.this.clone());
245                    }
246                }
247                Ok(Expression::Delete(d))
248            }
249
250            // Pass through everything else
251            _ => Ok(expr),
252        }
253    }
254}
255
256impl PrestoDialect {
257    /// Recursively unqualify columns - remove table qualifiers from Column references
258    fn unqualify_columns(expr: Expression) -> Expression {
259        match expr {
260            Expression::Column(c) => {
261                if c.table.is_some() {
262                    Expression::Column(Column {
263                        name: c.name,
264                        table: None,
265                        join_mark: c.join_mark,
266                        trailing_comments: c.trailing_comments,
267                        span: None,
268                    })
269                } else {
270                    Expression::Column(c)
271                }
272            }
273            // DotAccess: db.t2.c -> c (strip all qualifiers, keep only the final field name)
274            Expression::Dot(d) => Expression::Column(Column {
275                name: d.field,
276                table: None,
277                join_mark: false,
278                trailing_comments: Vec::new(),
279                span: None,
280            }),
281            // Recursively walk common binary expression types
282            Expression::And(mut op) => {
283                op.left = Self::unqualify_columns(op.left);
284                op.right = Self::unqualify_columns(op.right);
285                Expression::And(op)
286            }
287            Expression::Or(mut op) => {
288                op.left = Self::unqualify_columns(op.left);
289                op.right = Self::unqualify_columns(op.right);
290                Expression::Or(op)
291            }
292            Expression::Eq(mut op) => {
293                op.left = Self::unqualify_columns(op.left);
294                op.right = Self::unqualify_columns(op.right);
295                Expression::Eq(op)
296            }
297            Expression::Neq(mut op) => {
298                op.left = Self::unqualify_columns(op.left);
299                op.right = Self::unqualify_columns(op.right);
300                Expression::Neq(op)
301            }
302            Expression::Gt(mut op) => {
303                op.left = Self::unqualify_columns(op.left);
304                op.right = Self::unqualify_columns(op.right);
305                Expression::Gt(op)
306            }
307            Expression::Lt(mut op) => {
308                op.left = Self::unqualify_columns(op.left);
309                op.right = Self::unqualify_columns(op.right);
310                Expression::Lt(op)
311            }
312            Expression::Gte(mut op) => {
313                op.left = Self::unqualify_columns(op.left);
314                op.right = Self::unqualify_columns(op.right);
315                Expression::Gte(op)
316            }
317            Expression::Lte(mut op) => {
318                op.left = Self::unqualify_columns(op.left);
319                op.right = Self::unqualify_columns(op.right);
320                Expression::Lte(op)
321            }
322            // Unary operators
323            Expression::Not(mut e) => {
324                e.this = Self::unqualify_columns(e.this);
325                Expression::Not(e)
326            }
327            // Predicates
328            Expression::In(mut i) => {
329                i.this = Self::unqualify_columns(i.this);
330                i.expressions = i
331                    .expressions
332                    .into_iter()
333                    .map(Self::unqualify_columns)
334                    .collect();
335                // Also recurse into subquery if present
336                if let Some(q) = i.query {
337                    i.query = Some(Self::unqualify_columns(q));
338                }
339                Expression::In(i)
340            }
341            Expression::IsNull(mut f) => {
342                f.this = Self::unqualify_columns(f.this);
343                Expression::IsNull(f)
344            }
345            Expression::Paren(mut p) => {
346                p.this = Self::unqualify_columns(p.this);
347                Expression::Paren(p)
348            }
349            Expression::Function(mut f) => {
350                f.args = f.args.into_iter().map(Self::unqualify_columns).collect();
351                Expression::Function(f)
352            }
353            // For subqueries (SELECT statements inside IN, etc), also unqualify
354            Expression::Select(mut s) => {
355                s.expressions = s
356                    .expressions
357                    .into_iter()
358                    .map(Self::unqualify_columns)
359                    .collect();
360                if let Some(ref mut w) = s.where_clause {
361                    w.this = Self::unqualify_columns(w.this.clone());
362                }
363                Expression::Select(s)
364            }
365            Expression::Subquery(mut sq) => {
366                sq.this = Self::unqualify_columns(sq.this);
367                Expression::Subquery(sq)
368            }
369            Expression::Alias(mut a) => {
370                a.this = Self::unqualify_columns(a.this);
371                Expression::Alias(a)
372            }
373            // Pass through other expressions unchanged
374            other => other,
375        }
376    }
377
378    /// Check if an expression is already a CAST to a float type
379    fn is_float_cast(expr: &Expression) -> bool {
380        if let Expression::Cast(cast) = expr {
381            matches!(&cast.to, DataType::Double { .. } | DataType::Float { .. })
382        } else {
383            false
384        }
385    }
386
387    /// Convert Oracle/PostgreSQL-style date format to Presto's C-style format
388    /// Oracle: dd, hh, hh24, mi, mm, ss, yyyy, yy
389    /// Presto: %d, %H, %H, %i, %m, %s, %Y, %y
390    pub fn oracle_to_presto_format(fmt: &str) -> String {
391        // Process character by character to avoid double-replacement issues
392        let chars: Vec<char> = fmt.chars().collect();
393        let mut result = String::new();
394        let mut i = 0;
395        while i < chars.len() {
396            let remaining = &fmt[i..];
397            if remaining.starts_with("yyyy") {
398                result.push_str("%Y");
399                i += 4;
400            } else if remaining.starts_with("yy") {
401                result.push_str("%y");
402                i += 2;
403            } else if remaining.starts_with("hh24") {
404                result.push_str("%H");
405                i += 4;
406            } else if remaining.starts_with("hh") {
407                result.push_str("%H");
408                i += 2;
409            } else if remaining.starts_with("mi") {
410                result.push_str("%i");
411                i += 2;
412            } else if remaining.starts_with("mm") {
413                result.push_str("%m");
414                i += 2;
415            } else if remaining.starts_with("dd") {
416                result.push_str("%d");
417                i += 2;
418            } else if remaining.starts_with("ss") {
419                result.push_str("%s");
420                i += 2;
421            } else {
422                result.push(chars[i]);
423                i += 1;
424            }
425        }
426        result
427    }
428
429    /// Convert Presto's C-style date format to Java-style format (for Hive/Spark)
430    /// Presto: %Y, %m, %d, %H, %i, %S, %s, %y, %T, %F
431    /// Java:   yyyy, MM, dd, HH, mm, ss, ss, yy, HH:mm:ss, yyyy-MM-dd
432    pub fn presto_to_java_format(fmt: &str) -> String {
433        fmt.replace("%Y", "yyyy")
434            .replace("%m", "MM")
435            .replace("%d", "dd")
436            .replace("%H", "HH")
437            .replace("%i", "mm")
438            .replace("%S", "ss")
439            .replace("%s", "ss")
440            .replace("%y", "yy")
441            .replace("%T", "HH:mm:ss")
442            .replace("%F", "yyyy-MM-dd")
443            .replace("%M", "MMMM")
444    }
445
446    /// Normalize Presto format strings (e.g., %H:%i:%S -> %T, %Y-%m-%d -> %F)
447    pub fn normalize_presto_format(fmt: &str) -> String {
448        fmt.replace("%H:%i:%S", "%T").replace("%H:%i:%s", "%T")
449    }
450
451    /// Convert Presto's C-style format to DuckDB C-style (only difference: %i -> %M for minutes)
452    pub fn presto_to_duckdb_format(fmt: &str) -> String {
453        fmt.replace("%i", "%M")
454            .replace("%s", "%S")
455            .replace("%T", "%H:%M:%S")
456    }
457
458    /// Convert Presto's C-style format to BigQuery format
459    pub fn presto_to_bigquery_format(fmt: &str) -> String {
460        // BigQuery uses %F for %Y-%m-%d, %T for %H:%M:%S
461        // BigQuery uses %M for minutes (like DuckDB), not %i
462        let result = fmt
463            .replace("%Y-%m-%d", "%F")
464            .replace("%H:%i:%S", "%T")
465            .replace("%H:%i:%s", "%T")
466            .replace("%i", "%M")
467            .replace("%s", "%S");
468        result
469    }
470
471    /// Check if a Presto format string matches the default timestamp format
472    pub fn is_default_timestamp_format(fmt: &str) -> bool {
473        let normalized = Self::normalize_presto_format(fmt);
474        normalized == "%Y-%m-%d %T"
475            || normalized == "%Y-%m-%d %H:%i:%S"
476            || fmt == "%Y-%m-%d %H:%i:%S"
477            || fmt == "%Y-%m-%d %T"
478    }
479
480    /// Check if a Presto format string matches the default date format
481    pub fn is_default_date_format(fmt: &str) -> bool {
482        fmt == "%Y-%m-%d" || fmt == "%F"
483    }
484
485    fn transform_function(&self, f: Function) -> Result<Expression> {
486        let name_upper = f.name.to_uppercase();
487        match name_upper.as_str() {
488            // IFNULL -> COALESCE
489            "IFNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
490                original_name: None,
491                expressions: f.args,
492            }))),
493
494            // NVL -> COALESCE
495            "NVL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
496                original_name: None,
497                expressions: f.args,
498            }))),
499
500            // ISNULL -> COALESCE
501            "ISNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
502                original_name: None,
503                expressions: f.args,
504            }))),
505
506            // GETDATE -> CURRENT_TIMESTAMP
507            "GETDATE" => Ok(Expression::CurrentTimestamp(
508                crate::expressions::CurrentTimestamp {
509                    precision: None,
510                    sysdate: false,
511                },
512            )),
513
514            // NOW -> CURRENT_TIMESTAMP
515            "NOW" => Ok(Expression::CurrentTimestamp(
516                crate::expressions::CurrentTimestamp {
517                    precision: None,
518                    sysdate: false,
519                },
520            )),
521
522            // RAND -> RANDOM in Presto (but it's actually RANDOM())
523            "RAND" => Ok(Expression::Function(Box::new(Function::new(
524                "RANDOM".to_string(),
525                vec![],
526            )))),
527
528            // GROUP_CONCAT -> ARRAY_JOIN(ARRAY_AGG())
529            "GROUP_CONCAT" if !f.args.is_empty() => {
530                let mut args = f.args;
531                let first = args.remove(0);
532                let separator = args.pop();
533                let array_agg = Expression::Function(Box::new(Function::new(
534                    "ARRAY_AGG".to_string(),
535                    vec![first],
536                )));
537                let mut join_args = vec![array_agg];
538                if let Some(sep) = separator {
539                    join_args.push(sep);
540                }
541                Ok(Expression::Function(Box::new(Function::new(
542                    "ARRAY_JOIN".to_string(),
543                    join_args,
544                ))))
545            }
546
547            // STRING_AGG -> ARRAY_JOIN(ARRAY_AGG())
548            "STRING_AGG" if !f.args.is_empty() => {
549                let mut args = f.args;
550                let first = args.remove(0);
551                let separator = args.pop();
552                let array_agg = Expression::Function(Box::new(Function::new(
553                    "ARRAY_AGG".to_string(),
554                    vec![first],
555                )));
556                let mut join_args = vec![array_agg];
557                if let Some(sep) = separator {
558                    join_args.push(sep);
559                }
560                Ok(Expression::Function(Box::new(Function::new(
561                    "ARRAY_JOIN".to_string(),
562                    join_args,
563                ))))
564            }
565
566            // LISTAGG -> ARRAY_JOIN(ARRAY_AGG())
567            "LISTAGG" if !f.args.is_empty() => {
568                let mut args = f.args;
569                let first = args.remove(0);
570                let separator = args.pop();
571                let array_agg = Expression::Function(Box::new(Function::new(
572                    "ARRAY_AGG".to_string(),
573                    vec![first],
574                )));
575                let mut join_args = vec![array_agg];
576                if let Some(sep) = separator {
577                    join_args.push(sep);
578                }
579                Ok(Expression::Function(Box::new(Function::new(
580                    "ARRAY_JOIN".to_string(),
581                    join_args,
582                ))))
583            }
584
585            // SUBSTR is native in Presto (keep as-is, don't convert to SUBSTRING)
586            "SUBSTR" => Ok(Expression::Function(Box::new(f))),
587
588            // LEN -> LENGTH
589            "LEN" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc::new(
590                f.args.into_iter().next().unwrap(),
591            )))),
592
593            // CHARINDEX -> STRPOS in Presto (with swapped args)
594            "CHARINDEX" if f.args.len() >= 2 => {
595                let mut args = f.args;
596                let substring = args.remove(0);
597                let string = args.remove(0);
598                // STRPOS(string, substring) - note: argument order is reversed
599                Ok(Expression::Function(Box::new(Function::new(
600                    "STRPOS".to_string(),
601                    vec![string, substring],
602                ))))
603            }
604
605            // INSTR -> STRPOS (with same argument order)
606            "INSTR" if f.args.len() >= 2 => {
607                let args = f.args;
608                // INSTR(string, substring) -> STRPOS(string, substring)
609                Ok(Expression::Function(Box::new(Function::new(
610                    "STRPOS".to_string(),
611                    args,
612                ))))
613            }
614
615            // LOCATE -> STRPOS in Presto (with swapped args)
616            "LOCATE" if f.args.len() >= 2 => {
617                let mut args = f.args;
618                let substring = args.remove(0);
619                let string = args.remove(0);
620                // LOCATE(substring, string) -> STRPOS(string, substring)
621                Ok(Expression::Function(Box::new(Function::new(
622                    "STRPOS".to_string(),
623                    vec![string, substring],
624                ))))
625            }
626
627            // ARRAY_LENGTH -> CARDINALITY in Presto
628            "ARRAY_LENGTH" if f.args.len() == 1 => Ok(Expression::Function(Box::new(
629                Function::new("CARDINALITY".to_string(), f.args),
630            ))),
631
632            // SIZE -> CARDINALITY in Presto
633            "SIZE" if f.args.len() == 1 => Ok(Expression::Function(Box::new(Function::new(
634                "CARDINALITY".to_string(),
635                f.args,
636            )))),
637
638            // ARRAY_CONTAINS -> CONTAINS in Presto
639            "ARRAY_CONTAINS" if f.args.len() == 2 => Ok(Expression::Function(Box::new(
640                Function::new("CONTAINS".to_string(), f.args),
641            ))),
642
643            // TO_DATE -> DATE_PARSE in Presto (or CAST to DATE)
644            "TO_DATE" if !f.args.is_empty() => {
645                if f.args.len() == 1 {
646                    // Simple case: just cast to DATE
647                    Ok(Expression::Cast(Box::new(Cast {
648                        this: f.args.into_iter().next().unwrap(),
649                        to: DataType::Date,
650                        trailing_comments: Vec::new(),
651                        double_colon_syntax: false,
652                        format: None,
653                        default: None,
654                    })))
655                } else {
656                    // With format: use DATE_PARSE
657                    Ok(Expression::Function(Box::new(Function::new(
658                        "DATE_PARSE".to_string(),
659                        f.args,
660                    ))))
661                }
662            }
663
664            // TO_TIMESTAMP -> DATE_PARSE / CAST
665            "TO_TIMESTAMP" if !f.args.is_empty() => {
666                if f.args.len() == 1 {
667                    Ok(Expression::Cast(Box::new(Cast {
668                        this: f.args.into_iter().next().unwrap(),
669                        to: DataType::Timestamp {
670                            precision: None,
671                            timezone: false,
672                        },
673                        trailing_comments: Vec::new(),
674                        double_colon_syntax: false,
675                        format: None,
676                        default: None,
677                    })))
678                } else {
679                    Ok(Expression::Function(Box::new(Function::new(
680                        "DATE_PARSE".to_string(),
681                        f.args,
682                    ))))
683                }
684            }
685
686            // DATE_FORMAT -> DATE_FORMAT (native in Presto)
687            "DATE_FORMAT" => Ok(Expression::Function(Box::new(f))),
688
689            // strftime -> DATE_FORMAT in Presto
690            "STRFTIME" if f.args.len() >= 2 => {
691                let mut args = f.args;
692                // strftime(format, date) -> DATE_FORMAT(date, format)
693                let format = args.remove(0);
694                let date = args.remove(0);
695                Ok(Expression::Function(Box::new(Function::new(
696                    "DATE_FORMAT".to_string(),
697                    vec![date, format],
698                ))))
699            }
700
701            // TO_CHAR -> DATE_FORMAT in Presto (convert Oracle-style format to Presto C-style)
702            "TO_CHAR" if f.args.len() >= 2 => {
703                let mut args = f.args;
704                // Convert Oracle-style format string to Presto C-style
705                if let Expression::Literal(Literal::String(ref s)) = args[1] {
706                    let converted = Self::oracle_to_presto_format(s);
707                    args[1] = Expression::Literal(Literal::String(converted));
708                }
709                Ok(Expression::Function(Box::new(Function::new(
710                    "DATE_FORMAT".to_string(),
711                    args,
712                ))))
713            }
714
715            // LEVENSHTEIN -> LEVENSHTEIN_DISTANCE in Presto
716            "LEVENSHTEIN" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
717                Function::new("LEVENSHTEIN_DISTANCE".to_string(), f.args),
718            ))),
719
720            // FLATTEN -> FLATTEN is supported in Presto for nested arrays
721            "FLATTEN" => Ok(Expression::Function(Box::new(f))),
722
723            // JSON_EXTRACT -> JSON_EXTRACT (native in Presto)
724            "JSON_EXTRACT" => Ok(Expression::Function(Box::new(f))),
725
726            // JSON_EXTRACT_SCALAR -> JSON_EXTRACT_SCALAR (native in Presto)
727            "JSON_EXTRACT_SCALAR" => Ok(Expression::Function(Box::new(f))),
728
729            // GET_JSON_OBJECT -> JSON_EXTRACT_SCALAR in Presto
730            "GET_JSON_OBJECT" if f.args.len() == 2 => Ok(Expression::Function(Box::new(
731                Function::new("JSON_EXTRACT_SCALAR".to_string(), f.args),
732            ))),
733
734            // COLLECT_LIST -> ARRAY_AGG
735            "COLLECT_LIST" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
736                Function::new("ARRAY_AGG".to_string(), f.args),
737            ))),
738
739            // COLLECT_SET -> ARRAY_DISTINCT(ARRAY_AGG())
740            "COLLECT_SET" if !f.args.is_empty() => {
741                let array_agg =
742                    Expression::Function(Box::new(Function::new("ARRAY_AGG".to_string(), f.args)));
743                Ok(Expression::Function(Box::new(Function::new(
744                    "ARRAY_DISTINCT".to_string(),
745                    vec![array_agg],
746                ))))
747            }
748
749            // RLIKE -> REGEXP_LIKE in Presto
750            "RLIKE" if f.args.len() == 2 => Ok(Expression::Function(Box::new(Function::new(
751                "REGEXP_LIKE".to_string(),
752                f.args,
753            )))),
754
755            // REGEXP -> REGEXP_LIKE in Presto
756            "REGEXP" if f.args.len() == 2 => Ok(Expression::Function(Box::new(Function::new(
757                "REGEXP_LIKE".to_string(),
758                f.args,
759            )))),
760
761            // PARSE_JSON -> JSON_PARSE in Presto
762            "PARSE_JSON" => Ok(Expression::Function(Box::new(Function::new(
763                "JSON_PARSE".to_string(),
764                f.args,
765            )))),
766
767            // GET_PATH(obj, path) -> JSON_EXTRACT(obj, json_path) in Presto
768            "GET_PATH" if f.args.len() == 2 => {
769                let mut args = f.args;
770                let this = args.remove(0);
771                let path = args.remove(0);
772                let json_path = match &path {
773                    Expression::Literal(Literal::String(s)) => {
774                        let normalized = if s.starts_with('$') {
775                            s.clone()
776                        } else if s.starts_with('[') {
777                            format!("${}", s)
778                        } else {
779                            format!("$.{}", s)
780                        };
781                        Expression::Literal(Literal::String(normalized))
782                    }
783                    _ => path,
784                };
785                Ok(Expression::JsonExtract(Box::new(JsonExtractFunc {
786                    this,
787                    path: json_path,
788                    returning: None,
789                    arrow_syntax: false,
790                    hash_arrow_syntax: false,
791                    wrapper_option: None,
792                    quotes_option: None,
793                    on_scalar_string: false,
794                    on_error: None,
795                })))
796            }
797
798            // REGEXP_SUBSTR(subject, pattern, ...) -> REGEXP_EXTRACT(subject, pattern[, group])
799            "REGEXP_SUBSTR" if f.args.len() >= 2 => {
800                let mut args = f.args;
801                let subject = args.remove(0);
802                let pattern = args.remove(0);
803                // If 6-arg form: (subject, pattern, pos, occ, params, group) -> keep group
804                if args.len() >= 4 {
805                    let _pos = args.remove(0);
806                    let _occ = args.remove(0);
807                    let _params = args.remove(0);
808                    let group = args.remove(0);
809                    Ok(Expression::Function(Box::new(Function::new(
810                        "REGEXP_EXTRACT".to_string(),
811                        vec![subject, pattern, group],
812                    ))))
813                } else {
814                    Ok(Expression::Function(Box::new(Function::new(
815                        "REGEXP_EXTRACT".to_string(),
816                        vec![subject, pattern],
817                    ))))
818                }
819            }
820
821            // DATE_PART(epoch_second, x) -> TO_UNIXTIME(CAST(x AS TIMESTAMP))
822            // DATE_PART(epoch_millisecond[s], x) -> TO_UNIXTIME(CAST(x AS TIMESTAMP)) * 1000
823            "DATE_PART" if f.args.len() == 2 => {
824                let part_name = match &f.args[0] {
825                    Expression::Identifier(id) => Some(id.name.to_uppercase()),
826                    Expression::Column(c) => Some(c.name.name.to_uppercase()),
827                    _ => None,
828                };
829                match part_name.as_deref() {
830                    Some("EPOCH_SECOND" | "EPOCH_SECONDS") => {
831                        let mut args = f.args;
832                        let value = args.remove(1);
833                        let cast_expr = Expression::Cast(Box::new(Cast {
834                            this: value,
835                            to: DataType::Timestamp {
836                                precision: None,
837                                timezone: false,
838                            },
839                            trailing_comments: Vec::new(),
840                            double_colon_syntax: false,
841                            format: None,
842                            default: None,
843                        }));
844                        Ok(Expression::Function(Box::new(Function::new(
845                            "TO_UNIXTIME".to_string(),
846                            vec![cast_expr],
847                        ))))
848                    }
849                    Some("EPOCH_MILLISECOND" | "EPOCH_MILLISECONDS") => {
850                        let mut args = f.args;
851                        let value = args.remove(1);
852                        let cast_expr = Expression::Cast(Box::new(Cast {
853                            this: value,
854                            to: DataType::Timestamp {
855                                precision: None,
856                                timezone: false,
857                            },
858                            trailing_comments: Vec::new(),
859                            double_colon_syntax: false,
860                            format: None,
861                            default: None,
862                        }));
863                        let unixtime = Expression::Function(Box::new(Function::new(
864                            "TO_UNIXTIME".to_string(),
865                            vec![cast_expr],
866                        )));
867                        Ok(Expression::Mul(Box::new(BinaryOp {
868                            left: unixtime,
869                            right: Expression::Literal(Literal::Number("1000".to_string())),
870                            left_comments: Vec::new(),
871                            operator_comments: Vec::new(),
872                            trailing_comments: Vec::new(),
873                        })))
874                    }
875                    _ => Ok(Expression::Function(Box::new(f))),
876                }
877            }
878
879            // REPLACE(x, y) with 2 args -> REPLACE(x, y, '') - Presto requires explicit empty string
880            "REPLACE" if f.args.len() == 2 => {
881                let mut args = f.args;
882                args.push(Expression::string(""));
883                Ok(Expression::Function(Box::new(Function::new(
884                    "REPLACE".to_string(),
885                    args,
886                ))))
887            }
888
889            // REGEXP_REPLACE(x, y) with 2 args -> REGEXP_REPLACE(x, y, '')
890            "REGEXP_REPLACE" if f.args.len() == 2 => {
891                let mut args = f.args;
892                args.push(Expression::string(""));
893                Ok(Expression::Function(Box::new(Function::new(
894                    "REGEXP_REPLACE".to_string(),
895                    args,
896                ))))
897            }
898
899            // Pass through everything else
900            _ => Ok(Expression::Function(Box::new(f))),
901        }
902    }
903
904    fn transform_aggregate_function(
905        &self,
906        f: Box<crate::expressions::AggregateFunction>,
907    ) -> Result<Expression> {
908        let name_upper = f.name.to_uppercase();
909        match name_upper.as_str() {
910            // COUNT_IF -> SUM(CASE WHEN...)
911            "COUNT_IF" if !f.args.is_empty() => {
912                let condition = f.args.into_iter().next().unwrap();
913                let case_expr = Expression::Case(Box::new(Case {
914                    operand: None,
915                    whens: vec![(condition, Expression::number(1))],
916                    else_: Some(Expression::number(0)),
917                    comments: Vec::new(),
918                }));
919                Ok(Expression::Sum(Box::new(AggFunc {
920                    ignore_nulls: None,
921                    having_max: None,
922                    this: case_expr,
923                    distinct: f.distinct,
924                    filter: f.filter,
925                    order_by: Vec::new(),
926                    name: None,
927                    limit: None,
928                })))
929            }
930
931            // ANY_VALUE -> ARBITRARY in Presto
932            "ANY_VALUE" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
933                "ARBITRARY".to_string(),
934                f.args,
935            )))),
936
937            // GROUP_CONCAT -> ARRAY_JOIN(ARRAY_AGG())
938            "GROUP_CONCAT" if !f.args.is_empty() => {
939                let mut args = f.args;
940                let first = args.remove(0);
941                let separator = args.pop();
942                let array_agg = Expression::Function(Box::new(Function::new(
943                    "ARRAY_AGG".to_string(),
944                    vec![first],
945                )));
946                let mut join_args = vec![array_agg];
947                if let Some(sep) = separator {
948                    join_args.push(sep);
949                }
950                Ok(Expression::Function(Box::new(Function::new(
951                    "ARRAY_JOIN".to_string(),
952                    join_args,
953                ))))
954            }
955
956            // STRING_AGG -> ARRAY_JOIN(ARRAY_AGG())
957            "STRING_AGG" if !f.args.is_empty() => {
958                let mut args = f.args;
959                let first = args.remove(0);
960                let separator = args.pop();
961                let array_agg = Expression::Function(Box::new(Function::new(
962                    "ARRAY_AGG".to_string(),
963                    vec![first],
964                )));
965                let mut join_args = vec![array_agg];
966                if let Some(sep) = separator {
967                    join_args.push(sep);
968                }
969                Ok(Expression::Function(Box::new(Function::new(
970                    "ARRAY_JOIN".to_string(),
971                    join_args,
972                ))))
973            }
974
975            // LISTAGG -> ARRAY_JOIN(ARRAY_AGG())
976            "LISTAGG" if !f.args.is_empty() => {
977                let mut args = f.args;
978                let first = args.remove(0);
979                let separator = args.pop();
980                let array_agg = Expression::Function(Box::new(Function::new(
981                    "ARRAY_AGG".to_string(),
982                    vec![first],
983                )));
984                let mut join_args = vec![array_agg];
985                if let Some(sep) = separator {
986                    join_args.push(sep);
987                }
988                Ok(Expression::Function(Box::new(Function::new(
989                    "ARRAY_JOIN".to_string(),
990                    join_args,
991                ))))
992            }
993
994            // VAR -> VAR_POP in Presto
995            "VAR" if !f.args.is_empty() => {
996                Ok(Expression::AggregateFunction(Box::new(AggregateFunction {
997                    name: "VAR_POP".to_string(),
998                    args: f.args,
999                    distinct: f.distinct,
1000                    filter: f.filter,
1001                    order_by: Vec::new(),
1002                    limit: None,
1003                    ignore_nulls: None,
1004                })))
1005            }
1006
1007            // VARIANCE -> VAR_SAMP in Presto (for sample variance)
1008            "VARIANCE" if !f.args.is_empty() => {
1009                Ok(Expression::AggregateFunction(Box::new(AggregateFunction {
1010                    name: "VAR_SAMP".to_string(),
1011                    args: f.args,
1012                    distinct: f.distinct,
1013                    filter: f.filter,
1014                    order_by: Vec::new(),
1015                    limit: None,
1016                    ignore_nulls: None,
1017                })))
1018            }
1019
1020            // Pass through everything else
1021            _ => Ok(Expression::AggregateFunction(f)),
1022        }
1023    }
1024
1025    fn transform_cast(&self, c: Cast) -> Result<Expression> {
1026        // Presto type mappings are handled in the generator
1027        Ok(Expression::Cast(Box::new(c)))
1028    }
1029}