Skip to main content

polyglot_sql/dialects/
presto.rs

1//! Presto Dialect
2//!
3//! Presto-specific transformations based on sqlglot patterns.
4//! Presto is the base for Trino dialect.
5
6use super::{DialectImpl, DialectType};
7use crate::error::Result;
8use crate::expressions::{
9    AggFunc, AggregateFunction, BinaryOp, Case, Cast, Column, DataType, Expression, Function,
10    JsonExtractFunc, LikeOp, Literal, UnaryFunc, VarArgFunc,
11};
12use crate::generator::GeneratorConfig;
13use crate::tokens::TokenizerConfig;
14
15/// Presto dialect
16pub struct PrestoDialect;
17
18impl DialectImpl for PrestoDialect {
19    fn dialect_type(&self) -> DialectType {
20        DialectType::Presto
21    }
22
23    fn tokenizer_config(&self) -> TokenizerConfig {
24        let mut config = TokenizerConfig::default();
25        // Presto uses double quotes for identifiers
26        config.identifiers.insert('"', '"');
27        // Presto does NOT support nested comments
28        config.nested_comments = false;
29        // Presto does NOT support QUALIFY - it's a valid identifier
30        // (unlike Snowflake, BigQuery, DuckDB which have QUALIFY clause)
31        config.keywords.remove("QUALIFY");
32        config
33    }
34
35    fn generator_config(&self) -> GeneratorConfig {
36        use crate::generator::IdentifierQuoteStyle;
37        GeneratorConfig {
38            identifier_quote: '"',
39            identifier_quote_style: IdentifierQuoteStyle::DOUBLE_QUOTE,
40            dialect: Some(DialectType::Presto),
41            limit_only_literals: true,
42            tz_to_with_time_zone: true,
43            ..Default::default()
44        }
45    }
46
47    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
48        match expr {
49            // IFNULL -> COALESCE in Presto
50            Expression::IfNull(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
51                original_name: None,
52                expressions: vec![f.this, f.expression],
53                inferred_type: None,
54            }))),
55
56            // NVL -> COALESCE in Presto
57            Expression::Nvl(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
58                original_name: None,
59                expressions: vec![f.this, f.expression],
60                inferred_type: None,
61            }))),
62
63            // TryCast stays as TryCast (Presto supports TRY_CAST)
64            Expression::TryCast(c) => Ok(Expression::TryCast(c)),
65
66            // SafeCast -> TRY_CAST in Presto
67            Expression::SafeCast(c) => Ok(Expression::TryCast(c)),
68
69            // ILike -> LOWER() LIKE LOWER() (Presto doesn't support ILIKE)
70            Expression::ILike(op) => {
71                let lower_left = Expression::Lower(Box::new(UnaryFunc::new(op.left.clone())));
72                let lower_right = Expression::Lower(Box::new(UnaryFunc::new(op.right.clone())));
73                Ok(Expression::Like(Box::new(LikeOp {
74                    left: lower_left,
75                    right: lower_right,
76                    escape: op.escape,
77                    quantifier: op.quantifier.clone(),
78                    inferred_type: None,
79                })))
80            }
81
82            // CountIf is native in Presto (keep as-is)
83            Expression::CountIf(f) => Ok(Expression::CountIf(f)),
84
85            // EXPLODE -> UNNEST in Presto
86            Expression::Explode(f) => Ok(Expression::Unnest(Box::new(
87                crate::expressions::UnnestFunc {
88                    this: f.this,
89                    expressions: Vec::new(),
90                    with_ordinality: false,
91                    alias: None,
92                    offset_alias: None,
93                },
94            ))),
95
96            // ExplodeOuter -> UNNEST in Presto
97            Expression::ExplodeOuter(f) => Ok(Expression::Unnest(Box::new(
98                crate::expressions::UnnestFunc {
99                    this: f.this,
100                    expressions: Vec::new(),
101                    with_ordinality: false,
102                    alias: None,
103                    offset_alias: None,
104                },
105            ))),
106
107            // StringAgg -> ARRAY_JOIN(ARRAY_AGG()) in Presto
108            Expression::StringAgg(f) => {
109                let array_agg = Expression::Function(Box::new(Function::new(
110                    "ARRAY_AGG".to_string(),
111                    vec![f.this.clone()],
112                )));
113                let mut join_args = vec![array_agg];
114                if let Some(sep) = f.separator {
115                    join_args.push(sep);
116                }
117                Ok(Expression::Function(Box::new(Function::new(
118                    "ARRAY_JOIN".to_string(),
119                    join_args,
120                ))))
121            }
122
123            // GroupConcat -> ARRAY_JOIN(ARRAY_AGG()) in Presto
124            Expression::GroupConcat(f) => {
125                let array_agg = Expression::Function(Box::new(Function::new(
126                    "ARRAY_AGG".to_string(),
127                    vec![f.this.clone()],
128                )));
129                let mut join_args = vec![array_agg];
130                if let Some(sep) = f.separator {
131                    join_args.push(sep);
132                }
133                Ok(Expression::Function(Box::new(Function::new(
134                    "ARRAY_JOIN".to_string(),
135                    join_args,
136                ))))
137            }
138
139            // ListAgg -> ARRAY_JOIN(ARRAY_AGG()) in Presto
140            Expression::ListAgg(f) => {
141                let array_agg = Expression::Function(Box::new(Function::new(
142                    "ARRAY_AGG".to_string(),
143                    vec![f.this.clone()],
144                )));
145                let mut join_args = vec![array_agg];
146                if let Some(sep) = f.separator {
147                    join_args.push(sep);
148                }
149                Ok(Expression::Function(Box::new(Function::new(
150                    "ARRAY_JOIN".to_string(),
151                    join_args,
152                ))))
153            }
154
155            // ParseJson: handled by generator (outputs JSON_PARSE for Presto)
156
157            // JSONExtract (variant_extract/colon accessor) -> JSON_EXTRACT in Presto
158            Expression::JSONExtract(e) if e.variant_extract.is_some() => {
159                let path = match *e.expression {
160                    Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
161                        let Literal::String(s) = lit.as_ref() else {
162                            unreachable!()
163                        };
164                        let normalized = if s.starts_with('$') {
165                            s.clone()
166                        } else if s.starts_with('[') {
167                            format!("${}", s)
168                        } else {
169                            format!("$.{}", s)
170                        };
171                        Expression::Literal(Box::new(Literal::String(normalized)))
172                    }
173                    other => other,
174                };
175                Ok(Expression::JsonExtract(Box::new(JsonExtractFunc {
176                    this: *e.this,
177                    path,
178                    returning: None,
179                    arrow_syntax: false,
180                    hash_arrow_syntax: false,
181                    wrapper_option: None,
182                    quotes_option: None,
183                    on_scalar_string: false,
184                    on_error: None,
185                })))
186            }
187
188            // Generic function transformations
189            Expression::Function(f) => self.transform_function(*f),
190
191            // Generic aggregate function transformations
192            Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
193
194            // Cast transformations
195            Expression::Cast(c) => self.transform_cast(*c),
196
197            // Div: Presto has TYPED_DIVISION - wrap left operand in CAST(AS DOUBLE)
198            // to ensure float division (only when left isn't already a float cast)
199            Expression::Div(mut op) => {
200                if !Self::is_float_cast(&op.left) {
201                    op.left = Expression::Cast(Box::new(crate::expressions::Cast {
202                        this: op.left,
203                        to: DataType::Double {
204                            precision: None,
205                            scale: None,
206                        },
207                        trailing_comments: Vec::new(),
208                        double_colon_syntax: false,
209                        format: None,
210                        default: None,
211                        inferred_type: None,
212                    }));
213                }
214                Ok(Expression::Div(op))
215            }
216
217            // IntDiv -> CAST(CAST(x AS DOUBLE) / y AS INTEGER) in Presto
218            Expression::IntDiv(f) => {
219                let cast_x = Expression::Cast(Box::new(Cast {
220                    this: f.this,
221                    to: crate::expressions::DataType::Double {
222                        precision: None,
223                        scale: None,
224                    },
225                    trailing_comments: Vec::new(),
226                    double_colon_syntax: false,
227                    format: None,
228                    default: None,
229                    inferred_type: None,
230                }));
231                let div_expr = Expression::Div(Box::new(BinaryOp::new(cast_x, f.expression)));
232                Ok(Expression::Cast(Box::new(Cast {
233                    this: div_expr,
234                    to: crate::expressions::DataType::Int {
235                        length: None,
236                        integer_spelling: true,
237                    },
238                    trailing_comments: Vec::new(),
239                    double_colon_syntax: false,
240                    format: None,
241                    default: None,
242                    inferred_type: None,
243                })))
244            }
245
246            // DELETE: Strip table alias and unqualify columns (Presto doesn't support DELETE aliases)
247            Expression::Delete(mut d) => {
248                if d.alias.is_some() {
249                    d.alias = None;
250                    d.alias_explicit_as = false;
251                    // Unqualify all columns in the WHERE clause
252                    if let Some(ref mut where_clause) = d.where_clause {
253                        where_clause.this = Self::unqualify_columns(where_clause.this.clone());
254                    }
255                }
256                Ok(Expression::Delete(d))
257            }
258
259            // Pass through everything else
260            _ => Ok(expr),
261        }
262    }
263}
264
265impl PrestoDialect {
266    /// Recursively unqualify columns - remove table qualifiers from Column references
267    fn unqualify_columns(expr: Expression) -> Expression {
268        match expr {
269            Expression::Column(c) => {
270                if c.table.is_some() {
271                    Expression::boxed_column(Column {
272                        name: c.name,
273                        table: None,
274                        join_mark: c.join_mark,
275                        trailing_comments: c.trailing_comments,
276                        span: None,
277                        inferred_type: None,
278                    })
279                } else {
280                    Expression::Column(c)
281                }
282            }
283            // DotAccess: db.t2.c -> c (strip all qualifiers, keep only the final field name)
284            Expression::Dot(d) => Expression::boxed_column(Column {
285                name: d.field,
286                table: None,
287                join_mark: false,
288                trailing_comments: Vec::new(),
289                span: None,
290                inferred_type: None,
291            }),
292            // Recursively walk common binary expression types
293            Expression::And(mut op) => {
294                op.left = Self::unqualify_columns(op.left);
295                op.right = Self::unqualify_columns(op.right);
296                Expression::And(op)
297            }
298            Expression::Or(mut op) => {
299                op.left = Self::unqualify_columns(op.left);
300                op.right = Self::unqualify_columns(op.right);
301                Expression::Or(op)
302            }
303            Expression::Eq(mut op) => {
304                op.left = Self::unqualify_columns(op.left);
305                op.right = Self::unqualify_columns(op.right);
306                Expression::Eq(op)
307            }
308            Expression::Neq(mut op) => {
309                op.left = Self::unqualify_columns(op.left);
310                op.right = Self::unqualify_columns(op.right);
311                Expression::Neq(op)
312            }
313            Expression::Gt(mut op) => {
314                op.left = Self::unqualify_columns(op.left);
315                op.right = Self::unqualify_columns(op.right);
316                Expression::Gt(op)
317            }
318            Expression::Lt(mut op) => {
319                op.left = Self::unqualify_columns(op.left);
320                op.right = Self::unqualify_columns(op.right);
321                Expression::Lt(op)
322            }
323            Expression::Gte(mut op) => {
324                op.left = Self::unqualify_columns(op.left);
325                op.right = Self::unqualify_columns(op.right);
326                Expression::Gte(op)
327            }
328            Expression::Lte(mut op) => {
329                op.left = Self::unqualify_columns(op.left);
330                op.right = Self::unqualify_columns(op.right);
331                Expression::Lte(op)
332            }
333            // Unary operators
334            Expression::Not(mut e) => {
335                e.this = Self::unqualify_columns(e.this);
336                Expression::Not(e)
337            }
338            // Predicates
339            Expression::In(mut i) => {
340                i.this = Self::unqualify_columns(i.this);
341                i.expressions = i
342                    .expressions
343                    .into_iter()
344                    .map(Self::unqualify_columns)
345                    .collect();
346                // Also recurse into subquery if present
347                if let Some(q) = i.query {
348                    i.query = Some(Self::unqualify_columns(q));
349                }
350                Expression::In(i)
351            }
352            Expression::IsNull(mut f) => {
353                f.this = Self::unqualify_columns(f.this);
354                Expression::IsNull(f)
355            }
356            Expression::Paren(mut p) => {
357                p.this = Self::unqualify_columns(p.this);
358                Expression::Paren(p)
359            }
360            Expression::Function(mut f) => {
361                f.args = f.args.into_iter().map(Self::unqualify_columns).collect();
362                Expression::Function(f)
363            }
364            // For subqueries (SELECT statements inside IN, etc), also unqualify
365            Expression::Select(mut s) => {
366                s.expressions = s
367                    .expressions
368                    .into_iter()
369                    .map(Self::unqualify_columns)
370                    .collect();
371                if let Some(ref mut w) = s.where_clause {
372                    w.this = Self::unqualify_columns(w.this.clone());
373                }
374                Expression::Select(s)
375            }
376            Expression::Subquery(mut sq) => {
377                sq.this = Self::unqualify_columns(sq.this);
378                Expression::Subquery(sq)
379            }
380            Expression::Alias(mut a) => {
381                a.this = Self::unqualify_columns(a.this);
382                Expression::Alias(a)
383            }
384            // Pass through other expressions unchanged
385            other => other,
386        }
387    }
388
389    /// Check if an expression is already a CAST to a float type
390    fn is_float_cast(expr: &Expression) -> bool {
391        if let Expression::Cast(cast) = expr {
392            matches!(&cast.to, DataType::Double { .. } | DataType::Float { .. })
393        } else {
394            false
395        }
396    }
397
398    /// Convert Oracle/PostgreSQL-style date format to Presto's C-style format
399    /// Oracle: dd, hh, hh24, mi, mm, ss, yyyy, yy
400    /// Presto: %d, %H, %H, %i, %m, %s, %Y, %y
401    pub fn oracle_to_presto_format(fmt: &str) -> String {
402        // Process character by character to avoid double-replacement issues
403        let chars: Vec<char> = fmt.chars().collect();
404        let mut result = String::new();
405        let mut i = 0;
406        while i < chars.len() {
407            let remaining = &fmt[i..];
408            if remaining.starts_with("yyyy") {
409                result.push_str("%Y");
410                i += 4;
411            } else if remaining.starts_with("yy") {
412                result.push_str("%y");
413                i += 2;
414            } else if remaining.starts_with("hh24") {
415                result.push_str("%H");
416                i += 4;
417            } else if remaining.starts_with("hh") {
418                result.push_str("%H");
419                i += 2;
420            } else if remaining.starts_with("mi") {
421                result.push_str("%i");
422                i += 2;
423            } else if remaining.starts_with("mm") {
424                result.push_str("%m");
425                i += 2;
426            } else if remaining.starts_with("dd") {
427                result.push_str("%d");
428                i += 2;
429            } else if remaining.starts_with("ss") {
430                result.push_str("%s");
431                i += 2;
432            } else {
433                result.push(chars[i]);
434                i += 1;
435            }
436        }
437        result
438    }
439
440    /// Convert Presto's C-style date format to Java-style format (for Hive/Spark)
441    /// Presto: %Y, %m, %d, %H, %i, %S, %s, %y, %T, %F
442    /// Java:   yyyy, MM, dd, HH, mm, ss, ss, yy, HH:mm:ss, yyyy-MM-dd
443    pub fn presto_to_java_format(fmt: &str) -> String {
444        fmt.replace("%Y", "yyyy")
445            .replace("%m", "MM")
446            .replace("%d", "dd")
447            .replace("%H", "HH")
448            .replace("%i", "mm")
449            .replace("%S", "ss")
450            .replace("%s", "ss")
451            .replace("%y", "yy")
452            .replace("%T", "HH:mm:ss")
453            .replace("%F", "yyyy-MM-dd")
454            .replace("%M", "MMMM")
455    }
456
457    /// Normalize Presto format strings (e.g., %H:%i:%S -> %T, %Y-%m-%d -> %F)
458    pub fn normalize_presto_format(fmt: &str) -> String {
459        fmt.replace("%H:%i:%S", "%T").replace("%H:%i:%s", "%T")
460    }
461
462    /// Convert Presto's C-style format to DuckDB C-style (only difference: %i -> %M for minutes)
463    pub fn presto_to_duckdb_format(fmt: &str) -> String {
464        fmt.replace("%i", "%M")
465            .replace("%s", "%S")
466            .replace("%T", "%H:%M:%S")
467    }
468
469    /// Convert Presto's C-style format to BigQuery format
470    pub fn presto_to_bigquery_format(fmt: &str) -> String {
471        // BigQuery uses %F for %Y-%m-%d, %T for %H:%M:%S
472        // BigQuery uses %M for minutes (like DuckDB), not %i
473        let result = fmt
474            .replace("%Y-%m-%d", "%F")
475            .replace("%H:%i:%S", "%T")
476            .replace("%H:%i:%s", "%T")
477            .replace("%i", "%M")
478            .replace("%s", "%S");
479        result
480    }
481
482    /// Check if a Presto format string matches the default timestamp format
483    pub fn is_default_timestamp_format(fmt: &str) -> bool {
484        let normalized = Self::normalize_presto_format(fmt);
485        normalized == "%Y-%m-%d %T"
486            || normalized == "%Y-%m-%d %H:%i:%S"
487            || fmt == "%Y-%m-%d %H:%i:%S"
488            || fmt == "%Y-%m-%d %T"
489    }
490
491    /// Check if a Presto format string matches the default date format
492    pub fn is_default_date_format(fmt: &str) -> bool {
493        fmt == "%Y-%m-%d" || fmt == "%F"
494    }
495
496    fn transform_function(&self, f: Function) -> Result<Expression> {
497        let name_upper = f.name.to_uppercase();
498        match name_upper.as_str() {
499            // IFNULL -> COALESCE
500            "IFNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
501                original_name: None,
502                expressions: f.args,
503                inferred_type: None,
504            }))),
505
506            // NVL -> COALESCE
507            "NVL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
508                original_name: None,
509                expressions: f.args,
510                inferred_type: None,
511            }))),
512
513            // ISNULL -> COALESCE
514            "ISNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
515                original_name: None,
516                expressions: f.args,
517                inferred_type: None,
518            }))),
519
520            // GETDATE -> CURRENT_TIMESTAMP
521            "GETDATE" => Ok(Expression::CurrentTimestamp(
522                crate::expressions::CurrentTimestamp {
523                    precision: None,
524                    sysdate: false,
525                },
526            )),
527
528            // NOW -> CURRENT_TIMESTAMP
529            "NOW" => Ok(Expression::CurrentTimestamp(
530                crate::expressions::CurrentTimestamp {
531                    precision: None,
532                    sysdate: false,
533                },
534            )),
535
536            // RAND -> RANDOM in Presto (but it's actually RANDOM())
537            "RAND" => Ok(Expression::Function(Box::new(Function::new(
538                "RANDOM".to_string(),
539                vec![],
540            )))),
541
542            // GROUP_CONCAT -> ARRAY_JOIN(ARRAY_AGG())
543            "GROUP_CONCAT" if !f.args.is_empty() => {
544                let mut args = f.args;
545                let first = args.remove(0);
546                let separator = args.pop();
547                let array_agg = Expression::Function(Box::new(Function::new(
548                    "ARRAY_AGG".to_string(),
549                    vec![first],
550                )));
551                let mut join_args = vec![array_agg];
552                if let Some(sep) = separator {
553                    join_args.push(sep);
554                }
555                Ok(Expression::Function(Box::new(Function::new(
556                    "ARRAY_JOIN".to_string(),
557                    join_args,
558                ))))
559            }
560
561            // STRING_AGG -> ARRAY_JOIN(ARRAY_AGG())
562            "STRING_AGG" if !f.args.is_empty() => {
563                let mut args = f.args;
564                let first = args.remove(0);
565                let separator = args.pop();
566                let array_agg = Expression::Function(Box::new(Function::new(
567                    "ARRAY_AGG".to_string(),
568                    vec![first],
569                )));
570                let mut join_args = vec![array_agg];
571                if let Some(sep) = separator {
572                    join_args.push(sep);
573                }
574                Ok(Expression::Function(Box::new(Function::new(
575                    "ARRAY_JOIN".to_string(),
576                    join_args,
577                ))))
578            }
579
580            // LISTAGG -> ARRAY_JOIN(ARRAY_AGG())
581            "LISTAGG" if !f.args.is_empty() => {
582                let mut args = f.args;
583                let first = args.remove(0);
584                let separator = args.pop();
585                let array_agg = Expression::Function(Box::new(Function::new(
586                    "ARRAY_AGG".to_string(),
587                    vec![first],
588                )));
589                let mut join_args = vec![array_agg];
590                if let Some(sep) = separator {
591                    join_args.push(sep);
592                }
593                Ok(Expression::Function(Box::new(Function::new(
594                    "ARRAY_JOIN".to_string(),
595                    join_args,
596                ))))
597            }
598
599            // SUBSTR is native in Presto (keep as-is, don't convert to SUBSTRING)
600            "SUBSTR" => Ok(Expression::Function(Box::new(f))),
601
602            // LEN -> LENGTH
603            "LEN" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc::new(
604                f.args.into_iter().next().unwrap(),
605            )))),
606
607            // CHARINDEX -> STRPOS in Presto (with swapped args)
608            "CHARINDEX" if f.args.len() >= 2 => {
609                let mut args = f.args;
610                let substring = args.remove(0);
611                let string = args.remove(0);
612                // STRPOS(string, substring) - note: argument order is reversed
613                Ok(Expression::Function(Box::new(Function::new(
614                    "STRPOS".to_string(),
615                    vec![string, substring],
616                ))))
617            }
618
619            // INSTR -> STRPOS (with same argument order)
620            "INSTR" if f.args.len() >= 2 => {
621                let args = f.args;
622                // INSTR(string, substring) -> STRPOS(string, substring)
623                Ok(Expression::Function(Box::new(Function::new(
624                    "STRPOS".to_string(),
625                    args,
626                ))))
627            }
628
629            // LOCATE -> STRPOS in Presto (with swapped args)
630            "LOCATE" if f.args.len() >= 2 => {
631                let mut args = f.args;
632                let substring = args.remove(0);
633                let string = args.remove(0);
634                // LOCATE(substring, string) -> STRPOS(string, substring)
635                Ok(Expression::Function(Box::new(Function::new(
636                    "STRPOS".to_string(),
637                    vec![string, substring],
638                ))))
639            }
640
641            // ARRAY_LENGTH -> CARDINALITY in Presto
642            "ARRAY_LENGTH" if f.args.len() == 1 => Ok(Expression::Function(Box::new(
643                Function::new("CARDINALITY".to_string(), f.args),
644            ))),
645
646            // SIZE -> CARDINALITY in Presto
647            "SIZE" if f.args.len() == 1 => Ok(Expression::Function(Box::new(Function::new(
648                "CARDINALITY".to_string(),
649                f.args,
650            )))),
651
652            // ARRAY_CONTAINS -> CONTAINS in Presto
653            "ARRAY_CONTAINS" if f.args.len() == 2 => Ok(Expression::Function(Box::new(
654                Function::new("CONTAINS".to_string(), f.args),
655            ))),
656
657            // TO_DATE -> DATE_PARSE in Presto (or CAST to DATE)
658            "TO_DATE" if !f.args.is_empty() => {
659                if f.args.len() == 1 {
660                    // Simple case: just cast to DATE
661                    Ok(Expression::Cast(Box::new(Cast {
662                        this: f.args.into_iter().next().unwrap(),
663                        to: DataType::Date,
664                        trailing_comments: Vec::new(),
665                        double_colon_syntax: false,
666                        format: None,
667                        default: None,
668                        inferred_type: None,
669                    })))
670                } else {
671                    // With format: use DATE_PARSE
672                    Ok(Expression::Function(Box::new(Function::new(
673                        "DATE_PARSE".to_string(),
674                        f.args,
675                    ))))
676                }
677            }
678
679            // TO_TIMESTAMP -> DATE_PARSE / CAST
680            "TO_TIMESTAMP" if !f.args.is_empty() => {
681                if f.args.len() == 1 {
682                    Ok(Expression::Cast(Box::new(Cast {
683                        this: f.args.into_iter().next().unwrap(),
684                        to: DataType::Timestamp {
685                            precision: None,
686                            timezone: false,
687                        },
688                        trailing_comments: Vec::new(),
689                        double_colon_syntax: false,
690                        format: None,
691                        default: None,
692                        inferred_type: None,
693                    })))
694                } else {
695                    Ok(Expression::Function(Box::new(Function::new(
696                        "DATE_PARSE".to_string(),
697                        f.args,
698                    ))))
699                }
700            }
701
702            // DATE_FORMAT -> DATE_FORMAT (native in Presto)
703            "DATE_FORMAT" => Ok(Expression::Function(Box::new(f))),
704
705            // strftime -> DATE_FORMAT in Presto
706            "STRFTIME" if f.args.len() >= 2 => {
707                let mut args = f.args;
708                // strftime(format, date) -> DATE_FORMAT(date, format)
709                let format = args.remove(0);
710                let date = args.remove(0);
711                Ok(Expression::Function(Box::new(Function::new(
712                    "DATE_FORMAT".to_string(),
713                    vec![date, format],
714                ))))
715            }
716
717            // TO_CHAR -> DATE_FORMAT in Presto (convert Oracle-style format to Presto C-style)
718            "TO_CHAR" if f.args.len() >= 2 => {
719                let mut args = f.args;
720                // Convert Oracle-style format string to Presto C-style
721                if let Expression::Literal(ref lit) = args[1] {
722                    if let Literal::String(ref s) = lit.as_ref() {
723                        let converted = Self::oracle_to_presto_format(s);
724                        args[1] = Expression::Literal(Box::new(Literal::String(converted)));
725                    }
726                }
727                Ok(Expression::Function(Box::new(Function::new(
728                    "DATE_FORMAT".to_string(),
729                    args,
730                ))))
731            }
732
733            // LEVENSHTEIN -> LEVENSHTEIN_DISTANCE in Presto
734            "LEVENSHTEIN" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
735                Function::new("LEVENSHTEIN_DISTANCE".to_string(), f.args),
736            ))),
737
738            // FLATTEN -> FLATTEN is supported in Presto for nested arrays
739            "FLATTEN" => Ok(Expression::Function(Box::new(f))),
740
741            // JSON_EXTRACT -> JSON_EXTRACT (native in Presto)
742            "JSON_EXTRACT" => Ok(Expression::Function(Box::new(f))),
743
744            // JSON_EXTRACT_SCALAR -> JSON_EXTRACT_SCALAR (native in Presto)
745            "JSON_EXTRACT_SCALAR" => Ok(Expression::Function(Box::new(f))),
746
747            // GET_JSON_OBJECT -> JSON_EXTRACT_SCALAR in Presto
748            "GET_JSON_OBJECT" if f.args.len() == 2 => Ok(Expression::Function(Box::new(
749                Function::new("JSON_EXTRACT_SCALAR".to_string(), f.args),
750            ))),
751
752            // COLLECT_LIST -> ARRAY_AGG
753            "COLLECT_LIST" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
754                Function::new("ARRAY_AGG".to_string(), f.args),
755            ))),
756
757            // COLLECT_SET -> ARRAY_DISTINCT(ARRAY_AGG())
758            "COLLECT_SET" if !f.args.is_empty() => {
759                let array_agg =
760                    Expression::Function(Box::new(Function::new("ARRAY_AGG".to_string(), f.args)));
761                Ok(Expression::Function(Box::new(Function::new(
762                    "ARRAY_DISTINCT".to_string(),
763                    vec![array_agg],
764                ))))
765            }
766
767            // RLIKE -> REGEXP_LIKE in Presto
768            "RLIKE" if f.args.len() == 2 => Ok(Expression::Function(Box::new(Function::new(
769                "REGEXP_LIKE".to_string(),
770                f.args,
771            )))),
772
773            // REGEXP -> REGEXP_LIKE in Presto
774            "REGEXP" if f.args.len() == 2 => Ok(Expression::Function(Box::new(Function::new(
775                "REGEXP_LIKE".to_string(),
776                f.args,
777            )))),
778
779            // PARSE_JSON -> JSON_PARSE in Presto
780            "PARSE_JSON" => Ok(Expression::Function(Box::new(Function::new(
781                "JSON_PARSE".to_string(),
782                f.args,
783            )))),
784
785            // GET_PATH(obj, path) -> JSON_EXTRACT(obj, json_path) in Presto
786            "GET_PATH" if f.args.len() == 2 => {
787                let mut args = f.args;
788                let this = args.remove(0);
789                let path = args.remove(0);
790                let json_path = match &path {
791                    Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
792                        let Literal::String(s) = lit.as_ref() else {
793                            unreachable!()
794                        };
795                        let normalized = if s.starts_with('$') {
796                            s.clone()
797                        } else if s.starts_with('[') {
798                            format!("${}", s)
799                        } else {
800                            format!("$.{}", s)
801                        };
802                        Expression::Literal(Box::new(Literal::String(normalized)))
803                    }
804                    _ => path,
805                };
806                Ok(Expression::JsonExtract(Box::new(JsonExtractFunc {
807                    this,
808                    path: json_path,
809                    returning: None,
810                    arrow_syntax: false,
811                    hash_arrow_syntax: false,
812                    wrapper_option: None,
813                    quotes_option: None,
814                    on_scalar_string: false,
815                    on_error: None,
816                })))
817            }
818
819            // REGEXP_SUBSTR(subject, pattern, ...) -> REGEXP_EXTRACT(subject, pattern[, group])
820            "REGEXP_SUBSTR" if f.args.len() >= 2 => {
821                let mut args = f.args;
822                let subject = args.remove(0);
823                let pattern = args.remove(0);
824                // If 6-arg form: (subject, pattern, pos, occ, params, group) -> keep group
825                if args.len() >= 4 {
826                    let _pos = args.remove(0);
827                    let _occ = args.remove(0);
828                    let _params = args.remove(0);
829                    let group = args.remove(0);
830                    Ok(Expression::Function(Box::new(Function::new(
831                        "REGEXP_EXTRACT".to_string(),
832                        vec![subject, pattern, group],
833                    ))))
834                } else {
835                    Ok(Expression::Function(Box::new(Function::new(
836                        "REGEXP_EXTRACT".to_string(),
837                        vec![subject, pattern],
838                    ))))
839                }
840            }
841
842            // DATE_PART(epoch_second, x) -> TO_UNIXTIME(CAST(x AS TIMESTAMP))
843            // DATE_PART(epoch_millisecond[s], x) -> TO_UNIXTIME(CAST(x AS TIMESTAMP)) * 1000
844            "DATE_PART" if f.args.len() == 2 => {
845                let part_name = match &f.args[0] {
846                    Expression::Identifier(id) => Some(id.name.to_uppercase()),
847                    Expression::Var(v) => Some(v.this.to_uppercase()),
848                    Expression::Column(c) => Some(c.name.name.to_uppercase()),
849                    _ => None,
850                };
851                match part_name.as_deref() {
852                    Some("EPOCH_SECOND" | "EPOCH_SECONDS") => {
853                        let mut args = f.args;
854                        let value = args.remove(1);
855                        let cast_expr = Expression::Cast(Box::new(Cast {
856                            this: value,
857                            to: DataType::Timestamp {
858                                precision: None,
859                                timezone: false,
860                            },
861                            trailing_comments: Vec::new(),
862                            double_colon_syntax: false,
863                            format: None,
864                            default: None,
865                            inferred_type: None,
866                        }));
867                        Ok(Expression::Function(Box::new(Function::new(
868                            "TO_UNIXTIME".to_string(),
869                            vec![cast_expr],
870                        ))))
871                    }
872                    Some("EPOCH_MILLISECOND" | "EPOCH_MILLISECONDS") => {
873                        let mut args = f.args;
874                        let value = args.remove(1);
875                        let cast_expr = Expression::Cast(Box::new(Cast {
876                            this: value,
877                            to: DataType::Timestamp {
878                                precision: None,
879                                timezone: false,
880                            },
881                            trailing_comments: Vec::new(),
882                            double_colon_syntax: false,
883                            format: None,
884                            default: None,
885                            inferred_type: None,
886                        }));
887                        let unixtime = Expression::Function(Box::new(Function::new(
888                            "TO_UNIXTIME".to_string(),
889                            vec![cast_expr],
890                        )));
891                        Ok(Expression::Mul(Box::new(BinaryOp {
892                            left: unixtime,
893                            right: Expression::Literal(Box::new(Literal::Number(
894                                "1000".to_string(),
895                            ))),
896                            left_comments: Vec::new(),
897                            operator_comments: Vec::new(),
898                            trailing_comments: Vec::new(),
899                            inferred_type: None,
900                        })))
901                    }
902                    _ => Ok(Expression::Function(Box::new(f))),
903                }
904            }
905
906            // REPLACE(x, y) with 2 args -> REPLACE(x, y, '') - Presto requires explicit empty string
907            "REPLACE" if f.args.len() == 2 => {
908                let mut args = f.args;
909                args.push(Expression::string(""));
910                Ok(Expression::Function(Box::new(Function::new(
911                    "REPLACE".to_string(),
912                    args,
913                ))))
914            }
915
916            // REGEXP_REPLACE(x, y) with 2 args -> REGEXP_REPLACE(x, y, '')
917            "REGEXP_REPLACE" if f.args.len() == 2 => {
918                let mut args = f.args;
919                args.push(Expression::string(""));
920                Ok(Expression::Function(Box::new(Function::new(
921                    "REGEXP_REPLACE".to_string(),
922                    args,
923                ))))
924            }
925
926            // Pass through everything else
927            _ => Ok(Expression::Function(Box::new(f))),
928        }
929    }
930
931    fn transform_aggregate_function(
932        &self,
933        f: Box<crate::expressions::AggregateFunction>,
934    ) -> Result<Expression> {
935        let name_upper = f.name.to_uppercase();
936        match name_upper.as_str() {
937            // COUNT_IF -> SUM(CASE WHEN...)
938            "COUNT_IF" if !f.args.is_empty() => {
939                let condition = f.args.into_iter().next().unwrap();
940                let case_expr = Expression::Case(Box::new(Case {
941                    operand: None,
942                    whens: vec![(condition, Expression::number(1))],
943                    else_: Some(Expression::number(0)),
944                    comments: Vec::new(),
945                    inferred_type: None,
946                }));
947                Ok(Expression::Sum(Box::new(AggFunc {
948                    ignore_nulls: None,
949                    having_max: None,
950                    this: case_expr,
951                    distinct: f.distinct,
952                    filter: f.filter,
953                    order_by: Vec::new(),
954                    name: None,
955                    limit: None,
956                    inferred_type: None,
957                })))
958            }
959
960            // ANY_VALUE -> ARBITRARY in Presto
961            "ANY_VALUE" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
962                "ARBITRARY".to_string(),
963                f.args,
964            )))),
965
966            // GROUP_CONCAT -> ARRAY_JOIN(ARRAY_AGG())
967            "GROUP_CONCAT" if !f.args.is_empty() => {
968                let mut args = f.args;
969                let first = args.remove(0);
970                let separator = args.pop();
971                let array_agg = Expression::Function(Box::new(Function::new(
972                    "ARRAY_AGG".to_string(),
973                    vec![first],
974                )));
975                let mut join_args = vec![array_agg];
976                if let Some(sep) = separator {
977                    join_args.push(sep);
978                }
979                Ok(Expression::Function(Box::new(Function::new(
980                    "ARRAY_JOIN".to_string(),
981                    join_args,
982                ))))
983            }
984
985            // STRING_AGG -> ARRAY_JOIN(ARRAY_AGG())
986            "STRING_AGG" if !f.args.is_empty() => {
987                let mut args = f.args;
988                let first = args.remove(0);
989                let separator = args.pop();
990                let array_agg = Expression::Function(Box::new(Function::new(
991                    "ARRAY_AGG".to_string(),
992                    vec![first],
993                )));
994                let mut join_args = vec![array_agg];
995                if let Some(sep) = separator {
996                    join_args.push(sep);
997                }
998                Ok(Expression::Function(Box::new(Function::new(
999                    "ARRAY_JOIN".to_string(),
1000                    join_args,
1001                ))))
1002            }
1003
1004            // LISTAGG -> ARRAY_JOIN(ARRAY_AGG())
1005            "LISTAGG" if !f.args.is_empty() => {
1006                let mut args = f.args;
1007                let first = args.remove(0);
1008                let separator = args.pop();
1009                let array_agg = Expression::Function(Box::new(Function::new(
1010                    "ARRAY_AGG".to_string(),
1011                    vec![first],
1012                )));
1013                let mut join_args = vec![array_agg];
1014                if let Some(sep) = separator {
1015                    join_args.push(sep);
1016                }
1017                Ok(Expression::Function(Box::new(Function::new(
1018                    "ARRAY_JOIN".to_string(),
1019                    join_args,
1020                ))))
1021            }
1022
1023            // VAR -> VAR_POP in Presto
1024            "VAR" if !f.args.is_empty() => {
1025                Ok(Expression::AggregateFunction(Box::new(AggregateFunction {
1026                    name: "VAR_POP".to_string(),
1027                    args: f.args,
1028                    distinct: f.distinct,
1029                    filter: f.filter,
1030                    order_by: Vec::new(),
1031                    limit: None,
1032                    ignore_nulls: None,
1033                    inferred_type: None,
1034                })))
1035            }
1036
1037            // VARIANCE -> VAR_SAMP in Presto (for sample variance)
1038            "VARIANCE" if !f.args.is_empty() => {
1039                Ok(Expression::AggregateFunction(Box::new(AggregateFunction {
1040                    name: "VAR_SAMP".to_string(),
1041                    args: f.args,
1042                    distinct: f.distinct,
1043                    filter: f.filter,
1044                    order_by: Vec::new(),
1045                    limit: None,
1046                    ignore_nulls: None,
1047                    inferred_type: None,
1048                })))
1049            }
1050
1051            // Pass through everything else
1052            _ => Ok(Expression::AggregateFunction(f)),
1053        }
1054    }
1055
1056    fn transform_cast(&self, c: Cast) -> Result<Expression> {
1057        // Presto type mappings are handled in the generator
1058        Ok(Expression::Cast(Box::new(c)))
1059    }
1060}