Skip to main content

polyglot_sql/dialects/
presto.rs

1//! Presto Dialect
2//!
3//! Presto-specific transformations based on sqlglot patterns.
4//! Presto is the base for Trino dialect.
5
6use super::{DialectImpl, DialectType};
7use crate::error::Result;
8use crate::expressions::{
9    AggFunc, AggregateFunction, BinaryOp, Case, Cast, Column, DataType, Expression, Function,
10    JsonExtractFunc, LikeOp, Literal, UnaryFunc, VarArgFunc,
11};
12use crate::generator::GeneratorConfig;
13use crate::tokens::TokenizerConfig;
14
15/// Presto dialect
16pub struct PrestoDialect;
17
18impl DialectImpl for PrestoDialect {
19    fn dialect_type(&self) -> DialectType {
20        DialectType::Presto
21    }
22
23    fn tokenizer_config(&self) -> TokenizerConfig {
24        let mut config = TokenizerConfig::default();
25        // Presto uses double quotes for identifiers
26        config.identifiers.insert('"', '"');
27        // Presto does NOT support nested comments
28        config.nested_comments = false;
29        // Presto does NOT support QUALIFY - it's a valid identifier
30        // (unlike Snowflake, BigQuery, DuckDB which have QUALIFY clause)
31        config.keywords.remove("QUALIFY");
32        config
33    }
34
35    fn generator_config(&self) -> GeneratorConfig {
36        use crate::generator::IdentifierQuoteStyle;
37        GeneratorConfig {
38            identifier_quote: '"',
39            identifier_quote_style: IdentifierQuoteStyle::DOUBLE_QUOTE,
40            dialect: Some(DialectType::Presto),
41            limit_only_literals: true,
42            tz_to_with_time_zone: true,
43            ..Default::default()
44        }
45    }
46
47    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
48        match expr {
49            // IFNULL -> COALESCE in Presto
50            Expression::IfNull(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
51                original_name: None,
52                expressions: vec![f.this, f.expression],
53            }))),
54
55            // NVL -> COALESCE in Presto
56            Expression::Nvl(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
57                original_name: None,
58                expressions: vec![f.this, f.expression],
59            }))),
60
61            // TryCast stays as TryCast (Presto supports TRY_CAST)
62            Expression::TryCast(c) => Ok(Expression::TryCast(c)),
63
64            // SafeCast -> TRY_CAST in Presto
65            Expression::SafeCast(c) => Ok(Expression::TryCast(c)),
66
67            // ILike -> LOWER() LIKE LOWER() (Presto doesn't support ILIKE)
68            Expression::ILike(op) => {
69                let lower_left = Expression::Lower(Box::new(UnaryFunc::new(op.left.clone())));
70                let lower_right = Expression::Lower(Box::new(UnaryFunc::new(op.right.clone())));
71                Ok(Expression::Like(Box::new(LikeOp {
72                    left: lower_left,
73                    right: lower_right,
74                    escape: op.escape,
75                    quantifier: op.quantifier.clone(),
76                })))
77            }
78
79            // CountIf is native in Presto (keep as-is)
80            Expression::CountIf(f) => Ok(Expression::CountIf(f)),
81
82            // EXPLODE -> UNNEST in Presto
83            Expression::Explode(f) => Ok(Expression::Unnest(Box::new(
84                crate::expressions::UnnestFunc {
85                    this: f.this,
86                    expressions: Vec::new(),
87                    with_ordinality: false,
88                    alias: None,
89                    offset_alias: None,
90                },
91            ))),
92
93            // ExplodeOuter -> UNNEST in Presto
94            Expression::ExplodeOuter(f) => Ok(Expression::Unnest(Box::new(
95                crate::expressions::UnnestFunc {
96                    this: f.this,
97                    expressions: Vec::new(),
98                    with_ordinality: false,
99                    alias: None,
100                    offset_alias: None,
101                },
102            ))),
103
104            // StringAgg -> ARRAY_JOIN(ARRAY_AGG()) in Presto
105            Expression::StringAgg(f) => {
106                let array_agg = Expression::Function(Box::new(Function::new(
107                    "ARRAY_AGG".to_string(),
108                    vec![f.this.clone()],
109                )));
110                let mut join_args = vec![array_agg];
111                if let Some(sep) = f.separator {
112                    join_args.push(sep);
113                }
114                Ok(Expression::Function(Box::new(Function::new(
115                    "ARRAY_JOIN".to_string(),
116                    join_args,
117                ))))
118            }
119
120            // GroupConcat -> ARRAY_JOIN(ARRAY_AGG()) in Presto
121            Expression::GroupConcat(f) => {
122                let array_agg = Expression::Function(Box::new(Function::new(
123                    "ARRAY_AGG".to_string(),
124                    vec![f.this.clone()],
125                )));
126                let mut join_args = vec![array_agg];
127                if let Some(sep) = f.separator {
128                    join_args.push(sep);
129                }
130                Ok(Expression::Function(Box::new(Function::new(
131                    "ARRAY_JOIN".to_string(),
132                    join_args,
133                ))))
134            }
135
136            // ListAgg -> ARRAY_JOIN(ARRAY_AGG()) in Presto
137            Expression::ListAgg(f) => {
138                let array_agg = Expression::Function(Box::new(Function::new(
139                    "ARRAY_AGG".to_string(),
140                    vec![f.this.clone()],
141                )));
142                let mut join_args = vec![array_agg];
143                if let Some(sep) = f.separator {
144                    join_args.push(sep);
145                }
146                Ok(Expression::Function(Box::new(Function::new(
147                    "ARRAY_JOIN".to_string(),
148                    join_args,
149                ))))
150            }
151
152            // ParseJson: handled by generator (outputs JSON_PARSE for Presto)
153
154            // JSONExtract (variant_extract/colon accessor) -> JSON_EXTRACT in Presto
155            Expression::JSONExtract(e) if e.variant_extract.is_some() => {
156                let path = match *e.expression {
157                    Expression::Literal(Literal::String(s)) => {
158                        let normalized = if s.starts_with('$') {
159                            s
160                        } else if s.starts_with('[') {
161                            format!("${}", s)
162                        } else {
163                            format!("$.{}", s)
164                        };
165                        Expression::Literal(Literal::String(normalized))
166                    }
167                    other => other,
168                };
169                Ok(Expression::JsonExtract(Box::new(JsonExtractFunc {
170                    this: *e.this,
171                    path,
172                    returning: None,
173                    arrow_syntax: false,
174                    hash_arrow_syntax: false,
175                    wrapper_option: None,
176                    quotes_option: None,
177                    on_scalar_string: false,
178                    on_error: None,
179                })))
180            }
181
182            // Generic function transformations
183            Expression::Function(f) => self.transform_function(*f),
184
185            // Generic aggregate function transformations
186            Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
187
188            // Cast transformations
189            Expression::Cast(c) => self.transform_cast(*c),
190
191            // Div: Presto has TYPED_DIVISION - wrap left operand in CAST(AS DOUBLE)
192            // to ensure float division (only when left isn't already a float cast)
193            Expression::Div(mut op) => {
194                if !Self::is_float_cast(&op.left) {
195                    op.left = Expression::Cast(Box::new(crate::expressions::Cast {
196                        this: op.left,
197                        to: DataType::Double {
198                            precision: None,
199                            scale: None,
200                        },
201                        trailing_comments: Vec::new(),
202                        double_colon_syntax: false,
203                        format: None,
204                        default: None,
205                    }));
206                }
207                Ok(Expression::Div(op))
208            }
209
210            // IntDiv -> CAST(CAST(x AS DOUBLE) / y AS INTEGER) in Presto
211            Expression::IntDiv(f) => {
212                let cast_x = Expression::Cast(Box::new(Cast {
213                    this: f.this,
214                    to: crate::expressions::DataType::Double {
215                        precision: None,
216                        scale: None,
217                    },
218                    trailing_comments: Vec::new(),
219                    double_colon_syntax: false,
220                    format: None,
221                    default: None,
222                }));
223                let div_expr = Expression::Div(Box::new(BinaryOp::new(cast_x, f.expression)));
224                Ok(Expression::Cast(Box::new(Cast {
225                    this: div_expr,
226                    to: crate::expressions::DataType::Int {
227                        length: None,
228                        integer_spelling: true,
229                    },
230                    trailing_comments: Vec::new(),
231                    double_colon_syntax: false,
232                    format: None,
233                    default: None,
234                })))
235            }
236
237            // DELETE: Strip table alias and unqualify columns (Presto doesn't support DELETE aliases)
238            Expression::Delete(mut d) => {
239                if d.alias.is_some() {
240                    d.alias = None;
241                    d.alias_explicit_as = false;
242                    // Unqualify all columns in the WHERE clause
243                    if let Some(ref mut where_clause) = d.where_clause {
244                        where_clause.this = Self::unqualify_columns(where_clause.this.clone());
245                    }
246                }
247                Ok(Expression::Delete(d))
248            }
249
250            // Pass through everything else
251            _ => Ok(expr),
252        }
253    }
254}
255
256impl PrestoDialect {
257    /// Recursively unqualify columns - remove table qualifiers from Column references
258    fn unqualify_columns(expr: Expression) -> Expression {
259        match expr {
260            Expression::Column(c) => {
261                if c.table.is_some() {
262                    Expression::Column(Column {
263                        name: c.name,
264                        table: None,
265                        join_mark: c.join_mark,
266                        trailing_comments: c.trailing_comments,
267                    })
268                } else {
269                    Expression::Column(c)
270                }
271            }
272            // DotAccess: db.t2.c -> c (strip all qualifiers, keep only the final field name)
273            Expression::Dot(d) => Expression::Column(Column {
274                name: d.field,
275                table: None,
276                join_mark: false,
277                trailing_comments: Vec::new(),
278            }),
279            // Recursively walk common binary expression types
280            Expression::And(mut op) => {
281                op.left = Self::unqualify_columns(op.left);
282                op.right = Self::unqualify_columns(op.right);
283                Expression::And(op)
284            }
285            Expression::Or(mut op) => {
286                op.left = Self::unqualify_columns(op.left);
287                op.right = Self::unqualify_columns(op.right);
288                Expression::Or(op)
289            }
290            Expression::Eq(mut op) => {
291                op.left = Self::unqualify_columns(op.left);
292                op.right = Self::unqualify_columns(op.right);
293                Expression::Eq(op)
294            }
295            Expression::Neq(mut op) => {
296                op.left = Self::unqualify_columns(op.left);
297                op.right = Self::unqualify_columns(op.right);
298                Expression::Neq(op)
299            }
300            Expression::Gt(mut op) => {
301                op.left = Self::unqualify_columns(op.left);
302                op.right = Self::unqualify_columns(op.right);
303                Expression::Gt(op)
304            }
305            Expression::Lt(mut op) => {
306                op.left = Self::unqualify_columns(op.left);
307                op.right = Self::unqualify_columns(op.right);
308                Expression::Lt(op)
309            }
310            Expression::Gte(mut op) => {
311                op.left = Self::unqualify_columns(op.left);
312                op.right = Self::unqualify_columns(op.right);
313                Expression::Gte(op)
314            }
315            Expression::Lte(mut op) => {
316                op.left = Self::unqualify_columns(op.left);
317                op.right = Self::unqualify_columns(op.right);
318                Expression::Lte(op)
319            }
320            // Unary operators
321            Expression::Not(mut e) => {
322                e.this = Self::unqualify_columns(e.this);
323                Expression::Not(e)
324            }
325            // Predicates
326            Expression::In(mut i) => {
327                i.this = Self::unqualify_columns(i.this);
328                i.expressions = i
329                    .expressions
330                    .into_iter()
331                    .map(Self::unqualify_columns)
332                    .collect();
333                // Also recurse into subquery if present
334                if let Some(q) = i.query {
335                    i.query = Some(Self::unqualify_columns(q));
336                }
337                Expression::In(i)
338            }
339            Expression::IsNull(mut f) => {
340                f.this = Self::unqualify_columns(f.this);
341                Expression::IsNull(f)
342            }
343            Expression::Paren(mut p) => {
344                p.this = Self::unqualify_columns(p.this);
345                Expression::Paren(p)
346            }
347            Expression::Function(mut f) => {
348                f.args = f.args.into_iter().map(Self::unqualify_columns).collect();
349                Expression::Function(f)
350            }
351            // For subqueries (SELECT statements inside IN, etc), also unqualify
352            Expression::Select(mut s) => {
353                s.expressions = s
354                    .expressions
355                    .into_iter()
356                    .map(Self::unqualify_columns)
357                    .collect();
358                if let Some(ref mut w) = s.where_clause {
359                    w.this = Self::unqualify_columns(w.this.clone());
360                }
361                Expression::Select(s)
362            }
363            Expression::Subquery(mut sq) => {
364                sq.this = Self::unqualify_columns(sq.this);
365                Expression::Subquery(sq)
366            }
367            Expression::Alias(mut a) => {
368                a.this = Self::unqualify_columns(a.this);
369                Expression::Alias(a)
370            }
371            // Pass through other expressions unchanged
372            other => other,
373        }
374    }
375
376    /// Check if an expression is already a CAST to a float type
377    fn is_float_cast(expr: &Expression) -> bool {
378        if let Expression::Cast(cast) = expr {
379            matches!(&cast.to, DataType::Double { .. } | DataType::Float { .. })
380        } else {
381            false
382        }
383    }
384
385    /// Convert Oracle/PostgreSQL-style date format to Presto's C-style format
386    /// Oracle: dd, hh, hh24, mi, mm, ss, yyyy, yy
387    /// Presto: %d, %H, %H, %i, %m, %s, %Y, %y
388    pub fn oracle_to_presto_format(fmt: &str) -> String {
389        // Process character by character to avoid double-replacement issues
390        let chars: Vec<char> = fmt.chars().collect();
391        let mut result = String::new();
392        let mut i = 0;
393        while i < chars.len() {
394            let remaining = &fmt[i..];
395            if remaining.starts_with("yyyy") {
396                result.push_str("%Y");
397                i += 4;
398            } else if remaining.starts_with("yy") {
399                result.push_str("%y");
400                i += 2;
401            } else if remaining.starts_with("hh24") {
402                result.push_str("%H");
403                i += 4;
404            } else if remaining.starts_with("hh") {
405                result.push_str("%H");
406                i += 2;
407            } else if remaining.starts_with("mi") {
408                result.push_str("%i");
409                i += 2;
410            } else if remaining.starts_with("mm") {
411                result.push_str("%m");
412                i += 2;
413            } else if remaining.starts_with("dd") {
414                result.push_str("%d");
415                i += 2;
416            } else if remaining.starts_with("ss") {
417                result.push_str("%s");
418                i += 2;
419            } else {
420                result.push(chars[i]);
421                i += 1;
422            }
423        }
424        result
425    }
426
427    /// Convert Presto's C-style date format to Java-style format (for Hive/Spark)
428    /// Presto: %Y, %m, %d, %H, %i, %S, %s, %y, %T, %F
429    /// Java:   yyyy, MM, dd, HH, mm, ss, ss, yy, HH:mm:ss, yyyy-MM-dd
430    pub fn presto_to_java_format(fmt: &str) -> String {
431        fmt.replace("%Y", "yyyy")
432            .replace("%m", "MM")
433            .replace("%d", "dd")
434            .replace("%H", "HH")
435            .replace("%i", "mm")
436            .replace("%S", "ss")
437            .replace("%s", "ss")
438            .replace("%y", "yy")
439            .replace("%T", "HH:mm:ss")
440            .replace("%F", "yyyy-MM-dd")
441            .replace("%M", "MMMM")
442    }
443
444    /// Normalize Presto format strings (e.g., %H:%i:%S -> %T, %Y-%m-%d -> %F)
445    pub fn normalize_presto_format(fmt: &str) -> String {
446        fmt.replace("%H:%i:%S", "%T").replace("%H:%i:%s", "%T")
447    }
448
449    /// Convert Presto's C-style format to DuckDB C-style (only difference: %i -> %M for minutes)
450    pub fn presto_to_duckdb_format(fmt: &str) -> String {
451        fmt.replace("%i", "%M")
452            .replace("%s", "%S")
453            .replace("%T", "%H:%M:%S")
454    }
455
456    /// Convert Presto's C-style format to BigQuery format
457    pub fn presto_to_bigquery_format(fmt: &str) -> String {
458        // BigQuery uses %F for %Y-%m-%d, %T for %H:%M:%S
459        // BigQuery uses %M for minutes (like DuckDB), not %i
460        let result = fmt
461            .replace("%Y-%m-%d", "%F")
462            .replace("%H:%i:%S", "%T")
463            .replace("%H:%i:%s", "%T")
464            .replace("%i", "%M")
465            .replace("%s", "%S");
466        result
467    }
468
469    /// Check if a Presto format string matches the default timestamp format
470    pub fn is_default_timestamp_format(fmt: &str) -> bool {
471        let normalized = Self::normalize_presto_format(fmt);
472        normalized == "%Y-%m-%d %T"
473            || normalized == "%Y-%m-%d %H:%i:%S"
474            || fmt == "%Y-%m-%d %H:%i:%S"
475            || fmt == "%Y-%m-%d %T"
476    }
477
478    /// Check if a Presto format string matches the default date format
479    pub fn is_default_date_format(fmt: &str) -> bool {
480        fmt == "%Y-%m-%d" || fmt == "%F"
481    }
482
483    fn transform_function(&self, f: Function) -> Result<Expression> {
484        let name_upper = f.name.to_uppercase();
485        match name_upper.as_str() {
486            // IFNULL -> COALESCE
487            "IFNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
488                original_name: None,
489                expressions: f.args,
490            }))),
491
492            // NVL -> COALESCE
493            "NVL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
494                original_name: None,
495                expressions: f.args,
496            }))),
497
498            // ISNULL -> COALESCE
499            "ISNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
500                original_name: None,
501                expressions: f.args,
502            }))),
503
504            // GETDATE -> CURRENT_TIMESTAMP
505            "GETDATE" => Ok(Expression::CurrentTimestamp(
506                crate::expressions::CurrentTimestamp {
507                    precision: None,
508                    sysdate: false,
509                },
510            )),
511
512            // NOW -> CURRENT_TIMESTAMP
513            "NOW" => Ok(Expression::CurrentTimestamp(
514                crate::expressions::CurrentTimestamp {
515                    precision: None,
516                    sysdate: false,
517                },
518            )),
519
520            // RAND -> RANDOM in Presto (but it's actually RANDOM())
521            "RAND" => Ok(Expression::Function(Box::new(Function::new(
522                "RANDOM".to_string(),
523                vec![],
524            )))),
525
526            // GROUP_CONCAT -> ARRAY_JOIN(ARRAY_AGG())
527            "GROUP_CONCAT" if !f.args.is_empty() => {
528                let mut args = f.args;
529                let first = args.remove(0);
530                let separator = args.pop();
531                let array_agg = Expression::Function(Box::new(Function::new(
532                    "ARRAY_AGG".to_string(),
533                    vec![first],
534                )));
535                let mut join_args = vec![array_agg];
536                if let Some(sep) = separator {
537                    join_args.push(sep);
538                }
539                Ok(Expression::Function(Box::new(Function::new(
540                    "ARRAY_JOIN".to_string(),
541                    join_args,
542                ))))
543            }
544
545            // STRING_AGG -> ARRAY_JOIN(ARRAY_AGG())
546            "STRING_AGG" if !f.args.is_empty() => {
547                let mut args = f.args;
548                let first = args.remove(0);
549                let separator = args.pop();
550                let array_agg = Expression::Function(Box::new(Function::new(
551                    "ARRAY_AGG".to_string(),
552                    vec![first],
553                )));
554                let mut join_args = vec![array_agg];
555                if let Some(sep) = separator {
556                    join_args.push(sep);
557                }
558                Ok(Expression::Function(Box::new(Function::new(
559                    "ARRAY_JOIN".to_string(),
560                    join_args,
561                ))))
562            }
563
564            // LISTAGG -> ARRAY_JOIN(ARRAY_AGG())
565            "LISTAGG" if !f.args.is_empty() => {
566                let mut args = f.args;
567                let first = args.remove(0);
568                let separator = args.pop();
569                let array_agg = Expression::Function(Box::new(Function::new(
570                    "ARRAY_AGG".to_string(),
571                    vec![first],
572                )));
573                let mut join_args = vec![array_agg];
574                if let Some(sep) = separator {
575                    join_args.push(sep);
576                }
577                Ok(Expression::Function(Box::new(Function::new(
578                    "ARRAY_JOIN".to_string(),
579                    join_args,
580                ))))
581            }
582
583            // SUBSTR is native in Presto (keep as-is, don't convert to SUBSTRING)
584            "SUBSTR" => Ok(Expression::Function(Box::new(f))),
585
586            // LEN -> LENGTH
587            "LEN" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc::new(
588                f.args.into_iter().next().unwrap(),
589            )))),
590
591            // CHARINDEX -> STRPOS in Presto (with swapped args)
592            "CHARINDEX" if f.args.len() >= 2 => {
593                let mut args = f.args;
594                let substring = args.remove(0);
595                let string = args.remove(0);
596                // STRPOS(string, substring) - note: argument order is reversed
597                Ok(Expression::Function(Box::new(Function::new(
598                    "STRPOS".to_string(),
599                    vec![string, substring],
600                ))))
601            }
602
603            // INSTR -> STRPOS (with same argument order)
604            "INSTR" if f.args.len() >= 2 => {
605                let args = f.args;
606                // INSTR(string, substring) -> STRPOS(string, substring)
607                Ok(Expression::Function(Box::new(Function::new(
608                    "STRPOS".to_string(),
609                    args,
610                ))))
611            }
612
613            // LOCATE -> STRPOS in Presto (with swapped args)
614            "LOCATE" if f.args.len() >= 2 => {
615                let mut args = f.args;
616                let substring = args.remove(0);
617                let string = args.remove(0);
618                // LOCATE(substring, string) -> STRPOS(string, substring)
619                Ok(Expression::Function(Box::new(Function::new(
620                    "STRPOS".to_string(),
621                    vec![string, substring],
622                ))))
623            }
624
625            // ARRAY_LENGTH -> CARDINALITY in Presto
626            "ARRAY_LENGTH" if f.args.len() == 1 => Ok(Expression::Function(Box::new(
627                Function::new("CARDINALITY".to_string(), f.args),
628            ))),
629
630            // SIZE -> CARDINALITY in Presto
631            "SIZE" if f.args.len() == 1 => Ok(Expression::Function(Box::new(Function::new(
632                "CARDINALITY".to_string(),
633                f.args,
634            )))),
635
636            // ARRAY_CONTAINS -> CONTAINS in Presto
637            "ARRAY_CONTAINS" if f.args.len() == 2 => Ok(Expression::Function(Box::new(
638                Function::new("CONTAINS".to_string(), f.args),
639            ))),
640
641            // TO_DATE -> DATE_PARSE in Presto (or CAST to DATE)
642            "TO_DATE" if !f.args.is_empty() => {
643                if f.args.len() == 1 {
644                    // Simple case: just cast to DATE
645                    Ok(Expression::Cast(Box::new(Cast {
646                        this: f.args.into_iter().next().unwrap(),
647                        to: DataType::Date,
648                        trailing_comments: Vec::new(),
649                        double_colon_syntax: false,
650                        format: None,
651                        default: None,
652                    })))
653                } else {
654                    // With format: use DATE_PARSE
655                    Ok(Expression::Function(Box::new(Function::new(
656                        "DATE_PARSE".to_string(),
657                        f.args,
658                    ))))
659                }
660            }
661
662            // TO_TIMESTAMP -> DATE_PARSE / CAST
663            "TO_TIMESTAMP" if !f.args.is_empty() => {
664                if f.args.len() == 1 {
665                    Ok(Expression::Cast(Box::new(Cast {
666                        this: f.args.into_iter().next().unwrap(),
667                        to: DataType::Timestamp {
668                            precision: None,
669                            timezone: false,
670                        },
671                        trailing_comments: Vec::new(),
672                        double_colon_syntax: false,
673                        format: None,
674                        default: None,
675                    })))
676                } else {
677                    Ok(Expression::Function(Box::new(Function::new(
678                        "DATE_PARSE".to_string(),
679                        f.args,
680                    ))))
681                }
682            }
683
684            // DATE_FORMAT -> DATE_FORMAT (native in Presto)
685            "DATE_FORMAT" => Ok(Expression::Function(Box::new(f))),
686
687            // strftime -> DATE_FORMAT in Presto
688            "STRFTIME" if f.args.len() >= 2 => {
689                let mut args = f.args;
690                // strftime(format, date) -> DATE_FORMAT(date, format)
691                let format = args.remove(0);
692                let date = args.remove(0);
693                Ok(Expression::Function(Box::new(Function::new(
694                    "DATE_FORMAT".to_string(),
695                    vec![date, format],
696                ))))
697            }
698
699            // TO_CHAR -> DATE_FORMAT in Presto (convert Oracle-style format to Presto C-style)
700            "TO_CHAR" if f.args.len() >= 2 => {
701                let mut args = f.args;
702                // Convert Oracle-style format string to Presto C-style
703                if let Expression::Literal(Literal::String(ref s)) = args[1] {
704                    let converted = Self::oracle_to_presto_format(s);
705                    args[1] = Expression::Literal(Literal::String(converted));
706                }
707                Ok(Expression::Function(Box::new(Function::new(
708                    "DATE_FORMAT".to_string(),
709                    args,
710                ))))
711            }
712
713            // LEVENSHTEIN -> LEVENSHTEIN_DISTANCE in Presto
714            "LEVENSHTEIN" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
715                Function::new("LEVENSHTEIN_DISTANCE".to_string(), f.args),
716            ))),
717
718            // FLATTEN -> FLATTEN is supported in Presto for nested arrays
719            "FLATTEN" => Ok(Expression::Function(Box::new(f))),
720
721            // JSON_EXTRACT -> JSON_EXTRACT (native in Presto)
722            "JSON_EXTRACT" => Ok(Expression::Function(Box::new(f))),
723
724            // JSON_EXTRACT_SCALAR -> JSON_EXTRACT_SCALAR (native in Presto)
725            "JSON_EXTRACT_SCALAR" => Ok(Expression::Function(Box::new(f))),
726
727            // GET_JSON_OBJECT -> JSON_EXTRACT_SCALAR in Presto
728            "GET_JSON_OBJECT" if f.args.len() == 2 => Ok(Expression::Function(Box::new(
729                Function::new("JSON_EXTRACT_SCALAR".to_string(), f.args),
730            ))),
731
732            // COLLECT_LIST -> ARRAY_AGG
733            "COLLECT_LIST" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
734                Function::new("ARRAY_AGG".to_string(), f.args),
735            ))),
736
737            // COLLECT_SET -> ARRAY_DISTINCT(ARRAY_AGG())
738            "COLLECT_SET" if !f.args.is_empty() => {
739                let array_agg =
740                    Expression::Function(Box::new(Function::new("ARRAY_AGG".to_string(), f.args)));
741                Ok(Expression::Function(Box::new(Function::new(
742                    "ARRAY_DISTINCT".to_string(),
743                    vec![array_agg],
744                ))))
745            }
746
747            // RLIKE -> REGEXP_LIKE in Presto
748            "RLIKE" if f.args.len() == 2 => Ok(Expression::Function(Box::new(Function::new(
749                "REGEXP_LIKE".to_string(),
750                f.args,
751            )))),
752
753            // REGEXP -> REGEXP_LIKE in Presto
754            "REGEXP" if f.args.len() == 2 => Ok(Expression::Function(Box::new(Function::new(
755                "REGEXP_LIKE".to_string(),
756                f.args,
757            )))),
758
759            // PARSE_JSON -> JSON_PARSE in Presto
760            "PARSE_JSON" => Ok(Expression::Function(Box::new(Function::new(
761                "JSON_PARSE".to_string(),
762                f.args,
763            )))),
764
765            // GET_PATH(obj, path) -> JSON_EXTRACT(obj, json_path) in Presto
766            "GET_PATH" if f.args.len() == 2 => {
767                let mut args = f.args;
768                let this = args.remove(0);
769                let path = args.remove(0);
770                let json_path = match &path {
771                    Expression::Literal(Literal::String(s)) => {
772                        let normalized = if s.starts_with('$') {
773                            s.clone()
774                        } else if s.starts_with('[') {
775                            format!("${}", s)
776                        } else {
777                            format!("$.{}", s)
778                        };
779                        Expression::Literal(Literal::String(normalized))
780                    }
781                    _ => path,
782                };
783                Ok(Expression::JsonExtract(Box::new(JsonExtractFunc {
784                    this,
785                    path: json_path,
786                    returning: None,
787                    arrow_syntax: false,
788                    hash_arrow_syntax: false,
789                    wrapper_option: None,
790                    quotes_option: None,
791                    on_scalar_string: false,
792                    on_error: None,
793                })))
794            }
795
796            // REGEXP_SUBSTR(subject, pattern, ...) -> REGEXP_EXTRACT(subject, pattern[, group])
797            "REGEXP_SUBSTR" if f.args.len() >= 2 => {
798                let mut args = f.args;
799                let subject = args.remove(0);
800                let pattern = args.remove(0);
801                // If 6-arg form: (subject, pattern, pos, occ, params, group) -> keep group
802                if args.len() >= 4 {
803                    let _pos = args.remove(0);
804                    let _occ = args.remove(0);
805                    let _params = args.remove(0);
806                    let group = args.remove(0);
807                    Ok(Expression::Function(Box::new(Function::new(
808                        "REGEXP_EXTRACT".to_string(),
809                        vec![subject, pattern, group],
810                    ))))
811                } else {
812                    Ok(Expression::Function(Box::new(Function::new(
813                        "REGEXP_EXTRACT".to_string(),
814                        vec![subject, pattern],
815                    ))))
816                }
817            }
818
819            // DATE_PART(epoch_second, x) -> TO_UNIXTIME(CAST(x AS TIMESTAMP))
820            // DATE_PART(epoch_millisecond[s], x) -> TO_UNIXTIME(CAST(x AS TIMESTAMP)) * 1000
821            "DATE_PART" if f.args.len() == 2 => {
822                let part_name = match &f.args[0] {
823                    Expression::Identifier(id) => Some(id.name.to_uppercase()),
824                    Expression::Column(c) => Some(c.name.name.to_uppercase()),
825                    _ => None,
826                };
827                match part_name.as_deref() {
828                    Some("EPOCH_SECOND" | "EPOCH_SECONDS") => {
829                        let mut args = f.args;
830                        let value = args.remove(1);
831                        let cast_expr = Expression::Cast(Box::new(Cast {
832                            this: value,
833                            to: DataType::Timestamp {
834                                precision: None,
835                                timezone: false,
836                            },
837                            trailing_comments: Vec::new(),
838                            double_colon_syntax: false,
839                            format: None,
840                            default: None,
841                        }));
842                        Ok(Expression::Function(Box::new(Function::new(
843                            "TO_UNIXTIME".to_string(),
844                            vec![cast_expr],
845                        ))))
846                    }
847                    Some("EPOCH_MILLISECOND" | "EPOCH_MILLISECONDS") => {
848                        let mut args = f.args;
849                        let value = args.remove(1);
850                        let cast_expr = Expression::Cast(Box::new(Cast {
851                            this: value,
852                            to: DataType::Timestamp {
853                                precision: None,
854                                timezone: false,
855                            },
856                            trailing_comments: Vec::new(),
857                            double_colon_syntax: false,
858                            format: None,
859                            default: None,
860                        }));
861                        let unixtime = Expression::Function(Box::new(Function::new(
862                            "TO_UNIXTIME".to_string(),
863                            vec![cast_expr],
864                        )));
865                        Ok(Expression::Mul(Box::new(BinaryOp {
866                            left: unixtime,
867                            right: Expression::Literal(Literal::Number("1000".to_string())),
868                            left_comments: Vec::new(),
869                            operator_comments: Vec::new(),
870                            trailing_comments: Vec::new(),
871                        })))
872                    }
873                    _ => Ok(Expression::Function(Box::new(f))),
874                }
875            }
876
877            // REPLACE(x, y) with 2 args -> REPLACE(x, y, '') - Presto requires explicit empty string
878            "REPLACE" if f.args.len() == 2 => {
879                let mut args = f.args;
880                args.push(Expression::string(""));
881                Ok(Expression::Function(Box::new(Function::new(
882                    "REPLACE".to_string(),
883                    args,
884                ))))
885            }
886
887            // REGEXP_REPLACE(x, y) with 2 args -> REGEXP_REPLACE(x, y, '')
888            "REGEXP_REPLACE" if f.args.len() == 2 => {
889                let mut args = f.args;
890                args.push(Expression::string(""));
891                Ok(Expression::Function(Box::new(Function::new(
892                    "REGEXP_REPLACE".to_string(),
893                    args,
894                ))))
895            }
896
897            // Pass through everything else
898            _ => Ok(Expression::Function(Box::new(f))),
899        }
900    }
901
902    fn transform_aggregate_function(
903        &self,
904        f: Box<crate::expressions::AggregateFunction>,
905    ) -> Result<Expression> {
906        let name_upper = f.name.to_uppercase();
907        match name_upper.as_str() {
908            // COUNT_IF -> SUM(CASE WHEN...)
909            "COUNT_IF" if !f.args.is_empty() => {
910                let condition = f.args.into_iter().next().unwrap();
911                let case_expr = Expression::Case(Box::new(Case {
912                    operand: None,
913                    whens: vec![(condition, Expression::number(1))],
914                    else_: Some(Expression::number(0)),
915                    comments: Vec::new(),
916                }));
917                Ok(Expression::Sum(Box::new(AggFunc {
918                    ignore_nulls: None,
919                    having_max: None,
920                    this: case_expr,
921                    distinct: f.distinct,
922                    filter: f.filter,
923                    order_by: Vec::new(),
924                    name: None,
925                    limit: None,
926                })))
927            }
928
929            // ANY_VALUE -> ARBITRARY in Presto
930            "ANY_VALUE" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
931                "ARBITRARY".to_string(),
932                f.args,
933            )))),
934
935            // GROUP_CONCAT -> ARRAY_JOIN(ARRAY_AGG())
936            "GROUP_CONCAT" if !f.args.is_empty() => {
937                let mut args = f.args;
938                let first = args.remove(0);
939                let separator = args.pop();
940                let array_agg = Expression::Function(Box::new(Function::new(
941                    "ARRAY_AGG".to_string(),
942                    vec![first],
943                )));
944                let mut join_args = vec![array_agg];
945                if let Some(sep) = separator {
946                    join_args.push(sep);
947                }
948                Ok(Expression::Function(Box::new(Function::new(
949                    "ARRAY_JOIN".to_string(),
950                    join_args,
951                ))))
952            }
953
954            // STRING_AGG -> ARRAY_JOIN(ARRAY_AGG())
955            "STRING_AGG" if !f.args.is_empty() => {
956                let mut args = f.args;
957                let first = args.remove(0);
958                let separator = args.pop();
959                let array_agg = Expression::Function(Box::new(Function::new(
960                    "ARRAY_AGG".to_string(),
961                    vec![first],
962                )));
963                let mut join_args = vec![array_agg];
964                if let Some(sep) = separator {
965                    join_args.push(sep);
966                }
967                Ok(Expression::Function(Box::new(Function::new(
968                    "ARRAY_JOIN".to_string(),
969                    join_args,
970                ))))
971            }
972
973            // LISTAGG -> ARRAY_JOIN(ARRAY_AGG())
974            "LISTAGG" if !f.args.is_empty() => {
975                let mut args = f.args;
976                let first = args.remove(0);
977                let separator = args.pop();
978                let array_agg = Expression::Function(Box::new(Function::new(
979                    "ARRAY_AGG".to_string(),
980                    vec![first],
981                )));
982                let mut join_args = vec![array_agg];
983                if let Some(sep) = separator {
984                    join_args.push(sep);
985                }
986                Ok(Expression::Function(Box::new(Function::new(
987                    "ARRAY_JOIN".to_string(),
988                    join_args,
989                ))))
990            }
991
992            // VAR -> VAR_POP in Presto
993            "VAR" if !f.args.is_empty() => {
994                Ok(Expression::AggregateFunction(Box::new(AggregateFunction {
995                    name: "VAR_POP".to_string(),
996                    args: f.args,
997                    distinct: f.distinct,
998                    filter: f.filter,
999                    order_by: Vec::new(),
1000                    limit: None,
1001                    ignore_nulls: None,
1002                })))
1003            }
1004
1005            // VARIANCE -> VAR_SAMP in Presto (for sample variance)
1006            "VARIANCE" if !f.args.is_empty() => {
1007                Ok(Expression::AggregateFunction(Box::new(AggregateFunction {
1008                    name: "VAR_SAMP".to_string(),
1009                    args: f.args,
1010                    distinct: f.distinct,
1011                    filter: f.filter,
1012                    order_by: Vec::new(),
1013                    limit: None,
1014                    ignore_nulls: None,
1015                })))
1016            }
1017
1018            // Pass through everything else
1019            _ => Ok(Expression::AggregateFunction(f)),
1020        }
1021    }
1022
1023    fn transform_cast(&self, c: Cast) -> Result<Expression> {
1024        // Presto type mappings are handled in the generator
1025        Ok(Expression::Cast(Box::new(c)))
1026    }
1027}