polyglot_sql/dialects/mod.rs
1//! SQL Dialect System
2//!
3//! This module implements the dialect abstraction layer that enables SQL transpilation
4//! between 30+ database engines. Each dialect encapsulates three concerns:
5//!
6//! - **Tokenization**: Dialect-specific lexing rules (e.g., BigQuery uses backtick quoting,
7//! MySQL uses backtick for identifiers, TSQL uses square brackets).
8//! - **Generation**: How AST nodes are rendered back to SQL text, including identifier quoting
9//! style, function name casing, and syntax variations.
10//! - **Transformation**: AST-level rewrites that convert dialect-specific constructs to/from
11//! a normalized form (e.g., Snowflake `SQUARE(x)` becomes `POWER(x, 2)`).
12//!
13//! The primary entry point is [`Dialect::get`], which returns a configured [`Dialect`] instance
14//! for a given [`DialectType`]. From there, callers can [`parse`](Dialect::parse),
15//! [`generate`](Dialect::generate), [`transform`](Dialect::transform), or
16//! [`transpile_to`](Dialect::transpile_to) another dialect in a single call.
17//!
18//! Each concrete dialect (e.g., `PostgresDialect`, `BigQueryDialect`) implements the
19//! [`DialectImpl`] trait, which provides configuration hooks and expression-level transforms.
20//! Dialect modules live in submodules of this module and are re-exported here.
21
22mod generic;
23mod postgres;
24mod mysql;
25mod bigquery;
26mod snowflake;
27mod duckdb;
28mod tsql;
29mod oracle;
30mod hive;
31mod spark;
32mod sqlite;
33mod presto;
34mod trino;
35mod redshift;
36mod clickhouse;
37mod databricks;
38mod athena;
39mod teradata;
40mod doris;
41mod starrocks;
42mod materialize;
43mod risingwave;
44mod singlestore;
45mod cockroachdb;
46mod tidb;
47mod druid;
48mod solr;
49mod tableau;
50mod dune;
51mod fabric;
52mod drill;
53mod dremio;
54mod exasol;
55
56pub use generic::GenericDialect;
57pub use postgres::PostgresDialect;
58pub use mysql::MySQLDialect;
59pub use bigquery::BigQueryDialect;
60pub use snowflake::SnowflakeDialect;
61pub use duckdb::DuckDBDialect;
62pub use tsql::TSQLDialect;
63pub use oracle::OracleDialect;
64pub use hive::HiveDialect;
65pub use spark::SparkDialect;
66pub use sqlite::SQLiteDialect;
67pub use presto::PrestoDialect;
68pub use trino::TrinoDialect;
69pub use redshift::RedshiftDialect;
70pub use clickhouse::ClickHouseDialect;
71pub use databricks::DatabricksDialect;
72pub use athena::AthenaDialect;
73pub use teradata::TeradataDialect;
74pub use doris::DorisDialect;
75pub use starrocks::StarRocksDialect;
76pub use materialize::MaterializeDialect;
77pub use risingwave::RisingWaveDialect;
78pub use singlestore::SingleStoreDialect;
79pub use cockroachdb::CockroachDBDialect;
80pub use tidb::TiDBDialect;
81pub use druid::DruidDialect;
82pub use solr::SolrDialect;
83pub use tableau::TableauDialect;
84pub use dune::DuneDialect;
85pub use fabric::FabricDialect;
86pub use drill::DrillDialect;
87pub use dremio::DremioDialect;
88pub use exasol::ExasolDialect;
89
90use crate::error::Result;
91use crate::expressions::{Expression, FunctionBody};
92use crate::generator::{Generator, GeneratorConfig};
93use crate::parser::Parser;
94use crate::tokens::{Tokenizer, TokenizerConfig};
95use serde::{Deserialize, Serialize};
96use std::collections::HashMap;
97use std::sync::{Arc, LazyLock, RwLock};
98
99/// Enumeration of all supported SQL dialects.
100///
101/// Each variant corresponds to a specific SQL database engine or query language.
102/// The `Generic` variant represents standard SQL with no dialect-specific behavior,
103/// and is used as the default when no dialect is specified.
104///
105/// Dialect names are case-insensitive when parsed from strings via [`FromStr`].
106/// Some dialects accept aliases (e.g., "mssql" and "sqlserver" both resolve to [`TSQL`](DialectType::TSQL)).
107#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
108#[serde(rename_all = "lowercase")]
109pub enum DialectType {
110 /// Standard SQL with no dialect-specific behavior (default).
111 Generic,
112 /// PostgreSQL -- advanced open-source relational database.
113 PostgreSQL,
114 /// MySQL -- widely-used open-source relational database (also accepts "mysql").
115 MySQL,
116 /// Google BigQuery -- serverless cloud data warehouse with unique syntax (backtick quoting, STRUCT types, QUALIFY).
117 BigQuery,
118 /// Snowflake -- cloud data platform with QUALIFY clause, FLATTEN, and variant types.
119 Snowflake,
120 /// DuckDB -- in-process analytical database with modern SQL extensions.
121 DuckDB,
122 /// SQLite -- lightweight embedded relational database.
123 SQLite,
124 /// Apache Hive -- data warehouse on Hadoop with HiveQL syntax.
125 Hive,
126 /// Apache Spark SQL -- distributed query engine (also accepts "spark2").
127 Spark,
128 /// Trino -- distributed SQL query engine (formerly PrestoSQL).
129 Trino,
130 /// PrestoDB -- distributed SQL query engine for big data.
131 Presto,
132 /// Amazon Redshift -- cloud data warehouse based on PostgreSQL.
133 Redshift,
134 /// Transact-SQL (T-SQL) -- Microsoft SQL Server and Azure SQL (also accepts "mssql", "sqlserver").
135 TSQL,
136 /// Oracle Database -- commercial relational database with PL/SQL extensions.
137 Oracle,
138 /// ClickHouse -- column-oriented OLAP database for real-time analytics.
139 ClickHouse,
140 /// Databricks SQL -- Spark-based lakehouse platform with QUALIFY support.
141 Databricks,
142 /// Amazon Athena -- serverless query service (hybrid Trino/Hive engine).
143 Athena,
144 /// Teradata -- enterprise data warehouse with proprietary SQL extensions.
145 Teradata,
146 /// Apache Doris -- real-time analytical database (MySQL-compatible).
147 Doris,
148 /// StarRocks -- sub-second OLAP database (MySQL-compatible).
149 StarRocks,
150 /// Materialize -- streaming SQL database built on differential dataflow.
151 Materialize,
152 /// RisingWave -- distributed streaming database with PostgreSQL compatibility.
153 RisingWave,
154 /// SingleStore (formerly MemSQL) -- distributed SQL database (also accepts "memsql").
155 SingleStore,
156 /// CockroachDB -- distributed SQL database with PostgreSQL compatibility (also accepts "cockroach").
157 CockroachDB,
158 /// TiDB -- distributed HTAP database with MySQL compatibility.
159 TiDB,
160 /// Apache Druid -- real-time analytics database.
161 Druid,
162 /// Apache Solr -- search platform with SQL interface.
163 Solr,
164 /// Tableau -- data visualization platform with its own SQL dialect.
165 Tableau,
166 /// Dune Analytics -- blockchain analytics SQL engine.
167 Dune,
168 /// Microsoft Fabric -- unified analytics platform (T-SQL based).
169 Fabric,
170 /// Apache Drill -- schema-free SQL query engine for big data.
171 Drill,
172 /// Dremio -- data lakehouse platform with Arrow-based query engine.
173 Dremio,
174 /// Exasol -- in-memory analytic database.
175 Exasol,
176}
177
178impl Default for DialectType {
179 fn default() -> Self {
180 DialectType::Generic
181 }
182}
183
184impl std::fmt::Display for DialectType {
185 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
186 match self {
187 DialectType::Generic => write!(f, "generic"),
188 DialectType::PostgreSQL => write!(f, "postgresql"),
189 DialectType::MySQL => write!(f, "mysql"),
190 DialectType::BigQuery => write!(f, "bigquery"),
191 DialectType::Snowflake => write!(f, "snowflake"),
192 DialectType::DuckDB => write!(f, "duckdb"),
193 DialectType::SQLite => write!(f, "sqlite"),
194 DialectType::Hive => write!(f, "hive"),
195 DialectType::Spark => write!(f, "spark"),
196 DialectType::Trino => write!(f, "trino"),
197 DialectType::Presto => write!(f, "presto"),
198 DialectType::Redshift => write!(f, "redshift"),
199 DialectType::TSQL => write!(f, "tsql"),
200 DialectType::Oracle => write!(f, "oracle"),
201 DialectType::ClickHouse => write!(f, "clickhouse"),
202 DialectType::Databricks => write!(f, "databricks"),
203 DialectType::Athena => write!(f, "athena"),
204 DialectType::Teradata => write!(f, "teradata"),
205 DialectType::Doris => write!(f, "doris"),
206 DialectType::StarRocks => write!(f, "starrocks"),
207 DialectType::Materialize => write!(f, "materialize"),
208 DialectType::RisingWave => write!(f, "risingwave"),
209 DialectType::SingleStore => write!(f, "singlestore"),
210 DialectType::CockroachDB => write!(f, "cockroachdb"),
211 DialectType::TiDB => write!(f, "tidb"),
212 DialectType::Druid => write!(f, "druid"),
213 DialectType::Solr => write!(f, "solr"),
214 DialectType::Tableau => write!(f, "tableau"),
215 DialectType::Dune => write!(f, "dune"),
216 DialectType::Fabric => write!(f, "fabric"),
217 DialectType::Drill => write!(f, "drill"),
218 DialectType::Dremio => write!(f, "dremio"),
219 DialectType::Exasol => write!(f, "exasol"),
220 }
221 }
222}
223
224impl std::str::FromStr for DialectType {
225 type Err = crate::error::Error;
226
227 fn from_str(s: &str) -> Result<Self> {
228 match s.to_lowercase().as_str() {
229 "generic" | "" => Ok(DialectType::Generic),
230 "postgres" | "postgresql" => Ok(DialectType::PostgreSQL),
231 "mysql" => Ok(DialectType::MySQL),
232 "bigquery" => Ok(DialectType::BigQuery),
233 "snowflake" => Ok(DialectType::Snowflake),
234 "duckdb" => Ok(DialectType::DuckDB),
235 "sqlite" => Ok(DialectType::SQLite),
236 "hive" => Ok(DialectType::Hive),
237 "spark" | "spark2" => Ok(DialectType::Spark),
238 "trino" => Ok(DialectType::Trino),
239 "presto" => Ok(DialectType::Presto),
240 "redshift" => Ok(DialectType::Redshift),
241 "tsql" | "mssql" | "sqlserver" => Ok(DialectType::TSQL),
242 "oracle" => Ok(DialectType::Oracle),
243 "clickhouse" => Ok(DialectType::ClickHouse),
244 "databricks" => Ok(DialectType::Databricks),
245 "athena" => Ok(DialectType::Athena),
246 "teradata" => Ok(DialectType::Teradata),
247 "doris" => Ok(DialectType::Doris),
248 "starrocks" => Ok(DialectType::StarRocks),
249 "materialize" => Ok(DialectType::Materialize),
250 "risingwave" => Ok(DialectType::RisingWave),
251 "singlestore" | "memsql" => Ok(DialectType::SingleStore),
252 "cockroachdb" | "cockroach" => Ok(DialectType::CockroachDB),
253 "tidb" => Ok(DialectType::TiDB),
254 "druid" => Ok(DialectType::Druid),
255 "solr" => Ok(DialectType::Solr),
256 "tableau" => Ok(DialectType::Tableau),
257 "dune" => Ok(DialectType::Dune),
258 "fabric" => Ok(DialectType::Fabric),
259 "drill" => Ok(DialectType::Drill),
260 "dremio" => Ok(DialectType::Dremio),
261 "exasol" => Ok(DialectType::Exasol),
262 _ => Err(crate::error::Error::parse(format!("Unknown dialect: {}", s))),
263 }
264 }
265}
266
267/// Trait that each concrete SQL dialect must implement.
268///
269/// `DialectImpl` provides the configuration hooks and per-expression transform logic
270/// that distinguish one dialect from another. Implementors supply:
271///
272/// - A [`DialectType`] identifier.
273/// - Optional overrides for tokenizer and generator configuration (defaults to generic SQL).
274/// - An expression-level transform function ([`transform_expr`](DialectImpl::transform_expr))
275/// that rewrites individual AST nodes for this dialect (e.g., converting `NVL` to `COALESCE`).
276/// - An optional preprocessing step ([`preprocess`](DialectImpl::preprocess)) for whole-tree
277/// rewrites that must run before the recursive per-node transform (e.g., eliminating QUALIFY).
278///
279/// The default implementations are no-ops, so a minimal dialect only needs to provide
280/// [`dialect_type`](DialectImpl::dialect_type) and override the methods that differ from
281/// standard SQL.
282pub trait DialectImpl {
283 /// Returns the [`DialectType`] that identifies this dialect.
284 fn dialect_type(&self) -> DialectType;
285
286 /// Returns the tokenizer configuration for this dialect.
287 ///
288 /// Override to customize identifier quoting characters, string escape rules,
289 /// comment styles, and other lexing behavior.
290 fn tokenizer_config(&self) -> TokenizerConfig {
291 TokenizerConfig::default()
292 }
293
294 /// Returns the generator configuration for this dialect.
295 ///
296 /// Override to customize identifier quoting style, function name casing,
297 /// keyword casing, and other SQL generation behavior.
298 fn generator_config(&self) -> GeneratorConfig {
299 GeneratorConfig::default()
300 }
301
302 /// Returns a generator configuration tailored to a specific expression.
303 ///
304 /// Override this for hybrid dialects like Athena that route to different SQL engines
305 /// based on expression type (e.g., Hive-style generation for DDL, Trino-style for DML).
306 /// The default delegates to [`generator_config`](DialectImpl::generator_config).
307 fn generator_config_for_expr(&self, _expr: &Expression) -> GeneratorConfig {
308 self.generator_config()
309 }
310
311 /// Transforms a single expression node for this dialect, without recursing into children.
312 ///
313 /// This is the per-node rewrite hook invoked by [`transform_recursive`]. Return the
314 /// expression unchanged if no dialect-specific rewrite is needed. Transformations
315 /// typically include function renaming, operator substitution, and type mapping.
316 fn transform_expr(&self, expr: Expression) -> Result<Expression> {
317 Ok(expr)
318 }
319
320 /// Applies whole-tree preprocessing transforms before the recursive per-node pass.
321 ///
322 /// Override this to apply structural rewrites that must see the entire tree at once,
323 /// such as `eliminate_qualify`, `eliminate_distinct_on`, `ensure_bools`, or
324 /// `explode_projection_to_unnest`. The default is a no-op pass-through.
325 fn preprocess(&self, expr: Expression) -> Result<Expression> {
326 Ok(expr)
327 }
328}
329
330/// Recursively transforms a [`DataType`](crate::expressions::DataType), handling nested
331/// parametric types such as `ARRAY<INT>`, `STRUCT<a INT, b TEXT>`, and `MAP<STRING, INT>`.
332///
333/// The outer type is first passed through `transform_fn` as an `Expression::DataType`,
334/// and then nested element/field types are recursed into. This ensures that dialect-level
335/// type mappings (e.g., `INT` to `INTEGER`) propagate into complex nested types.
336fn transform_data_type_recursive<F>(dt: crate::expressions::DataType, transform_fn: &F) -> Result<crate::expressions::DataType>
337where
338 F: Fn(Expression) -> Result<Expression>,
339{
340 use crate::expressions::DataType;
341 // First, transform the outermost type through the expression system
342 let dt_expr = transform_fn(Expression::DataType(dt))?;
343 let dt = match dt_expr {
344 Expression::DataType(d) => d,
345 _ => return Ok(match dt_expr { _ => DataType::Custom { name: "UNKNOWN".to_string() } }),
346 };
347 // Then recurse into nested types
348 match dt {
349 DataType::Array { element_type, dimension } => {
350 let inner = transform_data_type_recursive(*element_type, transform_fn)?;
351 Ok(DataType::Array { element_type: Box::new(inner), dimension })
352 }
353 DataType::List { element_type } => {
354 let inner = transform_data_type_recursive(*element_type, transform_fn)?;
355 Ok(DataType::List { element_type: Box::new(inner) })
356 }
357 DataType::Struct { fields, nested } => {
358 let mut new_fields = Vec::new();
359 for mut field in fields {
360 field.data_type = transform_data_type_recursive(field.data_type, transform_fn)?;
361 new_fields.push(field);
362 }
363 Ok(DataType::Struct { fields: new_fields, nested })
364 }
365 DataType::Map { key_type, value_type } => {
366 let k = transform_data_type_recursive(*key_type, transform_fn)?;
367 let v = transform_data_type_recursive(*value_type, transform_fn)?;
368 Ok(DataType::Map { key_type: Box::new(k), value_type: Box::new(v) })
369 }
370 other => Ok(other),
371 }
372}
373
374/// Convert DuckDB C-style format strings to Presto C-style format strings.
375/// DuckDB and Presto both use C-style % directives but with different specifiers for some cases.
376fn duckdb_to_presto_format(fmt: &str) -> String {
377 // Order matters: handle longer patterns first to avoid partial replacements
378 let mut result = fmt.to_string();
379 // First pass: mark multi-char patterns with placeholders
380 result = result.replace("%-m", "\x01NOPADM\x01");
381 result = result.replace("%-d", "\x01NOPADD\x01");
382 result = result.replace("%-I", "\x01NOPADI\x01");
383 result = result.replace("%-H", "\x01NOPADH\x01");
384 result = result.replace("%H:%M:%S", "\x01HMS\x01");
385 result = result.replace("%Y-%m-%d", "\x01YMD\x01");
386 // Now convert individual specifiers
387 result = result.replace("%M", "%i");
388 result = result.replace("%S", "%s");
389 // Restore multi-char patterns with Presto equivalents
390 result = result.replace("\x01NOPADM\x01", "%c");
391 result = result.replace("\x01NOPADD\x01", "%e");
392 result = result.replace("\x01NOPADI\x01", "%l");
393 result = result.replace("\x01NOPADH\x01", "%k");
394 result = result.replace("\x01HMS\x01", "%T");
395 result = result.replace("\x01YMD\x01", "%Y-%m-%d");
396 result
397}
398
399/// Convert DuckDB C-style format strings to BigQuery format strings.
400/// BigQuery uses a mix of strftime-like directives.
401fn duckdb_to_bigquery_format(fmt: &str) -> String {
402 let mut result = fmt.to_string();
403 // Handle longer patterns first
404 result = result.replace("%-d", "%e");
405 result = result.replace("%Y-%m-%d %H:%M:%S", "%F %T");
406 result = result.replace("%Y-%m-%d", "%F");
407 result = result.replace("%H:%M:%S", "%T");
408 result
409}
410
411/// Applies a transform function bottom-up through an entire expression tree.
412///
413/// This is the core tree-rewriting engine used by the dialect system. It performs
414/// a post-order (children-first) traversal: for each node, all children are recursively
415/// transformed before the node itself is passed to `transform_fn`. This bottom-up
416/// strategy means that when `transform_fn` sees a node, its children have already
417/// been rewritten, which simplifies pattern matching on sub-expressions.
418///
419/// The function handles all expression variants including SELECT clauses (FROM, WHERE,
420/// GROUP BY, HAVING, ORDER BY, QUALIFY, WITH/CTEs, WINDOW), binary operators,
421/// function calls, CASE expressions, date/time functions, and more.
422///
423/// # Arguments
424///
425/// * `expr` - The root expression to transform (consumed).
426/// * `transform_fn` - A closure that receives each expression node (after its children
427/// have been transformed) and returns a possibly-rewritten expression.
428///
429/// # Errors
430///
431/// Returns an error if `transform_fn` returns an error for any node.
432pub fn transform_recursive<F>(expr: Expression, transform_fn: &F) -> Result<Expression>
433where
434 F: Fn(Expression) -> Result<Expression>,
435{
436 use crate::expressions::BinaryOp;
437
438 // Helper macro to transform binary ops with Box<BinaryOp>
439 macro_rules! transform_binary {
440 ($variant:ident, $op:expr) => {{
441 let left = transform_recursive($op.left, transform_fn)?;
442 let right = transform_recursive($op.right, transform_fn)?;
443 Expression::$variant(Box::new(BinaryOp {
444 left,
445 right,
446 left_comments: $op.left_comments,
447 operator_comments: $op.operator_comments,
448 trailing_comments: $op.trailing_comments,
449 }))
450 }};
451 }
452
453 // First recursively transform children, then apply the transform function
454 let expr = match expr {
455 Expression::Select(mut select) => {
456 select.expressions = select
457 .expressions
458 .into_iter()
459 .map(|e| transform_recursive(e, transform_fn))
460 .collect::<Result<Vec<_>>>()?;
461
462 // Transform FROM clause
463 if let Some(mut from) = select.from.take() {
464 from.expressions = from
465 .expressions
466 .into_iter()
467 .map(|e| transform_recursive(e, transform_fn))
468 .collect::<Result<Vec<_>>>()?;
469 select.from = Some(from);
470 }
471
472 // Transform JOINs - important for CROSS APPLY / LATERAL transformations
473 select.joins = select
474 .joins
475 .into_iter()
476 .map(|mut join| {
477 join.this = transform_recursive(join.this, transform_fn)?;
478 if let Some(on) = join.on.take() {
479 join.on = Some(transform_recursive(on, transform_fn)?);
480 }
481 // Wrap join in Expression::Join to allow transform_fn to transform it
482 match transform_fn(Expression::Join(Box::new(join)))? {
483 Expression::Join(j) => Ok(*j),
484 _ => Err(crate::error::Error::parse("Join transformation returned non-join expression")),
485 }
486 })
487 .collect::<Result<Vec<_>>>()?;
488
489 // Transform WHERE clause
490 if let Some(mut where_clause) = select.where_clause.take() {
491 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
492 select.where_clause = Some(where_clause);
493 }
494
495 // Transform GROUP BY
496 if let Some(mut group_by) = select.group_by.take() {
497 group_by.expressions = group_by
498 .expressions
499 .into_iter()
500 .map(|e| transform_recursive(e, transform_fn))
501 .collect::<Result<Vec<_>>>()?;
502 select.group_by = Some(group_by);
503 }
504
505 // Transform HAVING
506 if let Some(mut having) = select.having.take() {
507 having.this = transform_recursive(having.this, transform_fn)?;
508 select.having = Some(having);
509 }
510
511 // Transform WITH (CTEs)
512 if let Some(mut with) = select.with.take() {
513 with.ctes = with.ctes.into_iter().map(|mut cte| {
514 let original = cte.this.clone();
515 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
516 cte
517 }).collect();
518 select.with = Some(with);
519 }
520
521 // Transform ORDER BY
522 if let Some(mut order) = select.order_by.take() {
523 order.expressions = order.expressions.into_iter().map(|o| {
524 let mut o = o;
525 let original = o.this.clone();
526 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
527 // Also apply transform to the Ordered wrapper itself (for NULLS FIRST etc.)
528 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
529 Ok(Expression::Ordered(transformed)) => *transformed,
530 Ok(_) | Err(_) => o,
531 }
532 }).collect();
533 select.order_by = Some(order);
534 }
535
536 // Transform WINDOW clause order_by
537 if let Some(ref mut windows) = select.windows {
538 for nw in windows.iter_mut() {
539 nw.spec.order_by = std::mem::take(&mut nw.spec.order_by).into_iter().map(|o| {
540 let mut o = o;
541 let original = o.this.clone();
542 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
543 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
544 Ok(Expression::Ordered(transformed)) => *transformed,
545 Ok(_) | Err(_) => o,
546 }
547 }).collect();
548 }
549 }
550
551 // Transform QUALIFY
552 if let Some(mut qual) = select.qualify.take() {
553 qual.this = transform_recursive(qual.this, transform_fn)?;
554 select.qualify = Some(qual);
555 }
556
557 Expression::Select(select)
558 }
559 Expression::Function(mut f) => {
560 f.args = f
561 .args
562 .into_iter()
563 .map(|e| transform_recursive(e, transform_fn))
564 .collect::<Result<Vec<_>>>()?;
565 Expression::Function(f)
566 }
567 Expression::AggregateFunction(mut f) => {
568 f.args = f
569 .args
570 .into_iter()
571 .map(|e| transform_recursive(e, transform_fn))
572 .collect::<Result<Vec<_>>>()?;
573 if let Some(filter) = f.filter {
574 f.filter = Some(transform_recursive(filter, transform_fn)?);
575 }
576 Expression::AggregateFunction(f)
577 }
578 Expression::WindowFunction(mut wf) => {
579 wf.this = transform_recursive(wf.this, transform_fn)?;
580 wf.over.partition_by = wf
581 .over
582 .partition_by
583 .into_iter()
584 .map(|e| transform_recursive(e, transform_fn))
585 .collect::<Result<Vec<_>>>()?;
586 // Transform order_by items through Expression::Ordered wrapper
587 wf.over.order_by = wf.over.order_by.into_iter().map(|o| {
588 let mut o = o;
589 o.this = transform_recursive(o.this, transform_fn)?;
590 match transform_fn(Expression::Ordered(Box::new(o)))? {
591 Expression::Ordered(transformed) => Ok(*transformed),
592 _ => Err(crate::error::Error::parse("Ordered transformation returned non-Ordered expression")),
593 }
594 }).collect::<Result<Vec<_>>>()?;
595 Expression::WindowFunction(wf)
596 }
597 Expression::Alias(mut a) => {
598 a.this = transform_recursive(a.this, transform_fn)?;
599 Expression::Alias(a)
600 }
601 Expression::Cast(mut c) => {
602 c.this = transform_recursive(c.this, transform_fn)?;
603 // Also transform the target data type (recursively for nested types like ARRAY<INT>, STRUCT<a INT>)
604 c.to = transform_data_type_recursive(c.to, transform_fn)?;
605 Expression::Cast(c)
606 }
607 Expression::And(op) => transform_binary!(And, *op),
608 Expression::Or(op) => transform_binary!(Or, *op),
609 Expression::Add(op) => transform_binary!(Add, *op),
610 Expression::Sub(op) => transform_binary!(Sub, *op),
611 Expression::Mul(op) => transform_binary!(Mul, *op),
612 Expression::Div(op) => transform_binary!(Div, *op),
613 Expression::Eq(op) => transform_binary!(Eq, *op),
614 Expression::Lt(op) => transform_binary!(Lt, *op),
615 Expression::Gt(op) => transform_binary!(Gt, *op),
616 Expression::Paren(mut p) => {
617 p.this = transform_recursive(p.this, transform_fn)?;
618 Expression::Paren(p)
619 }
620 Expression::Coalesce(mut f) => {
621 f.expressions = f
622 .expressions
623 .into_iter()
624 .map(|e| transform_recursive(e, transform_fn))
625 .collect::<Result<Vec<_>>>()?;
626 Expression::Coalesce(f)
627 }
628 Expression::IfNull(mut f) => {
629 f.this = transform_recursive(f.this, transform_fn)?;
630 f.expression = transform_recursive(f.expression, transform_fn)?;
631 Expression::IfNull(f)
632 }
633 Expression::Nvl(mut f) => {
634 f.this = transform_recursive(f.this, transform_fn)?;
635 f.expression = transform_recursive(f.expression, transform_fn)?;
636 Expression::Nvl(f)
637 }
638 Expression::In(mut i) => {
639 i.this = transform_recursive(i.this, transform_fn)?;
640 i.expressions = i
641 .expressions
642 .into_iter()
643 .map(|e| transform_recursive(e, transform_fn))
644 .collect::<Result<Vec<_>>>()?;
645 if let Some(query) = i.query {
646 i.query = Some(transform_recursive(query, transform_fn)?);
647 }
648 Expression::In(i)
649 }
650 Expression::Not(mut n) => {
651 n.this = transform_recursive(n.this, transform_fn)?;
652 Expression::Not(n)
653 }
654 Expression::ArraySlice(mut s) => {
655 s.this = transform_recursive(s.this, transform_fn)?;
656 if let Some(start) = s.start {
657 s.start = Some(transform_recursive(start, transform_fn)?);
658 }
659 if let Some(end) = s.end {
660 s.end = Some(transform_recursive(end, transform_fn)?);
661 }
662 Expression::ArraySlice(s)
663 }
664 Expression::Subscript(mut s) => {
665 s.this = transform_recursive(s.this, transform_fn)?;
666 s.index = transform_recursive(s.index, transform_fn)?;
667 Expression::Subscript(s)
668 }
669 Expression::Array(mut a) => {
670 a.expressions = a.expressions.into_iter()
671 .map(|e| transform_recursive(e, transform_fn))
672 .collect::<Result<Vec<_>>>()?;
673 Expression::Array(a)
674 }
675 Expression::Struct(mut s) => {
676 let mut new_fields = Vec::new();
677 for (name, expr) in s.fields {
678 let transformed = transform_recursive(expr, transform_fn)?;
679 new_fields.push((name, transformed));
680 }
681 s.fields = new_fields;
682 Expression::Struct(s)
683 }
684 Expression::NamedArgument(mut na) => {
685 na.value = transform_recursive(na.value, transform_fn)?;
686 Expression::NamedArgument(na)
687 }
688 Expression::MapFunc(mut m) => {
689 m.keys = m.keys.into_iter()
690 .map(|e| transform_recursive(e, transform_fn))
691 .collect::<Result<Vec<_>>>()?;
692 m.values = m.values.into_iter()
693 .map(|e| transform_recursive(e, transform_fn))
694 .collect::<Result<Vec<_>>>()?;
695 Expression::MapFunc(m)
696 }
697 Expression::ArrayFunc(mut a) => {
698 a.expressions = a.expressions.into_iter()
699 .map(|e| transform_recursive(e, transform_fn))
700 .collect::<Result<Vec<_>>>()?;
701 Expression::ArrayFunc(a)
702 }
703 Expression::Lambda(mut l) => {
704 l.body = transform_recursive(l.body, transform_fn)?;
705 Expression::Lambda(l)
706 }
707 Expression::JsonExtract(mut f) => {
708 f.this = transform_recursive(f.this, transform_fn)?;
709 f.path = transform_recursive(f.path, transform_fn)?;
710 Expression::JsonExtract(f)
711 }
712 Expression::JsonExtractScalar(mut f) => {
713 f.this = transform_recursive(f.this, transform_fn)?;
714 f.path = transform_recursive(f.path, transform_fn)?;
715 Expression::JsonExtractScalar(f)
716 }
717
718 // ===== UnaryFunc-based expressions =====
719 // These all have a single `this: Expression` child
720 Expression::Length(mut f) => {
721 f.this = transform_recursive(f.this, transform_fn)?;
722 Expression::Length(f)
723 }
724 Expression::Upper(mut f) => {
725 f.this = transform_recursive(f.this, transform_fn)?;
726 Expression::Upper(f)
727 }
728 Expression::Lower(mut f) => {
729 f.this = transform_recursive(f.this, transform_fn)?;
730 Expression::Lower(f)
731 }
732 Expression::LTrim(mut f) => {
733 f.this = transform_recursive(f.this, transform_fn)?;
734 Expression::LTrim(f)
735 }
736 Expression::RTrim(mut f) => {
737 f.this = transform_recursive(f.this, transform_fn)?;
738 Expression::RTrim(f)
739 }
740 Expression::Reverse(mut f) => {
741 f.this = transform_recursive(f.this, transform_fn)?;
742 Expression::Reverse(f)
743 }
744 Expression::Abs(mut f) => {
745 f.this = transform_recursive(f.this, transform_fn)?;
746 Expression::Abs(f)
747 }
748 Expression::Ceil(mut f) => {
749 f.this = transform_recursive(f.this, transform_fn)?;
750 Expression::Ceil(f)
751 }
752 Expression::Floor(mut f) => {
753 f.this = transform_recursive(f.this, transform_fn)?;
754 Expression::Floor(f)
755 }
756 Expression::Sign(mut f) => {
757 f.this = transform_recursive(f.this, transform_fn)?;
758 Expression::Sign(f)
759 }
760 Expression::Sqrt(mut f) => {
761 f.this = transform_recursive(f.this, transform_fn)?;
762 Expression::Sqrt(f)
763 }
764 Expression::Cbrt(mut f) => {
765 f.this = transform_recursive(f.this, transform_fn)?;
766 Expression::Cbrt(f)
767 }
768 Expression::Ln(mut f) => {
769 f.this = transform_recursive(f.this, transform_fn)?;
770 Expression::Ln(f)
771 }
772 Expression::Exp(mut f) => {
773 f.this = transform_recursive(f.this, transform_fn)?;
774 Expression::Exp(f)
775 }
776 Expression::Date(mut f) => {
777 f.this = transform_recursive(f.this, transform_fn)?;
778 Expression::Date(f)
779 }
780 Expression::Stddev(mut f) => {
781 f.this = transform_recursive(f.this, transform_fn)?;
782 Expression::Stddev(f)
783 }
784 Expression::Variance(mut f) => {
785 f.this = transform_recursive(f.this, transform_fn)?;
786 Expression::Variance(f)
787 }
788
789 // ===== BinaryFunc-based expressions =====
790 Expression::ModFunc(mut f) => {
791 f.this = transform_recursive(f.this, transform_fn)?;
792 f.expression = transform_recursive(f.expression, transform_fn)?;
793 Expression::ModFunc(f)
794 }
795 Expression::Power(mut f) => {
796 f.this = transform_recursive(f.this, transform_fn)?;
797 f.expression = transform_recursive(f.expression, transform_fn)?;
798 Expression::Power(f)
799 }
800 Expression::MapFromArrays(mut f) => {
801 f.this = transform_recursive(f.this, transform_fn)?;
802 f.expression = transform_recursive(f.expression, transform_fn)?;
803 Expression::MapFromArrays(f)
804 }
805 Expression::ElementAt(mut f) => {
806 f.this = transform_recursive(f.this, transform_fn)?;
807 f.expression = transform_recursive(f.expression, transform_fn)?;
808 Expression::ElementAt(f)
809 }
810 Expression::MapContainsKey(mut f) => {
811 f.this = transform_recursive(f.this, transform_fn)?;
812 f.expression = transform_recursive(f.expression, transform_fn)?;
813 Expression::MapContainsKey(f)
814 }
815 Expression::Left(mut f) => {
816 f.this = transform_recursive(f.this, transform_fn)?;
817 f.length = transform_recursive(f.length, transform_fn)?;
818 Expression::Left(f)
819 }
820 Expression::Right(mut f) => {
821 f.this = transform_recursive(f.this, transform_fn)?;
822 f.length = transform_recursive(f.length, transform_fn)?;
823 Expression::Right(f)
824 }
825 Expression::Repeat(mut f) => {
826 f.this = transform_recursive(f.this, transform_fn)?;
827 f.times = transform_recursive(f.times, transform_fn)?;
828 Expression::Repeat(f)
829 }
830
831 // ===== Complex function expressions =====
832 Expression::Substring(mut f) => {
833 f.this = transform_recursive(f.this, transform_fn)?;
834 f.start = transform_recursive(f.start, transform_fn)?;
835 if let Some(len) = f.length {
836 f.length = Some(transform_recursive(len, transform_fn)?);
837 }
838 Expression::Substring(f)
839 }
840 Expression::Replace(mut f) => {
841 f.this = transform_recursive(f.this, transform_fn)?;
842 f.old = transform_recursive(f.old, transform_fn)?;
843 f.new = transform_recursive(f.new, transform_fn)?;
844 Expression::Replace(f)
845 }
846 Expression::ConcatWs(mut f) => {
847 f.separator = transform_recursive(f.separator, transform_fn)?;
848 f.expressions = f.expressions.into_iter()
849 .map(|e| transform_recursive(e, transform_fn))
850 .collect::<Result<Vec<_>>>()?;
851 Expression::ConcatWs(f)
852 }
853 Expression::Trim(mut f) => {
854 f.this = transform_recursive(f.this, transform_fn)?;
855 if let Some(chars) = f.characters {
856 f.characters = Some(transform_recursive(chars, transform_fn)?);
857 }
858 Expression::Trim(f)
859 }
860 Expression::Split(mut f) => {
861 f.this = transform_recursive(f.this, transform_fn)?;
862 f.delimiter = transform_recursive(f.delimiter, transform_fn)?;
863 Expression::Split(f)
864 }
865 Expression::Lpad(mut f) => {
866 f.this = transform_recursive(f.this, transform_fn)?;
867 f.length = transform_recursive(f.length, transform_fn)?;
868 if let Some(fill) = f.fill {
869 f.fill = Some(transform_recursive(fill, transform_fn)?);
870 }
871 Expression::Lpad(f)
872 }
873 Expression::Rpad(mut f) => {
874 f.this = transform_recursive(f.this, transform_fn)?;
875 f.length = transform_recursive(f.length, transform_fn)?;
876 if let Some(fill) = f.fill {
877 f.fill = Some(transform_recursive(fill, transform_fn)?);
878 }
879 Expression::Rpad(f)
880 }
881
882 // ===== Conditional expressions =====
883 Expression::Case(mut c) => {
884 if let Some(operand) = c.operand {
885 c.operand = Some(transform_recursive(operand, transform_fn)?);
886 }
887 c.whens = c.whens.into_iter().map(|(cond, then)| {
888 let new_cond = transform_recursive(cond.clone(), transform_fn).unwrap_or(cond);
889 let new_then = transform_recursive(then.clone(), transform_fn).unwrap_or(then);
890 (new_cond, new_then)
891 }).collect();
892 if let Some(else_expr) = c.else_ {
893 c.else_ = Some(transform_recursive(else_expr, transform_fn)?);
894 }
895 Expression::Case(c)
896 }
897 Expression::IfFunc(mut f) => {
898 f.condition = transform_recursive(f.condition, transform_fn)?;
899 f.true_value = transform_recursive(f.true_value, transform_fn)?;
900 if let Some(false_val) = f.false_value {
901 f.false_value = Some(transform_recursive(false_val, transform_fn)?);
902 }
903 Expression::IfFunc(f)
904 }
905
906 // ===== Date/Time expressions =====
907 Expression::DateAdd(mut f) => {
908 f.this = transform_recursive(f.this, transform_fn)?;
909 f.interval = transform_recursive(f.interval, transform_fn)?;
910 Expression::DateAdd(f)
911 }
912 Expression::DateSub(mut f) => {
913 f.this = transform_recursive(f.this, transform_fn)?;
914 f.interval = transform_recursive(f.interval, transform_fn)?;
915 Expression::DateSub(f)
916 }
917 Expression::DateDiff(mut f) => {
918 f.this = transform_recursive(f.this, transform_fn)?;
919 f.expression = transform_recursive(f.expression, transform_fn)?;
920 Expression::DateDiff(f)
921 }
922 Expression::DateTrunc(mut f) => {
923 f.this = transform_recursive(f.this, transform_fn)?;
924 Expression::DateTrunc(f)
925 }
926 Expression::Extract(mut f) => {
927 f.this = transform_recursive(f.this, transform_fn)?;
928 Expression::Extract(f)
929 }
930
931 // ===== JSON expressions =====
932 Expression::JsonObject(mut f) => {
933 f.pairs = f.pairs.into_iter().map(|(k, v)| {
934 let new_k = transform_recursive(k, transform_fn)?;
935 let new_v = transform_recursive(v, transform_fn)?;
936 Ok((new_k, new_v))
937 }).collect::<Result<Vec<_>>>()?;
938 Expression::JsonObject(f)
939 }
940
941 // ===== Subquery expressions =====
942 Expression::Subquery(mut s) => {
943 s.this = transform_recursive(s.this, transform_fn)?;
944 Expression::Subquery(s)
945 }
946 Expression::Exists(mut e) => {
947 e.this = transform_recursive(e.this, transform_fn)?;
948 Expression::Exists(e)
949 }
950
951 // ===== Set operations =====
952 Expression::Union(mut u) => {
953 u.left = transform_recursive(u.left, transform_fn)?;
954 u.right = transform_recursive(u.right, transform_fn)?;
955 Expression::Union(u)
956 }
957 Expression::Intersect(mut i) => {
958 i.left = transform_recursive(i.left, transform_fn)?;
959 i.right = transform_recursive(i.right, transform_fn)?;
960 Expression::Intersect(i)
961 }
962 Expression::Except(mut e) => {
963 e.left = transform_recursive(e.left, transform_fn)?;
964 e.right = transform_recursive(e.right, transform_fn)?;
965 Expression::Except(e)
966 }
967
968 // ===== DML expressions =====
969 Expression::Insert(mut ins) => {
970 // Transform VALUES clause expressions
971 let mut new_values = Vec::new();
972 for row in ins.values {
973 let mut new_row = Vec::new();
974 for e in row {
975 new_row.push(transform_recursive(e, transform_fn)?);
976 }
977 new_values.push(new_row);
978 }
979 ins.values = new_values;
980
981 // Transform query (for INSERT ... SELECT)
982 if let Some(query) = ins.query {
983 ins.query = Some(transform_recursive(query, transform_fn)?);
984 }
985
986 // Transform RETURNING clause
987 let mut new_returning = Vec::new();
988 for e in ins.returning {
989 new_returning.push(transform_recursive(e, transform_fn)?);
990 }
991 ins.returning = new_returning;
992
993 // Transform ON CONFLICT clause
994 if let Some(on_conflict) = ins.on_conflict {
995 ins.on_conflict = Some(Box::new(transform_recursive(*on_conflict, transform_fn)?));
996 }
997
998 Expression::Insert(ins)
999 }
1000 Expression::Update(mut upd) => {
1001 upd.set = upd.set.into_iter().map(|(id, val)| {
1002 let new_val = transform_recursive(val.clone(), transform_fn).unwrap_or(val);
1003 (id, new_val)
1004 }).collect();
1005 if let Some(mut where_clause) = upd.where_clause.take() {
1006 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
1007 upd.where_clause = Some(where_clause);
1008 }
1009 Expression::Update(upd)
1010 }
1011 Expression::Delete(mut del) => {
1012 if let Some(mut where_clause) = del.where_clause.take() {
1013 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
1014 del.where_clause = Some(where_clause);
1015 }
1016 Expression::Delete(del)
1017 }
1018
1019 // ===== CTE expressions =====
1020 Expression::With(mut w) => {
1021 w.ctes = w.ctes.into_iter().map(|mut cte| {
1022 let original = cte.this.clone();
1023 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
1024 cte
1025 }).collect();
1026 Expression::With(w)
1027 }
1028 Expression::Cte(mut c) => {
1029 c.this = transform_recursive(c.this, transform_fn)?;
1030 Expression::Cte(c)
1031 }
1032
1033 // ===== Order expressions =====
1034 Expression::Ordered(mut o) => {
1035 o.this = transform_recursive(o.this, transform_fn)?;
1036 Expression::Ordered(o)
1037 }
1038
1039 // ===== Negation =====
1040 Expression::Neg(mut n) => {
1041 n.this = transform_recursive(n.this, transform_fn)?;
1042 Expression::Neg(n)
1043 }
1044
1045 // ===== Between =====
1046 Expression::Between(mut b) => {
1047 b.this = transform_recursive(b.this, transform_fn)?;
1048 b.low = transform_recursive(b.low, transform_fn)?;
1049 b.high = transform_recursive(b.high, transform_fn)?;
1050 Expression::Between(b)
1051 }
1052
1053 // ===== Like expressions =====
1054 Expression::Like(mut l) => {
1055 l.left = transform_recursive(l.left, transform_fn)?;
1056 l.right = transform_recursive(l.right, transform_fn)?;
1057 Expression::Like(l)
1058 }
1059 Expression::ILike(mut l) => {
1060 l.left = transform_recursive(l.left, transform_fn)?;
1061 l.right = transform_recursive(l.right, transform_fn)?;
1062 Expression::ILike(l)
1063 }
1064
1065 // ===== Additional binary ops not covered by macro =====
1066 Expression::Neq(op) => transform_binary!(Neq, *op),
1067 Expression::Lte(op) => transform_binary!(Lte, *op),
1068 Expression::Gte(op) => transform_binary!(Gte, *op),
1069 Expression::Mod(op) => transform_binary!(Mod, *op),
1070 Expression::Concat(op) => transform_binary!(Concat, *op),
1071 Expression::BitwiseAnd(op) => transform_binary!(BitwiseAnd, *op),
1072 Expression::BitwiseOr(op) => transform_binary!(BitwiseOr, *op),
1073 Expression::BitwiseXor(op) => transform_binary!(BitwiseXor, *op),
1074 Expression::Is(op) => transform_binary!(Is, *op),
1075
1076 // ===== TryCast / SafeCast =====
1077 Expression::TryCast(mut c) => {
1078 c.this = transform_recursive(c.this, transform_fn)?;
1079 c.to = transform_data_type_recursive(c.to, transform_fn)?;
1080 Expression::TryCast(c)
1081 }
1082 Expression::SafeCast(mut c) => {
1083 c.this = transform_recursive(c.this, transform_fn)?;
1084 c.to = transform_data_type_recursive(c.to, transform_fn)?;
1085 Expression::SafeCast(c)
1086 }
1087
1088 // ===== Misc =====
1089 Expression::Unnest(mut f) => {
1090 f.this = transform_recursive(f.this, transform_fn)?;
1091 f.expressions = f.expressions.into_iter()
1092 .map(|e| transform_recursive(e, transform_fn))
1093 .collect::<Result<Vec<_>>>()?;
1094 Expression::Unnest(f)
1095 }
1096 Expression::Explode(mut f) => {
1097 f.this = transform_recursive(f.this, transform_fn)?;
1098 Expression::Explode(f)
1099 }
1100 Expression::GroupConcat(mut f) => {
1101 f.this = transform_recursive(f.this, transform_fn)?;
1102 Expression::GroupConcat(f)
1103 }
1104 Expression::StringAgg(mut f) => {
1105 f.this = transform_recursive(f.this, transform_fn)?;
1106 Expression::StringAgg(f)
1107 }
1108 Expression::ListAgg(mut f) => {
1109 f.this = transform_recursive(f.this, transform_fn)?;
1110 Expression::ListAgg(f)
1111 }
1112 Expression::ArrayAgg(mut f) => {
1113 f.this = transform_recursive(f.this, transform_fn)?;
1114 Expression::ArrayAgg(f)
1115 }
1116 Expression::ParseJson(mut f) => {
1117 f.this = transform_recursive(f.this, transform_fn)?;
1118 Expression::ParseJson(f)
1119 }
1120 Expression::ToJson(mut f) => {
1121 f.this = transform_recursive(f.this, transform_fn)?;
1122 Expression::ToJson(f)
1123 }
1124 Expression::JSONExtract(mut e) => {
1125 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1126 e.expression = Box::new(transform_recursive(*e.expression, transform_fn)?);
1127 Expression::JSONExtract(e)
1128 }
1129 Expression::JSONExtractScalar(mut e) => {
1130 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1131 e.expression = Box::new(transform_recursive(*e.expression, transform_fn)?);
1132 Expression::JSONExtractScalar(e)
1133 }
1134
1135 // StrToTime: recurse into this
1136 Expression::StrToTime(mut e) => {
1137 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1138 Expression::StrToTime(e)
1139 }
1140
1141 // UnixToTime: recurse into this
1142 Expression::UnixToTime(mut e) => {
1143 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1144 Expression::UnixToTime(e)
1145 }
1146
1147 // CreateTable: recurse into column defaults, on_update expressions, and data types
1148 Expression::CreateTable(mut ct) => {
1149 for col in &mut ct.columns {
1150 if let Some(default_expr) = col.default.take() {
1151 col.default = Some(transform_recursive(default_expr, transform_fn)?);
1152 }
1153 if let Some(on_update_expr) = col.on_update.take() {
1154 col.on_update = Some(transform_recursive(on_update_expr, transform_fn)?);
1155 }
1156 // Note: Column data type transformations (INT -> INT64 for BigQuery, etc.)
1157 // are NOT applied here because per-dialect transforms are designed for CAST/expression
1158 // contexts and may not produce correct results for DDL column definitions.
1159 // The DDL type mappings would need dedicated handling per source/target pair.
1160 }
1161 if let Some(as_select) = ct.as_select.take() {
1162 ct.as_select = Some(transform_recursive(as_select, transform_fn)?);
1163 }
1164 Expression::CreateTable(ct)
1165 }
1166
1167 // CreateProcedure: recurse into body expressions
1168 Expression::CreateProcedure(mut cp) => {
1169 if let Some(body) = cp.body.take() {
1170 cp.body = Some(match body {
1171 FunctionBody::Expression(expr) => {
1172 FunctionBody::Expression(transform_recursive(expr, transform_fn)?)
1173 }
1174 FunctionBody::Return(expr) => {
1175 FunctionBody::Return(transform_recursive(expr, transform_fn)?)
1176 }
1177 FunctionBody::Statements(stmts) => {
1178 let transformed_stmts = stmts
1179 .into_iter()
1180 .map(|s| transform_recursive(s, transform_fn))
1181 .collect::<Result<Vec<_>>>()?;
1182 FunctionBody::Statements(transformed_stmts)
1183 }
1184 other => other,
1185 });
1186 }
1187 Expression::CreateProcedure(cp)
1188 }
1189
1190 // CreateFunction: recurse into body expressions
1191 Expression::CreateFunction(mut cf) => {
1192 if let Some(body) = cf.body.take() {
1193 cf.body = Some(match body {
1194 FunctionBody::Expression(expr) => {
1195 FunctionBody::Expression(transform_recursive(expr, transform_fn)?)
1196 }
1197 FunctionBody::Return(expr) => {
1198 FunctionBody::Return(transform_recursive(expr, transform_fn)?)
1199 }
1200 FunctionBody::Statements(stmts) => {
1201 let transformed_stmts = stmts
1202 .into_iter()
1203 .map(|s| transform_recursive(s, transform_fn))
1204 .collect::<Result<Vec<_>>>()?;
1205 FunctionBody::Statements(transformed_stmts)
1206 }
1207 other => other,
1208 });
1209 }
1210 Expression::CreateFunction(cf)
1211 }
1212
1213 // MemberOf: recurse into left and right operands
1214 Expression::MemberOf(op) => transform_binary!(MemberOf, *op),
1215 // ArrayContainsAll (@>): recurse into left and right operands
1216 Expression::ArrayContainsAll(op) => transform_binary!(ArrayContainsAll, *op),
1217 // ArrayContainedBy (<@): recurse into left and right operands
1218 Expression::ArrayContainedBy(op) => transform_binary!(ArrayContainedBy, *op),
1219 // ArrayOverlaps (&&): recurse into left and right operands
1220 Expression::ArrayOverlaps(op) => transform_binary!(ArrayOverlaps, *op),
1221 // TsMatch (@@): recurse into left and right operands
1222 Expression::TsMatch(op) => transform_binary!(TsMatch, *op),
1223 // Adjacent (-|-): recurse into left and right operands
1224 Expression::Adjacent(op) => transform_binary!(Adjacent, *op),
1225
1226 // Table: recurse into when (HistoricalData) and changes fields
1227 Expression::Table(mut t) => {
1228 if let Some(when) = t.when.take() {
1229 let transformed = transform_recursive(Expression::HistoricalData(when), transform_fn)?;
1230 if let Expression::HistoricalData(hd) = transformed {
1231 t.when = Some(hd);
1232 }
1233 }
1234 if let Some(changes) = t.changes.take() {
1235 let transformed = transform_recursive(Expression::Changes(changes), transform_fn)?;
1236 if let Expression::Changes(c) = transformed {
1237 t.changes = Some(c);
1238 }
1239 }
1240 Expression::Table(t)
1241 }
1242
1243 // HistoricalData (Snowflake time travel): recurse into expression
1244 Expression::HistoricalData(mut hd) => {
1245 *hd.expression = transform_recursive(*hd.expression, transform_fn)?;
1246 Expression::HistoricalData(hd)
1247 }
1248
1249 // Changes (Snowflake CHANGES clause): recurse into at_before and end
1250 Expression::Changes(mut c) => {
1251 if let Some(at_before) = c.at_before.take() {
1252 c.at_before = Some(Box::new(transform_recursive(*at_before, transform_fn)?));
1253 }
1254 if let Some(end) = c.end.take() {
1255 c.end = Some(Box::new(transform_recursive(*end, transform_fn)?));
1256 }
1257 Expression::Changes(c)
1258 }
1259
1260 // TableArgument: TABLE(expr) or MODEL(expr)
1261 Expression::TableArgument(mut ta) => {
1262 ta.this = transform_recursive(ta.this, transform_fn)?;
1263 Expression::TableArgument(ta)
1264 }
1265
1266 // JoinedTable: (tbl1 JOIN tbl2 ON ...) - recurse into left and join tables
1267 Expression::JoinedTable(mut jt) => {
1268 jt.left = transform_recursive(jt.left, transform_fn)?;
1269 for join in &mut jt.joins {
1270 join.this = transform_recursive(std::mem::replace(&mut join.this, Expression::Null(crate::expressions::Null)), transform_fn)?;
1271 if let Some(on) = join.on.take() {
1272 join.on = Some(transform_recursive(on, transform_fn)?);
1273 }
1274 }
1275 Expression::JoinedTable(jt)
1276 }
1277
1278 // Lateral: LATERAL func() - recurse into the function expression
1279 Expression::Lateral(mut lat) => {
1280 *lat.this = transform_recursive(*lat.this, transform_fn)?;
1281 Expression::Lateral(lat)
1282 }
1283
1284 // WithinGroup: recurse into order_by items (for NULLS FIRST/LAST etc.)
1285 // but NOT into wg.this - the inner function is handled by StringAggConvert/GroupConcatConvert
1286 // as a unit together with the WithinGroup wrapper
1287 Expression::WithinGroup(mut wg) => {
1288 wg.order_by = wg.order_by.into_iter().map(|mut o| {
1289 let original = o.this.clone();
1290 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
1291 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
1292 Ok(Expression::Ordered(transformed)) => *transformed,
1293 Ok(_) | Err(_) => o,
1294 }
1295 }).collect();
1296 Expression::WithinGroup(wg)
1297 }
1298
1299 // Filter: recurse into both the aggregate and the filter condition
1300 Expression::Filter(mut f) => {
1301 f.this = Box::new(transform_recursive(*f.this, transform_fn)?);
1302 f.expression = Box::new(transform_recursive(*f.expression, transform_fn)?);
1303 Expression::Filter(f)
1304 }
1305
1306 // BitwiseOrAgg/BitwiseAndAgg/BitwiseXorAgg: recurse into the aggregate argument
1307 Expression::BitwiseOrAgg(mut f) => {
1308 f.this = transform_recursive(f.this, transform_fn)?;
1309 Expression::BitwiseOrAgg(f)
1310 }
1311 Expression::BitwiseAndAgg(mut f) => {
1312 f.this = transform_recursive(f.this, transform_fn)?;
1313 Expression::BitwiseAndAgg(f)
1314 }
1315 Expression::BitwiseXorAgg(mut f) => {
1316 f.this = transform_recursive(f.this, transform_fn)?;
1317 Expression::BitwiseXorAgg(f)
1318 }
1319
1320 // Pass through leaf nodes unchanged
1321 other => other,
1322 };
1323
1324 // Then apply the transform function
1325 transform_fn(expr)
1326}
1327
1328/// Returns the tokenizer config, generator config, and expression transform closure
1329/// for a built-in dialect type. This is the shared implementation used by both
1330/// `Dialect::get()` and custom dialect construction.
1331fn configs_for_dialect_type(
1332 dt: DialectType,
1333) -> (
1334 TokenizerConfig,
1335 GeneratorConfig,
1336 Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>,
1337) {
1338 macro_rules! dialect_configs {
1339 ($dialect_struct:ident) => {{
1340 let d = $dialect_struct;
1341 (
1342 d.tokenizer_config(),
1343 d.generator_config(),
1344 Box::new(move |e| $dialect_struct.transform_expr(e)),
1345 )
1346 }};
1347 }
1348 match dt {
1349 DialectType::PostgreSQL => dialect_configs!(PostgresDialect),
1350 DialectType::MySQL => dialect_configs!(MySQLDialect),
1351 DialectType::BigQuery => dialect_configs!(BigQueryDialect),
1352 DialectType::Snowflake => dialect_configs!(SnowflakeDialect),
1353 DialectType::DuckDB => dialect_configs!(DuckDBDialect),
1354 DialectType::TSQL => dialect_configs!(TSQLDialect),
1355 DialectType::Oracle => dialect_configs!(OracleDialect),
1356 DialectType::Hive => dialect_configs!(HiveDialect),
1357 DialectType::Spark => dialect_configs!(SparkDialect),
1358 DialectType::SQLite => dialect_configs!(SQLiteDialect),
1359 DialectType::Presto => dialect_configs!(PrestoDialect),
1360 DialectType::Trino => dialect_configs!(TrinoDialect),
1361 DialectType::Redshift => dialect_configs!(RedshiftDialect),
1362 DialectType::ClickHouse => dialect_configs!(ClickHouseDialect),
1363 DialectType::Databricks => dialect_configs!(DatabricksDialect),
1364 DialectType::Athena => dialect_configs!(AthenaDialect),
1365 DialectType::Teradata => dialect_configs!(TeradataDialect),
1366 DialectType::Doris => dialect_configs!(DorisDialect),
1367 DialectType::StarRocks => dialect_configs!(StarRocksDialect),
1368 DialectType::Materialize => dialect_configs!(MaterializeDialect),
1369 DialectType::RisingWave => dialect_configs!(RisingWaveDialect),
1370 DialectType::SingleStore => dialect_configs!(SingleStoreDialect),
1371 DialectType::CockroachDB => dialect_configs!(CockroachDBDialect),
1372 DialectType::TiDB => dialect_configs!(TiDBDialect),
1373 DialectType::Druid => dialect_configs!(DruidDialect),
1374 DialectType::Solr => dialect_configs!(SolrDialect),
1375 DialectType::Tableau => dialect_configs!(TableauDialect),
1376 DialectType::Dune => dialect_configs!(DuneDialect),
1377 DialectType::Fabric => dialect_configs!(FabricDialect),
1378 DialectType::Drill => dialect_configs!(DrillDialect),
1379 DialectType::Dremio => dialect_configs!(DremioDialect),
1380 DialectType::Exasol => dialect_configs!(ExasolDialect),
1381 _ => dialect_configs!(GenericDialect),
1382 }
1383}
1384
1385// ---------------------------------------------------------------------------
1386// Custom dialect registry
1387// ---------------------------------------------------------------------------
1388
1389static CUSTOM_DIALECT_REGISTRY: LazyLock<RwLock<HashMap<String, Arc<CustomDialectConfig>>>> =
1390 LazyLock::new(|| RwLock::new(HashMap::new()));
1391
1392struct CustomDialectConfig {
1393 name: String,
1394 base_dialect: DialectType,
1395 tokenizer_config: TokenizerConfig,
1396 generator_config: GeneratorConfig,
1397 transform: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1398 preprocess: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1399}
1400
1401/// Fluent builder for creating and registering custom SQL dialects.
1402///
1403/// A custom dialect is based on an existing built-in dialect and allows selective
1404/// overrides of tokenizer configuration, generator configuration, and expression
1405/// transforms.
1406///
1407/// # Example
1408///
1409/// ```rust,ignore
1410/// use polyglot_sql::dialects::{CustomDialectBuilder, DialectType, Dialect};
1411/// use polyglot_sql::generator::NormalizeFunctions;
1412///
1413/// CustomDialectBuilder::new("my_postgres")
1414/// .based_on(DialectType::PostgreSQL)
1415/// .generator_config_modifier(|gc| {
1416/// gc.normalize_functions = NormalizeFunctions::Lower;
1417/// })
1418/// .register()
1419/// .unwrap();
1420///
1421/// let d = Dialect::get_by_name("my_postgres").unwrap();
1422/// let exprs = d.parse("SELECT COUNT(*)").unwrap();
1423/// let sql = d.generate(&exprs[0]).unwrap();
1424/// assert_eq!(sql, "select count(*)");
1425///
1426/// polyglot_sql::unregister_custom_dialect("my_postgres");
1427/// ```
1428pub struct CustomDialectBuilder {
1429 name: String,
1430 base_dialect: DialectType,
1431 tokenizer_modifier: Option<Box<dyn FnOnce(&mut TokenizerConfig)>>,
1432 generator_modifier: Option<Box<dyn FnOnce(&mut GeneratorConfig)>>,
1433 transform: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1434 preprocess: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1435}
1436
1437impl CustomDialectBuilder {
1438 /// Create a new builder with the given name. Defaults to `Generic` as the base dialect.
1439 pub fn new(name: impl Into<String>) -> Self {
1440 Self {
1441 name: name.into(),
1442 base_dialect: DialectType::Generic,
1443 tokenizer_modifier: None,
1444 generator_modifier: None,
1445 transform: None,
1446 preprocess: None,
1447 }
1448 }
1449
1450 /// Set the base built-in dialect to inherit configuration from.
1451 pub fn based_on(mut self, dialect: DialectType) -> Self {
1452 self.base_dialect = dialect;
1453 self
1454 }
1455
1456 /// Provide a closure that modifies the tokenizer configuration inherited from the base dialect.
1457 pub fn tokenizer_config_modifier<F>(mut self, f: F) -> Self
1458 where
1459 F: FnOnce(&mut TokenizerConfig) + 'static,
1460 {
1461 self.tokenizer_modifier = Some(Box::new(f));
1462 self
1463 }
1464
1465 /// Provide a closure that modifies the generator configuration inherited from the base dialect.
1466 pub fn generator_config_modifier<F>(mut self, f: F) -> Self
1467 where
1468 F: FnOnce(&mut GeneratorConfig) + 'static,
1469 {
1470 self.generator_modifier = Some(Box::new(f));
1471 self
1472 }
1473
1474 /// Set a custom per-node expression transform function.
1475 ///
1476 /// This replaces the base dialect's transform. It is called on every expression
1477 /// node during the recursive transform pass.
1478 pub fn transform_fn<F>(mut self, f: F) -> Self
1479 where
1480 F: Fn(Expression) -> Result<Expression> + Send + Sync + 'static,
1481 {
1482 self.transform = Some(Arc::new(f));
1483 self
1484 }
1485
1486 /// Set a custom whole-tree preprocessing function.
1487 ///
1488 /// This replaces the base dialect's built-in preprocessing. It is called once
1489 /// on the entire expression tree before the recursive per-node transform.
1490 pub fn preprocess_fn<F>(mut self, f: F) -> Self
1491 where
1492 F: Fn(Expression) -> Result<Expression> + Send + Sync + 'static,
1493 {
1494 self.preprocess = Some(Arc::new(f));
1495 self
1496 }
1497
1498 /// Build the custom dialect configuration and register it in the global registry.
1499 ///
1500 /// Returns an error if:
1501 /// - The name collides with a built-in dialect name
1502 /// - A custom dialect with the same name is already registered
1503 pub fn register(self) -> Result<()> {
1504 // Reject names that collide with built-in dialects
1505 if DialectType::from_str(&self.name).is_ok() {
1506 return Err(crate::error::Error::parse(format!(
1507 "Cannot register custom dialect '{}': name collides with built-in dialect",
1508 self.name
1509 )));
1510 }
1511
1512 // Get base configs
1513 let (mut tok_config, mut gen_config, _base_transform) =
1514 configs_for_dialect_type(self.base_dialect);
1515
1516 // Apply modifiers
1517 if let Some(tok_mod) = self.tokenizer_modifier {
1518 tok_mod(&mut tok_config);
1519 }
1520 if let Some(gen_mod) = self.generator_modifier {
1521 gen_mod(&mut gen_config);
1522 }
1523
1524 let config = CustomDialectConfig {
1525 name: self.name.clone(),
1526 base_dialect: self.base_dialect,
1527 tokenizer_config: tok_config,
1528 generator_config: gen_config,
1529 transform: self.transform,
1530 preprocess: self.preprocess,
1531 };
1532
1533 register_custom_dialect(config)
1534 }
1535}
1536
1537use std::str::FromStr;
1538
1539fn register_custom_dialect(config: CustomDialectConfig) -> Result<()> {
1540 let mut registry = CUSTOM_DIALECT_REGISTRY
1541 .write()
1542 .map_err(|e| crate::error::Error::parse(format!("Registry lock poisoned: {}", e)))?;
1543
1544 if registry.contains_key(&config.name) {
1545 return Err(crate::error::Error::parse(format!(
1546 "Custom dialect '{}' is already registered",
1547 config.name
1548 )));
1549 }
1550
1551 registry.insert(config.name.clone(), Arc::new(config));
1552 Ok(())
1553}
1554
1555/// Remove a custom dialect from the global registry.
1556///
1557/// Returns `true` if a dialect with that name was found and removed,
1558/// `false` if no such custom dialect existed.
1559pub fn unregister_custom_dialect(name: &str) -> bool {
1560 if let Ok(mut registry) = CUSTOM_DIALECT_REGISTRY.write() {
1561 registry.remove(name).is_some()
1562 } else {
1563 false
1564 }
1565}
1566
1567fn get_custom_dialect_config(name: &str) -> Option<Arc<CustomDialectConfig>> {
1568 CUSTOM_DIALECT_REGISTRY
1569 .read()
1570 .ok()
1571 .and_then(|registry| registry.get(name).cloned())
1572}
1573
1574/// Main entry point for dialect-specific SQL operations.
1575///
1576/// A `Dialect` bundles together a tokenizer, generator configuration, and expression
1577/// transformer for a specific SQL database engine. It is the high-level API through
1578/// which callers parse, generate, transform, and transpile SQL.
1579///
1580/// # Usage
1581///
1582/// ```rust,ignore
1583/// use polyglot_sql::dialects::{Dialect, DialectType};
1584///
1585/// // Parse PostgreSQL SQL into an AST
1586/// let pg = Dialect::get(DialectType::PostgreSQL);
1587/// let exprs = pg.parse("SELECT id, name FROM users WHERE active")?;
1588///
1589/// // Transpile from PostgreSQL to BigQuery
1590/// let results = pg.transpile_to("SELECT NOW()", DialectType::BigQuery)?;
1591/// assert_eq!(results[0], "SELECT CURRENT_TIMESTAMP()");
1592/// ```
1593///
1594/// Obtain an instance via [`Dialect::get`] or [`Dialect::get_by_name`].
1595/// The struct is `Send + Sync` safe so it can be shared across threads.
1596pub struct Dialect {
1597 dialect_type: DialectType,
1598 tokenizer: Tokenizer,
1599 generator_config: GeneratorConfig,
1600 transformer: Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>,
1601 /// Optional function to get expression-specific generator config (for hybrid dialects like Athena).
1602 generator_config_for_expr: Option<Box<dyn Fn(&Expression) -> GeneratorConfig + Send + Sync>>,
1603 /// Optional custom preprocessing function (overrides built-in preprocess for custom dialects).
1604 custom_preprocess: Option<Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1605}
1606
1607impl Dialect {
1608 /// Creates a fully configured [`Dialect`] instance for the given [`DialectType`].
1609 ///
1610 /// This is the primary constructor. It initializes the tokenizer, generator config,
1611 /// and expression transformer based on the dialect's [`DialectImpl`] implementation.
1612 /// For hybrid dialects like Athena, it also sets up expression-specific generator
1613 /// config routing.
1614 pub fn get(dialect_type: DialectType) -> Self {
1615 let (tokenizer_config, generator_config, transformer) = configs_for_dialect_type(dialect_type);
1616
1617 // Set up expression-specific generator config for hybrid dialects
1618 let generator_config_for_expr: Option<Box<dyn Fn(&Expression) -> GeneratorConfig + Send + Sync>> = match dialect_type {
1619 DialectType::Athena => Some(Box::new(|expr| AthenaDialect.generator_config_for_expr(expr))),
1620 _ => None,
1621 };
1622
1623 Self {
1624 dialect_type,
1625 tokenizer: Tokenizer::new(tokenizer_config),
1626 generator_config,
1627 transformer,
1628 generator_config_for_expr,
1629 custom_preprocess: None,
1630 }
1631 }
1632
1633 /// Look up a dialect by string name.
1634 ///
1635 /// Checks built-in dialect names first (via [`DialectType::from_str`]), then
1636 /// falls back to the custom dialect registry. Returns `None` if no dialect
1637 /// with the given name exists.
1638 pub fn get_by_name(name: &str) -> Option<Self> {
1639 // Try built-in first
1640 if let Ok(dt) = DialectType::from_str(name) {
1641 return Some(Self::get(dt));
1642 }
1643
1644 // Try custom registry
1645 let config = get_custom_dialect_config(name)?;
1646 Some(Self::from_custom_config(&config))
1647 }
1648
1649 /// Construct a `Dialect` from a custom dialect configuration.
1650 fn from_custom_config(config: &CustomDialectConfig) -> Self {
1651 // Build the transformer: use custom if provided, else use base dialect's
1652 let transformer: Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync> =
1653 if let Some(ref custom_transform) = config.transform {
1654 let t = Arc::clone(custom_transform);
1655 Box::new(move |e| t(e))
1656 } else {
1657 let (_, _, base_transform) = configs_for_dialect_type(config.base_dialect);
1658 base_transform
1659 };
1660
1661 // Build the custom preprocess: use custom if provided
1662 let custom_preprocess: Option<Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>> =
1663 config.preprocess.as_ref().map(|p| {
1664 let p = Arc::clone(p);
1665 Box::new(move |e: Expression| p(e)) as Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>
1666 });
1667
1668 Self {
1669 dialect_type: config.base_dialect,
1670 tokenizer: Tokenizer::new(config.tokenizer_config.clone()),
1671 generator_config: config.generator_config.clone(),
1672 transformer,
1673 generator_config_for_expr: None,
1674 custom_preprocess,
1675 }
1676 }
1677
1678 /// Get the dialect type
1679 pub fn dialect_type(&self) -> DialectType {
1680 self.dialect_type
1681 }
1682
1683 /// Get the generator configuration
1684 pub fn generator_config(&self) -> &GeneratorConfig {
1685 &self.generator_config
1686 }
1687
1688 /// Parses a SQL string into a list of [`Expression`] AST nodes.
1689 ///
1690 /// The input may contain multiple semicolon-separated statements; each one
1691 /// produces a separate element in the returned vector. Tokenization uses
1692 /// this dialect's configured tokenizer, and parsing uses the dialect-aware parser.
1693 pub fn parse(&self, sql: &str) -> Result<Vec<Expression>> {
1694 let tokens = self.tokenizer.tokenize(sql)?;
1695 let config = crate::parser::ParserConfig {
1696 dialect: Some(self.dialect_type),
1697 ..Default::default()
1698 };
1699 let mut parser = Parser::with_source(tokens, config, sql.to_string());
1700 parser.parse()
1701 }
1702
1703 /// Get the generator config for a specific expression (supports hybrid dialects)
1704 fn get_config_for_expr(&self, expr: &Expression) -> GeneratorConfig {
1705 if let Some(ref config_fn) = self.generator_config_for_expr {
1706 config_fn(expr)
1707 } else {
1708 self.generator_config.clone()
1709 }
1710 }
1711
1712 /// Generates a SQL string from an [`Expression`] AST node.
1713 ///
1714 /// The output uses this dialect's generator configuration for identifier quoting,
1715 /// keyword casing, function name normalization, and syntax style. The result is
1716 /// a single-line (non-pretty) SQL string.
1717 pub fn generate(&self, expr: &Expression) -> Result<String> {
1718 let config = self.get_config_for_expr(expr);
1719 let mut generator = Generator::with_config(config);
1720 generator.generate(expr)
1721 }
1722
1723 /// Generate SQL from an expression with pretty printing enabled
1724 pub fn generate_pretty(&self, expr: &Expression) -> Result<String> {
1725 let mut config = self.get_config_for_expr(expr);
1726 config.pretty = true;
1727 let mut generator = Generator::with_config(config);
1728 generator.generate(expr)
1729 }
1730
1731 /// Generate SQL from an expression with forced identifier quoting (identify=True)
1732 pub fn generate_with_identify(&self, expr: &Expression) -> Result<String> {
1733 let mut config = self.get_config_for_expr(expr);
1734 config.always_quote_identifiers = true;
1735 let mut generator = Generator::with_config(config);
1736 generator.generate(expr)
1737 }
1738
1739 /// Generate SQL from an expression with pretty printing and forced identifier quoting
1740 pub fn generate_pretty_with_identify(&self, expr: &Expression) -> Result<String> {
1741 let mut config = self.generator_config.clone();
1742 config.pretty = true;
1743 config.always_quote_identifiers = true;
1744 let mut generator = Generator::with_config(config);
1745 generator.generate(expr)
1746 }
1747
1748 /// Transforms an expression tree to conform to this dialect's syntax and semantics.
1749 ///
1750 /// The transformation proceeds in two phases:
1751 /// 1. **Preprocessing** -- whole-tree structural rewrites such as eliminating QUALIFY,
1752 /// ensuring boolean predicates, or converting DISTINCT ON to a window-function pattern.
1753 /// 2. **Recursive per-node transform** -- a bottom-up pass via [`transform_recursive`]
1754 /// that applies this dialect's [`DialectImpl::transform_expr`] to every node.
1755 ///
1756 /// This method is used both during transpilation (to rewrite an AST for a target dialect)
1757 /// and for identity transforms (normalizing SQL within the same dialect).
1758 pub fn transform(&self, expr: Expression) -> Result<Expression> {
1759 // Apply preprocessing transforms based on dialect
1760 let preprocessed = self.preprocess(expr)?;
1761 // Then apply recursive transformation
1762 transform_recursive(preprocessed, &self.transformer)
1763 }
1764
1765 /// Apply dialect-specific preprocessing transforms
1766 fn preprocess(&self, expr: Expression) -> Result<Expression> {
1767 // If a custom preprocess function is set, use it instead of the built-in logic
1768 if let Some(ref custom_preprocess) = self.custom_preprocess {
1769 return custom_preprocess(expr);
1770 }
1771
1772 use crate::transforms;
1773
1774 match self.dialect_type {
1775 // MySQL doesn't support QUALIFY, DISTINCT ON, FULL OUTER JOIN
1776 DialectType::MySQL => {
1777 let expr = transforms::eliminate_qualify(expr)?;
1778 let expr = transforms::eliminate_full_outer_join(expr)?;
1779 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
1780 Ok(expr)
1781 }
1782 // PostgreSQL doesn't support QUALIFY
1783 DialectType::PostgreSQL => {
1784 let expr = transforms::eliminate_qualify(expr)?;
1785 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
1786 Ok(expr)
1787 }
1788 // BigQuery doesn't support DISTINCT ON or CTE column aliases
1789 DialectType::BigQuery => {
1790 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
1791 let expr = transforms::pushdown_cte_column_names(expr)?;
1792 let expr = transforms::explode_projection_to_unnest(expr, DialectType::BigQuery)?;
1793 Ok(expr)
1794 }
1795 // Snowflake
1796 DialectType::Snowflake => {
1797 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
1798 let expr = transforms::eliminate_window_clause(expr)?;
1799 let expr = transforms::snowflake_flatten_projection_to_unnest(expr)?;
1800 Ok(expr)
1801 }
1802 // TSQL doesn't support QUALIFY
1803 // TSQL requires boolean expressions in WHERE/HAVING (no implicit truthiness)
1804 DialectType::TSQL => {
1805 let expr = transforms::eliminate_qualify(expr)?;
1806 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
1807 let expr = transforms::ensure_bools(expr)?;
1808 Ok(expr)
1809 }
1810 // Spark doesn't support QUALIFY (but Databricks does)
1811 DialectType::Spark => {
1812 let expr = transforms::eliminate_qualify(expr)?;
1813 let expr = transforms::add_auto_table_alias(expr)?;
1814 let expr = transforms::simplify_nested_paren_values(expr)?;
1815 Ok(expr)
1816 }
1817 // Databricks supports QUALIFY natively
1818 DialectType::Databricks => {
1819 let expr = transforms::add_auto_table_alias(expr)?;
1820 let expr = transforms::simplify_nested_paren_values(expr)?;
1821 Ok(expr)
1822 }
1823 // Hive doesn't support QUALIFY
1824 DialectType::Hive => {
1825 let expr = transforms::eliminate_qualify(expr)?;
1826 Ok(expr)
1827 }
1828 // SQLite doesn't support QUALIFY
1829 DialectType::SQLite => {
1830 let expr = transforms::eliminate_qualify(expr)?;
1831 Ok(expr)
1832 }
1833 // Trino doesn't support QUALIFY
1834 DialectType::Trino => {
1835 let expr = transforms::eliminate_qualify(expr)?;
1836 let expr = transforms::explode_projection_to_unnest(expr, DialectType::Trino)?;
1837 Ok(expr)
1838 }
1839 // Presto doesn't support QUALIFY or WINDOW clause
1840 DialectType::Presto => {
1841 let expr = transforms::eliminate_qualify(expr)?;
1842 let expr = transforms::eliminate_window_clause(expr)?;
1843 let expr = transforms::explode_projection_to_unnest(expr, DialectType::Presto)?;
1844 Ok(expr)
1845 }
1846 // DuckDB supports QUALIFY - no elimination needed
1847 // Expand POSEXPLODE to GENERATE_SUBSCRIPTS + UNNEST
1848 DialectType::DuckDB => {
1849 let expr = transforms::expand_posexplode_duckdb(expr)?;
1850 Ok(expr)
1851 }
1852 // Redshift doesn't support QUALIFY or WINDOW clause
1853 DialectType::Redshift => {
1854 let expr = transforms::eliminate_qualify(expr)?;
1855 let expr = transforms::eliminate_window_clause(expr)?;
1856 Ok(expr)
1857 }
1858 // StarRocks doesn't support BETWEEN in DELETE statements or QUALIFY
1859 DialectType::StarRocks => {
1860 let expr = transforms::eliminate_qualify(expr)?;
1861 let expr = transforms::expand_between_in_delete(expr)?;
1862 Ok(expr)
1863 }
1864 // Oracle - no special preprocessing needed
1865 DialectType::Oracle => {
1866 Ok(expr)
1867 }
1868 // Drill - no special preprocessing needed
1869 DialectType::Drill => {
1870 Ok(expr)
1871 }
1872 // Teradata - no special preprocessing needed
1873 DialectType::Teradata => {
1874 Ok(expr)
1875 }
1876 // Other dialects - no preprocessing
1877 _ => Ok(expr),
1878 }
1879 }
1880
1881 /// Transpile SQL from this dialect to another
1882 pub fn transpile_to(&self, sql: &str, target: DialectType) -> Result<Vec<String>> {
1883 self.transpile_to_inner(sql, target, false)
1884 }
1885
1886 /// Transpile SQL from this dialect to another with pretty printing enabled
1887 pub fn transpile_to_pretty(&self, sql: &str, target: DialectType) -> Result<Vec<String>> {
1888 self.transpile_to_inner(sql, target, true)
1889 }
1890
1891 fn transpile_to_inner(&self, sql: &str, target: DialectType, pretty: bool) -> Result<Vec<String>> {
1892 let expressions = self.parse(sql)?;
1893 let target_dialect = Dialect::get(target);
1894
1895 expressions
1896 .into_iter()
1897 .map(|expr| {
1898 // When source and target differ, first normalize the source dialect's
1899 // AST constructs to standard SQL, so that the target dialect can handle them.
1900 // This handles cases like Snowflake's SQUARE -> POWER, DIV0 -> CASE, etc.
1901 let normalized = if self.dialect_type != target && self.dialect_type != DialectType::Generic {
1902 self.transform(expr)?
1903 } else {
1904 expr
1905 };
1906
1907 // For TSQL source targeting non-TSQL: unwrap ISNULL(JSON_QUERY(...), JSON_VALUE(...))
1908 // to just JSON_QUERY(...) so cross_dialect_normalize can convert it cleanly.
1909 // The TSQL read transform wraps JsonQuery in ISNULL for identity, but for
1910 // cross-dialect transpilation we need the unwrapped JSON_QUERY.
1911 let normalized = if matches!(self.dialect_type, DialectType::TSQL | DialectType::Fabric)
1912 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
1913 {
1914 transform_recursive(normalized, &|e| {
1915 if let Expression::Function(ref f) = e {
1916 if f.name.eq_ignore_ascii_case("ISNULL") && f.args.len() == 2 {
1917 // Check if first arg is JSON_QUERY and second is JSON_VALUE
1918 if let (Expression::Function(ref jq), Expression::Function(ref jv)) = (&f.args[0], &f.args[1]) {
1919 if jq.name.eq_ignore_ascii_case("JSON_QUERY") && jv.name.eq_ignore_ascii_case("JSON_VALUE") {
1920 // Unwrap: return just JSON_QUERY(...)
1921 return Ok(f.args[0].clone());
1922 }
1923 }
1924 }
1925 }
1926 Ok(e)
1927 })?
1928 } else {
1929 normalized
1930 };
1931
1932 // Propagate struct field names in arrays (for BigQuery source to non-BigQuery target)
1933 // BigQuery->BigQuery should NOT propagate names (BigQuery handles implicit inheritance)
1934 let normalized = if matches!(self.dialect_type, DialectType::BigQuery) && !matches!(target, DialectType::BigQuery) {
1935 crate::transforms::propagate_struct_field_names(normalized)?
1936 } else {
1937 normalized
1938 };
1939
1940 // Apply cross-dialect semantic normalizations
1941 let normalized = Self::cross_dialect_normalize(normalized, self.dialect_type, target)?;
1942
1943 // For DuckDB target from BigQuery source: wrap UNNEST of struct arrays in
1944 // (SELECT UNNEST(..., max_depth => 2)) subquery
1945 // Must run BEFORE unnest_alias_to_column_alias since it changes alias structure
1946 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
1947 && matches!(target, DialectType::DuckDB)
1948 {
1949 crate::transforms::wrap_duckdb_unnest_struct(normalized)?
1950 } else {
1951 normalized
1952 };
1953
1954 // Convert BigQuery UNNEST aliases to column-alias format for DuckDB/Presto/Spark
1955 // UNNEST(arr) AS x -> UNNEST(arr) AS _t0(x)
1956 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
1957 && matches!(target, DialectType::DuckDB | DialectType::Presto | DialectType::Trino | DialectType::Athena
1958 | DialectType::Spark | DialectType::Databricks)
1959 {
1960 crate::transforms::unnest_alias_to_column_alias(normalized)?
1961 } else if matches!(self.dialect_type, DialectType::BigQuery)
1962 && matches!(target, DialectType::BigQuery | DialectType::Redshift)
1963 {
1964 // For BigQuery/Redshift targets: move UNNEST FROM items to CROSS JOINs
1965 // but don't convert alias format (no _t0 wrapper)
1966 let result = crate::transforms::unnest_from_to_cross_join(normalized)?;
1967 // For Redshift: strip UNNEST when arg is a column reference path
1968 if matches!(target, DialectType::Redshift) {
1969 crate::transforms::strip_unnest_column_refs(result)?
1970 } else {
1971 result
1972 }
1973 } else {
1974 normalized
1975 };
1976
1977 // For Presto/Trino targets from PostgreSQL/Redshift source:
1978 // Wrap UNNEST aliases from GENERATE_SERIES conversion: AS s -> AS _u(s)
1979 let normalized = if matches!(self.dialect_type, DialectType::PostgreSQL | DialectType::Redshift)
1980 && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena)
1981 {
1982 crate::transforms::wrap_unnest_join_aliases(normalized)?
1983 } else {
1984 normalized
1985 };
1986
1987 // Eliminate DISTINCT ON with target-dialect awareness
1988 // This must happen after source transform (which may produce DISTINCT ON)
1989 // and before target transform, with knowledge of the target dialect's NULL ordering behavior
1990 let normalized = crate::transforms::eliminate_distinct_on_for_dialect(normalized, Some(target))?;
1991
1992 // BigQuery GENERATE_DATE_ARRAY in UNNEST -> Snowflake ARRAY_GENERATE_RANGE + DATEADD
1993 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
1994 && matches!(target, DialectType::Snowflake)
1995 {
1996 Self::transform_generate_date_array_snowflake(normalized)?
1997 } else {
1998 normalized
1999 };
2000
2001 // CROSS JOIN UNNEST -> LATERAL VIEW EXPLODE/INLINE for Spark/Hive/Databricks
2002 let normalized = if matches!(target, DialectType::Spark | DialectType::Databricks | DialectType::Hive) {
2003 crate::transforms::unnest_to_explode_select(normalized)?
2004 } else {
2005 normalized
2006 };
2007
2008 let transformed = target_dialect.transform(normalized)?;
2009 let mut sql = if pretty {
2010 target_dialect.generate_pretty(&transformed)?
2011 } else {
2012 target_dialect.generate(&transformed)?
2013 };
2014
2015 // Align a known Snowflake pretty-print edge case with Python sqlglot output.
2016 if pretty && target == DialectType::Snowflake {
2017 sql = Self::normalize_snowflake_pretty(sql);
2018 }
2019
2020 Ok(sql)
2021 })
2022 .collect()
2023 }
2024
2025 /// Transform BigQuery GENERATE_DATE_ARRAY in UNNEST for Snowflake target.
2026 /// Converts:
2027 /// SELECT ..., alias, ... FROM t CROSS JOIN UNNEST(GENERATE_DATE_ARRAY(start, end, INTERVAL '1' unit)) AS alias
2028 /// To:
2029 /// SELECT ..., DATEADD(unit, CAST(alias AS INT), CAST(start AS DATE)) AS alias, ...
2030 /// FROM t, LATERAL FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (DATEDIFF(unit, start, end) + 1 - 1) + 1)) AS _t0(seq, key, path, index, alias, this)
2031 fn transform_generate_date_array_snowflake(expr: Expression) -> Result<Expression> {
2032 use crate::expressions::*;
2033 transform_recursive(expr, &|e| {
2034 let Expression::Select(mut sel) = e else { return Ok(e); };
2035
2036 // Find joins with UNNEST containing GenerateSeries (from GENERATE_DATE_ARRAY conversion)
2037 let mut gda_info: Option<(String, Expression, Expression, String)> = None; // (alias_name, start_expr, end_expr, unit)
2038 let mut gda_join_idx: Option<usize> = None;
2039
2040 for (idx, join) in sel.joins.iter().enumerate() {
2041 // The join.this may be:
2042 // 1. Unnest(UnnestFunc { alias: Some("mnth"), ... })
2043 // 2. Alias(Alias { this: Unnest(UnnestFunc { alias: None, ... }), alias: "mnth", ... })
2044 let (unnest_ref, alias_name) = match &join.this {
2045 Expression::Unnest(ref unnest) => {
2046 let alias = unnest.alias.as_ref().map(|id| id.name.clone());
2047 (Some(unnest.as_ref()), alias)
2048 }
2049 Expression::Alias(ref a) => {
2050 if let Expression::Unnest(ref unnest) = a.this {
2051 (Some(unnest.as_ref()), Some(a.alias.name.clone()))
2052 } else {
2053 (None, None)
2054 }
2055 }
2056 _ => (None, None),
2057 };
2058
2059 if let (Some(unnest), Some(alias)) = (unnest_ref, alias_name) {
2060 // Check the main expression (this) of the UNNEST for GENERATE_DATE_ARRAY function
2061 if let Expression::Function(ref f) = unnest.this {
2062 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY") && f.args.len() >= 2 {
2063 let start_expr = f.args[0].clone();
2064 let end_expr = f.args[1].clone();
2065 let step = f.args.get(2).cloned();
2066
2067 // Extract unit from step interval
2068 let unit = if let Some(Expression::Interval(ref iv)) = step {
2069 if let Some(IntervalUnitSpec::Simple { ref unit, .. }) = iv.unit {
2070 Some(format!("{:?}", unit).to_uppercase())
2071 } else if let Some(ref this) = iv.this {
2072 // The interval may be stored as a string like "1 MONTH"
2073 if let Expression::Literal(Literal::String(ref s)) = this {
2074 let parts: Vec<&str> = s.split_whitespace().collect();
2075 if parts.len() == 2 {
2076 Some(parts[1].to_uppercase())
2077 } else if parts.len() == 1 {
2078 // Single word like "MONTH" or just "1"
2079 let upper = parts[0].to_uppercase();
2080 if matches!(upper.as_str(), "YEAR" | "QUARTER" | "MONTH" | "WEEK" | "DAY" | "HOUR" | "MINUTE" | "SECOND") {
2081 Some(upper)
2082 } else {
2083 None
2084 }
2085 } else {
2086 None
2087 }
2088 } else {
2089 None
2090 }
2091 } else {
2092 None
2093 }
2094 } else {
2095 None
2096 };
2097
2098 if let Some(unit_str) = unit {
2099 gda_info = Some((alias, start_expr, end_expr, unit_str));
2100 gda_join_idx = Some(idx);
2101 }
2102 }
2103 }
2104 }
2105 if gda_info.is_some() { break; }
2106 }
2107
2108 let Some((alias_name, start_expr, end_expr, unit_str)) = gda_info else {
2109 return Ok(Expression::Select(sel));
2110 };
2111 let join_idx = gda_join_idx.unwrap();
2112
2113 // Build ARRAY_GENERATE_RANGE(0, (DATEDIFF(unit, start, end) + 1 - 1) + 1)
2114 let datediff = Expression::Function(Box::new(Function::new(
2115 "DATEDIFF".to_string(),
2116 vec![
2117 Expression::Column(Column { name: Identifier::new(&unit_str), table: None, join_mark: false, trailing_comments: vec![] }),
2118 start_expr.clone(),
2119 end_expr.clone(),
2120 ],
2121 )));
2122 // (DATEDIFF(...) + 1 - 1) + 1
2123 let plus_one = Expression::Add(Box::new(BinaryOp {
2124 left: datediff,
2125 right: Expression::Literal(Literal::Number("1".to_string())),
2126 left_comments: vec![], operator_comments: vec![], trailing_comments: vec![],
2127 }));
2128 let minus_one = Expression::Sub(Box::new(BinaryOp {
2129 left: plus_one,
2130 right: Expression::Literal(Literal::Number("1".to_string())),
2131 left_comments: vec![], operator_comments: vec![], trailing_comments: vec![],
2132 }));
2133 let paren_inner = Expression::Paren(Box::new(Paren { this: minus_one, trailing_comments: vec![] }));
2134 let outer_plus_one = Expression::Add(Box::new(BinaryOp {
2135 left: paren_inner,
2136 right: Expression::Literal(Literal::Number("1".to_string())),
2137 left_comments: vec![], operator_comments: vec![], trailing_comments: vec![],
2138 }));
2139
2140 let array_gen_range = Expression::Function(Box::new(Function::new(
2141 "ARRAY_GENERATE_RANGE".to_string(),
2142 vec![Expression::Literal(Literal::Number("0".to_string())), outer_plus_one],
2143 )));
2144
2145 // Build FLATTEN(INPUT => ARRAY_GENERATE_RANGE(...))
2146 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
2147 name: Identifier::new("INPUT"),
2148 value: array_gen_range,
2149 separator: crate::expressions::NamedArgSeparator::DArrow,
2150 }));
2151 let flatten = Expression::Function(Box::new(Function::new(
2152 "FLATTEN".to_string(),
2153 vec![flatten_input],
2154 )));
2155
2156 // Build LATERAL FLATTEN(...) AS _t0(seq, key, path, index, alias, this)
2157 let alias_table = Alias {
2158 this: flatten,
2159 alias: Identifier::new("_t0"),
2160 column_aliases: vec![
2161 Identifier::new("seq"),
2162 Identifier::new("key"),
2163 Identifier::new("path"),
2164 Identifier::new("index"),
2165 Identifier::new(&alias_name),
2166 Identifier::new("this"),
2167 ],
2168 pre_alias_comments: vec![],
2169 trailing_comments: vec![],
2170 };
2171 let lateral_expr = Expression::Lateral(Box::new(Lateral {
2172 this: Box::new(Expression::Alias(Box::new(alias_table))),
2173 view: None,
2174 outer: None,
2175 alias: None,
2176 alias_quoted: false,
2177 cross_apply: None,
2178 ordinality: None,
2179 column_aliases: vec![],
2180 }));
2181
2182 // Remove the original join and add to FROM expressions
2183 sel.joins.remove(join_idx);
2184 if let Some(ref mut from) = sel.from {
2185 from.expressions.push(lateral_expr);
2186 }
2187
2188 // Build DATEADD(unit, CAST(alias AS INT), CAST(start AS DATE))
2189 let dateadd_expr = Expression::Function(Box::new(Function::new(
2190 "DATEADD".to_string(),
2191 vec![
2192 Expression::Column(Column { name: Identifier::new(&unit_str), table: None, join_mark: false, trailing_comments: vec![] }),
2193 Expression::Cast(Box::new(Cast {
2194 this: Expression::Column(Column { name: Identifier::new(&alias_name), table: None, join_mark: false, trailing_comments: vec![] }),
2195 to: DataType::Int { length: None, integer_spelling: false },
2196 trailing_comments: vec![],
2197 double_colon_syntax: false,
2198 format: None,
2199 default: None,
2200 })),
2201 Expression::Cast(Box::new(Cast {
2202 this: start_expr.clone(),
2203 to: DataType::Date,
2204 trailing_comments: vec![],
2205 double_colon_syntax: false,
2206 format: None,
2207 default: None,
2208 })),
2209 ],
2210 )));
2211
2212 // Replace references to the alias in the SELECT list
2213 let new_exprs: Vec<Expression> = sel.expressions.iter().map(|expr| {
2214 Self::replace_column_ref_with_dateadd(expr, &alias_name, &dateadd_expr)
2215 }).collect();
2216 sel.expressions = new_exprs;
2217
2218 Ok(Expression::Select(sel))
2219 })
2220 }
2221
2222 /// Helper: replace column references to `alias_name` with dateadd expression
2223 fn replace_column_ref_with_dateadd(expr: &Expression, alias_name: &str, dateadd: &Expression) -> Expression {
2224 use crate::expressions::*;
2225 match expr {
2226 Expression::Column(c) if c.name.name == alias_name && c.table.is_none() => {
2227 // Plain column reference -> DATEADD(...) AS alias_name
2228 Expression::Alias(Box::new(Alias {
2229 this: dateadd.clone(),
2230 alias: Identifier::new(alias_name),
2231 column_aliases: vec![],
2232 pre_alias_comments: vec![],
2233 trailing_comments: vec![],
2234 }))
2235 }
2236 Expression::Alias(a) => {
2237 // Check if the inner expression references the alias
2238 let new_this = Self::replace_column_ref_inner(&a.this, alias_name, dateadd);
2239 Expression::Alias(Box::new(Alias {
2240 this: new_this,
2241 alias: a.alias.clone(),
2242 column_aliases: a.column_aliases.clone(),
2243 pre_alias_comments: a.pre_alias_comments.clone(),
2244 trailing_comments: a.trailing_comments.clone(),
2245 }))
2246 }
2247 _ => expr.clone(),
2248 }
2249 }
2250
2251 /// Helper: replace column references in inner expression (not top-level)
2252 fn replace_column_ref_inner(expr: &Expression, alias_name: &str, dateadd: &Expression) -> Expression {
2253 use crate::expressions::*;
2254 match expr {
2255 Expression::Column(c) if c.name.name == alias_name && c.table.is_none() => {
2256 dateadd.clone()
2257 }
2258 Expression::Add(op) => {
2259 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
2260 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
2261 Expression::Add(Box::new(BinaryOp {
2262 left, right,
2263 left_comments: op.left_comments.clone(),
2264 operator_comments: op.operator_comments.clone(),
2265 trailing_comments: op.trailing_comments.clone(),
2266 }))
2267 }
2268 Expression::Sub(op) => {
2269 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
2270 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
2271 Expression::Sub(Box::new(BinaryOp {
2272 left, right,
2273 left_comments: op.left_comments.clone(),
2274 operator_comments: op.operator_comments.clone(),
2275 trailing_comments: op.trailing_comments.clone(),
2276 }))
2277 }
2278 Expression::Mul(op) => {
2279 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
2280 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
2281 Expression::Mul(Box::new(BinaryOp {
2282 left, right,
2283 left_comments: op.left_comments.clone(),
2284 operator_comments: op.operator_comments.clone(),
2285 trailing_comments: op.trailing_comments.clone(),
2286 }))
2287 }
2288 _ => expr.clone(),
2289 }
2290 }
2291
2292 fn normalize_snowflake_pretty(mut sql: String) -> String {
2293 if sql.contains("LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)")
2294 && sql.contains("ARRAY_GENERATE_RANGE(0, (GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1) + 1)")
2295 {
2296 sql = sql.replace(
2297 "AND uc.user_id <> ALL (SELECT DISTINCT\n _id\n FROM users, LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)\n WHERE\n GET_PATH(datasource.value, 'name') = 'something')",
2298 "AND uc.user_id <> ALL (\n SELECT DISTINCT\n _id\n FROM users, LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)\n WHERE\n GET_PATH(datasource.value, 'name') = 'something'\n )",
2299 );
2300
2301 sql = sql.replace(
2302 "CROSS JOIN TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1) + 1))) AS _u(seq, key, path, index, pos, this)",
2303 "CROSS JOIN TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (\n GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1\n) + 1))) AS _u(seq, key, path, index, pos, this)",
2304 );
2305
2306 sql = sql.replace(
2307 "OR (_u.pos > (ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1)\n AND _u_2.pos_2 = (ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1))",
2308 "OR (\n _u.pos > (\n ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1\n )\n AND _u_2.pos_2 = (\n ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1\n )\n )",
2309 );
2310 }
2311
2312 sql
2313 }
2314
2315 /// Apply cross-dialect semantic normalizations that depend on knowing both source and target.
2316 /// This handles cases where the same syntax has different semantics across dialects.
2317 fn cross_dialect_normalize(expr: Expression, source: DialectType, target: DialectType) -> Result<Expression> {
2318 use crate::expressions::{AggFunc, BinaryOp, Case, Cast, ConvertTimezone, DataType, DateTimeField, DateTruncFunc, Function, Identifier, IsNull, Literal, Null, Paren};
2319
2320 // Helper to tag which kind of transform to apply
2321 #[derive(Debug)]
2322 enum Action {
2323 None,
2324 GreatestLeastNull,
2325 ArrayGenerateRange,
2326 Div0TypedDivision,
2327 ArrayAggCollectList,
2328 ArrayAggWithinGroupFilter,
2329 ArrayAggFilter,
2330 CastTimestampToDatetime,
2331 DateTruncWrapCast,
2332 ToDateToCast,
2333 ConvertTimezoneToExpr,
2334 SetToVariable,
2335 RegexpReplaceSnowflakeToDuckDB,
2336 BigQueryFunctionNormalize,
2337 BigQuerySafeDivide,
2338 BigQueryCastType,
2339 BigQueryToHexBare, // _BQ_TO_HEX(x) with no LOWER/UPPER wrapper
2340 BigQueryToHexLower, // LOWER(_BQ_TO_HEX(x))
2341 BigQueryToHexUpper, // UPPER(_BQ_TO_HEX(x))
2342 BigQueryLastDayStripUnit, // LAST_DAY(date, MONTH) -> LAST_DAY(date)
2343 BigQueryCastFormat, // CAST(x AS type FORMAT 'fmt') -> PARSE_DATE/PARSE_TIMESTAMP etc.
2344 BigQueryAnyValueHaving, // ANY_VALUE(x HAVING MAX/MIN y) -> ARG_MAX_NULL/ARG_MIN_NULL for DuckDB
2345 BigQueryApproxQuantiles, // APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
2346 GenericFunctionNormalize, // Cross-dialect function renaming (non-BigQuery sources)
2347 RegexpLikeToDuckDB, // RegexpLike -> REGEXP_MATCHES for DuckDB target (partial match)
2348 EpochConvert, // Expression::Epoch -> target-specific epoch function
2349 EpochMsConvert, // Expression::EpochMs -> target-specific epoch ms function
2350 TSQLTypeNormalize, // TSQL types (MONEY, SMALLMONEY, REAL, DATETIME2) -> standard types
2351 MySQLSafeDivide, // MySQL a/b -> a / NULLIF(b, 0) with optional CAST
2352 NullsOrdering, // Add NULLS FIRST/LAST for ORDER BY
2353 AlterTableRenameStripSchema, // ALTER TABLE db.t1 RENAME TO db.t2 -> ALTER TABLE db.t1 RENAME TO t2
2354 StringAggConvert, // STRING_AGG/WITHIN GROUP -> target-specific aggregate
2355 GroupConcatConvert, // GROUP_CONCAT -> target-specific aggregate
2356 TempTableHash, // TSQL #table -> temp table normalization
2357 ArrayLengthConvert, // CARDINALITY/ARRAY_LENGTH/ARRAY_SIZE -> target-specific
2358 DatePartUnquote, // DATE_PART('month', x) -> DATE_PART(month, x) for Snowflake target
2359 NvlClearOriginal, // Clear NVL original_name for cross-dialect transpilation
2360 HiveCastToTryCast, // Hive/Spark CAST -> TRY_CAST for targets that support it
2361 XorExpand, // MySQL XOR -> (a AND NOT b) OR (NOT a AND b) for non-XOR targets
2362 CastTimestampStripTz, // CAST(x AS TIMESTAMP WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark
2363 JsonExtractToGetJsonObject, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
2364 JsonExtractScalarToGetJsonObject, // JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
2365 JsonQueryValueConvert, // JsonQuery/JsonValue -> target-specific (ISNULL wrapper for TSQL, GET_JSON_OBJECT for Spark, etc.)
2366 JsonLiteralToJsonParse, // JSON 'x' -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake
2367 ArraySyntaxConvert, // ARRAY[x] -> ARRAY(x) for Spark, [x] for BigQuery/DuckDB
2368 AtTimeZoneConvert, // AT TIME ZONE -> AT_TIMEZONE (Presto) / FROM_UTC_TIMESTAMP (Spark)
2369 DayOfWeekConvert, // DAY_OF_WEEK -> dialect-specific
2370 MaxByMinByConvert, // MAX_BY/MIN_BY -> argMax/argMin for ClickHouse
2371 ArrayAggToCollectList, // ARRAY_AGG(x ORDER BY ...) -> COLLECT_LIST(x) for Hive/Spark
2372 ElementAtConvert, // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
2373 CurrentUserParens, // CURRENT_USER -> CURRENT_USER() for Snowflake
2374 CastToJsonForSpark, // CAST(x AS JSON) -> TO_JSON(x) for Spark
2375 CastJsonToFromJson, // CAST(JSON_PARSE(literal) AS ARRAY/MAP) -> FROM_JSON(literal, type_string)
2376 ToJsonConvert, // TO_JSON(x) -> JSON_FORMAT(CAST(x AS JSON)) for Presto etc.
2377 ArrayAggNullFilter, // ARRAY_AGG(x) FILTER(WHERE cond) -> add AND NOT x IS NULL for DuckDB
2378 ArrayAggIgnoreNullsDuckDB, // ARRAY_AGG(x IGNORE NULLS ORDER BY ...) -> ARRAY_AGG(x ORDER BY a NULLS FIRST, ...) for DuckDB
2379 BigQueryPercentileContToDuckDB, // PERCENTILE_CONT(x, frac RESPECT NULLS) -> QUANTILE_CONT(x, frac) for DuckDB
2380 BigQueryArraySelectAsStructToSnowflake, // ARRAY(SELECT AS STRUCT ...) -> (SELECT ARRAY_AGG(OBJECT_CONSTRUCT(...)))
2381 CountDistinctMultiArg, // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN ... END)
2382 VarianceToClickHouse, // Expression::Variance -> varSamp for ClickHouse
2383 StddevToClickHouse, // Expression::Stddev -> stddevSamp for ClickHouse
2384 ApproxQuantileConvert, // Expression::ApproxQuantile -> APPROX_PERCENTILE for Snowflake
2385 ArrayIndexConvert, // array[1] -> array[0] for BigQuery (1-based to 0-based)
2386 DollarParamConvert, // $foo -> @foo for BigQuery
2387 TablesampleReservoir, // TABLESAMPLE (n ROWS) -> TABLESAMPLE RESERVOIR (n ROWS) for DuckDB
2388 BitAggFloatCast, // BIT_OR/BIT_AND/BIT_XOR float arg -> CAST(ROUND(CAST(arg)) AS INT) for DuckDB
2389 BitAggSnowflakeRename, // BIT_OR -> BITORAGG, BIT_AND -> BITANDAGG etc. for Snowflake
2390 StrftimeCastTimestamp, // CAST TIMESTAMP -> TIMESTAMP_NTZ for Spark in STRFTIME
2391 AnyValueIgnoreNulls, // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
2392 CreateTableStripComment, // Strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
2393 EscapeStringNormalize, // e'Hello\nworld' literal newline -> \n
2394 AnyToExists, // PostgreSQL x <op> ANY(array) -> EXISTS(array, x -> ...)
2395 ArrayConcatBracketConvert, // [1,2] -> ARRAY[1,2] for PostgreSQL in ARRAY_CAT
2396 SnowflakeIntervalFormat, // INTERVAL '2' HOUR -> INTERVAL '2 HOUR' for Snowflake
2397 AlterTableToSpRename, // ALTER TABLE RENAME -> EXEC sp_rename for TSQL
2398 StraightJoinCase, // STRAIGHT_JOIN -> straight_join for DuckDB
2399 RespectNullsConvert, // RESPECT NULLS window function handling
2400 MysqlNullsOrdering, // MySQL doesn't support NULLS ordering
2401 MysqlNullsLastRewrite, // Add CASE WHEN to ORDER BY for DuckDB -> MySQL (NULLS LAST simulation)
2402 BigQueryNullsOrdering, // BigQuery doesn't support NULLS FIRST/LAST - strip
2403 SnowflakeFloatProtect, // Protect FLOAT from being converted to DOUBLE by Snowflake target transform
2404 JsonToGetPath, // JSON arrow -> GET_PATH/PARSE_JSON for Snowflake
2405 FilterToIff, // FILTER(WHERE) -> IFF wrapping for Snowflake
2406 AggFilterToIff, // AggFunc.filter -> IFF wrapping for Snowflake (e.g., AVG(x) FILTER(WHERE cond))
2407 StructToRow, // DuckDB struct -> Presto ROW / BigQuery STRUCT
2408 SparkStructConvert, // Spark STRUCT(x AS col1, ...) -> ROW/DuckDB struct
2409 DecimalDefaultPrecision, // DECIMAL -> DECIMAL(18, 3) for Snowflake in BIT agg
2410 ApproxCountDistinctToApproxDistinct, // APPROX_COUNT_DISTINCT -> APPROX_DISTINCT for Presto/Trino
2411 CollectListToArrayAgg, // COLLECT_LIST -> ARRAY_AGG for Presto/DuckDB
2412 CollectSetConvert, // COLLECT_SET -> SET_AGG/ARRAY_AGG(DISTINCT)/ARRAY_UNIQUE_AGG
2413 PercentileConvert, // PERCENTILE -> QUANTILE/APPROX_PERCENTILE
2414 CorrIsnanWrap, // CORR(a,b) -> CASE WHEN ISNAN(CORR(a,b)) THEN NULL ELSE CORR(a,b) END
2415 TruncToDateTrunc, // TRUNC(ts, unit) -> DATE_TRUNC(unit, ts)
2416 ArrayContainsConvert, // ARRAY_CONTAINS -> CONTAINS/target-specific
2417 StrPositionExpand, // StrPosition with position -> complex STRPOS expansion for Presto/DuckDB
2418 TablesampleSnowflakeStrip, // Strip method and PERCENT for Snowflake target
2419 FirstToAnyValue, // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
2420 MonthsBetweenConvert, // Expression::MonthsBetween -> target-specific
2421 CurrentUserSparkParens, // CURRENT_USER -> CURRENT_USER() for Spark
2422 SparkDateFuncCast, // MONTH/YEAR/DAY('str') -> MONTH/YEAR/DAY(CAST('str' AS DATE)) from Spark
2423 MapFromArraysConvert, // Expression::MapFromArrays -> MAP/OBJECT_CONSTRUCT/MAP_FROM_ARRAYS
2424 AddMonthsConvert, // Expression::AddMonths -> target-specific DATEADD/DATE_ADD
2425 PercentileContConvert, // PERCENTILE_CONT/DISC WITHIN GROUP -> APPROX_PERCENTILE/PERCENTILE_APPROX
2426 GenerateSeriesConvert, // GENERATE_SERIES -> SEQUENCE/UNNEST(SEQUENCE)/EXPLODE(SEQUENCE)
2427 ConcatCoalesceWrap, // CONCAT(a, b) -> CONCAT(COALESCE(CAST(a), ''), ...) for Presto/ClickHouse
2428 PipeConcatToConcat, // a || b -> CONCAT(CAST(a), CAST(b)) for Presto
2429 DivFuncConvert, // DIV(a, b) -> a // b for DuckDB, CAST for BigQuery
2430 JsonObjectAggConvert, // JSON_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
2431 JsonbExistsConvert, // JSONB_EXISTS -> JSON_EXISTS for DuckDB
2432 DateBinConvert, // DATE_BIN -> TIME_BUCKET for DuckDB
2433 MysqlCastCharToText, // MySQL CAST(x AS CHAR) -> CAST(x AS TEXT/VARCHAR/STRING) for targets
2434 SparkCastVarcharToString, // Spark CAST(x AS VARCHAR/CHAR) -> CAST(x AS STRING) for Spark targets
2435 JsonExtractToArrow, // JSON_EXTRACT(x, path) -> x -> path for SQLite/DuckDB
2436 MinMaxToLeastGreatest, // Multi-arg MIN(a,b,c) -> LEAST(a,b,c), MAX(a,b,c) -> GREATEST(a,b,c)
2437 ClickHouseUniqToApproxCountDistinct, // uniq(x) -> APPROX_COUNT_DISTINCT(x) for non-ClickHouse targets
2438 ClickHouseAnyToAnyValue, // any(x) -> ANY_VALUE(x) for non-ClickHouse targets
2439 OracleVarchar2ToVarchar, // VARCHAR2(N CHAR/BYTE) -> VARCHAR(N) for non-Oracle targets
2440 }
2441
2442 // Handle SELECT INTO -> CREATE TABLE AS for DuckDB/Snowflake/etc.
2443 let expr = if matches!(source, DialectType::TSQL | DialectType::Fabric) {
2444 Self::transform_select_into(expr, source, target)
2445 } else {
2446 expr
2447 };
2448
2449 // Strip OFFSET ROWS for non-TSQL/Oracle targets
2450 let expr = if !matches!(target, DialectType::TSQL | DialectType::Oracle | DialectType::Fabric) {
2451 if let Expression::Select(mut select) = expr {
2452 if let Some(ref mut offset) = select.offset {
2453 offset.rows = None;
2454 }
2455 Expression::Select(select)
2456 } else {
2457 expr
2458 }
2459 } else {
2460 expr
2461 };
2462
2463 // Handle CreateTable WITH properties transformation before recursive transforms
2464 let expr = if let Expression::CreateTable(mut ct) = expr {
2465 Self::transform_create_table_properties(&mut ct, source, target);
2466
2467 // Handle Hive-style PARTITIONED BY (col_name type, ...) -> target-specific
2468 // When the PARTITIONED BY clause contains column definitions, merge them into the
2469 // main column list and adjust the PARTITIONED BY clause for the target dialect.
2470 if matches!(source, DialectType::Hive | DialectType::Spark | DialectType::Databricks) {
2471 let mut partition_col_names: Vec<String> = Vec::new();
2472 let mut partition_col_defs: Vec<crate::expressions::ColumnDef> = Vec::new();
2473 let mut has_col_def_partitions = false;
2474
2475 // Check if any PARTITIONED BY property contains ColumnDef expressions
2476 for prop in &ct.properties {
2477 if let Expression::PartitionedByProperty(ref pbp) = prop {
2478 if let Expression::Tuple(ref tuple) = *pbp.this {
2479 for expr in &tuple.expressions {
2480 if let Expression::ColumnDef(ref cd) = expr {
2481 has_col_def_partitions = true;
2482 partition_col_names.push(cd.name.name.clone());
2483 partition_col_defs.push(*cd.clone());
2484 }
2485 }
2486 }
2487 }
2488 }
2489
2490 if has_col_def_partitions && !matches!(target, DialectType::Hive) {
2491 // Merge partition columns into main column list
2492 for cd in partition_col_defs {
2493 ct.columns.push(cd);
2494 }
2495
2496 // Replace PARTITIONED BY property with column-name-only version
2497 ct.properties.retain(|p| !matches!(p, Expression::PartitionedByProperty(_)));
2498
2499 if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena) {
2500 // Presto: WITH (PARTITIONED_BY=ARRAY['y', 'z'])
2501 let array_elements: Vec<String> = partition_col_names.iter()
2502 .map(|n| format!("'{}'", n))
2503 .collect();
2504 let array_value = format!("ARRAY[{}]", array_elements.join(", "));
2505 ct.with_properties.push(("PARTITIONED_BY".to_string(), array_value));
2506 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
2507 // Spark: PARTITIONED BY (y, z) - just column names
2508 let name_exprs: Vec<Expression> = partition_col_names.iter()
2509 .map(|n| Expression::Column(crate::expressions::Column {
2510 name: crate::expressions::Identifier::new(n.clone()),
2511 table: None,
2512 join_mark: false,
2513 trailing_comments: Vec::new(),
2514 }))
2515 .collect();
2516 ct.properties.insert(0, Expression::PartitionedByProperty(Box::new(
2517 crate::expressions::PartitionedByProperty {
2518 this: Box::new(Expression::Tuple(Box::new(crate::expressions::Tuple { expressions: name_exprs }))),
2519 }
2520 )));
2521 }
2522 // For DuckDB and other targets, just drop the PARTITIONED BY (already retained above)
2523 }
2524
2525 // Note: Non-ColumnDef partitions (e.g., function expressions like MONTHS(y))
2526 // are handled by transform_create_table_properties which runs first
2527 }
2528
2529 // Strip LOCATION property for Presto/Trino (not supported)
2530 if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena) {
2531 ct.properties.retain(|p| !matches!(p, Expression::LocationProperty(_)));
2532 }
2533
2534 // Strip table-level constraints for Spark/Hive/Databricks
2535 // Keep PRIMARY KEY constraints but strip TSQL-specific modifiers; remove all others
2536 if matches!(target, DialectType::Spark | DialectType::Databricks | DialectType::Hive) {
2537 ct.constraints.retain(|c| matches!(c, crate::expressions::TableConstraint::PrimaryKey { .. }));
2538 for constraint in &mut ct.constraints {
2539 if let crate::expressions::TableConstraint::PrimaryKey { columns, modifiers, .. } = constraint {
2540 // Strip ASC/DESC from column names
2541 for col in columns.iter_mut() {
2542 if col.name.ends_with(" ASC") {
2543 col.name = col.name[..col.name.len() - 4].to_string();
2544 } else if col.name.ends_with(" DESC") {
2545 col.name = col.name[..col.name.len() - 5].to_string();
2546 }
2547 }
2548 // Strip TSQL-specific modifiers
2549 modifiers.clustered = None;
2550 modifiers.with_options.clear();
2551 modifiers.on_filegroup = None;
2552 }
2553 }
2554 }
2555
2556 // Databricks: IDENTITY columns with INT/INTEGER -> BIGINT
2557 if matches!(target, DialectType::Databricks) {
2558 for col in &mut ct.columns {
2559 if col.auto_increment {
2560 if matches!(col.data_type, crate::expressions::DataType::Int { .. }) {
2561 col.data_type = crate::expressions::DataType::BigInt { length: None };
2562 }
2563 }
2564 }
2565 }
2566
2567 // Spark/Databricks: INTEGER -> INT in column definitions
2568 // Python sqlglot always outputs INT for Spark/Databricks
2569 if matches!(target, DialectType::Spark | DialectType::Databricks) {
2570 for col in &mut ct.columns {
2571 if let crate::expressions::DataType::Int { integer_spelling, .. } = &mut col.data_type {
2572 *integer_spelling = false;
2573 }
2574 }
2575 }
2576
2577 // Strip explicit NULL constraints for Hive/Spark (B INTEGER NULL -> B INTEGER)
2578 if matches!(target, DialectType::Hive | DialectType::Spark) {
2579 for col in &mut ct.columns {
2580 // If nullable is explicitly true (NULL), change to None (omit it)
2581 if col.nullable == Some(true) {
2582 col.nullable = None;
2583 }
2584 // Also remove from constraints if stored there
2585 col.constraints.retain(|c| !matches!(c, crate::expressions::ColumnConstraint::Null));
2586 }
2587 }
2588
2589 // Strip TSQL ON filegroup for non-TSQL/Fabric targets
2590 if ct.on_property.is_some() && !matches!(target, DialectType::TSQL | DialectType::Fabric) {
2591 ct.on_property = None;
2592 }
2593
2594 // Snowflake: strip ARRAY type parameters (ARRAY<INT> -> ARRAY, ARRAY<ARRAY<INT>> -> ARRAY)
2595 // Snowflake doesn't support typed arrays in DDL
2596 if matches!(target, DialectType::Snowflake) {
2597 fn strip_array_type_params(dt: &mut crate::expressions::DataType) {
2598 if let crate::expressions::DataType::Array { .. } = dt {
2599 *dt = crate::expressions::DataType::Custom { name: "ARRAY".to_string() };
2600 }
2601 }
2602 for col in &mut ct.columns {
2603 strip_array_type_params(&mut col.data_type);
2604 }
2605 }
2606
2607 // PostgreSQL target: ensure IDENTITY columns have NOT NULL
2608 // If NOT NULL was explicit in source (present in constraint_order), preserve original order.
2609 // If NOT NULL was not explicit, add it after IDENTITY (GENERATED BY DEFAULT AS IDENTITY NOT NULL).
2610 if matches!(target, DialectType::PostgreSQL) {
2611 for col in &mut ct.columns {
2612 if col.auto_increment && !col.constraint_order.is_empty() {
2613 use crate::expressions::ConstraintType;
2614 let has_explicit_not_null = col.constraint_order.iter().any(|ct| *ct == ConstraintType::NotNull);
2615
2616 if has_explicit_not_null {
2617 // Source had explicit NOT NULL - preserve original order
2618 // Just ensure nullable is set
2619 if col.nullable != Some(false) {
2620 col.nullable = Some(false);
2621 }
2622 } else {
2623 // Source didn't have explicit NOT NULL - build order with
2624 // AutoIncrement + NotNull first, then remaining constraints
2625 let mut new_order = Vec::new();
2626 // Put AutoIncrement (IDENTITY) first, followed by synthetic NotNull
2627 new_order.push(ConstraintType::AutoIncrement);
2628 new_order.push(ConstraintType::NotNull);
2629 // Add remaining constraints in original order (except AutoIncrement)
2630 for ct_type in &col.constraint_order {
2631 if *ct_type != ConstraintType::AutoIncrement {
2632 new_order.push(ct_type.clone());
2633 }
2634 }
2635 col.constraint_order = new_order;
2636 col.nullable = Some(false);
2637 }
2638 }
2639 }
2640 }
2641
2642 Expression::CreateTable(ct)
2643 } else {
2644 expr
2645 };
2646
2647 // Handle CreateView column stripping for Presto/Trino target
2648 let expr = if let Expression::CreateView(mut cv) = expr {
2649 // Presto/Trino: drop column list when view has a SELECT body
2650 if matches!(target, DialectType::Presto | DialectType::Trino) && !cv.columns.is_empty() {
2651 if !matches!(&cv.query, Expression::Null(_)) {
2652 cv.columns.clear();
2653 }
2654 }
2655 Expression::CreateView(cv)
2656 } else {
2657 expr
2658 };
2659
2660 transform_recursive(expr, &|e| {
2661 // BigQuery CAST(ARRAY[STRUCT(...)] AS STRUCT_TYPE[]) -> DuckDB: convert unnamed Structs to ROW()
2662 // This converts auto-named struct literals {'_0': x, '_1': y} inside typed arrays to ROW(x, y)
2663 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
2664 if let Expression::Cast(ref c) = e {
2665 // Check if this is a CAST of an array to a struct array type
2666 let is_struct_array_cast = matches!(&c.to, crate::expressions::DataType::Array { .. });
2667 if is_struct_array_cast {
2668 let has_auto_named_structs = match &c.this {
2669 Expression::Array(arr) => arr.expressions.iter().any(|elem| {
2670 if let Expression::Struct(s) = elem {
2671 s.fields.iter().all(|(name, _)| {
2672 name.as_ref().map_or(true, |n| n.starts_with('_') && n[1..].parse::<usize>().is_ok())
2673 })
2674 } else { false }
2675 }),
2676 Expression::ArrayFunc(arr) => arr.expressions.iter().any(|elem| {
2677 if let Expression::Struct(s) = elem {
2678 s.fields.iter().all(|(name, _)| {
2679 name.as_ref().map_or(true, |n| n.starts_with('_') && n[1..].parse::<usize>().is_ok())
2680 })
2681 } else { false }
2682 }),
2683 _ => false,
2684 };
2685 if has_auto_named_structs {
2686 let convert_struct_to_row = |elem: Expression| -> Expression {
2687 if let Expression::Struct(s) = elem {
2688 let row_args: Vec<Expression> = s.fields.into_iter().map(|(_, v)| v).collect();
2689 Expression::Function(Box::new(Function::new("ROW".to_string(), row_args)))
2690 } else {
2691 elem
2692 }
2693 };
2694 let mut c_clone = c.as_ref().clone();
2695 match &mut c_clone.this {
2696 Expression::Array(arr) => {
2697 arr.expressions = arr.expressions.drain(..).map(convert_struct_to_row).collect();
2698 }
2699 Expression::ArrayFunc(arr) => {
2700 arr.expressions = arr.expressions.drain(..).map(convert_struct_to_row).collect();
2701 }
2702 _ => {}
2703 }
2704 return Ok(Expression::Cast(Box::new(c_clone)));
2705 }
2706 }
2707 }
2708 }
2709
2710 // BigQuery SELECT AS STRUCT -> DuckDB struct literal {'key': value, ...}
2711 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
2712 if let Expression::Select(ref sel) = e {
2713 if sel.kind.as_deref() == Some("STRUCT") {
2714 let mut fields = Vec::new();
2715 for expr in &sel.expressions {
2716 match expr {
2717 Expression::Alias(a) => {
2718 fields.push((Some(a.alias.name.clone()), a.this.clone()));
2719 }
2720 Expression::Column(c) => {
2721 fields.push((Some(c.name.name.clone()), expr.clone()));
2722 }
2723 _ => {
2724 fields.push((None, expr.clone()));
2725 }
2726 }
2727 }
2728 let struct_lit = Expression::Struct(Box::new(crate::expressions::Struct { fields }));
2729 let mut new_select = sel.as_ref().clone();
2730 new_select.kind = None;
2731 new_select.expressions = vec![struct_lit];
2732 return Ok(Expression::Select(Box::new(new_select)));
2733 }
2734 }
2735 }
2736
2737 // Convert @variable -> ${variable} for Spark/Hive/Databricks
2738 if matches!(source, DialectType::TSQL | DialectType::Fabric)
2739 && matches!(target, DialectType::Spark | DialectType::Databricks | DialectType::Hive)
2740 {
2741 if let Expression::Parameter(ref p) = e {
2742 if p.style == crate::expressions::ParameterStyle::At {
2743 if let Some(ref name) = p.name {
2744 return Ok(Expression::Parameter(Box::new(crate::expressions::Parameter {
2745 name: Some(name.clone()),
2746 index: p.index,
2747 style: crate::expressions::ParameterStyle::DollarBrace,
2748 quoted: p.quoted,
2749 expression: None,
2750 })));
2751 }
2752 }
2753 }
2754 // Also handle Column("@x") -> Parameter("x", DollarBrace) for TSQL vars
2755 if let Expression::Column(ref col) = e {
2756 if col.name.name.starts_with('@') && col.table.is_none() {
2757 let var_name = col.name.name.trim_start_matches('@').to_string();
2758 return Ok(Expression::Parameter(Box::new(crate::expressions::Parameter {
2759 name: Some(var_name),
2760 index: None,
2761 style: crate::expressions::ParameterStyle::DollarBrace,
2762 quoted: false,
2763 expression: None,
2764 })));
2765 }
2766 }
2767 }
2768
2769 // Convert @variable -> variable in SET statements for Spark/Databricks
2770 if matches!(source, DialectType::TSQL | DialectType::Fabric)
2771 && matches!(target, DialectType::Spark | DialectType::Databricks)
2772 {
2773 if let Expression::SetStatement(ref s) = e {
2774 let mut new_items = s.items.clone();
2775 let mut changed = false;
2776 for item in &mut new_items {
2777 // Strip @ from the SET name (Parameter style)
2778 if let Expression::Parameter(ref p) = item.name {
2779 if p.style == crate::expressions::ParameterStyle::At {
2780 if let Some(ref name) = p.name {
2781 item.name = Expression::Identifier(Identifier::new(name));
2782 changed = true;
2783 }
2784 }
2785 }
2786 // Strip @ from the SET name (Identifier style - SET parser)
2787 if let Expression::Identifier(ref id) = item.name {
2788 if id.name.starts_with('@') {
2789 let var_name = id.name.trim_start_matches('@').to_string();
2790 item.name = Expression::Identifier(Identifier::new(&var_name));
2791 changed = true;
2792 }
2793 }
2794 // Strip @ from the SET name (Column style - alternative parsing)
2795 if let Expression::Column(ref col) = item.name {
2796 if col.name.name.starts_with('@') && col.table.is_none() {
2797 let var_name = col.name.name.trim_start_matches('@').to_string();
2798 item.name = Expression::Identifier(Identifier::new(&var_name));
2799 changed = true;
2800 }
2801 }
2802 }
2803 if changed {
2804 let mut new_set = (**s).clone();
2805 new_set.items = new_items;
2806 return Ok(Expression::SetStatement(Box::new(new_set)));
2807 }
2808 }
2809 }
2810
2811 // Strip NOLOCK hint for non-TSQL targets
2812 if matches!(source, DialectType::TSQL | DialectType::Fabric)
2813 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
2814 {
2815 if let Expression::Table(ref tr) = e {
2816 if !tr.hints.is_empty() {
2817 let mut new_tr = tr.clone();
2818 new_tr.hints.clear();
2819 return Ok(Expression::Table(new_tr));
2820 }
2821 }
2822 }
2823
2824 // Snowflake: TRUE IS TRUE -> TRUE, FALSE IS FALSE -> FALSE
2825 // Snowflake simplifies IS TRUE/IS FALSE on boolean literals
2826 if matches!(target, DialectType::Snowflake) {
2827 if let Expression::IsTrue(ref itf) = e {
2828 if let Expression::Boolean(ref b) = itf.this {
2829 if !itf.not {
2830 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral { value: b.value }));
2831 } else {
2832 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral { value: !b.value }));
2833 }
2834 }
2835 }
2836 if let Expression::IsFalse(ref itf) = e {
2837 if let Expression::Boolean(ref b) = itf.this {
2838 if !itf.not {
2839 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral { value: !b.value }));
2840 } else {
2841 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral { value: b.value }));
2842 }
2843 }
2844 }
2845 }
2846
2847 // BigQuery: split dotted backtick identifiers in table names
2848 // e.g., `a.b.c` -> "a"."b"."c" when source is BigQuery and target is not BigQuery
2849 if matches!(source, DialectType::BigQuery) && !matches!(target, DialectType::BigQuery) {
2850 if let Expression::CreateTable(ref ct) = e {
2851 let mut changed = false;
2852 let mut new_ct = ct.clone();
2853 // Split the table name
2854 if ct.name.schema.is_none() && ct.name.name.name.contains('.') {
2855 let parts: Vec<&str> = ct.name.name.name.split('.').collect();
2856 // Use quoted identifiers when the original was quoted (backtick in BigQuery)
2857 let was_quoted = ct.name.name.quoted;
2858 let mk_id = |s: &str| if was_quoted { Identifier::quoted(s) } else { Identifier::new(s) };
2859 if parts.len() == 3 {
2860 new_ct.name.catalog = Some(mk_id(parts[0]));
2861 new_ct.name.schema = Some(mk_id(parts[1]));
2862 new_ct.name.name = mk_id(parts[2]);
2863 changed = true;
2864 } else if parts.len() == 2 {
2865 new_ct.name.schema = Some(mk_id(parts[0]));
2866 new_ct.name.name = mk_id(parts[1]);
2867 changed = true;
2868 }
2869 }
2870 // Split the clone source name
2871 if let Some(ref clone_src) = ct.clone_source {
2872 if clone_src.schema.is_none() && clone_src.name.name.contains('.') {
2873 let parts: Vec<&str> = clone_src.name.name.split('.').collect();
2874 let was_quoted = clone_src.name.quoted;
2875 let mk_id = |s: &str| if was_quoted { Identifier::quoted(s) } else { Identifier::new(s) };
2876 let mut new_src = clone_src.clone();
2877 if parts.len() == 3 {
2878 new_src.catalog = Some(mk_id(parts[0]));
2879 new_src.schema = Some(mk_id(parts[1]));
2880 new_src.name = mk_id(parts[2]);
2881 new_ct.clone_source = Some(new_src);
2882 changed = true;
2883 } else if parts.len() == 2 {
2884 new_src.schema = Some(mk_id(parts[0]));
2885 new_src.name = mk_id(parts[1]);
2886 new_ct.clone_source = Some(new_src);
2887 changed = true;
2888 }
2889 }
2890 }
2891 if changed {
2892 return Ok(Expression::CreateTable(new_ct));
2893 }
2894 }
2895 }
2896
2897 // BigQuery array subscript: a[1], b[OFFSET(1)], c[ORDINAL(1)], d[SAFE_OFFSET(1)], e[SAFE_ORDINAL(1)]
2898 // -> DuckDB/Presto: convert 0-based to 1-based, handle SAFE_* -> ELEMENT_AT for Presto
2899 if matches!(source, DialectType::BigQuery)
2900 && matches!(target, DialectType::DuckDB | DialectType::Presto | DialectType::Trino | DialectType::Athena)
2901 {
2902 if let Expression::Subscript(ref sub) = e {
2903 let (new_index, is_safe) = match &sub.index {
2904 // a[1] -> a[1+1] = a[2] (plain index is 0-based in BQ)
2905 Expression::Literal(Literal::Number(n)) => {
2906 if let Ok(val) = n.parse::<i64>() {
2907 (Some(Expression::Literal(Literal::Number((val + 1).to_string()))), false)
2908 } else {
2909 (None, false)
2910 }
2911 }
2912 // OFFSET(n) -> n+1 (0-based)
2913 Expression::Function(ref f) if f.name.eq_ignore_ascii_case("OFFSET") && f.args.len() == 1 => {
2914 if let Expression::Literal(Literal::Number(n)) = &f.args[0] {
2915 if let Ok(val) = n.parse::<i64>() {
2916 (Some(Expression::Literal(Literal::Number((val + 1).to_string()))), false)
2917 } else {
2918 (Some(Expression::Add(Box::new(crate::expressions::BinaryOp::new(f.args[0].clone(), Expression::number(1))))), false)
2919 }
2920 } else {
2921 (Some(Expression::Add(Box::new(crate::expressions::BinaryOp::new(f.args[0].clone(), Expression::number(1))))), false)
2922 }
2923 }
2924 // ORDINAL(n) -> n (already 1-based)
2925 Expression::Function(ref f) if f.name.eq_ignore_ascii_case("ORDINAL") && f.args.len() == 1 => {
2926 (Some(f.args[0].clone()), false)
2927 }
2928 // SAFE_OFFSET(n) -> n+1 (0-based, safe)
2929 Expression::Function(ref f) if f.name.eq_ignore_ascii_case("SAFE_OFFSET") && f.args.len() == 1 => {
2930 if let Expression::Literal(Literal::Number(n)) = &f.args[0] {
2931 if let Ok(val) = n.parse::<i64>() {
2932 (Some(Expression::Literal(Literal::Number((val + 1).to_string()))), true)
2933 } else {
2934 (Some(Expression::Add(Box::new(crate::expressions::BinaryOp::new(f.args[0].clone(), Expression::number(1))))), true)
2935 }
2936 } else {
2937 (Some(Expression::Add(Box::new(crate::expressions::BinaryOp::new(f.args[0].clone(), Expression::number(1))))), true)
2938 }
2939 }
2940 // SAFE_ORDINAL(n) -> n (already 1-based, safe)
2941 Expression::Function(ref f) if f.name.eq_ignore_ascii_case("SAFE_ORDINAL") && f.args.len() == 1 => {
2942 (Some(f.args[0].clone()), true)
2943 }
2944 _ => (None, false),
2945 };
2946 if let Some(idx) = new_index {
2947 if is_safe && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena) {
2948 // Presto: SAFE_OFFSET/SAFE_ORDINAL -> ELEMENT_AT(arr, idx)
2949 return Ok(Expression::Function(Box::new(Function::new(
2950 "ELEMENT_AT".to_string(), vec![sub.this.clone(), idx],
2951 ))));
2952 } else {
2953 // DuckDB or non-safe: just use subscript with converted index
2954 return Ok(Expression::Subscript(Box::new(crate::expressions::Subscript {
2955 this: sub.this.clone(),
2956 index: idx,
2957 })));
2958 }
2959 }
2960 }
2961 }
2962
2963 // BigQuery LENGTH(x) -> DuckDB CASE TYPEOF(x) WHEN 'BLOB' THEN OCTET_LENGTH(...) ELSE LENGTH(...) END
2964 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
2965 if let Expression::Length(ref uf) = e {
2966 let arg = uf.this.clone();
2967 let typeof_func = Expression::Function(Box::new(Function::new("TYPEOF".to_string(), vec![arg.clone()])));
2968 let blob_cast = Expression::Cast(Box::new(Cast {
2969 this: arg.clone(),
2970 to: DataType::VarBinary { length: None },
2971 trailing_comments: vec![],
2972 double_colon_syntax: false,
2973 format: None,
2974 default: None,
2975 }));
2976 let octet_length = Expression::Function(Box::new(Function::new("OCTET_LENGTH".to_string(), vec![blob_cast])));
2977 let text_cast = Expression::Cast(Box::new(Cast {
2978 this: arg,
2979 to: DataType::Text,
2980 trailing_comments: vec![],
2981 double_colon_syntax: false,
2982 format: None,
2983 default: None,
2984 }));
2985 let length_text = Expression::Length(Box::new(crate::expressions::UnaryFunc { this: text_cast, original_name: None }));
2986 return Ok(Expression::Case(Box::new(Case {
2987 operand: Some(typeof_func),
2988 whens: vec![(Expression::Literal(Literal::String("BLOB".to_string())), octet_length)],
2989 else_: Some(length_text),
2990 })));
2991 }
2992 }
2993
2994 // BigQuery UNNEST alias handling (only for non-BigQuery sources):
2995 // UNNEST(...) AS x -> UNNEST(...) (drop unused table alias)
2996 // UNNEST(...) AS x(y) -> UNNEST(...) AS y (use column alias as main alias)
2997 if matches!(target, DialectType::BigQuery) && !matches!(source, DialectType::BigQuery) {
2998 if let Expression::Alias(ref a) = e {
2999 if matches!(&a.this, Expression::Unnest(_)) {
3000 if a.column_aliases.is_empty() {
3001 // Drop the entire alias, return just the UNNEST expression
3002 return Ok(a.this.clone());
3003 } else {
3004 // Use first column alias as the main alias
3005 let mut new_alias = a.as_ref().clone();
3006 new_alias.alias = a.column_aliases[0].clone();
3007 new_alias.column_aliases.clear();
3008 return Ok(Expression::Alias(Box::new(new_alias)));
3009 }
3010 }
3011 }
3012 }
3013
3014 // BigQuery IN UNNEST(expr) -> IN (SELECT UNNEST/EXPLODE(expr)) for non-BigQuery targets
3015 if matches!(source, DialectType::BigQuery) && !matches!(target, DialectType::BigQuery) {
3016 if let Expression::In(ref in_expr) = e {
3017 if let Some(ref unnest_inner) = in_expr.unnest {
3018 // Build the function call for the target dialect
3019 let func_expr = if matches!(target, DialectType::Hive | DialectType::Spark | DialectType::Databricks) {
3020 // Use EXPLODE for Hive/Spark
3021 Expression::Function(Box::new(Function::new("EXPLODE".to_string(), vec![*unnest_inner.clone()])))
3022 } else {
3023 // Use UNNEST for Presto/Trino/DuckDB/etc.
3024 Expression::Unnest(Box::new(crate::expressions::UnnestFunc {
3025 this: *unnest_inner.clone(),
3026 expressions: Vec::new(),
3027 with_ordinality: false,
3028 alias: None,
3029 offset_alias: None,
3030 }))
3031 };
3032
3033 // Wrap in SELECT
3034 let mut inner_select = crate::expressions::Select::new();
3035 inner_select.expressions = vec![func_expr];
3036
3037 let subquery_expr = Expression::Select(Box::new(inner_select));
3038
3039 return Ok(Expression::In(Box::new(crate::expressions::In {
3040 this: in_expr.this.clone(),
3041 expressions: Vec::new(),
3042 query: Some(subquery_expr),
3043 not: in_expr.not,
3044 global: in_expr.global,
3045 unnest: None,
3046 })));
3047 }
3048 }
3049 }
3050
3051 // SQLite: GENERATE_SERIES AS t(i) -> (SELECT value AS i FROM GENERATE_SERIES(...)) AS t
3052 // This handles the subquery wrapping for RANGE -> GENERATE_SERIES in FROM context
3053 if matches!(target, DialectType::SQLite) && matches!(source, DialectType::DuckDB) {
3054 if let Expression::Alias(ref a) = e {
3055 if let Expression::Function(ref f) = a.this {
3056 if f.name.eq_ignore_ascii_case("GENERATE_SERIES") && !a.column_aliases.is_empty() {
3057 // Build: (SELECT value AS col_alias FROM GENERATE_SERIES(start, end)) AS table_alias
3058 let col_alias = a.column_aliases[0].clone();
3059 let mut inner_select = crate::expressions::Select::new();
3060 inner_select.expressions = vec![Expression::Alias(Box::new(crate::expressions::Alias::new(
3061 Expression::Identifier(Identifier::new("value".to_string())),
3062 col_alias,
3063 )))];
3064 inner_select.from = Some(crate::expressions::From {
3065 expressions: vec![a.this.clone()],
3066 });
3067 let subquery = Expression::Subquery(Box::new(crate::expressions::Subquery {
3068 this: Expression::Select(Box::new(inner_select)),
3069 alias: Some(a.alias.clone()),
3070 column_aliases: Vec::new(),
3071 order_by: None,
3072 limit: None,
3073 offset: None,
3074 lateral: false,
3075 modifiers_inside: false,
3076 trailing_comments: Vec::new(),
3077 distribute_by: None,
3078 sort_by: None,
3079 cluster_by: None,
3080 }));
3081 return Ok(subquery);
3082 }
3083 }
3084 }
3085 }
3086
3087 // BigQuery implicit UNNEST: comma-join on array path -> CROSS JOIN UNNEST
3088 // e.g., SELECT results FROM Coordinates, Coordinates.position AS results
3089 // -> SELECT results FROM Coordinates CROSS JOIN UNNEST(Coordinates.position) AS results
3090 if matches!(source, DialectType::BigQuery) {
3091 if let Expression::Select(ref s) = e {
3092 if let Some(ref from) = s.from {
3093 if from.expressions.len() >= 2 {
3094 // Collect table names from first expression
3095 let first_tables: Vec<String> = from.expressions.iter().take(1).filter_map(|expr| {
3096 if let Expression::Table(t) = expr {
3097 Some(t.name.name.to_lowercase())
3098 } else {
3099 None
3100 }
3101 }).collect();
3102
3103 // Check if any subsequent FROM expressions are schema-qualified with a matching table name
3104 // or have a dotted name matching a table
3105 let mut needs_rewrite = false;
3106 for expr in from.expressions.iter().skip(1) {
3107 if let Expression::Table(t) = expr {
3108 if let Some(ref schema) = t.schema {
3109 if first_tables.contains(&schema.name.to_lowercase()) {
3110 needs_rewrite = true;
3111 break;
3112 }
3113 }
3114 // Also check dotted names in quoted identifiers (e.g., `Coordinates.position`)
3115 if t.schema.is_none() && t.name.name.contains('.') {
3116 let parts: Vec<&str> = t.name.name.split('.').collect();
3117 if parts.len() >= 2 && first_tables.contains(&parts[0].to_lowercase()) {
3118 needs_rewrite = true;
3119 break;
3120 }
3121 }
3122 }
3123 }
3124
3125 if needs_rewrite {
3126 let mut new_select = s.clone();
3127 let mut new_from_exprs = vec![from.expressions[0].clone()];
3128 let mut new_joins = s.joins.clone();
3129
3130 for expr in from.expressions.iter().skip(1) {
3131 if let Expression::Table(ref t) = expr {
3132 if let Some(ref schema) = t.schema {
3133 if first_tables.contains(&schema.name.to_lowercase()) {
3134 // This is an array path reference, convert to CROSS JOIN UNNEST
3135 let col_expr = Expression::Column(crate::expressions::Column {
3136 name: t.name.clone(),
3137 table: Some(schema.clone()),
3138 join_mark: false,
3139 trailing_comments: vec![],
3140 });
3141 let unnest_expr = Expression::Unnest(Box::new(crate::expressions::UnnestFunc {
3142 this: col_expr,
3143 expressions: Vec::new(),
3144 with_ordinality: false,
3145 alias: None,
3146 offset_alias: None,
3147 }));
3148 let join_this = if let Some(ref alias) = t.alias {
3149 if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena) {
3150 // Presto: UNNEST(x) AS _t0(results)
3151 Expression::Alias(Box::new(crate::expressions::Alias {
3152 this: unnest_expr,
3153 alias: Identifier::new("_t0"),
3154 column_aliases: vec![alias.clone()],
3155 pre_alias_comments: vec![],
3156 trailing_comments: vec![],
3157 }))
3158 } else {
3159 // BigQuery: UNNEST(x) AS results
3160 Expression::Alias(Box::new(crate::expressions::Alias {
3161 this: unnest_expr,
3162 alias: alias.clone(),
3163 column_aliases: vec![],
3164 pre_alias_comments: vec![],
3165 trailing_comments: vec![],
3166 }))
3167 }
3168 } else {
3169 unnest_expr
3170 };
3171 new_joins.push(crate::expressions::Join {
3172 kind: crate::expressions::JoinKind::Cross,
3173 this: join_this,
3174 on: None,
3175 using: Vec::new(),
3176 use_inner_keyword: false,
3177 use_outer_keyword: false,
3178 deferred_condition: false,
3179 join_hint: None,
3180 match_condition: None,
3181 pivots: Vec::new(),
3182 });
3183 } else {
3184 new_from_exprs.push(expr.clone());
3185 }
3186 } else if t.schema.is_none() && t.name.name.contains('.') {
3187 // Dotted name in quoted identifier: `Coordinates.position`
3188 let parts: Vec<&str> = t.name.name.split('.').collect();
3189 if parts.len() >= 2 && first_tables.contains(&parts[0].to_lowercase()) {
3190 let join_this = if matches!(target, DialectType::BigQuery) {
3191 // BigQuery: keep as single quoted identifier, just convert comma -> CROSS JOIN
3192 Expression::Table(t.clone())
3193 } else {
3194 // Other targets: split into "schema"."name"
3195 let mut new_t = t.clone();
3196 new_t.schema = Some(Identifier::quoted(parts[0]));
3197 new_t.name = Identifier::quoted(parts[1]);
3198 Expression::Table(new_t)
3199 };
3200 new_joins.push(crate::expressions::Join {
3201 kind: crate::expressions::JoinKind::Cross,
3202 this: join_this,
3203 on: None,
3204 using: Vec::new(),
3205 use_inner_keyword: false,
3206 use_outer_keyword: false,
3207 deferred_condition: false,
3208 join_hint: None,
3209 match_condition: None,
3210 pivots: Vec::new(),
3211 });
3212 } else {
3213 new_from_exprs.push(expr.clone());
3214 }
3215 } else {
3216 new_from_exprs.push(expr.clone());
3217 }
3218 } else {
3219 new_from_exprs.push(expr.clone());
3220 }
3221 }
3222
3223 new_select.from = Some(crate::expressions::From {
3224 expressions: new_from_exprs,
3225 ..from.clone()
3226 });
3227 new_select.joins = new_joins;
3228 return Ok(Expression::Select(new_select));
3229 }
3230 }
3231 }
3232 }
3233 }
3234
3235 // CROSS JOIN UNNEST -> LATERAL VIEW EXPLODE for Hive/Spark
3236 if matches!(target, DialectType::Hive | DialectType::Spark | DialectType::Databricks) {
3237 if let Expression::Select(ref s) = e {
3238 // Check if any joins are CROSS JOIN with UNNEST/EXPLODE
3239 let is_unnest_or_explode_expr = |expr: &Expression| -> bool {
3240 matches!(expr, Expression::Unnest(_))
3241 || matches!(expr, Expression::Function(f) if f.name.eq_ignore_ascii_case("EXPLODE"))
3242 };
3243 let has_unnest_join = s.joins.iter().any(|j| {
3244 j.kind == crate::expressions::JoinKind::Cross && (
3245 matches!(&j.this, Expression::Alias(a) if is_unnest_or_explode_expr(&a.this))
3246 || is_unnest_or_explode_expr(&j.this)
3247 )
3248 });
3249 if has_unnest_join {
3250 let mut select = s.clone();
3251 let mut new_joins = Vec::new();
3252 for join in select.joins.drain(..) {
3253 if join.kind == crate::expressions::JoinKind::Cross {
3254 // Extract the UNNEST/EXPLODE from the join
3255 let (func_expr, table_alias, col_aliases) = match &join.this {
3256 Expression::Alias(a) => {
3257 let ta = if a.alias.is_empty() { None } else { Some(a.alias.clone()) };
3258 let cas = a.column_aliases.clone();
3259 match &a.this {
3260 Expression::Unnest(u) => {
3261 // Convert UNNEST(x) to EXPLODE(x)
3262 let explode = Expression::Function(Box::new(crate::expressions::Function::new(
3263 "EXPLODE".to_string(),
3264 vec![u.this.clone()],
3265 )));
3266 (Some(explode), ta, cas)
3267 }
3268 Expression::Function(f) if f.name.eq_ignore_ascii_case("EXPLODE") => {
3269 (Some(Expression::Function(f.clone())), ta, cas)
3270 }
3271 _ => (None, None, Vec::new())
3272 }
3273 }
3274 Expression::Unnest(u) => {
3275 let explode = Expression::Function(Box::new(crate::expressions::Function::new(
3276 "EXPLODE".to_string(),
3277 vec![u.this.clone()],
3278 )));
3279 let ta = u.alias.clone();
3280 (Some(explode), ta, Vec::new())
3281 }
3282 _ => (None, None, Vec::new())
3283 };
3284 if let Some(func) = func_expr {
3285 select.lateral_views.push(crate::expressions::LateralView {
3286 this: func,
3287 table_alias,
3288 column_aliases: col_aliases,
3289 outer: false,
3290 });
3291 } else {
3292 new_joins.push(join);
3293 }
3294 } else {
3295 new_joins.push(join);
3296 }
3297 }
3298 select.joins = new_joins;
3299 return Ok(Expression::Select(select));
3300 }
3301 }
3302 }
3303
3304 // UNNEST expansion: DuckDB SELECT UNNEST(arr) in SELECT list -> expanded query
3305 // for BigQuery, Presto/Trino, Snowflake
3306 if matches!(source, DialectType::DuckDB | DialectType::PostgreSQL)
3307 && matches!(target, DialectType::BigQuery | DialectType::Presto | DialectType::Trino | DialectType::Snowflake)
3308 {
3309 if let Expression::Select(ref s) = e {
3310 // Check if any SELECT expressions contain UNNEST
3311 // Note: UNNEST can appear as Expression::Unnest OR Expression::Function("UNNEST")
3312 let has_unnest_in_select = s.expressions.iter().any(|expr| {
3313 fn contains_unnest(e: &Expression) -> bool {
3314 match e {
3315 Expression::Unnest(_) => true,
3316 Expression::Function(f) if f.name.eq_ignore_ascii_case("UNNEST") => true,
3317 Expression::Alias(a) => contains_unnest(&a.this),
3318 Expression::Add(op) | Expression::Sub(op) | Expression::Mul(op) | Expression::Div(op) => {
3319 contains_unnest(&op.left) || contains_unnest(&op.right)
3320 }
3321 _ => false,
3322 }
3323 }
3324 contains_unnest(expr)
3325 });
3326
3327 if has_unnest_in_select {
3328 let rewritten = Self::rewrite_unnest_expansion(s, target);
3329 if let Some(new_select) = rewritten {
3330 return Ok(Expression::Select(Box::new(new_select)));
3331 }
3332 }
3333 }
3334 }
3335
3336 // BigQuery -> PostgreSQL: convert escape sequences in string literals to actual characters
3337 // BigQuery '\n' -> PostgreSQL literal newline in string
3338 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::PostgreSQL) {
3339 if let Expression::Literal(Literal::String(ref s)) = e {
3340 if s.contains("\\n") || s.contains("\\t") || s.contains("\\r") || s.contains("\\\\") {
3341 let converted = s
3342 .replace("\\n", "\n")
3343 .replace("\\t", "\t")
3344 .replace("\\r", "\r")
3345 .replace("\\\\", "\\");
3346 return Ok(Expression::Literal(Literal::String(converted)));
3347 }
3348 }
3349 }
3350
3351 // Cross-dialect: convert Literal::Timestamp to target-specific CAST form
3352 // when source != target (identity tests keep the Literal::Timestamp for native handling)
3353 if source != target {
3354 if let Expression::Literal(Literal::Timestamp(ref s)) = e {
3355 let s = s.clone();
3356 // MySQL: TIMESTAMP handling depends on source dialect
3357 // BigQuery TIMESTAMP is timezone-aware -> TIMESTAMP() function in MySQL
3358 // Other sources' TIMESTAMP is non-timezone -> CAST('x' AS DATETIME) in MySQL
3359 if matches!(target, DialectType::MySQL) {
3360 if matches!(source, DialectType::BigQuery) {
3361 // BigQuery TIMESTAMP is timezone-aware -> MySQL TIMESTAMP() function
3362 return Ok(Expression::Function(Box::new(Function::new(
3363 "TIMESTAMP".to_string(), vec![Expression::Literal(Literal::String(s))],
3364 ))));
3365 } else {
3366 // Non-timezone TIMESTAMP -> CAST('x' AS DATETIME) in MySQL
3367 return Ok(Expression::Cast(Box::new(Cast {
3368 this: Expression::Literal(Literal::String(s)),
3369 to: DataType::Custom { name: "DATETIME".to_string() },
3370 trailing_comments: Vec::new(),
3371 double_colon_syntax: false,
3372 format: None,
3373 default: None,
3374 })));
3375 }
3376 }
3377 let dt = match target {
3378 DialectType::BigQuery | DialectType::StarRocks => {
3379 DataType::Custom { name: "DATETIME".to_string() }
3380 }
3381 DialectType::Snowflake => {
3382 // BigQuery TIMESTAMP is timezone-aware -> use TIMESTAMPTZ for Snowflake
3383 if matches!(source, DialectType::BigQuery) {
3384 DataType::Custom { name: "TIMESTAMPTZ".to_string() }
3385 } else if matches!(source, DialectType::PostgreSQL | DialectType::Redshift | DialectType::Snowflake) {
3386 DataType::Timestamp { precision: None, timezone: false }
3387 } else {
3388 DataType::Custom { name: "TIMESTAMPNTZ".to_string() }
3389 }
3390 }
3391 DialectType::Spark | DialectType::Databricks => {
3392 // BigQuery TIMESTAMP is timezone-aware -> use plain TIMESTAMP for Spark/Databricks
3393 if matches!(source, DialectType::BigQuery) {
3394 DataType::Timestamp { precision: None, timezone: false }
3395 } else {
3396 DataType::Custom { name: "TIMESTAMP_NTZ".to_string() }
3397 }
3398 }
3399 DialectType::ClickHouse => {
3400 DataType::Custom { name: "Nullable(DateTime)".to_string() }
3401 }
3402 DialectType::TSQL | DialectType::Fabric => {
3403 DataType::Custom { name: "DATETIME2".to_string() }
3404 }
3405 DialectType::DuckDB => {
3406 // DuckDB: use TIMESTAMPTZ when source is BigQuery (BQ TIMESTAMP is always UTC/tz-aware)
3407 // or when the timestamp string explicitly has timezone info
3408 if matches!(source, DialectType::BigQuery) || Self::timestamp_string_has_timezone(&s) {
3409 DataType::Custom { name: "TIMESTAMPTZ".to_string() }
3410 } else {
3411 DataType::Timestamp { precision: None, timezone: false }
3412 }
3413 }
3414 _ => {
3415 DataType::Timestamp { precision: None, timezone: false }
3416 }
3417 };
3418 return Ok(Expression::Cast(Box::new(Cast {
3419 this: Expression::Literal(Literal::String(s)),
3420 to: dt,
3421 trailing_comments: vec![],
3422 double_colon_syntax: false,
3423 format: None,
3424 default: None,
3425 })));
3426 }
3427 }
3428
3429 // PostgreSQL DELETE requires explicit AS for table aliases
3430 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
3431 if let Expression::Delete(ref del) = e {
3432 if del.alias.is_some() && !del.alias_explicit_as {
3433 let mut new_del = del.clone();
3434 new_del.alias_explicit_as = true;
3435 return Ok(Expression::Delete(new_del));
3436 }
3437 }
3438 }
3439
3440 // CREATE DATABASE -> CREATE SCHEMA for DuckDB target
3441 if matches!(target, DialectType::DuckDB) {
3442 if let Expression::CreateDatabase(db) = e {
3443 let mut schema = crate::expressions::CreateSchema::new(db.name.name.clone());
3444 schema.if_not_exists = db.if_not_exists;
3445 return Ok(Expression::CreateSchema(Box::new(schema)));
3446 }
3447 if let Expression::DropDatabase(db) = e {
3448 let mut schema = crate::expressions::DropSchema::new(db.name.name.clone());
3449 schema.if_exists = db.if_exists;
3450 return Ok(Expression::DropSchema(Box::new(schema)));
3451 }
3452 }
3453
3454 // Strip ClickHouse Nullable(...) wrapper for non-ClickHouse targets
3455 if matches!(source, DialectType::ClickHouse) && !matches!(target, DialectType::ClickHouse) {
3456 if let Expression::Cast(ref c) = e {
3457 if let DataType::Custom { ref name } = c.to {
3458 let upper = name.to_uppercase();
3459 if upper.starts_with("NULLABLE(") && upper.ends_with(")") {
3460 let inner = &name[9..name.len()-1]; // strip "Nullable(" and ")"
3461 let inner_upper = inner.to_uppercase();
3462 let new_dt = match inner_upper.as_str() {
3463 "DATETIME" | "DATETIME64" => DataType::Timestamp { precision: None, timezone: false },
3464 "DATE" => DataType::Date,
3465 "INT64" | "BIGINT" => DataType::BigInt { length: None },
3466 "INT32" | "INT" | "INTEGER" => DataType::Int { length: None, integer_spelling: false },
3467 "FLOAT64" | "DOUBLE" => DataType::Double { precision: None, scale: None },
3468 "STRING" => DataType::Text,
3469 _ => DataType::Custom { name: inner.to_string() },
3470 };
3471 let mut new_cast = c.clone();
3472 new_cast.to = new_dt;
3473 return Ok(Expression::Cast(new_cast));
3474 }
3475 }
3476 }
3477 }
3478
3479 // ARRAY_CONCAT_AGG -> Snowflake: ARRAY_FLATTEN(ARRAY_AGG(...))
3480 if matches!(target, DialectType::Snowflake) {
3481 if let Expression::ArrayConcatAgg(ref agg) = e {
3482 let mut agg_clone = agg.as_ref().clone();
3483 agg_clone.name = None; // Clear name so generator uses default "ARRAY_AGG"
3484 let array_agg = Expression::ArrayAgg(Box::new(agg_clone));
3485 let flatten = Expression::Function(Box::new(Function::new(
3486 "ARRAY_FLATTEN".to_string(), vec![array_agg],
3487 )));
3488 return Ok(flatten);
3489 }
3490 }
3491
3492 // ARRAY_CONCAT_AGG -> others: keep as function for cross-dialect
3493 if !matches!(target, DialectType::BigQuery | DialectType::Snowflake) {
3494 if let Expression::ArrayConcatAgg(agg) = e {
3495 let arg = agg.this;
3496 return Ok(Expression::Function(Box::new(Function::new(
3497 "ARRAY_CONCAT_AGG".to_string(), vec![arg],
3498 ))));
3499 }
3500 }
3501
3502 // Determine what action to take by inspecting e immutably
3503 let action = {
3504 let source_propagates_nulls = matches!(source, DialectType::Snowflake | DialectType::BigQuery);
3505 let target_ignores_nulls = matches!(target, DialectType::DuckDB | DialectType::PostgreSQL);
3506
3507 match &e {
3508 Expression::Function(f) => {
3509 let name = f.name.to_uppercase();
3510 // DATE_PART: strip quotes from first arg when target is Snowflake (source != Snowflake)
3511 if (name == "DATE_PART" || name == "DATEPART")
3512 && f.args.len() == 2
3513 && matches!(target, DialectType::Snowflake)
3514 && !matches!(source, DialectType::Snowflake)
3515 && matches!(&f.args[0], Expression::Literal(crate::expressions::Literal::String(_)))
3516 {
3517 Action::DatePartUnquote
3518 } else if source_propagates_nulls && target_ignores_nulls
3519 && (name == "GREATEST" || name == "LEAST") && f.args.len() >= 2 {
3520 Action::GreatestLeastNull
3521 } else if matches!(source, DialectType::Snowflake)
3522 && name == "ARRAY_GENERATE_RANGE" && f.args.len() >= 2 {
3523 Action::ArrayGenerateRange
3524 } else if matches!(source, DialectType::Snowflake)
3525 && matches!(target, DialectType::DuckDB)
3526 && name == "DATE_TRUNC" && f.args.len() == 2 {
3527 // Determine if DuckDB DATE_TRUNC needs CAST wrapping to preserve input type.
3528 // Logic based on Python sqlglot's input_type_preserved flag:
3529 // - DATE + non-date-unit (HOUR, MINUTE, etc.) -> wrap
3530 // - TIMESTAMP + date-unit (YEAR, QUARTER, MONTH, WEEK, DAY) -> wrap
3531 // - TIMESTAMPTZ/TIMESTAMPLTZ/TIME -> always wrap
3532 let unit_str = match &f.args[0] {
3533 Expression::Literal(crate::expressions::Literal::String(s)) => Some(s.to_uppercase()),
3534 _ => None,
3535 };
3536 let is_date_unit = unit_str.as_ref().map_or(false, |u| matches!(u.as_str(), "YEAR" | "QUARTER" | "MONTH" | "WEEK" | "DAY"));
3537 match &f.args[1] {
3538 Expression::Cast(c) => match &c.to {
3539 DataType::Time { .. } => Action::DateTruncWrapCast,
3540 DataType::Custom { name } if name.eq_ignore_ascii_case("TIMESTAMPTZ") || name.eq_ignore_ascii_case("TIMESTAMPLTZ") => Action::DateTruncWrapCast,
3541 DataType::Timestamp { timezone: true, .. } => Action::DateTruncWrapCast,
3542 DataType::Date if !is_date_unit => Action::DateTruncWrapCast,
3543 DataType::Timestamp { timezone: false, .. } if is_date_unit => Action::DateTruncWrapCast,
3544 _ => Action::None,
3545 }
3546 _ => Action::None,
3547 }
3548 } else if matches!(source, DialectType::Snowflake)
3549 && matches!(target, DialectType::DuckDB)
3550 && name == "TO_DATE" && f.args.len() == 1
3551 && !matches!(&f.args[0], Expression::Literal(crate::expressions::Literal::String(_))) {
3552 Action::ToDateToCast
3553 } else if !matches!(source, DialectType::Redshift)
3554 && matches!(target, DialectType::Redshift)
3555 && name == "CONVERT_TIMEZONE"
3556 && (f.args.len() == 2 || f.args.len() == 3) {
3557 // Convert Function("CONVERT_TIMEZONE") to Expression::ConvertTimezone
3558 // so Redshift's transform_expr won't expand 2-arg to 3-arg with 'UTC'.
3559 // The Redshift parser adds 'UTC' as default source_tz, but when
3560 // transpiling from other dialects, we should preserve the original form.
3561 Action::ConvertTimezoneToExpr
3562 } else if matches!(source, DialectType::Snowflake)
3563 && matches!(target, DialectType::DuckDB)
3564 && name == "REGEXP_REPLACE"
3565 && f.args.len() == 4
3566 && !matches!(&f.args[3], Expression::Literal(crate::expressions::Literal::String(_))) {
3567 // Snowflake REGEXP_REPLACE with position arg -> DuckDB needs 'g' flag
3568 Action::RegexpReplaceSnowflakeToDuckDB
3569 } else if name == "_BQ_TO_HEX" {
3570 // Internal marker from TO_HEX conversion - bare (no LOWER/UPPER wrapper)
3571 Action::BigQueryToHexBare
3572 } else if matches!(source, DialectType::BigQuery) && !matches!(target, DialectType::BigQuery) {
3573 // BigQuery-specific functions that need to be converted to standard forms
3574 match name.as_str() {
3575 "TIMESTAMP_DIFF" | "DATETIME_DIFF" | "TIME_DIFF"
3576 | "DATE_DIFF"
3577 | "TIMESTAMP_ADD" | "TIMESTAMP_SUB"
3578 | "DATETIME_ADD" | "DATETIME_SUB"
3579 | "TIME_ADD" | "TIME_SUB"
3580 | "DATE_ADD" | "DATE_SUB"
3581 | "SAFE_DIVIDE"
3582 | "GENERATE_UUID"
3583 | "COUNTIF"
3584 | "EDIT_DISTANCE"
3585 | "TIMESTAMP_SECONDS" | "TIMESTAMP_MILLIS" | "TIMESTAMP_MICROS"
3586 | "TIMESTAMP_TRUNC" | "DATETIME_TRUNC" | "DATE_TRUNC"
3587 | "TO_HEX"
3588 | "TO_JSON_STRING"
3589 | "GENERATE_ARRAY" | "GENERATE_TIMESTAMP_ARRAY"
3590 | "DIV"
3591 | "UNIX_DATE" | "UNIX_SECONDS" | "UNIX_MILLIS" | "UNIX_MICROS"
3592 | "LAST_DAY"
3593 | "TIME" | "DATETIME" | "TIMESTAMP" | "STRING"
3594 | "REGEXP_CONTAINS"
3595 | "CONTAINS_SUBSTR"
3596 | "SAFE_ADD" | "SAFE_SUBTRACT" | "SAFE_MULTIPLY"
3597 | "SAFE_CAST"
3598 | "GENERATE_DATE_ARRAY"
3599 | "PARSE_DATE" | "PARSE_TIMESTAMP"
3600 | "FORMAT_DATE" | "FORMAT_DATETIME" | "FORMAT_TIMESTAMP"
3601 | "ARRAY_CONCAT"
3602 | "JSON_QUERY" | "JSON_VALUE_ARRAY"
3603 | "INSTR"
3604 | "MD5" | "SHA1" | "SHA256" | "SHA512"
3605 | "GENERATE_UUID()" // just in case
3606 | "REGEXP_EXTRACT_ALL"
3607 | "REGEXP_EXTRACT"
3608 | "INT64"
3609 | "ARRAY_CONCAT_AGG"
3610 | "DATE_DIFF(" // just in case
3611 | "TO_HEX_MD5" // internal
3612 | "MOD"
3613 | "CONCAT"
3614 | "CURRENT_TIMESTAMP" | "CURRENT_DATE" | "CURRENT_DATETIME" | "CURRENT_TIME"
3615 | "STRUCT"
3616 | "ROUND"
3617 | "MAKE_INTERVAL"
3618 | "ARRAY_TO_STRING"
3619 | "PERCENTILE_CONT"
3620 => Action::BigQueryFunctionNormalize,
3621 "ARRAY" if matches!(target, DialectType::Snowflake)
3622 && f.args.len() == 1
3623 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT"))
3624 => Action::BigQueryArraySelectAsStructToSnowflake,
3625 _ => Action::None,
3626 }
3627 } else if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::BigQuery) {
3628 // BigQuery -> BigQuery normalizations
3629 match name.as_str() {
3630 "TIMESTAMP_DIFF" | "DATETIME_DIFF" | "TIME_DIFF"
3631 | "DATE_DIFF"
3632 | "DATE_ADD"
3633 | "TO_HEX"
3634 | "CURRENT_TIMESTAMP" | "CURRENT_DATE" | "CURRENT_TIME" | "CURRENT_DATETIME"
3635 | "GENERATE_DATE_ARRAY"
3636 | "INSTR"
3637 | "FORMAT_DATETIME"
3638 | "DATETIME"
3639 | "MAKE_INTERVAL"
3640 => Action::BigQueryFunctionNormalize,
3641 _ => Action::None,
3642 }
3643 } else {
3644 // Generic function normalization for non-BigQuery sources
3645 match name.as_str() {
3646 "ARBITRARY" | "AGGREGATE"
3647 | "REGEXP_MATCHES" | "REGEXP_FULL_MATCH"
3648 | "STRUCT_EXTRACT"
3649 | "LIST_FILTER" | "LIST_TRANSFORM" | "LIST_SORT" | "LIST_REVERSE_SORT"
3650 | "STRING_TO_ARRAY" | "STR_SPLIT" | "STR_SPLIT_REGEX" | "SPLIT_TO_ARRAY"
3651 | "SUBSTRINGINDEX"
3652 | "ARRAY_LENGTH" | "SIZE" | "CARDINALITY"
3653 | "UNICODE"
3654 | "XOR"
3655 | "ARRAY_REVERSE_SORT"
3656 | "ENCODE" | "DECODE"
3657 | "QUANTILE"
3658 | "EPOCH" | "EPOCH_MS"
3659 | "HASHBYTES"
3660 | "JSON_EXTRACT_PATH" | "JSON_EXTRACT_PATH_TEXT"
3661 | "APPROX_DISTINCT"
3662 | "DATE_PARSE" | "FORMAT_DATETIME"
3663 | "REGEXP_EXTRACT" | "REGEXP_SUBSTR" | "TO_DAYS"
3664 | "RLIKE"
3665 | "DATEDIFF" | "DATE_DIFF" | "MONTHS_BETWEEN"
3666 | "ADD_MONTHS" | "DATEADD" | "DATE_ADD" | "DATE_SUB" | "DATETRUNC"
3667 | "LAST_DAY" | "LAST_DAY_OF_MONTH" | "EOMONTH"
3668 | "ARRAY_CONSTRUCT" | "ARRAY_CAT" | "ARRAY_COMPACT"
3669 | "MAP" | "MAP_FROM_ENTRIES"
3670 | "COLLECT_LIST" | "COLLECT_SET"
3671 | "ISNAN" | "IS_NAN"
3672 | "TO_UTC_TIMESTAMP" | "FROM_UTC_TIMESTAMP"
3673 | "FORMAT_NUMBER"
3674 | "TOMONDAY" | "TOSTARTOFWEEK" | "TOSTARTOFMONTH" | "TOSTARTOFYEAR"
3675 | "ELEMENT_AT"
3676 | "EXPLODE" | "EXPLODE_OUTER" | "POSEXPLODE"
3677 | "SPLIT_PART"
3678 // GENERATE_SERIES: handled separately below
3679 | "JSON_EXTRACT" | "JSON_EXTRACT_SCALAR"
3680 | "JSON_QUERY" | "JSON_VALUE"
3681 | "JSON_EXTRACT_JSON" | "BSON_EXTRACT_BSON"
3682 | "ARRAY_SUM"
3683 | "TO_UNIX_TIMESTAMP" | "UNIX_TIMESTAMP"
3684 | "CURDATE" | "CURTIME"
3685 | "ARRAY_TO_STRING"
3686 | "ARRAY_SORT" | "SORT_ARRAY"
3687 | "LEFT" | "RIGHT"
3688 | "MAP_FROM_ARRAYS"
3689 | "LIKE" | "ILIKE"
3690 | "ARRAY_CONCAT"
3691 | "QUANTILE_CONT" | "QUANTILE_DISC"
3692 | "PERCENTILE_CONT" | "PERCENTILE_DISC"
3693 | "PERCENTILE_APPROX" | "APPROX_PERCENTILE"
3694 | "LOCATE" | "STRPOS" | "INSTR"
3695 | "CHAR"
3696 // CONCAT: handled separately for COALESCE wrapping
3697 | "ARRAY_JOIN"
3698 | "ARRAY_CONTAINS" | "HAS" | "CONTAINS"
3699 | "ISNULL"
3700 | "MONTHNAME"
3701 | "TO_TIMESTAMP"
3702 | "TO_DATE"
3703 | "TO_JSON"
3704 | "STR_TO_DATE"
3705 | "REGEXP_SPLIT"
3706 | "SPLIT"
3707 | "FORMATDATETIME"
3708 | "ARRAYJOIN"
3709 | "SPLITBYSTRING" | "SPLITBYREGEXP"
3710 | "NVL"
3711 | "TO_CHAR"
3712 | "DBMS_RANDOM.VALUE"
3713 | "REGEXP_LIKE"
3714 | "REPLICATE"
3715 | "LEN"
3716 | "COUNT_BIG"
3717 | "DATEFROMPARTS"
3718 | "DATETIMEFROMPARTS"
3719 | "CONVERT" | "TRY_CONVERT"
3720 | "STRFTIME" | "STRPTIME"
3721 | "DATE_FORMAT" | "FORMAT_DATE"
3722 | "PARSE_TIMESTAMP" | "PARSE_DATE"
3723 | "FROM_BASE64" | "TO_BASE64"
3724 | "GETDATE"
3725 | "TO_HEX" | "FROM_HEX" | "UNHEX" | "HEX"
3726 | "TO_UTF8" | "FROM_UTF8"
3727 | "STARTS_WITH" | "STARTSWITH"
3728 | "APPROX_COUNT_DISTINCT"
3729 | "JSON_FORMAT"
3730 | "SYSDATE"
3731 | "LOGICAL_OR" | "LOGICAL_AND"
3732 | "MONTHS_ADD"
3733 | "SCHEMA_NAME"
3734 | "STRTOL"
3735 | "EDITDIST3"
3736 | "FORMAT"
3737 | "LIST_CONTAINS" | "LIST_HAS"
3738 | "VARIANCE" | "STDDEV"
3739 | "ISINF"
3740 | "TO_UNIXTIME"
3741 | "FROM_UNIXTIME"
3742 | "DATEPART" | "DATE_PART"
3743 | "DATENAME"
3744 | "STRING_AGG"
3745 | "JSON_ARRAYAGG"
3746 | "APPROX_QUANTILE"
3747 | "MAKE_DATE"
3748 | "LIST_HAS_ANY" | "ARRAY_HAS_ANY"
3749 | "RANGE"
3750 | "TRY_ELEMENT_AT"
3751 | "STR_TO_MAP"
3752 | "STRING"
3753 | "TIME_TO_STR"
3754 => Action::GenericFunctionNormalize,
3755 // Functions needing specific cross-dialect transforms
3756 "MAX_BY" | "MIN_BY" if matches!(target, DialectType::ClickHouse | DialectType::Spark | DialectType::Databricks | DialectType::DuckDB) => Action::MaxByMinByConvert,
3757 "STRUCT" if matches!(source, DialectType::Spark | DialectType::Databricks)
3758 && !matches!(target, DialectType::Spark | DialectType::Databricks | DialectType::Hive) => Action::SparkStructConvert,
3759 "ARRAY" if matches!(source, DialectType::BigQuery)
3760 && matches!(target, DialectType::Snowflake)
3761 && f.args.len() == 1
3762 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT")) => Action::BigQueryArraySelectAsStructToSnowflake,
3763 "ARRAY" if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena | DialectType::BigQuery | DialectType::DuckDB | DialectType::ClickHouse | DialectType::StarRocks) => Action::ArraySyntaxConvert,
3764 "TRUNC" if f.args.len() == 2 && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::ClickHouse) => Action::TruncToDateTrunc,
3765 "UNIFORM" if matches!(target, DialectType::Snowflake) => Action::GenericFunctionNormalize,
3766 // GENERATE_SERIES -> SEQUENCE/UNNEST/EXPLODE for target dialects
3767 "GENERATE_SERIES" if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
3768 && !matches!(target, DialectType::PostgreSQL | DialectType::Redshift | DialectType::TSQL | DialectType::Fabric) => Action::GenerateSeriesConvert,
3769 // GENERATE_SERIES with interval normalization for PG target
3770 "GENERATE_SERIES" if f.args.len() >= 3
3771 && matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
3772 && matches!(target, DialectType::PostgreSQL | DialectType::Redshift) => Action::GenerateSeriesConvert,
3773 "GENERATE_SERIES" => Action::None, // passthrough for other cases
3774 // CONCAT(a, b) -> COALESCE wrapping for Presto/ClickHouse from PostgreSQL
3775 "CONCAT" if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
3776 && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::ClickHouse) => Action::ConcatCoalesceWrap,
3777 "CONCAT" => Action::GenericFunctionNormalize,
3778 // DIV(a, b) -> target-specific integer division
3779 "DIV" if f.args.len() == 2
3780 && matches!(source, DialectType::PostgreSQL)
3781 && matches!(target, DialectType::DuckDB | DialectType::BigQuery | DialectType::SQLite) => Action::DivFuncConvert,
3782 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
3783 "JSON_OBJECT_AGG" | "JSONB_OBJECT_AGG" if f.args.len() == 2
3784 && matches!(target, DialectType::DuckDB) => Action::JsonObjectAggConvert,
3785 // JSONB_EXISTS -> JSON_EXISTS for DuckDB
3786 "JSONB_EXISTS" if f.args.len() == 2
3787 && matches!(target, DialectType::DuckDB) => Action::JsonbExistsConvert,
3788 // DATE_BIN -> TIME_BUCKET for DuckDB
3789 "DATE_BIN" if matches!(target, DialectType::DuckDB) => Action::DateBinConvert,
3790 // Multi-arg MIN(a,b,c) -> LEAST, MAX(a,b,c) -> GREATEST
3791 "MIN" | "MAX" if f.args.len() > 1 && !matches!(target, DialectType::SQLite) => Action::MinMaxToLeastGreatest,
3792 // ClickHouse uniq -> APPROX_COUNT_DISTINCT for other dialects
3793 "UNIQ" if matches!(source, DialectType::ClickHouse) && !matches!(target, DialectType::ClickHouse) => Action::ClickHouseUniqToApproxCountDistinct,
3794 // ClickHouse any -> ANY_VALUE for other dialects
3795 "ANY" if f.args.len() == 1 && matches!(source, DialectType::ClickHouse) && !matches!(target, DialectType::ClickHouse) => Action::ClickHouseAnyToAnyValue,
3796 _ => Action::None,
3797 }
3798 }
3799 }
3800 Expression::AggregateFunction(af) => {
3801 let name = af.name.to_uppercase();
3802 match name.as_str() {
3803 "ARBITRARY" | "AGGREGATE" => Action::GenericFunctionNormalize,
3804 "JSON_ARRAYAGG" => Action::GenericFunctionNormalize,
3805 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
3806 "JSON_OBJECT_AGG" | "JSONB_OBJECT_AGG" if matches!(target, DialectType::DuckDB) => Action::JsonObjectAggConvert,
3807 "ARRAY_AGG" if matches!(target, DialectType::Hive | DialectType::Spark | DialectType::Databricks) => Action::ArrayAggToCollectList,
3808 "MAX_BY" | "MIN_BY" if matches!(target, DialectType::ClickHouse | DialectType::Spark | DialectType::Databricks | DialectType::DuckDB) => Action::MaxByMinByConvert,
3809 "COLLECT_LIST" if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::DuckDB) => Action::CollectListToArrayAgg,
3810 "COLLECT_SET" if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Snowflake | DialectType::DuckDB) => Action::CollectSetConvert,
3811 "PERCENTILE" if matches!(target, DialectType::DuckDB | DialectType::Presto | DialectType::Trino) => Action::PercentileConvert,
3812 // CORR -> CASE WHEN ISNAN(CORR(a,b)) THEN NULL ELSE CORR(a,b) END for DuckDB
3813 "CORR" if matches!(target, DialectType::DuckDB) && matches!(source, DialectType::Snowflake) => Action::CorrIsnanWrap,
3814 // BigQuery APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
3815 "APPROX_QUANTILES" if matches!(source, DialectType::BigQuery)
3816 && matches!(target, DialectType::DuckDB) => Action::BigQueryApproxQuantiles,
3817 // BigQuery PERCENTILE_CONT(x, frac RESPECT NULLS) -> QUANTILE_CONT(x, frac) for DuckDB
3818 "PERCENTILE_CONT" if matches!(source, DialectType::BigQuery)
3819 && matches!(target, DialectType::DuckDB)
3820 && af.args.len() >= 2 => Action::BigQueryPercentileContToDuckDB,
3821 _ => Action::None,
3822 }
3823 }
3824 Expression::JSONArrayAgg(_) => {
3825 match target {
3826 DialectType::PostgreSQL => Action::GenericFunctionNormalize,
3827 _ => Action::None,
3828 }
3829 }
3830 Expression::ToNumber(tn) => {
3831 // TO_NUMBER(x) with 1 arg -> CAST(x AS DOUBLE) for most targets
3832 if tn.format.is_none() && tn.precision.is_none() && tn.scale.is_none() {
3833 match target {
3834 DialectType::Oracle | DialectType::Snowflake | DialectType::Teradata => Action::None,
3835 _ => Action::GenericFunctionNormalize,
3836 }
3837 } else {
3838 Action::None
3839 }
3840 }
3841 Expression::IfFunc(if_func) => {
3842 if matches!(source, DialectType::Snowflake)
3843 && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::SQLite)
3844 && matches!(if_func.false_value, Some(Expression::Div(_))) {
3845 Action::Div0TypedDivision
3846 } else {
3847 Action::None
3848 }
3849 }
3850 Expression::ToJson(_) => {
3851 match target {
3852 DialectType::Presto | DialectType::Trino => Action::ToJsonConvert,
3853 DialectType::BigQuery => Action::ToJsonConvert,
3854 DialectType::DuckDB => Action::ToJsonConvert,
3855 _ => Action::None,
3856 }
3857 }
3858 Expression::ArrayAgg(ref agg) => {
3859 if matches!(target, DialectType::Hive | DialectType::Spark | DialectType::Databricks) {
3860 // Any source -> Hive/Spark: convert ARRAY_AGG to COLLECT_LIST
3861 Action::ArrayAggToCollectList
3862 } else if matches!(source, DialectType::Spark | DialectType::Databricks | DialectType::Hive)
3863 && matches!(target, DialectType::DuckDB)
3864 && agg.filter.is_some() {
3865 // Spark/Hive ARRAY_AGG excludes NULLs, DuckDB includes them
3866 // Need to add NOT x IS NULL to existing filter
3867 Action::ArrayAggNullFilter
3868 } else if matches!(target, DialectType::DuckDB)
3869 && agg.ignore_nulls == Some(true)
3870 && !agg.order_by.is_empty() {
3871 // BigQuery ARRAY_AGG(x IGNORE NULLS ORDER BY ...) -> DuckDB ARRAY_AGG(x ORDER BY a NULLS FIRST, ...)
3872 Action::ArrayAggIgnoreNullsDuckDB
3873 } else if !matches!(source, DialectType::Snowflake) {
3874 Action::None
3875 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
3876 let is_array_agg = agg.name.as_deref().map(|n| n.to_uppercase()) == Some("ARRAY_AGG".to_string())
3877 || agg.name.is_none();
3878 if is_array_agg {
3879 Action::ArrayAggCollectList
3880 } else {
3881 Action::None
3882 }
3883 } else if matches!(target, DialectType::DuckDB | DialectType::Presto | DialectType::Trino) && agg.filter.is_none() {
3884 Action::ArrayAggFilter
3885 } else {
3886 Action::None
3887 }
3888 }
3889 Expression::WithinGroup(wg) => {
3890 if matches!(source, DialectType::Snowflake)
3891 && matches!(target, DialectType::DuckDB | DialectType::Presto | DialectType::Trino)
3892 && matches!(wg.this, Expression::ArrayAgg(_)) {
3893 Action::ArrayAggWithinGroupFilter
3894 } else if matches!(&wg.this, Expression::AggregateFunction(af) if af.name.eq_ignore_ascii_case("STRING_AGG"))
3895 || matches!(&wg.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("STRING_AGG"))
3896 || matches!(&wg.this, Expression::StringAgg(_)) {
3897 Action::StringAggConvert
3898 } else if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena
3899 | DialectType::Spark | DialectType::Databricks)
3900 && (matches!(&wg.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("PERCENTILE_CONT") || f.name.eq_ignore_ascii_case("PERCENTILE_DISC"))
3901 || matches!(&wg.this, Expression::AggregateFunction(af) if af.name.eq_ignore_ascii_case("PERCENTILE_CONT") || af.name.eq_ignore_ascii_case("PERCENTILE_DISC"))
3902 || matches!(&wg.this, Expression::PercentileCont(_))) {
3903 Action::PercentileContConvert
3904 } else {
3905 Action::None
3906 }
3907 }
3908 // For BigQuery: CAST(x AS TIMESTAMP) -> CAST(x AS DATETIME)
3909 // because BigQuery's TIMESTAMP is really TIMESTAMPTZ, and
3910 // DATETIME is the timezone-unaware type
3911 Expression::Cast(ref c) => {
3912 if c.format.is_some() && (matches!(source, DialectType::BigQuery) || matches!(source, DialectType::Teradata)) {
3913 Action::BigQueryCastFormat
3914 } else if matches!(target, DialectType::BigQuery)
3915 && !matches!(source, DialectType::BigQuery)
3916 && matches!(c.to, DataType::Timestamp { timezone: false, .. })
3917 {
3918 Action::CastTimestampToDatetime
3919 } else if matches!(source,
3920 DialectType::Hive | DialectType::Spark | DialectType::Databricks
3921 ) && matches!(target,
3922 DialectType::Presto | DialectType::Trino | DialectType::Athena
3923 | DialectType::DuckDB | DialectType::Snowflake | DialectType::BigQuery
3924 | DialectType::Databricks | DialectType::TSQL
3925 ) {
3926 Action::HiveCastToTryCast
3927 } else if matches!(c.to, DataType::Timestamp { timezone: true, .. })
3928 && matches!(target, DialectType::Hive | DialectType::Spark | DialectType::Databricks | DialectType::BigQuery) {
3929 // CAST(x AS TIMESTAMP WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark/BigQuery
3930 Action::CastTimestampStripTz
3931 } else if matches!(&c.to, DataType::Json)
3932 && matches!(&c.this, Expression::Literal(Literal::String(_)))
3933 && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena | DialectType::Snowflake) {
3934 // CAST('x' AS JSON) -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake
3935 // Only when the input is a string literal (JSON 'value' syntax)
3936 Action::JsonLiteralToJsonParse
3937 } else if matches!(&c.to, DataType::Json | DataType::JsonB)
3938 && matches!(target, DialectType::Spark | DialectType::Databricks) {
3939 // CAST(x AS JSON) -> TO_JSON(x) for Spark
3940 Action::CastToJsonForSpark
3941 } else if (matches!(&c.to, DataType::Array { .. } | DataType::Map { .. } | DataType::Struct { .. }))
3942 && matches!(target, DialectType::Spark | DialectType::Databricks)
3943 && (
3944 matches!(&c.this, Expression::ParseJson(_))
3945 || matches!(
3946 &c.this,
3947 Expression::Function(f)
3948 if f.name.eq_ignore_ascii_case("JSON_EXTRACT")
3949 || f.name.eq_ignore_ascii_case("JSON_EXTRACT_SCALAR")
3950 || f.name.eq_ignore_ascii_case("GET_JSON_OBJECT")
3951 )
3952 ) {
3953 // CAST(JSON_PARSE(...) AS ARRAY/MAP) or CAST(JSON_EXTRACT/GET_JSON_OBJECT(...) AS ARRAY/MAP)
3954 // -> FROM_JSON(..., type_string) for Spark
3955 Action::CastJsonToFromJson
3956 } else if matches!(target, DialectType::Spark | DialectType::Databricks)
3957 && matches!(c.to, DataType::Timestamp { timezone: false, .. })
3958 && matches!(source, DialectType::DuckDB) {
3959 Action::StrftimeCastTimestamp
3960 } else if matches!(source, DialectType::DuckDB)
3961 && matches!(c.to, DataType::Decimal { precision: None, .. }) {
3962 Action::DecimalDefaultPrecision
3963 } else if matches!(source, DialectType::MySQL | DialectType::SingleStore)
3964 && matches!(c.to, DataType::Char { length: None })
3965 && !matches!(target, DialectType::MySQL | DialectType::SingleStore) {
3966 // MySQL CAST(x AS CHAR) was originally TEXT - convert to target text type
3967 Action::MysqlCastCharToText
3968 } else if matches!(source, DialectType::Spark | DialectType::Databricks | DialectType::Hive)
3969 && matches!(target, DialectType::Spark | DialectType::Databricks | DialectType::Hive)
3970 && Self::has_varchar_char_type(&c.to) {
3971 // Spark parses VARCHAR(n)/CHAR(n) as TEXT, so normalize back to STRING
3972 Action::SparkCastVarcharToString
3973 } else {
3974 Action::None
3975 }
3976 }
3977 Expression::SafeCast(ref c) => {
3978 if c.format.is_some() && matches!(source, DialectType::BigQuery)
3979 && !matches!(target, DialectType::BigQuery)
3980 {
3981 Action::BigQueryCastFormat
3982 } else {
3983 Action::None
3984 }
3985 }
3986 // For DuckDB: DATE_TRUNC should preserve the input type
3987 Expression::DateTrunc(_) | Expression::TimestampTrunc(_) => {
3988 if matches!(source, DialectType::Snowflake) && matches!(target, DialectType::DuckDB) {
3989 Action::DateTruncWrapCast
3990 } else {
3991 Action::None
3992 }
3993 }
3994 // For DuckDB: SET a = 1 -> SET VARIABLE a = 1
3995 Expression::SetStatement(s) => {
3996 if matches!(target, DialectType::DuckDB)
3997 && !matches!(source, DialectType::TSQL | DialectType::Fabric)
3998 && s.items.iter().any(|item| item.kind.is_none()) {
3999 Action::SetToVariable
4000 } else {
4001 Action::None
4002 }
4003 }
4004 // Cross-dialect NULL ordering normalization.
4005 // When nulls_first is not specified, fill in the source dialect's implied
4006 // default so the target generator can correctly add/strip NULLS FIRST/LAST.
4007 Expression::Ordered(o) => {
4008 // MySQL doesn't support NULLS FIRST/LAST - strip or rewrite
4009 if matches!(target, DialectType::MySQL) && o.nulls_first.is_some() {
4010 Action::MysqlNullsOrdering
4011 } else {
4012 // Skip targets that don't support NULLS FIRST/LAST syntax
4013 let target_supports_nulls = !matches!(target,
4014 DialectType::MySQL | DialectType::TSQL
4015 | DialectType::StarRocks | DialectType::Doris
4016 );
4017 if o.nulls_first.is_none() && source != target && target_supports_nulls {
4018 Action::NullsOrdering
4019 } else {
4020 Action::None
4021 }
4022 }
4023 }
4024 // BigQuery data types: convert INT64, BYTES, NUMERIC etc. to standard types
4025 Expression::DataType(dt) => {
4026 if matches!(source, DialectType::BigQuery) && !matches!(target, DialectType::BigQuery) {
4027 match dt {
4028 DataType::Custom { ref name } if name.eq_ignore_ascii_case("INT64") || name.eq_ignore_ascii_case("FLOAT64") || name.eq_ignore_ascii_case("BOOL") || name.eq_ignore_ascii_case("BYTES") || name.eq_ignore_ascii_case("NUMERIC") || name.eq_ignore_ascii_case("STRING") || name.eq_ignore_ascii_case("DATETIME") => Action::BigQueryCastType,
4029 _ => Action::None,
4030 }
4031 } else if matches!(source, DialectType::TSQL) {
4032 // For TSQL source -> any target (including TSQL itself for REAL)
4033 match dt {
4034 // REAL -> FLOAT even for TSQL->TSQL
4035 DataType::Custom { ref name } if name.eq_ignore_ascii_case("REAL")
4036 => Action::TSQLTypeNormalize,
4037 DataType::Float { real_spelling: true, .. }
4038 => Action::TSQLTypeNormalize,
4039 // Other TSQL type normalizations only for non-TSQL targets
4040 DataType::Custom { ref name } if !matches!(target, DialectType::TSQL) && (
4041 name.eq_ignore_ascii_case("MONEY")
4042 || name.eq_ignore_ascii_case("SMALLMONEY")
4043 || name.eq_ignore_ascii_case("DATETIME2")
4044 || name.eq_ignore_ascii_case("IMAGE")
4045 || name.eq_ignore_ascii_case("BIT")
4046 || name.eq_ignore_ascii_case("ROWVERSION")
4047 || name.eq_ignore_ascii_case("UNIQUEIDENTIFIER")
4048 || name.eq_ignore_ascii_case("DATETIMEOFFSET")
4049 || name.to_uppercase().starts_with("NUMERIC")
4050 || name.to_uppercase().starts_with("DATETIME2(")
4051 || name.to_uppercase().starts_with("TIME(")
4052 ) => Action::TSQLTypeNormalize,
4053 DataType::Float { precision: Some(_), .. } if !matches!(target, DialectType::TSQL) => Action::TSQLTypeNormalize,
4054 DataType::TinyInt { .. } if !matches!(target, DialectType::TSQL) => Action::TSQLTypeNormalize,
4055 // INTEGER -> INT for Databricks/Spark targets
4056 DataType::Int { integer_spelling: true, .. } if matches!(target, DialectType::Databricks | DialectType::Spark) => Action::TSQLTypeNormalize,
4057 _ => Action::None,
4058 }
4059 } else if matches!(source, DialectType::Oracle) && !matches!(target, DialectType::Oracle) {
4060 match dt {
4061 DataType::Custom { ref name } if name.to_uppercase().starts_with("VARCHAR2(") || name.to_uppercase().starts_with("NVARCHAR2(") || name.eq_ignore_ascii_case("VARCHAR2") || name.eq_ignore_ascii_case("NVARCHAR2") => Action::OracleVarchar2ToVarchar,
4062 _ => Action::None,
4063 }
4064 } else if matches!(target, DialectType::Snowflake) && !matches!(source, DialectType::Snowflake) {
4065 // When target is Snowflake but source is NOT Snowflake,
4066 // protect FLOAT from being converted to DOUBLE by Snowflake's transform.
4067 // Snowflake treats FLOAT=DOUBLE internally, but non-Snowflake sources
4068 // should keep their FLOAT spelling.
4069 match dt {
4070 DataType::Float { .. } => Action::SnowflakeFloatProtect,
4071 _ => Action::None,
4072 }
4073 } else {
4074 Action::None
4075 }
4076 }
4077 // LOWER patterns from BigQuery TO_HEX conversions:
4078 // - LOWER(LOWER(HEX(x))) from non-BQ targets: flatten
4079 // - LOWER(Function("TO_HEX")) for BQ->BQ: strip LOWER
4080 Expression::Lower(uf) => {
4081 if matches!(source, DialectType::BigQuery) {
4082 match &uf.this {
4083 Expression::Lower(_) => Action::BigQueryToHexLower,
4084 Expression::Function(f) if f.name == "TO_HEX" && matches!(target, DialectType::BigQuery) => {
4085 // BQ->BQ: LOWER(TO_HEX(x)) -> TO_HEX(x)
4086 Action::BigQueryToHexLower
4087 }
4088 _ => Action::None,
4089 }
4090 } else {
4091 Action::None
4092 }
4093 }
4094 // UPPER patterns from BigQuery TO_HEX conversions:
4095 // - UPPER(LOWER(HEX(x))) from non-BQ targets: extract inner
4096 // - UPPER(Function("TO_HEX")) for BQ->BQ: keep as UPPER(TO_HEX(x))
4097 Expression::Upper(uf) => {
4098 if matches!(source, DialectType::BigQuery) {
4099 match &uf.this {
4100 Expression::Lower(_) => Action::BigQueryToHexUpper,
4101 _ => Action::None,
4102 }
4103 } else {
4104 Action::None
4105 }
4106 }
4107 // BigQuery LAST_DAY(date, unit) -> strip unit for non-BigQuery targets
4108 // Snowflake supports LAST_DAY with unit, so keep it there
4109 Expression::LastDay(ld) => {
4110 if matches!(source, DialectType::BigQuery)
4111 && !matches!(target, DialectType::BigQuery | DialectType::Snowflake)
4112 && ld.unit.is_some()
4113 {
4114 Action::BigQueryLastDayStripUnit
4115 } else {
4116 Action::None
4117 }
4118 }
4119 // BigQuery SafeDivide expressions (already parsed as SafeDivide)
4120 Expression::SafeDivide(_) => {
4121 if matches!(source, DialectType::BigQuery) && !matches!(target, DialectType::BigQuery) {
4122 Action::BigQuerySafeDivide
4123 } else {
4124 Action::None
4125 }
4126 }
4127 // BigQuery ANY_VALUE(x HAVING MAX/MIN y) -> ARG_MAX_NULL/ARG_MIN_NULL for DuckDB
4128 // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
4129 Expression::AnyValue(ref agg) => {
4130 if matches!(source, DialectType::BigQuery)
4131 && matches!(target, DialectType::DuckDB)
4132 && agg.having_max.is_some()
4133 {
4134 Action::BigQueryAnyValueHaving
4135 } else if matches!(target, DialectType::Spark | DialectType::Databricks)
4136 && !matches!(source, DialectType::Spark | DialectType::Databricks)
4137 && agg.ignore_nulls.is_none()
4138 {
4139 Action::AnyValueIgnoreNulls
4140 } else {
4141 Action::None
4142 }
4143 }
4144 Expression::Any(ref q) => {
4145 if matches!(source, DialectType::PostgreSQL)
4146 && matches!(target, DialectType::Spark | DialectType::Databricks | DialectType::Hive)
4147 && q.op.is_some()
4148 && !matches!(q.subquery, Expression::Select(_) | Expression::Subquery(_))
4149 {
4150 Action::AnyToExists
4151 } else {
4152 Action::None
4153 }
4154 }
4155 // BigQuery APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
4156 // RegexpLike from non-DuckDB sources -> REGEXP_MATCHES for DuckDB target
4157 // DuckDB's ~ is a full match, but other dialects' REGEXP/RLIKE is a partial match
4158 Expression::RegexpLike(_) if !matches!(source, DialectType::DuckDB)
4159 && matches!(target, DialectType::DuckDB) => {
4160 Action::RegexpLikeToDuckDB
4161 }
4162 // MySQL division -> NULLIF wrapping and/or CAST for specific targets
4163 Expression::Div(ref op) if matches!(source, DialectType::MySQL)
4164 && matches!(target, DialectType::PostgreSQL | DialectType::Redshift
4165 | DialectType::Drill | DialectType::Trino | DialectType::Presto
4166 | DialectType::TSQL | DialectType::Teradata | DialectType::SQLite
4167 | DialectType::BigQuery | DialectType::Snowflake | DialectType::Databricks
4168 | DialectType::Oracle) => {
4169 // Only wrap if RHS is not already NULLIF
4170 if !matches!(&op.right, Expression::Function(f) if f.name.eq_ignore_ascii_case("NULLIF")) {
4171 Action::MySQLSafeDivide
4172 } else {
4173 Action::None
4174 }
4175 }
4176 // ALTER TABLE ... RENAME TO <schema>.<table> -> strip schema for most targets
4177 // For TSQL/Fabric, convert to sp_rename instead
4178 Expression::AlterTable(ref at) if !at.actions.is_empty() => {
4179 if let Some(crate::expressions::AlterTableAction::RenameTable(ref new_tbl)) = at.actions.first() {
4180 if matches!(target, DialectType::TSQL | DialectType::Fabric) {
4181 // TSQL: ALTER TABLE RENAME -> EXEC sp_rename
4182 Action::AlterTableToSpRename
4183 } else if new_tbl.schema.is_some()
4184 && matches!(target, DialectType::BigQuery | DialectType::Doris | DialectType::StarRocks
4185 | DialectType::DuckDB | DialectType::PostgreSQL | DialectType::Redshift) {
4186 Action::AlterTableRenameStripSchema
4187 } else {
4188 Action::None
4189 }
4190 } else {
4191 Action::None
4192 }
4193 }
4194 // EPOCH(x) expression -> target-specific epoch conversion
4195 Expression::Epoch(_) if !matches!(target, DialectType::DuckDB) => {
4196 Action::EpochConvert
4197 }
4198 // EPOCH_MS(x) expression -> target-specific epoch ms conversion
4199 Expression::EpochMs(_) if !matches!(target, DialectType::DuckDB) => {
4200 Action::EpochMsConvert
4201 }
4202 // STRING_AGG -> GROUP_CONCAT for MySQL/SQLite
4203 Expression::StringAgg(_) => {
4204 if matches!(target, DialectType::MySQL | DialectType::SingleStore
4205 | DialectType::Doris | DialectType::StarRocks | DialectType::SQLite)
4206 {
4207 Action::StringAggConvert
4208 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
4209 Action::StringAggConvert
4210 } else {
4211 Action::None
4212 }
4213 }
4214 // GROUP_CONCAT -> STRING_AGG for PostgreSQL/Presto/etc.
4215 // Also handles GROUP_CONCAT normalization for MySQL/SQLite targets
4216 Expression::GroupConcat(_) => {
4217 Action::GroupConcatConvert
4218 }
4219 // CARDINALITY/ARRAY_LENGTH/ARRAY_SIZE -> target-specific array length
4220 Expression::Cardinality(_) | Expression::ArrayLength(_) | Expression::ArraySize(_) => {
4221 Action::ArrayLengthConvert
4222 }
4223 // NVL: clear original_name so generator uses dialect-specific function names
4224 Expression::Nvl(f) if f.original_name.is_some() => {
4225 Action::NvlClearOriginal
4226 }
4227 // XOR: expand for dialects that don't support the XOR keyword
4228 Expression::Xor(_) => {
4229 let target_supports_xor = matches!(target,
4230 DialectType::MySQL | DialectType::SingleStore | DialectType::Doris
4231 | DialectType::StarRocks
4232 );
4233 if !target_supports_xor {
4234 Action::XorExpand
4235 } else {
4236 Action::None
4237 }
4238 }
4239 // TSQL #table -> temp table normalization (CREATE TABLE)
4240 Expression::CreateTable(ct)
4241 if matches!(source, DialectType::TSQL | DialectType::Fabric)
4242 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
4243 && ct.name.name.name.starts_with('#') => {
4244 Action::TempTableHash
4245 }
4246 // TSQL #table -> strip # from table references in SELECT/etc.
4247 Expression::Table(tr)
4248 if matches!(source, DialectType::TSQL | DialectType::Fabric)
4249 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
4250 && tr.name.name.starts_with('#') => {
4251 Action::TempTableHash
4252 }
4253 // TSQL #table -> strip # from DROP TABLE names
4254 Expression::DropTable(ref dt)
4255 if matches!(source, DialectType::TSQL | DialectType::Fabric)
4256 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
4257 && dt.names.iter().any(|n| n.name.name.starts_with('#')) => {
4258 Action::TempTableHash
4259 }
4260 // JSON_EXTRACT -> arrow syntax for SQLite/DuckDB
4261 Expression::JsonExtract(ref f) if !f.arrow_syntax && matches!(target, DialectType::SQLite | DialectType::DuckDB) => {
4262 Action::JsonExtractToArrow
4263 }
4264 // JSON_EXTRACT with JSONPath -> JSON_EXTRACT_PATH for PostgreSQL (non-PG sources only)
4265 Expression::JsonExtract(ref f) if matches!(target, DialectType::PostgreSQL | DialectType::Redshift)
4266 && !matches!(source, DialectType::PostgreSQL | DialectType::Redshift | DialectType::Materialize)
4267 && matches!(&f.path, Expression::Literal(Literal::String(s)) if s.starts_with("$.")) => {
4268 Action::JsonExtractToGetJsonObject
4269 }
4270 // JSON_EXTRACT -> GET_JSON_OBJECT for Hive/Spark
4271 Expression::JsonExtract(_) if matches!(target, DialectType::Hive | DialectType::Spark | DialectType::Databricks) => {
4272 Action::JsonExtractToGetJsonObject
4273 }
4274 // JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
4275 Expression::JsonExtractScalar(_) if matches!(target, DialectType::Hive | DialectType::Spark | DialectType::Databricks) => {
4276 Action::JsonExtractScalarToGetJsonObject
4277 }
4278 // JsonQuery (parsed JSON_QUERY) -> target-specific
4279 Expression::JsonQuery(_) => {
4280 Action::JsonQueryValueConvert
4281 }
4282 // JsonValue (parsed JSON_VALUE) -> target-specific
4283 Expression::JsonValue(_) => {
4284 Action::JsonQueryValueConvert
4285 }
4286 // AT TIME ZONE -> AT_TIMEZONE for Presto, FROM_UTC_TIMESTAMP for Spark,
4287 // TIMESTAMP(DATETIME(...)) for BigQuery, CONVERT_TIMEZONE for Snowflake
4288 Expression::AtTimeZone(_) if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena
4289 | DialectType::Spark | DialectType::Databricks
4290 | DialectType::BigQuery | DialectType::Snowflake) => {
4291 Action::AtTimeZoneConvert
4292 }
4293 // DAY_OF_WEEK -> dialect-specific
4294 Expression::DayOfWeek(_) if matches!(target, DialectType::DuckDB | DialectType::Spark | DialectType::Databricks) => {
4295 Action::DayOfWeekConvert
4296 }
4297 // CURRENT_USER -> CURRENT_USER() for Snowflake
4298 Expression::CurrentUser(_) if matches!(target, DialectType::Snowflake) => {
4299 Action::CurrentUserParens
4300 }
4301 // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
4302 Expression::ElementAt(_) if matches!(target, DialectType::PostgreSQL | DialectType::BigQuery) => {
4303 Action::ElementAtConvert
4304 }
4305 // ARRAY[...] (ArrayFunc bracket_notation=false) -> convert for target dialect
4306 Expression::ArrayFunc(ref arr) if !arr.bracket_notation
4307 && matches!(target, DialectType::Spark | DialectType::Databricks | DialectType::Hive | DialectType::BigQuery | DialectType::DuckDB | DialectType::Snowflake | DialectType::Presto | DialectType::Trino | DialectType::Athena | DialectType::ClickHouse | DialectType::StarRocks) => {
4308 Action::ArraySyntaxConvert
4309 }
4310 // VARIANCE expression -> varSamp for ClickHouse
4311 Expression::Variance(_) if matches!(target, DialectType::ClickHouse) => {
4312 Action::VarianceToClickHouse
4313 }
4314 // STDDEV expression -> stddevSamp for ClickHouse
4315 Expression::Stddev(_) if matches!(target, DialectType::ClickHouse) => {
4316 Action::StddevToClickHouse
4317 }
4318 // ApproxQuantile -> APPROX_PERCENTILE for Snowflake
4319 Expression::ApproxQuantile(_) if matches!(target, DialectType::Snowflake) => {
4320 Action::ApproxQuantileConvert
4321 }
4322 // MonthsBetween -> target-specific
4323 Expression::MonthsBetween(_) if !matches!(target, DialectType::Spark | DialectType::Databricks | DialectType::Hive) => {
4324 Action::MonthsBetweenConvert
4325 }
4326 // AddMonths -> target-specific DATEADD/DATE_ADD
4327 Expression::AddMonths(_) => {
4328 Action::AddMonthsConvert
4329 }
4330 // MapFromArrays -> target-specific (MAP, OBJECT_CONSTRUCT, MAP_FROM_ARRAYS)
4331 Expression::MapFromArrays(_) if !matches!(target, DialectType::Spark | DialectType::Databricks) => {
4332 Action::MapFromArraysConvert
4333 }
4334 // CURRENT_USER -> CURRENT_USER() for Spark
4335 Expression::CurrentUser(_) if matches!(target, DialectType::Spark | DialectType::Databricks) => {
4336 Action::CurrentUserSparkParens
4337 }
4338 // MONTH/YEAR/DAY('string') from Spark -> cast string to DATE for DuckDB/Presto
4339 Expression::Month(ref f) | Expression::Year(ref f) | Expression::Day(ref f)
4340 if matches!(source, DialectType::Spark | DialectType::Databricks | DialectType::Hive)
4341 && matches!(&f.this, Expression::Literal(Literal::String(_)))
4342 && matches!(target, DialectType::DuckDB | DialectType::Presto | DialectType::Trino | DialectType::Athena | DialectType::PostgreSQL | DialectType::Redshift) => {
4343 Action::SparkDateFuncCast
4344 }
4345 // $parameter -> @parameter for BigQuery
4346 Expression::Parameter(ref p) if matches!(target, DialectType::BigQuery)
4347 && matches!(source, DialectType::DuckDB)
4348 && (p.style == crate::expressions::ParameterStyle::Dollar || p.style == crate::expressions::ParameterStyle::DoubleDollar) => {
4349 Action::DollarParamConvert
4350 }
4351 // EscapeString literal: normalize literal newlines to \n
4352 Expression::Literal(Literal::EscapeString(ref s)) if s.contains('\n') || s.contains('\r') || s.contains('\t') => {
4353 Action::EscapeStringNormalize
4354 }
4355 // straight_join: keep lowercase for DuckDB, quote for MySQL
4356 Expression::Column(ref col) if col.name.name == "STRAIGHT_JOIN" && col.table.is_none()
4357 && matches!(source, DialectType::DuckDB)
4358 && matches!(target, DialectType::DuckDB | DialectType::MySQL) => {
4359 Action::StraightJoinCase
4360 }
4361 // DATE and TIMESTAMP literal type conversions are now handled in the generator directly
4362 // Snowflake INTERVAL format: INTERVAL '2' HOUR -> INTERVAL '2 HOUR'
4363 Expression::Interval(ref iv) if matches!(target, DialectType::Snowflake | DialectType::PostgreSQL | DialectType::Redshift)
4364 && iv.unit.is_some()
4365 && matches!(&iv.this, Some(Expression::Literal(Literal::String(_)))) => {
4366 Action::SnowflakeIntervalFormat
4367 }
4368 // TABLESAMPLE -> TABLESAMPLE RESERVOIR for DuckDB target
4369 Expression::TableSample(ref ts) if matches!(target, DialectType::DuckDB) => {
4370 if let Some(ref sample) = ts.sample {
4371 if !sample.explicit_method {
4372 Action::TablesampleReservoir
4373 } else {
4374 Action::None
4375 }
4376 } else {
4377 Action::None
4378 }
4379 }
4380 // TABLESAMPLE from non-Snowflake source to Snowflake: strip method and PERCENT
4381 // Handles both Expression::TableSample wrapper and Expression::Table with table_sample
4382 Expression::TableSample(ref ts) if matches!(target, DialectType::Snowflake)
4383 && !matches!(source, DialectType::Snowflake)
4384 && ts.sample.is_some() => {
4385 if let Some(ref sample) = ts.sample {
4386 if !sample.explicit_method {
4387 Action::TablesampleSnowflakeStrip
4388 } else {
4389 Action::None
4390 }
4391 } else {
4392 Action::None
4393 }
4394 }
4395 Expression::Table(ref t) if matches!(target, DialectType::Snowflake)
4396 && !matches!(source, DialectType::Snowflake)
4397 && t.table_sample.is_some() => {
4398 if let Some(ref sample) = t.table_sample {
4399 if !sample.explicit_method {
4400 Action::TablesampleSnowflakeStrip
4401 } else {
4402 Action::None
4403 }
4404 } else {
4405 Action::None
4406 }
4407 }
4408 // ALTER TABLE RENAME -> EXEC sp_rename for TSQL
4409 Expression::AlterTable(ref at) if matches!(target, DialectType::TSQL | DialectType::Fabric)
4410 && !at.actions.is_empty()
4411 && matches!(at.actions.first(), Some(crate::expressions::AlterTableAction::RenameTable(_))) => {
4412 Action::AlterTableToSpRename
4413 }
4414 // Subscript index: 1-based to 0-based for BigQuery/Hive/Spark
4415 Expression::Subscript(ref sub) if matches!(target, DialectType::BigQuery | DialectType::Hive | DialectType::Spark | DialectType::Databricks)
4416 && matches!(source, DialectType::DuckDB | DialectType::PostgreSQL | DialectType::Presto | DialectType::Trino | DialectType::Redshift | DialectType::ClickHouse)
4417 && matches!(&sub.index, Expression::Literal(Literal::Number(ref n)) if n.parse::<i64>().unwrap_or(0) > 0) => {
4418 Action::ArrayIndexConvert
4419 }
4420 // ANY_VALUE IGNORE NULLS detection moved to the AnyValue arm above
4421 // MysqlNullsOrdering for Ordered is now handled in the Ordered arm above
4422 // RESPECT NULLS handling for SQLite (strip it, add NULLS LAST to ORDER BY)
4423 // and for MySQL (rewrite ORDER BY with CASE WHEN for null ordering)
4424 Expression::WindowFunction(ref wf) => {
4425 // BigQuery doesn't support NULLS FIRST/LAST in window function ORDER BY
4426 // EXCEPT for ROW_NUMBER which keeps NULLS LAST
4427 let is_row_number = matches!(wf.this, Expression::RowNumber(_));
4428 if matches!(target, DialectType::BigQuery)
4429 && !is_row_number
4430 && !wf.over.order_by.is_empty()
4431 && wf.over.order_by.iter().any(|o| o.nulls_first.is_some()) {
4432 Action::BigQueryNullsOrdering
4433 // DuckDB -> MySQL: Add CASE WHEN for NULLS LAST simulation in window ORDER BY
4434 // But NOT when frame is RANGE/GROUPS, since adding CASE WHEN would break value-based frames
4435 } else {
4436 let source_nulls_last = matches!(source, DialectType::DuckDB);
4437 let has_range_frame = wf.over.frame.as_ref().map_or(false, |f| {
4438 matches!(f.kind, crate::expressions::WindowFrameKind::Range | crate::expressions::WindowFrameKind::Groups)
4439 });
4440 if source_nulls_last && matches!(target, DialectType::MySQL)
4441 && !wf.over.order_by.is_empty()
4442 && wf.over.order_by.iter().any(|o| !o.desc)
4443 && !has_range_frame {
4444 Action::MysqlNullsLastRewrite
4445 } else {
4446 match &wf.this {
4447 Expression::FirstValue(ref vf) | Expression::LastValue(ref vf) if vf.ignore_nulls == Some(false) => {
4448 // RESPECT NULLS
4449 match target {
4450 DialectType::SQLite => Action::RespectNullsConvert,
4451 _ => Action::None,
4452 }
4453 }
4454 _ => Action::None,
4455 }
4456 }
4457 }
4458 }
4459 // CREATE TABLE: strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
4460 Expression::CreateTable(ref ct) if matches!(target, DialectType::DuckDB)
4461 && matches!(source, DialectType::DuckDB | DialectType::Spark | DialectType::Databricks | DialectType::Hive) => {
4462 let has_comment = ct.columns.iter().any(|c| c.comment.is_some()
4463 || c.constraints.iter().any(|con| matches!(con, crate::expressions::ColumnConstraint::Comment(_)))
4464 );
4465 let has_props = !ct.properties.is_empty();
4466 if has_comment || has_props {
4467 Action::CreateTableStripComment
4468 } else {
4469 Action::None
4470 }
4471 }
4472 // Array conversion: Expression::Array -> Expression::ArrayFunc for PostgreSQL
4473 Expression::Array(_) if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) => {
4474 Action::ArrayConcatBracketConvert
4475 }
4476 // ArrayFunc (bracket notation) -> Function("ARRAY") for Redshift (from BigQuery source)
4477 Expression::ArrayFunc(ref arr) if arr.bracket_notation
4478 && matches!(source, DialectType::BigQuery)
4479 && matches!(target, DialectType::Redshift) => {
4480 Action::ArrayConcatBracketConvert
4481 }
4482 // BIT_OR/BIT_AND/BIT_XOR: float/decimal arg cast for DuckDB, or rename for Snowflake
4483 Expression::BitwiseOrAgg(ref f) | Expression::BitwiseAndAgg(ref f) | Expression::BitwiseXorAgg(ref f) => {
4484 if matches!(target, DialectType::DuckDB) {
4485 // Check if the arg is CAST(val AS FLOAT/DOUBLE/DECIMAL/REAL)
4486 if let Expression::Cast(ref c) = f.this {
4487 match &c.to {
4488 DataType::Float { .. } | DataType::Double { .. }
4489 | DataType::Decimal { .. } => Action::BitAggFloatCast,
4490 DataType::Custom { ref name } if name.eq_ignore_ascii_case("REAL") => Action::BitAggFloatCast,
4491 _ => Action::None,
4492 }
4493 } else {
4494 Action::None
4495 }
4496 } else if matches!(target, DialectType::Snowflake) {
4497 Action::BitAggSnowflakeRename
4498 } else {
4499 Action::None
4500 }
4501 }
4502 // FILTER -> IFF for Snowflake (aggregate functions with FILTER clause)
4503 Expression::Filter(ref _f) if matches!(target, DialectType::Snowflake) => {
4504 Action::FilterToIff
4505 }
4506 // AggFunc.filter -> IFF wrapping for Snowflake (e.g., AVG(x) FILTER(WHERE cond))
4507 Expression::Avg(ref f) | Expression::Sum(ref f) | Expression::Min(ref f)
4508 | Expression::Max(ref f)
4509 | Expression::CountIf(ref f) | Expression::Stddev(ref f)
4510 | Expression::StddevPop(ref f) | Expression::StddevSamp(ref f)
4511 | Expression::Variance(ref f) | Expression::VarPop(ref f)
4512 | Expression::VarSamp(ref f) | Expression::Median(ref f)
4513 | Expression::Mode(ref f) | Expression::First(ref f) | Expression::Last(ref f)
4514 | Expression::ApproxDistinct(ref f)
4515 if f.filter.is_some() && matches!(target, DialectType::Snowflake) =>
4516 {
4517 Action::AggFilterToIff
4518 }
4519 Expression::Count(ref c) if c.filter.is_some() && matches!(target, DialectType::Snowflake) => {
4520 Action::AggFilterToIff
4521 }
4522 // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN ... END) for dialects that don't support multi-arg DISTINCT
4523 Expression::Count(ref c) if c.distinct && matches!(&c.this, Some(Expression::Tuple(_)))
4524 && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::DuckDB | DialectType::PostgreSQL) => {
4525 Action::CountDistinctMultiArg
4526 }
4527 // JSON arrow -> GET_PATH/PARSE_JSON for Snowflake
4528 Expression::JsonExtract(_) if matches!(target, DialectType::Snowflake) => {
4529 Action::JsonToGetPath
4530 }
4531 // DuckDB struct/dict -> BigQuery STRUCT / Presto ROW
4532 Expression::Struct(_) if matches!(target, DialectType::BigQuery | DialectType::Presto | DialectType::Trino)
4533 && matches!(source, DialectType::DuckDB) => {
4534 Action::StructToRow
4535 }
4536 // DuckDB curly-brace dict {'key': value} -> BigQuery STRUCT / Presto ROW
4537 Expression::MapFunc(ref m) if m.curly_brace_syntax
4538 && matches!(target, DialectType::BigQuery | DialectType::Presto | DialectType::Trino)
4539 && matches!(source, DialectType::DuckDB) => {
4540 Action::StructToRow
4541 }
4542 // APPROX_COUNT_DISTINCT -> APPROX_DISTINCT for Presto/Trino
4543 Expression::ApproxCountDistinct(_)
4544 if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena) => {
4545 Action::ApproxCountDistinctToApproxDistinct
4546 }
4547 // ARRAY_CONTAINS(arr, val) -> CONTAINS(arr, val) for Presto, ARRAY_CONTAINS(CAST(val AS VARIANT), arr) for Snowflake
4548 Expression::ArrayContains(_)
4549 if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Snowflake) => {
4550 Action::ArrayContainsConvert
4551 }
4552 // StrPosition with position -> complex expansion for Presto/DuckDB
4553 // STRPOS doesn't support a position arg in these dialects
4554 Expression::StrPosition(ref sp) if sp.position.is_some()
4555 && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena | DialectType::DuckDB) => {
4556 Action::StrPositionExpand
4557 }
4558 // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
4559 Expression::First(ref f) if f.ignore_nulls == Some(true)
4560 && matches!(target, DialectType::DuckDB) => {
4561 Action::FirstToAnyValue
4562 }
4563 // BEGIN -> START TRANSACTION for Presto/Trino
4564 Expression::Command(ref cmd) if cmd.this.eq_ignore_ascii_case("BEGIN")
4565 && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena) => {
4566 // Handled inline below
4567 Action::None // We'll handle it directly
4568 }
4569 // Note: PostgreSQL ^ is now parsed as Power directly (not BitwiseXor).
4570 // PostgreSQL # is parsed as BitwiseXor (which is correct).
4571 // a || b (Concat operator) -> CONCAT function for Presto/Trino
4572 Expression::Concat(ref _op) if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
4573 && matches!(target, DialectType::Presto | DialectType::Trino) => {
4574 Action::PipeConcatToConcat
4575 }
4576 _ => Action::None,
4577 }
4578 };
4579
4580 match action {
4581 Action::None => {
4582 // Handle inline transforms that don't need a dedicated action
4583 // Oracle DBMS_RANDOM.VALUE() -> RANDOM() for PostgreSQL, RAND() for others
4584 if let Expression::MethodCall(ref mc) = e {
4585 if matches!(source, DialectType::Oracle)
4586 && mc.method.name.eq_ignore_ascii_case("VALUE")
4587 && mc.args.is_empty()
4588 {
4589 let is_dbms_random = match &mc.this {
4590 Expression::Identifier(id) => id.name.eq_ignore_ascii_case("DBMS_RANDOM"),
4591 Expression::Column(col) => col.table.is_none() && col.name.name.eq_ignore_ascii_case("DBMS_RANDOM"),
4592 _ => false,
4593 };
4594 if is_dbms_random {
4595 let func_name = match target {
4596 DialectType::PostgreSQL | DialectType::Redshift | DialectType::DuckDB | DialectType::SQLite => "RANDOM",
4597 DialectType::Oracle => "DBMS_RANDOM.VALUE",
4598 _ => "RAND",
4599 };
4600 return Ok(Expression::Function(Box::new(Function::new(func_name.to_string(), vec![]))));
4601 }
4602 }
4603 }
4604 // TRIM without explicit position -> add BOTH for ClickHouse
4605 if let Expression::Trim(ref trim) = e {
4606 if matches!(target, DialectType::ClickHouse)
4607 && trim.sql_standard_syntax
4608 && trim.characters.is_some()
4609 && !trim.position_explicit
4610 {
4611 let mut new_trim = (**trim).clone();
4612 new_trim.position_explicit = true;
4613 return Ok(Expression::Trim(Box::new(new_trim)));
4614 }
4615 }
4616 // BEGIN -> START TRANSACTION for Presto/Trino
4617 if let Expression::Transaction(ref txn) = e {
4618 if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena) {
4619 // Convert BEGIN to START TRANSACTION by setting mark to "START"
4620 let mut txn = txn.clone();
4621 txn.mark = Some(Box::new(Expression::Identifier(Identifier::new("START".to_string()))));
4622 return Ok(Expression::Transaction(Box::new(*txn)));
4623 }
4624 }
4625 // IS TRUE/FALSE -> simplified forms for Presto/Trino
4626 if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena) {
4627 match &e {
4628 Expression::IsTrue(itf) if !itf.not => {
4629 // x IS TRUE -> x
4630 return Ok(itf.this.clone());
4631 }
4632 Expression::IsTrue(itf) if itf.not => {
4633 // x IS NOT TRUE -> NOT x
4634 return Ok(Expression::Not(Box::new(crate::expressions::UnaryOp {
4635 this: itf.this.clone(),
4636 })));
4637 }
4638 Expression::IsFalse(itf) if !itf.not => {
4639 // x IS FALSE -> NOT x
4640 return Ok(Expression::Not(Box::new(crate::expressions::UnaryOp {
4641 this: itf.this.clone(),
4642 })));
4643 }
4644 Expression::IsFalse(itf) if itf.not => {
4645 // x IS NOT FALSE -> NOT NOT x
4646 let not_x = Expression::Not(Box::new(crate::expressions::UnaryOp {
4647 this: itf.this.clone(),
4648 }));
4649 return Ok(Expression::Not(Box::new(crate::expressions::UnaryOp {
4650 this: not_x,
4651 })));
4652 }
4653 _ => {}
4654 }
4655 }
4656 // x IS NOT FALSE -> NOT x IS FALSE for Redshift
4657 if matches!(target, DialectType::Redshift) {
4658 if let Expression::IsFalse(ref itf) = e {
4659 if itf.not {
4660 return Ok(Expression::Not(Box::new(crate::expressions::UnaryOp {
4661 this: Expression::IsFalse(Box::new(crate::expressions::IsTrueFalse {
4662 this: itf.this.clone(),
4663 not: false,
4664 })),
4665 })));
4666 }
4667 }
4668 }
4669 // REGEXP_REPLACE: add 'g' flag when source defaults to global replacement
4670 // Snowflake default is global, PostgreSQL/DuckDB default is first-match-only
4671 if let Expression::Function(ref f) = e {
4672 if f.name.eq_ignore_ascii_case("REGEXP_REPLACE")
4673 && matches!(source, DialectType::Snowflake)
4674 && matches!(target, DialectType::PostgreSQL | DialectType::DuckDB)
4675 {
4676 if f.args.len() == 3 {
4677 let mut args = f.args.clone();
4678 args.push(Expression::string("g"));
4679 return Ok(Expression::Function(Box::new(Function::new(
4680 "REGEXP_REPLACE".to_string(), args,
4681 ))));
4682 } else if f.args.len() == 4 {
4683 // 4th arg might be position, add 'g' as 5th
4684 let mut args = f.args.clone();
4685 args.push(Expression::string("g"));
4686 return Ok(Expression::Function(Box::new(Function::new(
4687 "REGEXP_REPLACE".to_string(), args,
4688 ))));
4689 }
4690 }
4691 }
4692 Ok(e)
4693 }
4694
4695 Action::GreatestLeastNull => {
4696 let f = if let Expression::Function(f) = e { *f } else { unreachable!("action only triggered for Function expressions") };
4697 let mut null_checks: Vec<Expression> = f.args.iter().map(|a| {
4698 Expression::IsNull(Box::new(IsNull {
4699 this: a.clone(),
4700 not: false,
4701 postfix_form: false,
4702 }))
4703 }).collect();
4704 let condition = if null_checks.len() == 1 {
4705 null_checks.remove(0)
4706 } else {
4707 let first = null_checks.remove(0);
4708 null_checks.into_iter().fold(first, |acc, check| {
4709 Expression::Or(Box::new(BinaryOp::new(acc, check)))
4710 })
4711 };
4712 Ok(Expression::Case(Box::new(Case {
4713 operand: None,
4714 whens: vec![(condition, Expression::Null(Null))],
4715 else_: Some(Expression::Function(Box::new(Function::new(f.name, f.args)))),
4716 })))
4717 }
4718
4719 Action::ArrayGenerateRange => {
4720 let f = if let Expression::Function(f) = e { *f } else { unreachable!("action only triggered for Function expressions") };
4721 let start = f.args[0].clone();
4722 let end = f.args[1].clone();
4723 let step = f.args.get(2).cloned();
4724
4725 let end_minus_1 = Expression::Sub(Box::new(BinaryOp::new(
4726 end.clone(),
4727 Expression::number(1),
4728 )));
4729
4730 match target {
4731 DialectType::PostgreSQL | DialectType::Redshift => {
4732 let mut args = vec![start, end_minus_1];
4733 if let Some(s) = step { args.push(s); }
4734 Ok(Expression::Function(Box::new(Function::new(
4735 "GENERATE_SERIES".to_string(), args,
4736 ))))
4737 }
4738 DialectType::Presto | DialectType::Trino => {
4739 let mut args = vec![start, end_minus_1];
4740 if let Some(s) = step { args.push(s); }
4741 Ok(Expression::Function(Box::new(Function::new(
4742 "SEQUENCE".to_string(), args,
4743 ))))
4744 }
4745 DialectType::BigQuery => {
4746 let mut args = vec![start, end_minus_1];
4747 if let Some(s) = step { args.push(s); }
4748 Ok(Expression::Function(Box::new(Function::new(
4749 "GENERATE_ARRAY".to_string(), args,
4750 ))))
4751 }
4752 DialectType::Snowflake => {
4753 let normalized_end = Expression::Add(Box::new(BinaryOp::new(
4754 Expression::Paren(Box::new(Paren { this: end_minus_1, trailing_comments: vec![] })),
4755 Expression::number(1),
4756 )));
4757 let mut args = vec![start, normalized_end];
4758 if let Some(s) = step { args.push(s); }
4759 Ok(Expression::Function(Box::new(Function::new(
4760 "ARRAY_GENERATE_RANGE".to_string(), args,
4761 ))))
4762 }
4763 _ => {
4764 Ok(Expression::Function(Box::new(Function::new(f.name, f.args))))
4765 }
4766 }
4767 }
4768
4769 Action::Div0TypedDivision => {
4770 let if_func = if let Expression::IfFunc(f) = e { *f } else { unreachable!("action only triggered for IfFunc expressions") };
4771 if let Some(Expression::Div(div)) = if_func.false_value {
4772 let cast_type = if matches!(target, DialectType::SQLite) {
4773 DataType::Float { precision: None, scale: None, real_spelling: true }
4774 } else {
4775 DataType::Double { precision: None, scale: None }
4776 };
4777 let casted_left = Expression::Cast(Box::new(Cast {
4778 this: div.left,
4779 to: cast_type,
4780 trailing_comments: vec![],
4781 double_colon_syntax: false,
4782 format: None,
4783 default: None,
4784 }));
4785 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
4786 condition: if_func.condition,
4787 true_value: if_func.true_value,
4788 false_value: Some(Expression::Div(Box::new(BinaryOp::new(casted_left, div.right)))),
4789 original_name: if_func.original_name,
4790 })))
4791 } else {
4792 // Not actually a Div, reconstruct
4793 Ok(Expression::IfFunc(Box::new(if_func)))
4794 }
4795 }
4796
4797 Action::ArrayAggCollectList => {
4798 let agg = if let Expression::ArrayAgg(a) = e { *a } else { unreachable!("action only triggered for ArrayAgg expressions") };
4799 Ok(Expression::ArrayAgg(Box::new(AggFunc {
4800 name: Some("COLLECT_LIST".to_string()),
4801 ..agg
4802 })))
4803 }
4804
4805 Action::ArrayAggWithinGroupFilter => {
4806 let wg = if let Expression::WithinGroup(w) = e { *w } else { unreachable!("action only triggered for WithinGroup expressions") };
4807 if let Expression::ArrayAgg(inner_agg) = wg.this {
4808 let col = inner_agg.this.clone();
4809 let filter = Expression::IsNull(Box::new(IsNull {
4810 this: col,
4811 not: true,
4812 postfix_form: false,
4813 }));
4814 // For DuckDB, add explicit NULLS FIRST for DESC ordering
4815 let order_by = if matches!(target, DialectType::DuckDB) {
4816 wg.order_by.into_iter().map(|mut o| {
4817 if o.desc && o.nulls_first.is_none() {
4818 o.nulls_first = Some(true);
4819 }
4820 o
4821 }).collect()
4822 } else {
4823 wg.order_by
4824 };
4825 Ok(Expression::ArrayAgg(Box::new(AggFunc {
4826 this: inner_agg.this,
4827 distinct: inner_agg.distinct,
4828 filter: Some(filter),
4829 order_by,
4830 name: inner_agg.name,
4831 ignore_nulls: inner_agg.ignore_nulls,
4832 having_max: inner_agg.having_max,
4833 limit: inner_agg.limit,
4834 })))
4835 } else {
4836 Ok(Expression::WithinGroup(Box::new(wg)))
4837 }
4838 }
4839
4840 Action::ArrayAggFilter => {
4841 let agg = if let Expression::ArrayAgg(a) = e { *a } else { unreachable!("action only triggered for ArrayAgg expressions") };
4842 let col = agg.this.clone();
4843 let filter = Expression::IsNull(Box::new(IsNull {
4844 this: col,
4845 not: true,
4846 postfix_form: false,
4847 }));
4848 Ok(Expression::ArrayAgg(Box::new(AggFunc {
4849 filter: Some(filter),
4850 ..agg
4851 })))
4852 }
4853
4854 Action::ArrayAggNullFilter => {
4855 // ARRAY_AGG(x) FILTER(WHERE cond) -> ARRAY_AGG(x) FILTER(WHERE cond AND NOT x IS NULL)
4856 // For source dialects that exclude NULLs (Spark/Hive) targeting DuckDB which includes them
4857 let agg = if let Expression::ArrayAgg(a) = e { *a } else { unreachable!("action only triggered for ArrayAgg expressions") };
4858 let col = agg.this.clone();
4859 let not_null = Expression::IsNull(Box::new(IsNull {
4860 this: col,
4861 not: true,
4862 postfix_form: true, // Use "NOT x IS NULL" form (prefix NOT)
4863 }));
4864 let new_filter = if let Some(existing_filter) = agg.filter {
4865 // AND the NOT IS NULL with existing filter
4866 Expression::And(Box::new(crate::expressions::BinaryOp::new(
4867 existing_filter,
4868 not_null,
4869 )))
4870 } else {
4871 not_null
4872 };
4873 Ok(Expression::ArrayAgg(Box::new(AggFunc {
4874 filter: Some(new_filter),
4875 ..agg
4876 })))
4877 }
4878
4879 Action::BigQueryArraySelectAsStructToSnowflake => {
4880 // ARRAY(SELECT AS STRUCT x1 AS x1, x2 AS x2 FROM t)
4881 // -> (SELECT ARRAY_AGG(OBJECT_CONSTRUCT('x1', x1, 'x2', x2)) FROM t)
4882 if let Expression::Function(mut f) = e {
4883 let is_match = f.args.len() == 1 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT"));
4884 if is_match {
4885 let inner_select = match f.args.remove(0) {
4886 Expression::Select(s) => *s,
4887 _ => unreachable!("argument already verified to be a Select expression"),
4888 };
4889 // Build OBJECT_CONSTRUCT args from SELECT expressions
4890 let mut oc_args = Vec::new();
4891 for expr in &inner_select.expressions {
4892 match expr {
4893 Expression::Alias(a) => {
4894 let key = Expression::Literal(Literal::String(a.alias.name.clone()));
4895 let value = a.this.clone();
4896 oc_args.push(key);
4897 oc_args.push(value);
4898 }
4899 Expression::Column(c) => {
4900 let key = Expression::Literal(Literal::String(c.name.name.clone()));
4901 oc_args.push(key);
4902 oc_args.push(expr.clone());
4903 }
4904 _ => {
4905 oc_args.push(expr.clone());
4906 }
4907 }
4908 }
4909 let object_construct = Expression::Function(Box::new(Function::new(
4910 "OBJECT_CONSTRUCT".to_string(), oc_args,
4911 )));
4912 let array_agg = Expression::Function(Box::new(Function::new(
4913 "ARRAY_AGG".to_string(), vec![object_construct],
4914 )));
4915 let mut new_select = crate::expressions::Select::new();
4916 new_select.expressions = vec![array_agg];
4917 new_select.from = inner_select.from.clone();
4918 new_select.where_clause = inner_select.where_clause.clone();
4919 new_select.group_by = inner_select.group_by.clone();
4920 new_select.having = inner_select.having.clone();
4921 new_select.joins = inner_select.joins.clone();
4922 Ok(Expression::Subquery(Box::new(crate::expressions::Subquery {
4923 this: Expression::Select(Box::new(new_select)),
4924 alias: None,
4925 column_aliases: Vec::new(),
4926 order_by: None,
4927 limit: None,
4928 offset: None,
4929 distribute_by: None,
4930 sort_by: None,
4931 cluster_by: None,
4932 lateral: false,
4933 modifiers_inside: false,
4934 trailing_comments: Vec::new(),
4935 })))
4936 } else {
4937 Ok(Expression::Function(f))
4938 }
4939 } else {
4940 Ok(e)
4941 }
4942 }
4943
4944 Action::BigQueryPercentileContToDuckDB => {
4945 // PERCENTILE_CONT(x, frac [RESPECT NULLS]) -> QUANTILE_CONT(x, frac) for DuckDB
4946 if let Expression::AggregateFunction(mut af) = e {
4947 af.name = "QUANTILE_CONT".to_string();
4948 af.ignore_nulls = None; // Strip RESPECT/IGNORE NULLS
4949 // Keep only first 2 args
4950 if af.args.len() > 2 {
4951 af.args.truncate(2);
4952 }
4953 Ok(Expression::AggregateFunction(af))
4954 } else {
4955 Ok(e)
4956 }
4957 }
4958
4959 Action::ArrayAggIgnoreNullsDuckDB => {
4960 // ARRAY_AGG(x IGNORE NULLS ORDER BY a, b DESC) -> ARRAY_AGG(x ORDER BY a NULLS FIRST, b DESC)
4961 // Strip IGNORE NULLS, add NULLS FIRST to first ORDER BY column
4962 let mut agg = if let Expression::ArrayAgg(a) = e { *a } else { unreachable!("action only triggered for ArrayAgg expressions") };
4963 agg.ignore_nulls = None; // Strip IGNORE NULLS
4964 if !agg.order_by.is_empty() {
4965 agg.order_by[0].nulls_first = Some(true);
4966 }
4967 Ok(Expression::ArrayAgg(Box::new(agg)))
4968 }
4969
4970 Action::CountDistinctMultiArg => {
4971 // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN a IS NULL THEN NULL WHEN b IS NULL THEN NULL ELSE (a, b) END)
4972 if let Expression::Count(c) = e {
4973 if let Some(Expression::Tuple(t)) = c.this {
4974 let args = t.expressions;
4975 // Build CASE expression:
4976 // WHEN a IS NULL THEN NULL WHEN b IS NULL THEN NULL ELSE (a, b) END
4977 let mut whens = Vec::new();
4978 for arg in &args {
4979 whens.push((
4980 Expression::IsNull(Box::new(IsNull {
4981 this: arg.clone(),
4982 not: false,
4983 postfix_form: false,
4984 })),
4985 Expression::Null(crate::expressions::Null),
4986 ));
4987 }
4988 // Build the tuple for ELSE
4989 let tuple_expr = Expression::Tuple(Box::new(crate::expressions::Tuple {
4990 expressions: args,
4991 }));
4992 let case_expr = Expression::Case(Box::new(crate::expressions::Case {
4993 operand: None,
4994 whens,
4995 else_: Some(tuple_expr),
4996 }));
4997 Ok(Expression::Count(Box::new(crate::expressions::CountFunc {
4998 this: Some(case_expr),
4999 star: false,
5000 distinct: true,
5001 filter: c.filter,
5002 ignore_nulls: c.ignore_nulls,
5003 original_name: c.original_name,
5004 })))
5005 } else {
5006 Ok(Expression::Count(c))
5007 }
5008 } else {
5009 Ok(e)
5010 }
5011 }
5012
5013 Action::CastTimestampToDatetime => {
5014 let c = if let Expression::Cast(c) = e { *c } else { unreachable!("action only triggered for Cast expressions") };
5015 Ok(Expression::Cast(Box::new(Cast {
5016 to: DataType::Custom { name: "DATETIME".to_string() },
5017 ..c
5018 })))
5019 }
5020
5021 Action::CastTimestampStripTz => {
5022 // CAST(x AS TIMESTAMP(n) WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark/BigQuery
5023 let c = if let Expression::Cast(c) = e { *c } else { unreachable!("action only triggered for Cast expressions") };
5024 Ok(Expression::Cast(Box::new(Cast {
5025 to: DataType::Timestamp { precision: None, timezone: false },
5026 ..c
5027 })))
5028 }
5029
5030 Action::ToDateToCast => {
5031 // Convert TO_DATE(x) -> CAST(x AS DATE) for DuckDB
5032 if let Expression::Function(f) = e {
5033 let arg = f.args.into_iter().next().unwrap();
5034 Ok(Expression::Cast(Box::new(Cast {
5035 this: arg,
5036 to: DataType::Date,
5037 double_colon_syntax: false,
5038 trailing_comments: vec![],
5039 format: None,
5040 default: None,
5041 })))
5042 } else {
5043 Ok(e)
5044 }
5045 }
5046 Action::DateTruncWrapCast => {
5047 // Handle both Expression::DateTrunc/TimestampTrunc and
5048 // Expression::Function("DATE_TRUNC", [unit, expr])
5049 match e {
5050 Expression::DateTrunc(d) | Expression::TimestampTrunc(d) => {
5051 let input_type = match &d.this {
5052 Expression::Cast(c) => Some(c.to.clone()),
5053 _ => None,
5054 };
5055 if let Some(cast_type) = input_type {
5056 let is_time = matches!(cast_type, DataType::Time { .. });
5057 if is_time {
5058 let date_expr = Expression::Cast(Box::new(Cast {
5059 this: Expression::Literal(crate::expressions::Literal::String("1970-01-01".to_string())),
5060 to: DataType::Date,
5061 double_colon_syntax: false,
5062 trailing_comments: vec![],
5063 format: None,
5064 default: None,
5065 }));
5066 let add_expr = Expression::Add(Box::new(BinaryOp::new(date_expr, d.this)));
5067 let inner = Expression::DateTrunc(Box::new(DateTruncFunc { this: add_expr, unit: d.unit }));
5068 Ok(Expression::Cast(Box::new(Cast { this: inner, to: cast_type, double_colon_syntax: false, trailing_comments: vec![], format: None, default: None })))
5069 } else {
5070 let inner = Expression::DateTrunc(Box::new(*d));
5071 Ok(Expression::Cast(Box::new(Cast { this: inner, to: cast_type, double_colon_syntax: false, trailing_comments: vec![], format: None, default: None })))
5072 }
5073 } else {
5074 Ok(Expression::DateTrunc(d))
5075 }
5076 }
5077 Expression::Function(f) if f.args.len() == 2 => {
5078 // Function-based DATE_TRUNC(unit, expr)
5079 let input_type = match &f.args[1] {
5080 Expression::Cast(c) => Some(c.to.clone()),
5081 _ => None,
5082 };
5083 if let Some(cast_type) = input_type {
5084 let is_time = matches!(cast_type, DataType::Time { .. });
5085 if is_time {
5086 let date_expr = Expression::Cast(Box::new(Cast {
5087 this: Expression::Literal(crate::expressions::Literal::String("1970-01-01".to_string())),
5088 to: DataType::Date,
5089 double_colon_syntax: false,
5090 trailing_comments: vec![],
5091 format: None,
5092 default: None,
5093 }));
5094 let mut args = f.args;
5095 let unit_arg = args.remove(0);
5096 let time_expr = args.remove(0);
5097 let add_expr = Expression::Add(Box::new(BinaryOp::new(date_expr, time_expr)));
5098 let inner = Expression::Function(Box::new(Function::new(
5099 "DATE_TRUNC".to_string(),
5100 vec![unit_arg, add_expr],
5101 )));
5102 Ok(Expression::Cast(Box::new(Cast { this: inner, to: cast_type, double_colon_syntax: false, trailing_comments: vec![], format: None, default: None })))
5103 } else {
5104 // Wrap the function in CAST
5105 Ok(Expression::Cast(Box::new(Cast { this: Expression::Function(f), to: cast_type, double_colon_syntax: false, trailing_comments: vec![], format: None, default: None })))
5106 }
5107 } else {
5108 Ok(Expression::Function(f))
5109 }
5110 }
5111 other => Ok(other),
5112 }
5113 }
5114
5115 Action::RegexpReplaceSnowflakeToDuckDB => {
5116 // Snowflake REGEXP_REPLACE(s, p, r, position) -> REGEXP_REPLACE(s, p, r, 'g')
5117 if let Expression::Function(f) = e {
5118 let mut args = f.args;
5119 let subject = args.remove(0);
5120 let pattern = args.remove(0);
5121 let replacement = args.remove(0);
5122 Ok(Expression::Function(Box::new(Function::new(
5123 "REGEXP_REPLACE".to_string(),
5124 vec![subject, pattern, replacement, Expression::Literal(crate::expressions::Literal::String("g".to_string()))],
5125 ))))
5126 } else {
5127 Ok(e)
5128 }
5129 }
5130
5131 Action::SetToVariable => {
5132 // For DuckDB: SET a = 1 -> SET VARIABLE a = 1
5133 if let Expression::SetStatement(mut s) = e {
5134 for item in &mut s.items {
5135 if item.kind.is_none() {
5136 // Check if name already has VARIABLE prefix (from DuckDB source parsing)
5137 let already_variable = match &item.name {
5138 Expression::Identifier(id) => id.name.starts_with("VARIABLE "),
5139 _ => false,
5140 };
5141 if already_variable {
5142 // Extract the actual name and set kind
5143 if let Expression::Identifier(ref mut id) = item.name {
5144 let actual_name = id.name["VARIABLE ".len()..].to_string();
5145 id.name = actual_name;
5146 }
5147 }
5148 item.kind = Some("VARIABLE".to_string());
5149 }
5150 }
5151 Ok(Expression::SetStatement(s))
5152 } else {
5153 Ok(e)
5154 }
5155 }
5156
5157 Action::ConvertTimezoneToExpr => {
5158 // Convert Function("CONVERT_TIMEZONE", args) to Expression::ConvertTimezone
5159 // This prevents Redshift's transform_expr from expanding 2-arg to 3-arg with 'UTC'
5160 if let Expression::Function(f) = e {
5161 if f.args.len() == 2 {
5162 let mut args = f.args;
5163 let target_tz = args.remove(0);
5164 let timestamp = args.remove(0);
5165 Ok(Expression::ConvertTimezone(Box::new(ConvertTimezone {
5166 source_tz: None,
5167 target_tz: Some(Box::new(target_tz)),
5168 timestamp: Some(Box::new(timestamp)),
5169 options: vec![],
5170 })))
5171 } else if f.args.len() == 3 {
5172 let mut args = f.args;
5173 let source_tz = args.remove(0);
5174 let target_tz = args.remove(0);
5175 let timestamp = args.remove(0);
5176 Ok(Expression::ConvertTimezone(Box::new(ConvertTimezone {
5177 source_tz: Some(Box::new(source_tz)),
5178 target_tz: Some(Box::new(target_tz)),
5179 timestamp: Some(Box::new(timestamp)),
5180 options: vec![],
5181 })))
5182 } else {
5183 Ok(Expression::Function(f))
5184 }
5185 } else {
5186 Ok(e)
5187 }
5188 }
5189
5190 Action::BigQueryCastType => {
5191 // Convert BigQuery types to standard SQL types
5192 if let Expression::DataType(dt) = e {
5193 match dt {
5194 DataType::Custom { ref name } if name.eq_ignore_ascii_case("INT64") => {
5195 Ok(Expression::DataType(DataType::BigInt { length: None }))
5196 }
5197 DataType::Custom { ref name } if name.eq_ignore_ascii_case("FLOAT64") => {
5198 Ok(Expression::DataType(DataType::Double { precision: None, scale: None }))
5199 }
5200 DataType::Custom { ref name } if name.eq_ignore_ascii_case("BOOL") => {
5201 Ok(Expression::DataType(DataType::Boolean))
5202 }
5203 DataType::Custom { ref name } if name.eq_ignore_ascii_case("BYTES") => {
5204 Ok(Expression::DataType(DataType::VarBinary { length: None }))
5205 }
5206 DataType::Custom { ref name } if name.eq_ignore_ascii_case("NUMERIC") => {
5207 // For DuckDB target, use Custom("DECIMAL") to avoid DuckDB's
5208 // default precision (18, 3) being added to bare DECIMAL
5209 if matches!(target, DialectType::DuckDB) {
5210 Ok(Expression::DataType(DataType::Custom { name: "DECIMAL".to_string() }))
5211 } else {
5212 Ok(Expression::DataType(DataType::Decimal { precision: None, scale: None }))
5213 }
5214 }
5215 DataType::Custom { ref name } if name.eq_ignore_ascii_case("STRING") => {
5216 Ok(Expression::DataType(DataType::String { length: None }))
5217 }
5218 DataType::Custom { ref name } if name.eq_ignore_ascii_case("DATETIME") => {
5219 Ok(Expression::DataType(DataType::Timestamp { precision: None, timezone: false }))
5220 }
5221 _ => Ok(Expression::DataType(dt)),
5222 }
5223 } else {
5224 Ok(e)
5225 }
5226 }
5227
5228 Action::BigQuerySafeDivide => {
5229 // Convert SafeDivide expression to IF/CASE form for most targets
5230 if let Expression::SafeDivide(sd) = e {
5231 let x = *sd.this;
5232 let y = *sd.expression;
5233 // Wrap x and y in parens if they're complex expressions
5234 let y_ref = match &y {
5235 Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => y.clone(),
5236 _ => Expression::Paren(Box::new(Paren { this: y.clone(), trailing_comments: vec![] })),
5237 };
5238 let x_ref = match &x {
5239 Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => x.clone(),
5240 _ => Expression::Paren(Box::new(Paren { this: x.clone(), trailing_comments: vec![] })),
5241 };
5242 let condition = Expression::Neq(Box::new(BinaryOp::new(y_ref.clone(), Expression::number(0))));
5243 let div_expr = Expression::Div(Box::new(BinaryOp::new(x_ref, y_ref)));
5244
5245 if matches!(target, DialectType::Presto | DialectType::Trino) {
5246 // Presto/Trino: IF(y <> 0, CAST(x AS DOUBLE) / y, NULL)
5247 let cast_x = Expression::Cast(Box::new(Cast {
5248 this: match &x { Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => x, _ => Expression::Paren(Box::new(Paren { this: x, trailing_comments: vec![] })) },
5249 to: DataType::Double { precision: None, scale: None },
5250 trailing_comments: vec![],
5251 double_colon_syntax: false,
5252 format: None,
5253 default: None,
5254 }));
5255 let cast_div = Expression::Div(Box::new(BinaryOp::new(cast_x, match &y { Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => y, _ => Expression::Paren(Box::new(Paren { this: y, trailing_comments: vec![] })) })));
5256 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
5257 condition,
5258 true_value: cast_div,
5259 false_value: Some(Expression::Null(Null)),
5260 original_name: None,
5261 })))
5262 } else if matches!(target, DialectType::PostgreSQL) {
5263 // PostgreSQL: CASE WHEN y <> 0 THEN CAST(x AS DOUBLE PRECISION) / y ELSE NULL END
5264 let cast_x = Expression::Cast(Box::new(Cast {
5265 this: match &x { Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => x, _ => Expression::Paren(Box::new(Paren { this: x, trailing_comments: vec![] })) },
5266 to: DataType::Custom { name: "DOUBLE PRECISION".to_string() },
5267 trailing_comments: vec![],
5268 double_colon_syntax: false,
5269 format: None,
5270 default: None,
5271 }));
5272 let y_paren = match &y { Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => y, _ => Expression::Paren(Box::new(Paren { this: y, trailing_comments: vec![] })) };
5273 let cast_div = Expression::Div(Box::new(BinaryOp::new(cast_x, y_paren)));
5274 Ok(Expression::Case(Box::new(Case {
5275 operand: None,
5276 whens: vec![(condition, cast_div)],
5277 else_: Some(Expression::Null(Null)),
5278 })))
5279 } else if matches!(target, DialectType::DuckDB) {
5280 // DuckDB: CASE WHEN y <> 0 THEN x / y ELSE NULL END
5281 Ok(Expression::Case(Box::new(Case {
5282 operand: None,
5283 whens: vec![(condition, div_expr)],
5284 else_: Some(Expression::Null(Null)),
5285 })))
5286 } else if matches!(target, DialectType::Snowflake) {
5287 // Snowflake: IFF(y <> 0, x / y, NULL)
5288 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
5289 condition,
5290 true_value: div_expr,
5291 false_value: Some(Expression::Null(Null)),
5292 original_name: Some("IFF".to_string()),
5293 })))
5294 } else {
5295 // All others: IF(y <> 0, x / y, NULL)
5296 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
5297 condition,
5298 true_value: div_expr,
5299 false_value: Some(Expression::Null(Null)),
5300 original_name: None,
5301 })))
5302 }
5303 } else {
5304 Ok(e)
5305 }
5306 }
5307
5308 Action::BigQueryLastDayStripUnit => {
5309 if let Expression::LastDay(mut ld) = e {
5310 ld.unit = None; // Strip the unit (MONTH is default)
5311 match target {
5312 DialectType::PostgreSQL => {
5313 // LAST_DAY(date) -> CAST(DATE_TRUNC('MONTH', date) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)
5314 let date_trunc = Expression::Function(Box::new(Function::new(
5315 "DATE_TRUNC".to_string(),
5316 vec![
5317 Expression::Literal(crate::expressions::Literal::String("MONTH".to_string())),
5318 ld.this.clone(),
5319 ],
5320 )));
5321 let plus_month = Expression::Add(Box::new(crate::expressions::BinaryOp::new(
5322 date_trunc,
5323 Expression::Interval(Box::new(crate::expressions::Interval {
5324 this: Some(Expression::Literal(crate::expressions::Literal::String("1 MONTH".to_string()))),
5325 unit: None,
5326 })),
5327 )));
5328 let minus_day = Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
5329 plus_month,
5330 Expression::Interval(Box::new(crate::expressions::Interval {
5331 this: Some(Expression::Literal(crate::expressions::Literal::String("1 DAY".to_string()))),
5332 unit: None,
5333 })),
5334 )));
5335 Ok(Expression::Cast(Box::new(Cast {
5336 this: minus_day,
5337 to: DataType::Date,
5338 trailing_comments: vec![],
5339 double_colon_syntax: false,
5340 format: None,
5341 default: None,
5342 })))
5343 }
5344 DialectType::Presto => {
5345 // LAST_DAY(date) -> LAST_DAY_OF_MONTH(date)
5346 Ok(Expression::Function(Box::new(Function::new(
5347 "LAST_DAY_OF_MONTH".to_string(),
5348 vec![ld.this],
5349 ))))
5350 }
5351 DialectType::ClickHouse => {
5352 // ClickHouse LAST_DAY(CAST(x AS Nullable(DATE)))
5353 // Need to wrap the DATE type in Nullable
5354 let nullable_date = match ld.this {
5355 Expression::Cast(mut c) => {
5356 c.to = DataType::Custom { name: "Nullable(DATE)".to_string() };
5357 Expression::Cast(c)
5358 }
5359 other => other,
5360 };
5361 ld.this = nullable_date;
5362 Ok(Expression::LastDay(ld))
5363 }
5364 _ => Ok(Expression::LastDay(ld)),
5365 }
5366 } else {
5367 Ok(e)
5368 }
5369 }
5370
5371 Action::BigQueryCastFormat => {
5372 // CAST(x AS DATE FORMAT 'fmt') -> PARSE_DATE('%m/%d/%Y', x) for BigQuery
5373 // CAST(x AS TIMESTAMP FORMAT 'fmt') -> PARSE_TIMESTAMP(...) for BigQuery
5374 // SAFE_CAST(x AS DATE FORMAT 'fmt') -> CAST(TRY_STRPTIME(x, ...) AS DATE) for DuckDB
5375 let (this, to, format_expr, is_safe) = match e {
5376 Expression::Cast(ref c) if c.format.is_some() => {
5377 (c.this.clone(), c.to.clone(), c.format.as_ref().unwrap().as_ref().clone(), false)
5378 }
5379 Expression::SafeCast(ref c) if c.format.is_some() => {
5380 (c.this.clone(), c.to.clone(), c.format.as_ref().unwrap().as_ref().clone(), true)
5381 }
5382 _ => return Ok(e),
5383 };
5384 // For CAST(x AS STRING FORMAT ...) when target is BigQuery, keep as-is
5385 if matches!(target, DialectType::BigQuery) {
5386 match &to {
5387 DataType::String { .. } | DataType::VarChar { .. } | DataType::Text => {
5388 // CAST(x AS STRING FORMAT 'fmt') stays as CAST expression for BigQuery
5389 return Ok(e);
5390 }
5391 _ => {}
5392 }
5393 }
5394 // Extract timezone from format if AT TIME ZONE is present
5395 let (actual_format_expr, timezone) = match &format_expr {
5396 Expression::AtTimeZone(ref atz) => {
5397 (atz.this.clone(), Some(atz.zone.clone()))
5398 }
5399 _ => (format_expr.clone(), None),
5400 };
5401 let strftime_fmt = Self::bq_cast_format_to_strftime(&actual_format_expr);
5402 match target {
5403 DialectType::BigQuery => {
5404 // CAST(x AS DATE FORMAT 'fmt') -> PARSE_DATE(strftime_fmt, x)
5405 // CAST(x AS TIMESTAMP FORMAT 'fmt' AT TIME ZONE 'tz') -> PARSE_TIMESTAMP(strftime_fmt, x, tz)
5406 let func_name = match &to {
5407 DataType::Date => "PARSE_DATE",
5408 DataType::Timestamp { .. } => "PARSE_TIMESTAMP",
5409 DataType::Time { .. } => "PARSE_TIMESTAMP",
5410 _ => "PARSE_TIMESTAMP",
5411 };
5412 let mut func_args = vec![strftime_fmt, this];
5413 if let Some(tz) = timezone {
5414 func_args.push(tz);
5415 }
5416 Ok(Expression::Function(Box::new(Function::new(func_name.to_string(), func_args))))
5417 }
5418 DialectType::DuckDB => {
5419 // SAFE_CAST(x AS DATE FORMAT 'fmt') -> CAST(TRY_STRPTIME(x, fmt) AS DATE)
5420 // CAST(x AS DATE FORMAT 'fmt') -> CAST(STRPTIME(x, fmt) AS DATE)
5421 let duck_fmt = Self::bq_format_to_duckdb(&strftime_fmt);
5422 let parse_fn_name = if is_safe { "TRY_STRPTIME" } else { "STRPTIME" };
5423 let parse_call = Expression::Function(Box::new(Function::new(parse_fn_name.to_string(), vec![this, duck_fmt])));
5424 Ok(Expression::Cast(Box::new(Cast {
5425 this: parse_call,
5426 to,
5427 trailing_comments: vec![],
5428 double_colon_syntax: false,
5429 format: None,
5430 default: None,
5431 })))
5432 }
5433 _ => Ok(e),
5434 }
5435 }
5436
5437 Action::BigQueryFunctionNormalize => {
5438 Self::normalize_bigquery_function(e, source, target)
5439 }
5440
5441 Action::BigQueryToHexBare => {
5442 // Not used anymore - handled directly in normalize_bigquery_function
5443 Ok(e)
5444 }
5445
5446 Action::BigQueryToHexLower => {
5447 if let Expression::Lower(uf) = e {
5448 match uf.this {
5449 // BQ->BQ: LOWER(TO_HEX(x)) -> TO_HEX(x)
5450 Expression::Function(f) if matches!(target, DialectType::BigQuery) && f.name == "TO_HEX" => {
5451 Ok(Expression::Function(f))
5452 }
5453 // LOWER(LOWER(HEX/TO_HEX(x))) patterns
5454 Expression::Lower(inner_uf) => {
5455 if matches!(target, DialectType::BigQuery) {
5456 // BQ->BQ: extract TO_HEX
5457 if let Expression::Function(f) = inner_uf.this {
5458 Ok(Expression::Function(Box::new(Function::new("TO_HEX".to_string(), f.args))))
5459 } else {
5460 Ok(Expression::Lower(inner_uf))
5461 }
5462 } else {
5463 // Flatten: LOWER(LOWER(x)) -> LOWER(x)
5464 Ok(Expression::Lower(inner_uf))
5465 }
5466 }
5467 other => Ok(Expression::Lower(Box::new(crate::expressions::UnaryFunc { this: other, original_name: None })))
5468 }
5469 } else {
5470 Ok(e)
5471 }
5472 }
5473
5474 Action::BigQueryToHexUpper => {
5475 // UPPER(LOWER(HEX(x))) -> HEX(x) (UPPER cancels LOWER, HEX is already uppercase)
5476 // UPPER(LOWER(TO_HEX(x))) -> TO_HEX(x) for Presto/Trino
5477 if let Expression::Upper(uf) = e {
5478 if let Expression::Lower(inner_uf) = uf.this {
5479 // For BQ->BQ: UPPER(TO_HEX(x)) should stay as UPPER(TO_HEX(x))
5480 if matches!(target, DialectType::BigQuery) {
5481 // Restore TO_HEX name in inner function
5482 if let Expression::Function(f) = inner_uf.this {
5483 let restored = Expression::Function(Box::new(Function::new("TO_HEX".to_string(), f.args)));
5484 Ok(Expression::Upper(Box::new(crate::expressions::UnaryFunc::new(restored))))
5485 } else {
5486 Ok(Expression::Upper(inner_uf))
5487 }
5488 } else {
5489 // Extract the inner HEX/TO_HEX function (UPPER(LOWER(x)) = x when HEX is uppercase)
5490 Ok(inner_uf.this)
5491 }
5492 } else {
5493 Ok(Expression::Upper(uf))
5494 }
5495 } else {
5496 Ok(e)
5497 }
5498 }
5499
5500 Action::BigQueryAnyValueHaving => {
5501 // ANY_VALUE(x HAVING MAX y) -> ARG_MAX_NULL(x, y)
5502 // ANY_VALUE(x HAVING MIN y) -> ARG_MIN_NULL(x, y)
5503 if let Expression::AnyValue(agg) = e {
5504 if let Some((having_expr, is_max)) = agg.having_max {
5505 let func_name = if is_max { "ARG_MAX_NULL" } else { "ARG_MIN_NULL" };
5506 Ok(Expression::Function(Box::new(Function::new(
5507 func_name.to_string(),
5508 vec![agg.this, *having_expr],
5509 ))))
5510 } else {
5511 Ok(Expression::AnyValue(agg))
5512 }
5513 } else {
5514 Ok(e)
5515 }
5516 }
5517
5518 Action::BigQueryApproxQuantiles => {
5519 // APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [0, 1/n, 2/n, ..., 1])
5520 // APPROX_QUANTILES(DISTINCT x, n) -> APPROX_QUANTILE(DISTINCT x, [0, 1/n, ..., 1])
5521 if let Expression::AggregateFunction(agg) = e {
5522 if agg.args.len() >= 2 {
5523 let x_expr = agg.args[0].clone();
5524 let n_expr = &agg.args[1];
5525
5526 // Extract the numeric value from n_expr
5527 let n = match n_expr {
5528 Expression::Literal(crate::expressions::Literal::Number(s)) => s.parse::<usize>().unwrap_or(2),
5529 _ => 2,
5530 };
5531
5532 // Generate quantile array: [0, 1/n, 2/n, ..., 1]
5533 let mut quantiles = Vec::new();
5534 for i in 0..=n {
5535 let q = i as f64 / n as f64;
5536 // Format nicely: 0 -> 0, 0.25 -> 0.25, 1 -> 1
5537 if q == 0.0 {
5538 quantiles.push(Expression::number(0));
5539 } else if q == 1.0 {
5540 quantiles.push(Expression::number(1));
5541 } else {
5542 quantiles.push(Expression::Literal(crate::expressions::Literal::Number(format!("{}", q))));
5543 }
5544 }
5545
5546 let array_expr = Expression::Array(Box::new(crate::expressions::Array {
5547 expressions: quantiles,
5548 }));
5549
5550 // Preserve DISTINCT modifier
5551 let mut new_func = Function::new("APPROX_QUANTILE".to_string(), vec![x_expr, array_expr]);
5552 new_func.distinct = agg.distinct;
5553 Ok(Expression::Function(Box::new(new_func)))
5554 } else {
5555 Ok(Expression::AggregateFunction(agg))
5556 }
5557 } else {
5558 Ok(e)
5559 }
5560 }
5561
5562 Action::GenericFunctionNormalize => {
5563 // Helper closure to convert ARBITRARY to target-specific function
5564 fn convert_arbitrary(arg: Expression, target: DialectType) -> Expression {
5565 let name = match target {
5566 DialectType::ClickHouse => "any",
5567 DialectType::TSQL | DialectType::SQLite => "MAX",
5568 DialectType::Hive => "FIRST",
5569 DialectType::Presto | DialectType::Trino | DialectType::Athena => "ARBITRARY",
5570 _ => "ANY_VALUE",
5571 };
5572 Expression::Function(Box::new(Function::new(name.to_string(), vec![arg])))
5573 }
5574
5575 if let Expression::Function(f) = e {
5576 let name = f.name.to_uppercase();
5577 match name.as_str() {
5578 "ARBITRARY" if f.args.len() == 1 => {
5579 let arg = f.args.into_iter().next().unwrap();
5580 Ok(convert_arbitrary(arg, target))
5581 }
5582 "TO_NUMBER" if f.args.len() == 1 => {
5583 let arg = f.args.into_iter().next().unwrap();
5584 match target {
5585 DialectType::Oracle | DialectType::Snowflake => {
5586 Ok(Expression::Function(Box::new(Function::new("TO_NUMBER".to_string(), vec![arg]))))
5587 }
5588 _ => {
5589 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
5590 this: arg,
5591 to: crate::expressions::DataType::Double { precision: None, scale: None },
5592 double_colon_syntax: false,
5593 trailing_comments: Vec::new(),
5594 format: None,
5595 default: None,
5596 })))
5597 }
5598 }
5599 }
5600 "AGGREGATE" if f.args.len() >= 3 => {
5601 match target {
5602 DialectType::DuckDB | DialectType::Hive | DialectType::Presto | DialectType::Trino => {
5603 Ok(Expression::Function(Box::new(Function::new("REDUCE".to_string(), f.args))))
5604 }
5605 _ => Ok(Expression::Function(f)),
5606 }
5607 }
5608 // REGEXP_MATCHES(x, y) -> RegexpLike for most targets, keep for DuckDB
5609 "REGEXP_MATCHES" if f.args.len() >= 2 => {
5610 if matches!(target, DialectType::DuckDB) {
5611 Ok(Expression::Function(f))
5612 } else {
5613 let mut args = f.args;
5614 let this = args.remove(0);
5615 let pattern = args.remove(0);
5616 let flags = if args.is_empty() { None } else { Some(args.remove(0)) };
5617 Ok(Expression::RegexpLike(Box::new(crate::expressions::RegexpFunc {
5618 this,
5619 pattern,
5620 flags,
5621 })))
5622 }
5623 }
5624 // REGEXP_FULL_MATCH (Hive REGEXP) -> RegexpLike
5625 "REGEXP_FULL_MATCH" if f.args.len() >= 2 => {
5626 if matches!(target, DialectType::DuckDB) {
5627 Ok(Expression::Function(f))
5628 } else {
5629 let mut args = f.args;
5630 let this = args.remove(0);
5631 let pattern = args.remove(0);
5632 let flags = if args.is_empty() { None } else { Some(args.remove(0)) };
5633 Ok(Expression::RegexpLike(Box::new(crate::expressions::RegexpFunc {
5634 this,
5635 pattern,
5636 flags,
5637 })))
5638 }
5639 }
5640 // STRUCT_EXTRACT(x, 'field') -> x.field (StructExtract expression)
5641 "STRUCT_EXTRACT" if f.args.len() == 2 => {
5642 let mut args = f.args;
5643 let this = args.remove(0);
5644 let field_expr = args.remove(0);
5645 // Extract string literal to get field name
5646 let field_name = match &field_expr {
5647 Expression::Literal(crate::expressions::Literal::String(s)) => s.clone(),
5648 Expression::Identifier(id) => id.name.clone(),
5649 _ => return Ok(Expression::Function(Box::new(Function::new("STRUCT_EXTRACT".to_string(), vec![this, field_expr])))),
5650 };
5651 Ok(Expression::StructExtract(Box::new(crate::expressions::StructExtractFunc {
5652 this,
5653 field: crate::expressions::Identifier::new(field_name),
5654 })))
5655 }
5656 // LIST_FILTER([4,5,6], x -> x > 4) -> FILTER(ARRAY(4,5,6), x -> x > 4)
5657 "LIST_FILTER" if f.args.len() == 2 => {
5658 let name = match target {
5659 DialectType::DuckDB => "LIST_FILTER",
5660 _ => "FILTER",
5661 };
5662 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
5663 }
5664 // LIST_TRANSFORM(x, y -> y + 1) -> TRANSFORM(x, y -> y + 1)
5665 "LIST_TRANSFORM" if f.args.len() == 2 => {
5666 let name = match target {
5667 DialectType::DuckDB => "LIST_TRANSFORM",
5668 _ => "TRANSFORM",
5669 };
5670 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
5671 }
5672 // LIST_SORT(x) -> SORT_ARRAY(x) / ARRAY_SORT(x)
5673 "LIST_SORT" if f.args.len() >= 1 => {
5674 let name = match target {
5675 DialectType::DuckDB | DialectType::Presto | DialectType::Trino => "ARRAY_SORT",
5676 _ => "SORT_ARRAY",
5677 };
5678 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
5679 }
5680 // LIST_REVERSE_SORT(x) -> SORT_ARRAY(x, FALSE) for Spark/Hive, ARRAY_SORT(x, lambda) for Presto
5681 "LIST_REVERSE_SORT" if f.args.len() >= 1 => {
5682 match target {
5683 DialectType::DuckDB => Ok(Expression::Function(Box::new(Function::new("ARRAY_REVERSE_SORT".to_string(), f.args)))),
5684 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
5685 let mut args = f.args;
5686 args.push(Expression::Identifier(crate::expressions::Identifier::new("FALSE")));
5687 Ok(Expression::Function(Box::new(Function::new("SORT_ARRAY".to_string(), args))))
5688 }
5689 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
5690 // ARRAY_SORT(x, (a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END)
5691 let arr = f.args.into_iter().next().unwrap();
5692 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
5693 parameters: vec![
5694 crate::expressions::Identifier::new("a"),
5695 crate::expressions::Identifier::new("b"),
5696 ],
5697 body: Expression::Case(Box::new(Case {
5698 operand: None,
5699 whens: vec![
5700 (
5701 Expression::Lt(Box::new(BinaryOp::new(
5702 Expression::Identifier(crate::expressions::Identifier::new("a")),
5703 Expression::Identifier(crate::expressions::Identifier::new("b")),
5704 ))),
5705 Expression::number(1),
5706 ),
5707 (
5708 Expression::Gt(Box::new(BinaryOp::new(
5709 Expression::Identifier(crate::expressions::Identifier::new("a")),
5710 Expression::Identifier(crate::expressions::Identifier::new("b")),
5711 ))),
5712 Expression::Literal(Literal::Number("-1".to_string())),
5713 ),
5714 ],
5715 else_: Some(Expression::number(0)),
5716 })),
5717 colon: false,
5718 parameter_types: Vec::new(),
5719 }));
5720 Ok(Expression::Function(Box::new(Function::new("ARRAY_SORT".to_string(), vec![arr, lambda]))))
5721 }
5722 _ => Ok(Expression::Function(Box::new(Function::new("LIST_REVERSE_SORT".to_string(), f.args)))),
5723 }
5724 }
5725 // SPLIT_TO_ARRAY(x) with 1 arg -> add default ',' separator and rename
5726 "SPLIT_TO_ARRAY" if f.args.len() == 1 => {
5727 let mut args = f.args;
5728 args.push(Expression::string(","));
5729 let name = match target {
5730 DialectType::DuckDB => "STR_SPLIT",
5731 DialectType::Presto | DialectType::Trino => "SPLIT",
5732 DialectType::Spark | DialectType::Databricks | DialectType::Hive => "SPLIT",
5733 DialectType::PostgreSQL => "STRING_TO_ARRAY",
5734 DialectType::Redshift => "SPLIT_TO_ARRAY",
5735 _ => "SPLIT",
5736 };
5737 Ok(Expression::Function(Box::new(Function::new(name.to_string(), args))))
5738 }
5739 // SPLIT_TO_ARRAY(x, sep) with 2 args -> rename based on target
5740 "SPLIT_TO_ARRAY" if f.args.len() == 2 => {
5741 let name = match target {
5742 DialectType::DuckDB => "STR_SPLIT",
5743 DialectType::Presto | DialectType::Trino => "SPLIT",
5744 DialectType::Spark | DialectType::Databricks | DialectType::Hive => "SPLIT",
5745 DialectType::PostgreSQL => "STRING_TO_ARRAY",
5746 DialectType::Redshift => "SPLIT_TO_ARRAY",
5747 _ => "SPLIT",
5748 };
5749 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
5750 }
5751 // STRING_TO_ARRAY/STR_SPLIT -> target-specific split function
5752 "STRING_TO_ARRAY" | "STR_SPLIT" if f.args.len() >= 2 => {
5753 let name = match target {
5754 DialectType::DuckDB => "STR_SPLIT",
5755 DialectType::Presto | DialectType::Trino => "SPLIT",
5756 DialectType::Spark | DialectType::Databricks | DialectType::Hive => "SPLIT",
5757 DialectType::Doris | DialectType::StarRocks => "SPLIT_BY_STRING",
5758 DialectType::PostgreSQL | DialectType::Redshift => "STRING_TO_ARRAY",
5759 _ => "SPLIT",
5760 };
5761 // For Spark/Hive, SPLIT uses regex - need to escape literal with \Q...\E
5762 if matches!(target, DialectType::Spark | DialectType::Databricks | DialectType::Hive) {
5763 let mut args = f.args;
5764 let x = args.remove(0);
5765 let sep = args.remove(0);
5766 // Wrap separator in CONCAT('\\Q', sep, '\\E')
5767 let escaped_sep = Expression::Function(Box::new(Function::new(
5768 "CONCAT".to_string(),
5769 vec![
5770 Expression::string("\\Q"),
5771 sep,
5772 Expression::string("\\E"),
5773 ],
5774 )));
5775 Ok(Expression::Function(Box::new(Function::new(name.to_string(), vec![x, escaped_sep]))))
5776 } else {
5777 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
5778 }
5779 }
5780 // STR_SPLIT_REGEX(x, 'a') / REGEXP_SPLIT(x, 'a') -> target-specific regex split
5781 "STR_SPLIT_REGEX" | "REGEXP_SPLIT" if f.args.len() == 2 => {
5782 let name = match target {
5783 DialectType::DuckDB => "STR_SPLIT_REGEX",
5784 DialectType::Presto | DialectType::Trino => "REGEXP_SPLIT",
5785 DialectType::Spark | DialectType::Databricks | DialectType::Hive => "SPLIT",
5786 _ => "REGEXP_SPLIT",
5787 };
5788 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
5789 }
5790 // SPLIT(x, sep) from Presto/StarRocks/Doris -> target-specific split with regex escaping for Hive/Spark
5791 "SPLIT" if f.args.len() == 2
5792 && matches!(source, DialectType::Presto | DialectType::Trino | DialectType::Athena
5793 | DialectType::StarRocks | DialectType::Doris)
5794 && matches!(target, DialectType::Spark | DialectType::Databricks | DialectType::Hive) => {
5795 // Presto/StarRocks SPLIT is literal, Hive/Spark SPLIT is regex
5796 let mut args = f.args;
5797 let x = args.remove(0);
5798 let sep = args.remove(0);
5799 let escaped_sep = Expression::Function(Box::new(Function::new(
5800 "CONCAT".to_string(),
5801 vec![
5802 Expression::string("\\Q"),
5803 sep,
5804 Expression::string("\\E"),
5805 ],
5806 )));
5807 Ok(Expression::Function(Box::new(Function::new("SPLIT".to_string(), vec![x, escaped_sep]))))
5808 }
5809 // SUBSTRINGINDEX -> SUBSTRING_INDEX (ClickHouse camelCase to standard)
5810 // For ClickHouse target, preserve original name to maintain camelCase
5811 "SUBSTRINGINDEX" => {
5812 let name = if matches!(target, DialectType::ClickHouse) {
5813 f.name.clone()
5814 } else {
5815 "SUBSTRING_INDEX".to_string()
5816 };
5817 Ok(Expression::Function(Box::new(Function::new(name, f.args))))
5818 }
5819 // ARRAY_LENGTH/SIZE/CARDINALITY -> target-specific array length function
5820 "ARRAY_LENGTH" | "SIZE" | "CARDINALITY" => {
5821 // Get the array argument (first arg, drop dimension args)
5822 let mut args = f.args;
5823 let arr = if args.is_empty() {
5824 return Ok(Expression::Function(Box::new(Function::new(name.to_string(), args))));
5825 } else {
5826 args.remove(0)
5827 };
5828 let name = match target {
5829 DialectType::Spark | DialectType::Databricks | DialectType::Hive => "SIZE",
5830 DialectType::Presto | DialectType::Trino => "CARDINALITY",
5831 DialectType::BigQuery => "ARRAY_LENGTH",
5832 DialectType::DuckDB => {
5833 // DuckDB: use ARRAY_LENGTH with all args
5834 let mut all_args = vec![arr];
5835 all_args.extend(args);
5836 return Ok(Expression::Function(Box::new(Function::new("ARRAY_LENGTH".to_string(), all_args))));
5837 }
5838 DialectType::PostgreSQL | DialectType::Redshift => {
5839 // Keep ARRAY_LENGTH with dimension arg
5840 let mut all_args = vec![arr];
5841 all_args.extend(args);
5842 return Ok(Expression::Function(Box::new(Function::new("ARRAY_LENGTH".to_string(), all_args))));
5843 }
5844 DialectType::ClickHouse => "LENGTH",
5845 _ => "ARRAY_LENGTH",
5846 };
5847 Ok(Expression::Function(Box::new(Function::new(name.to_string(), vec![arr]))))
5848 }
5849 // UNICODE(x) -> target-specific codepoint function
5850 "UNICODE" if f.args.len() == 1 => {
5851 match target {
5852 DialectType::SQLite | DialectType::DuckDB => {
5853 Ok(Expression::Function(Box::new(Function::new("UNICODE".to_string(), f.args))))
5854 }
5855 DialectType::Oracle => {
5856 // ASCII(UNISTR(x))
5857 let inner = Expression::Function(Box::new(Function::new("UNISTR".to_string(), f.args)));
5858 Ok(Expression::Function(Box::new(Function::new("ASCII".to_string(), vec![inner]))))
5859 }
5860 DialectType::MySQL => {
5861 // ORD(CONVERT(x USING utf32))
5862 let arg = f.args.into_iter().next().unwrap();
5863 let convert_expr = Expression::ConvertToCharset(Box::new(crate::expressions::ConvertToCharset {
5864 this: Box::new(arg),
5865 dest: Some(Box::new(Expression::Identifier(crate::expressions::Identifier::new("utf32")))),
5866 source: None,
5867 }));
5868 Ok(Expression::Function(Box::new(Function::new("ORD".to_string(), vec![convert_expr]))))
5869 }
5870 _ => {
5871 Ok(Expression::Function(Box::new(Function::new("ASCII".to_string(), f.args))))
5872 }
5873 }
5874 }
5875 // XOR(a, b, ...) -> a XOR b XOR ... for MySQL, BITWISE_XOR for Presto/Trino, # for PostgreSQL, ^ for BigQuery
5876 "XOR" if f.args.len() >= 2 => {
5877 match target {
5878 DialectType::ClickHouse => {
5879 // ClickHouse: keep as xor() function with lowercase name
5880 Ok(Expression::Function(Box::new(Function::new("xor".to_string(), f.args))))
5881 }
5882 DialectType::Presto | DialectType::Trino => {
5883 if f.args.len() == 2 {
5884 Ok(Expression::Function(Box::new(Function::new("BITWISE_XOR".to_string(), f.args))))
5885 } else {
5886 // Nest: BITWISE_XOR(BITWISE_XOR(a, b), c)
5887 let mut args = f.args;
5888 let first = args.remove(0);
5889 let second = args.remove(0);
5890 let mut result = Expression::Function(Box::new(Function::new("BITWISE_XOR".to_string(), vec![first, second])));
5891 for arg in args {
5892 result = Expression::Function(Box::new(Function::new("BITWISE_XOR".to_string(), vec![result, arg])));
5893 }
5894 Ok(result)
5895 }
5896 }
5897 DialectType::MySQL | DialectType::SingleStore | DialectType::Doris | DialectType::StarRocks => {
5898 // Convert XOR(a, b, c) -> Expression::Xor with expressions list
5899 let args = f.args;
5900 Ok(Expression::Xor(Box::new(crate::expressions::Xor {
5901 this: None,
5902 expression: None,
5903 expressions: args,
5904 })))
5905 }
5906 DialectType::PostgreSQL | DialectType::Redshift => {
5907 // PostgreSQL: a # b (hash operator for XOR)
5908 let mut args = f.args;
5909 let first = args.remove(0);
5910 let second = args.remove(0);
5911 let mut result = Expression::BitwiseXor(Box::new(BinaryOp::new(first, second)));
5912 for arg in args {
5913 result = Expression::BitwiseXor(Box::new(BinaryOp::new(result, arg)));
5914 }
5915 Ok(result)
5916 }
5917 DialectType::DuckDB => {
5918 // DuckDB: keep as XOR function (DuckDB ^ is Power, not XOR)
5919 Ok(Expression::Function(Box::new(Function::new("XOR".to_string(), f.args))))
5920 }
5921 DialectType::BigQuery => {
5922 // BigQuery: a ^ b (caret operator for XOR)
5923 let mut args = f.args;
5924 let first = args.remove(0);
5925 let second = args.remove(0);
5926 let mut result = Expression::BitwiseXor(Box::new(BinaryOp::new(first, second)));
5927 for arg in args {
5928 result = Expression::BitwiseXor(Box::new(BinaryOp::new(result, arg)));
5929 }
5930 Ok(result)
5931 }
5932 _ => Ok(Expression::Function(Box::new(Function::new("XOR".to_string(), f.args)))),
5933 }
5934 }
5935 // ARRAY_REVERSE_SORT(x) -> SORT_ARRAY(x, FALSE) for Spark/Hive, ARRAY_SORT(x, lambda) for Presto
5936 "ARRAY_REVERSE_SORT" if f.args.len() >= 1 => {
5937 match target {
5938 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
5939 let mut args = f.args;
5940 args.push(Expression::Identifier(crate::expressions::Identifier::new("FALSE")));
5941 Ok(Expression::Function(Box::new(Function::new("SORT_ARRAY".to_string(), args))))
5942 }
5943 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
5944 // ARRAY_SORT(x, (a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END)
5945 let arr = f.args.into_iter().next().unwrap();
5946 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
5947 parameters: vec![
5948 Identifier::new("a"),
5949 Identifier::new("b"),
5950 ],
5951 colon: false,
5952 parameter_types: Vec::new(),
5953 body: Expression::Case(Box::new(Case {
5954 operand: None,
5955 whens: vec![
5956 (
5957 Expression::Lt(Box::new(BinaryOp::new(
5958 Expression::Identifier(Identifier::new("a")),
5959 Expression::Identifier(Identifier::new("b")),
5960 ))),
5961 Expression::number(1),
5962 ),
5963 (
5964 Expression::Gt(Box::new(BinaryOp::new(
5965 Expression::Identifier(Identifier::new("a")),
5966 Expression::Identifier(Identifier::new("b")),
5967 ))),
5968 Expression::Neg(Box::new(crate::expressions::UnaryOp {
5969 this: Expression::number(1),
5970 })),
5971 ),
5972 ],
5973 else_: Some(Expression::number(0)),
5974 })),
5975 }));
5976 Ok(Expression::Function(Box::new(Function::new("ARRAY_SORT".to_string(), vec![arr, lambda]))))
5977 }
5978 _ => Ok(Expression::Function(Box::new(Function::new("ARRAY_REVERSE_SORT".to_string(), f.args)))),
5979 }
5980 }
5981 // ENCODE(x) -> ENCODE(x, 'utf-8') for Spark/Hive, TO_UTF8(x) for Presto
5982 "ENCODE" if f.args.len() == 1 => {
5983 match target {
5984 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
5985 let mut args = f.args;
5986 args.push(Expression::string("utf-8"));
5987 Ok(Expression::Function(Box::new(Function::new("ENCODE".to_string(), args))))
5988 }
5989 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
5990 Ok(Expression::Function(Box::new(Function::new("TO_UTF8".to_string(), f.args))))
5991 }
5992 _ => Ok(Expression::Function(Box::new(Function::new("ENCODE".to_string(), f.args)))),
5993 }
5994 }
5995 // DECODE(x) -> DECODE(x, 'utf-8') for Spark/Hive, FROM_UTF8(x) for Presto
5996 "DECODE" if f.args.len() == 1 => {
5997 match target {
5998 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
5999 let mut args = f.args;
6000 args.push(Expression::string("utf-8"));
6001 Ok(Expression::Function(Box::new(Function::new("DECODE".to_string(), args))))
6002 }
6003 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
6004 Ok(Expression::Function(Box::new(Function::new("FROM_UTF8".to_string(), f.args))))
6005 }
6006 _ => Ok(Expression::Function(Box::new(Function::new("DECODE".to_string(), f.args)))),
6007 }
6008 }
6009 // QUANTILE(x, p) -> PERCENTILE(x, p) for Spark/Hive
6010 "QUANTILE" if f.args.len() == 2 => {
6011 let name = match target {
6012 DialectType::Spark | DialectType::Databricks | DialectType::Hive => "PERCENTILE",
6013 DialectType::Presto | DialectType::Trino => "APPROX_PERCENTILE",
6014 DialectType::BigQuery => "PERCENTILE_CONT",
6015 _ => "QUANTILE",
6016 };
6017 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
6018 }
6019 // QUANTILE_CONT(x, q) -> PERCENTILE_CONT(q) WITHIN GROUP (ORDER BY x) for PostgreSQL/Snowflake
6020 "QUANTILE_CONT" if f.args.len() == 2 => {
6021 let mut args = f.args;
6022 let column = args.remove(0);
6023 let quantile = args.remove(0);
6024 match target {
6025 DialectType::DuckDB => {
6026 Ok(Expression::Function(Box::new(Function::new("QUANTILE_CONT".to_string(), vec![column, quantile]))))
6027 }
6028 DialectType::PostgreSQL | DialectType::Redshift | DialectType::Snowflake => {
6029 // PERCENTILE_CONT(q) WITHIN GROUP (ORDER BY x)
6030 let inner = Expression::PercentileCont(Box::new(crate::expressions::PercentileFunc {
6031 this: column.clone(),
6032 percentile: quantile,
6033 order_by: None,
6034 filter: None,
6035 }));
6036 Ok(Expression::WithinGroup(Box::new(crate::expressions::WithinGroup {
6037 this: inner,
6038 order_by: vec![crate::expressions::Ordered {
6039 this: column,
6040 desc: false,
6041 nulls_first: None,
6042 explicit_asc: false,
6043 with_fill: None,
6044 }],
6045 })))
6046 }
6047 _ => Ok(Expression::Function(Box::new(Function::new("QUANTILE_CONT".to_string(), vec![column, quantile])))),
6048 }
6049 }
6050 // QUANTILE_DISC(x, q) -> PERCENTILE_DISC(q) WITHIN GROUP (ORDER BY x) for PostgreSQL/Snowflake
6051 "QUANTILE_DISC" if f.args.len() == 2 => {
6052 let mut args = f.args;
6053 let column = args.remove(0);
6054 let quantile = args.remove(0);
6055 match target {
6056 DialectType::DuckDB => {
6057 Ok(Expression::Function(Box::new(Function::new("QUANTILE_DISC".to_string(), vec![column, quantile]))))
6058 }
6059 DialectType::PostgreSQL | DialectType::Redshift | DialectType::Snowflake => {
6060 // PERCENTILE_DISC(q) WITHIN GROUP (ORDER BY x)
6061 let inner = Expression::PercentileDisc(Box::new(crate::expressions::PercentileFunc {
6062 this: column.clone(),
6063 percentile: quantile,
6064 order_by: None,
6065 filter: None,
6066 }));
6067 Ok(Expression::WithinGroup(Box::new(crate::expressions::WithinGroup {
6068 this: inner,
6069 order_by: vec![crate::expressions::Ordered {
6070 this: column,
6071 desc: false,
6072 nulls_first: None,
6073 explicit_asc: false,
6074 with_fill: None,
6075 }],
6076 })))
6077 }
6078 _ => Ok(Expression::Function(Box::new(Function::new("QUANTILE_DISC".to_string(), vec![column, quantile])))),
6079 }
6080 }
6081 // PERCENTILE_APPROX(x, p) / APPROX_PERCENTILE(x, p) -> target-specific
6082 "PERCENTILE_APPROX" | "APPROX_PERCENTILE" if f.args.len() >= 2 => {
6083 let name = match target {
6084 DialectType::Presto | DialectType::Trino | DialectType::Athena => "APPROX_PERCENTILE",
6085 DialectType::Spark | DialectType::Databricks | DialectType::Hive => "PERCENTILE_APPROX",
6086 DialectType::DuckDB => "APPROX_QUANTILE",
6087 DialectType::PostgreSQL | DialectType::Redshift => "PERCENTILE_CONT",
6088 _ => &f.name,
6089 };
6090 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
6091 }
6092 // EPOCH(x) -> UNIX_TIMESTAMP(x) for Spark/Hive
6093 "EPOCH" if f.args.len() == 1 => {
6094 let name = match target {
6095 DialectType::Spark | DialectType::Databricks | DialectType::Hive => "UNIX_TIMESTAMP",
6096 DialectType::Presto | DialectType::Trino => "TO_UNIXTIME",
6097 _ => "EPOCH",
6098 };
6099 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
6100 }
6101 // EPOCH_MS(x) -> target-specific epoch milliseconds conversion
6102 "EPOCH_MS" if f.args.len() == 1 => {
6103 match target {
6104 DialectType::Spark | DialectType::Databricks => {
6105 Ok(Expression::Function(Box::new(Function::new("TIMESTAMP_MILLIS".to_string(), f.args))))
6106 }
6107 DialectType::Hive => {
6108 // Hive: FROM_UNIXTIME(x / 1000)
6109 let arg = f.args.into_iter().next().unwrap();
6110 let div_expr = Expression::Div(Box::new(crate::expressions::BinaryOp::new(
6111 arg,
6112 Expression::number(1000),
6113 )));
6114 Ok(Expression::Function(Box::new(Function::new("FROM_UNIXTIME".to_string(), vec![div_expr]))))
6115 }
6116 DialectType::Presto | DialectType::Trino => {
6117 Ok(Expression::Function(Box::new(Function::new("FROM_UNIXTIME".to_string(),
6118 vec![Expression::Div(Box::new(crate::expressions::BinaryOp::new(
6119 f.args.into_iter().next().unwrap(),
6120 Expression::number(1000),
6121 )))]
6122 ))))
6123 }
6124 _ => Ok(Expression::Function(Box::new(Function::new("EPOCH_MS".to_string(), f.args)))),
6125 }
6126 }
6127 // HASHBYTES('algorithm', x) -> target-specific hash function
6128 "HASHBYTES" if f.args.len() == 2 => {
6129 // Keep HASHBYTES as-is for TSQL target
6130 if matches!(target, DialectType::TSQL) {
6131 return Ok(Expression::Function(f));
6132 }
6133 let algo_expr = &f.args[0];
6134 let algo = match algo_expr {
6135 Expression::Literal(crate::expressions::Literal::String(s)) => s.to_uppercase(),
6136 _ => return Ok(Expression::Function(f)),
6137 };
6138 let data_arg = f.args.into_iter().nth(1).unwrap();
6139 match algo.as_str() {
6140 "SHA1" => {
6141 let name = match target {
6142 DialectType::Spark | DialectType::Databricks => "SHA",
6143 DialectType::Hive => "SHA1",
6144 _ => "SHA1",
6145 };
6146 Ok(Expression::Function(Box::new(Function::new(name.to_string(), vec![data_arg]))))
6147 }
6148 "SHA2_256" => {
6149 Ok(Expression::Function(Box::new(Function::new("SHA2".to_string(), vec![data_arg, Expression::number(256)]))))
6150 }
6151 "SHA2_512" => {
6152 Ok(Expression::Function(Box::new(Function::new("SHA2".to_string(), vec![data_arg, Expression::number(512)]))))
6153 }
6154 "MD5" => {
6155 Ok(Expression::Function(Box::new(Function::new("MD5".to_string(), vec![data_arg]))))
6156 }
6157 _ => Ok(Expression::Function(Box::new(Function::new("HASHBYTES".to_string(), vec![Expression::string(&algo), data_arg])))),
6158 }
6159 }
6160 // JSON_EXTRACT_PATH(json, key1, key2, ...) -> target-specific JSON extraction
6161 "JSON_EXTRACT_PATH" | "JSON_EXTRACT_PATH_TEXT" if f.args.len() >= 2 => {
6162 let is_text = name == "JSON_EXTRACT_PATH_TEXT";
6163 let mut args = f.args;
6164 let json_expr = args.remove(0);
6165 // Build JSON path from remaining keys: $.key1.key2 or $.key1[0]
6166 let mut json_path = "$".to_string();
6167 for a in &args {
6168 match a {
6169 Expression::Literal(crate::expressions::Literal::String(s)) => {
6170 // Numeric string keys become array indices: [0]
6171 if s.chars().all(|c| c.is_ascii_digit()) {
6172 json_path.push('[');
6173 json_path.push_str(s);
6174 json_path.push(']');
6175 } else {
6176 json_path.push('.');
6177 json_path.push_str(s);
6178 }
6179 }
6180 _ => {
6181 json_path.push_str(".?");
6182 }
6183 }
6184 }
6185 match target {
6186 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
6187 Ok(Expression::Function(Box::new(Function::new(
6188 "GET_JSON_OBJECT".to_string(),
6189 vec![json_expr, Expression::string(&json_path)],
6190 ))))
6191 }
6192 DialectType::Presto | DialectType::Trino => {
6193 let func_name = if is_text { "JSON_EXTRACT_SCALAR" } else { "JSON_EXTRACT" };
6194 Ok(Expression::Function(Box::new(Function::new(
6195 func_name.to_string(),
6196 vec![json_expr, Expression::string(&json_path)],
6197 ))))
6198 }
6199 DialectType::BigQuery | DialectType::MySQL => {
6200 let func_name = if is_text { "JSON_EXTRACT_SCALAR" } else { "JSON_EXTRACT" };
6201 Ok(Expression::Function(Box::new(Function::new(
6202 func_name.to_string(),
6203 vec![json_expr, Expression::string(&json_path)],
6204 ))))
6205 }
6206 DialectType::PostgreSQL | DialectType::Materialize => {
6207 // Keep as JSON_EXTRACT_PATH_TEXT / JSON_EXTRACT_PATH for PostgreSQL/Materialize
6208 let func_name = if is_text { "JSON_EXTRACT_PATH_TEXT" } else { "JSON_EXTRACT_PATH" };
6209 let mut new_args = vec![json_expr];
6210 new_args.extend(args);
6211 Ok(Expression::Function(Box::new(Function::new(
6212 func_name.to_string(),
6213 new_args,
6214 ))))
6215 }
6216 DialectType::DuckDB | DialectType::SQLite => {
6217 // Use -> for JSON_EXTRACT_PATH, ->> for JSON_EXTRACT_PATH_TEXT
6218 if is_text {
6219 Ok(Expression::JsonExtractScalar(Box::new(crate::expressions::JsonExtractFunc {
6220 this: json_expr,
6221 path: Expression::string(&json_path),
6222 returning: None,
6223 arrow_syntax: true,
6224 hash_arrow_syntax: false,
6225 wrapper_option: None,
6226 quotes_option: None,
6227 on_scalar_string: false,
6228 on_error: None,
6229 })))
6230 } else {
6231 Ok(Expression::JsonExtract(Box::new(crate::expressions::JsonExtractFunc {
6232 this: json_expr,
6233 path: Expression::string(&json_path),
6234 returning: None,
6235 arrow_syntax: true,
6236 hash_arrow_syntax: false,
6237 wrapper_option: None,
6238 quotes_option: None,
6239 on_scalar_string: false,
6240 on_error: None,
6241 })))
6242 }
6243 }
6244 DialectType::Redshift => {
6245 // Keep as JSON_EXTRACT_PATH_TEXT for Redshift
6246 let mut new_args = vec![json_expr];
6247 new_args.extend(args);
6248 Ok(Expression::Function(Box::new(Function::new(
6249 "JSON_EXTRACT_PATH_TEXT".to_string(),
6250 new_args,
6251 ))))
6252 }
6253 DialectType::TSQL => {
6254 // ISNULL(JSON_QUERY(json, '$.path'), JSON_VALUE(json, '$.path'))
6255 let jq = Expression::Function(Box::new(Function::new(
6256 "JSON_QUERY".to_string(),
6257 vec![json_expr.clone(), Expression::string(&json_path)],
6258 )));
6259 let jv = Expression::Function(Box::new(Function::new(
6260 "JSON_VALUE".to_string(),
6261 vec![json_expr, Expression::string(&json_path)],
6262 )));
6263 Ok(Expression::Function(Box::new(Function::new(
6264 "ISNULL".to_string(),
6265 vec![jq, jv],
6266 ))))
6267 }
6268 DialectType::ClickHouse => {
6269 let func_name = if is_text { "JSONExtractString" } else { "JSONExtractRaw" };
6270 let mut new_args = vec![json_expr];
6271 new_args.extend(args);
6272 Ok(Expression::Function(Box::new(Function::new(
6273 func_name.to_string(),
6274 new_args,
6275 ))))
6276 }
6277 _ => {
6278 let func_name = if is_text { "JSON_EXTRACT_SCALAR" } else { "JSON_EXTRACT" };
6279 Ok(Expression::Function(Box::new(Function::new(
6280 func_name.to_string(),
6281 vec![json_expr, Expression::string(&json_path)],
6282 ))))
6283 }
6284 }
6285 }
6286 // APPROX_DISTINCT(x) -> APPROX_COUNT_DISTINCT(x) for Spark/Hive/BigQuery
6287 "APPROX_DISTINCT" if f.args.len() >= 1 => {
6288 let name = match target {
6289 DialectType::Spark | DialectType::Databricks | DialectType::Hive | DialectType::BigQuery => "APPROX_COUNT_DISTINCT",
6290 _ => "APPROX_DISTINCT",
6291 };
6292 let mut args = f.args;
6293 // Hive doesn't support the accuracy parameter
6294 if name == "APPROX_COUNT_DISTINCT" && matches!(target, DialectType::Hive) {
6295 args.truncate(1);
6296 }
6297 Ok(Expression::Function(Box::new(Function::new(name.to_string(), args))))
6298 }
6299 // REGEXP_EXTRACT(x, pattern) - normalize default group index
6300 "REGEXP_EXTRACT" if f.args.len() == 2 => {
6301 // Determine source default group index
6302 let source_default = match source {
6303 DialectType::Presto | DialectType::Trino | DialectType::DuckDB => 0,
6304 _ => 1, // Hive/Spark/Databricks default = 1
6305 };
6306 // Determine target default group index
6307 let target_default = match target {
6308 DialectType::Presto | DialectType::Trino | DialectType::DuckDB
6309 | DialectType::BigQuery => 0,
6310 DialectType::Snowflake => {
6311 // Snowflake uses REGEXP_SUBSTR
6312 return Ok(Expression::Function(Box::new(Function::new("REGEXP_SUBSTR".to_string(), f.args))));
6313 }
6314 _ => 1, // Hive/Spark/Databricks default = 1
6315 };
6316 if source_default != target_default {
6317 let mut args = f.args;
6318 args.push(Expression::number(source_default));
6319 Ok(Expression::Function(Box::new(Function::new("REGEXP_EXTRACT".to_string(), args))))
6320 } else {
6321 Ok(Expression::Function(Box::new(Function::new("REGEXP_EXTRACT".to_string(), f.args))))
6322 }
6323 }
6324 // RLIKE(str, pattern) -> RegexpLike expression (generates as target-specific form)
6325 "RLIKE" if f.args.len() == 2 => {
6326 let mut args = f.args;
6327 let str_expr = args.remove(0);
6328 let pattern = args.remove(0);
6329 match target {
6330 DialectType::DuckDB => {
6331 // REGEXP_MATCHES(str, pattern)
6332 Ok(Expression::Function(Box::new(Function::new(
6333 "REGEXP_MATCHES".to_string(),
6334 vec![str_expr, pattern],
6335 ))))
6336 }
6337 _ => {
6338 // Convert to RegexpLike which generates as RLIKE/~/REGEXP_LIKE per dialect
6339 Ok(Expression::RegexpLike(Box::new(crate::expressions::RegexpFunc {
6340 this: str_expr,
6341 pattern,
6342 flags: None,
6343 })))
6344 }
6345 }
6346 }
6347 // EOMONTH(date[, month_offset]) -> target-specific
6348 "EOMONTH" if f.args.len() >= 1 => {
6349 let mut args = f.args;
6350 let date_arg = args.remove(0);
6351 let month_offset = if !args.is_empty() { Some(args.remove(0)) } else { None };
6352
6353 // Helper: wrap date in CAST to DATE
6354 let cast_to_date = |e: Expression| -> Expression {
6355 Expression::Cast(Box::new(Cast {
6356 this: e,
6357 to: DataType::Date,
6358 trailing_comments: vec![], double_colon_syntax: false, format: None, default: None,
6359 }))
6360 };
6361
6362 match target {
6363 DialectType::TSQL | DialectType::Fabric => {
6364 // TSQL: EOMONTH(CAST(date AS DATE)) or EOMONTH(DATEADD(MONTH, offset, CAST(date AS DATE)))
6365 let date = cast_to_date(date_arg);
6366 let date = if let Some(offset) = month_offset {
6367 Expression::Function(Box::new(Function::new(
6368 "DATEADD".to_string(), vec![
6369 Expression::Identifier(Identifier::new("MONTH")),
6370 offset, date,
6371 ],
6372 )))
6373 } else {
6374 date
6375 };
6376 Ok(Expression::Function(Box::new(Function::new("EOMONTH".to_string(), vec![date]))))
6377 }
6378 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
6379 // Presto: LAST_DAY_OF_MONTH(CAST(CAST(date AS TIMESTAMP) AS DATE))
6380 // or with offset: LAST_DAY_OF_MONTH(DATE_ADD('MONTH', offset, CAST(CAST(date AS TIMESTAMP) AS DATE)))
6381 let cast_ts = Expression::Cast(Box::new(Cast {
6382 this: date_arg,
6383 to: DataType::Timestamp { timezone: false, precision: None },
6384 trailing_comments: vec![], double_colon_syntax: false, format: None, default: None,
6385 }));
6386 let date = cast_to_date(cast_ts);
6387 let date = if let Some(offset) = month_offset {
6388 Expression::Function(Box::new(Function::new(
6389 "DATE_ADD".to_string(), vec![
6390 Expression::string("MONTH"),
6391 offset, date,
6392 ],
6393 )))
6394 } else {
6395 date
6396 };
6397 Ok(Expression::Function(Box::new(Function::new("LAST_DAY_OF_MONTH".to_string(), vec![date]))))
6398 }
6399 DialectType::PostgreSQL => {
6400 // PostgreSQL: CAST(DATE_TRUNC('MONTH', CAST(date AS DATE) [+ INTERVAL 'offset MONTH']) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)
6401 let date = cast_to_date(date_arg);
6402 let date = if let Some(offset) = month_offset {
6403 let interval_str = format!("{} MONTH", Self::expr_to_string_static(&offset));
6404 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
6405 date,
6406 Expression::Interval(Box::new(crate::expressions::Interval {
6407 this: Some(Expression::string(&interval_str)),
6408 unit: None,
6409 })),
6410 )))
6411 } else {
6412 date
6413 };
6414 let truncated = Expression::Function(Box::new(Function::new(
6415 "DATE_TRUNC".to_string(), vec![Expression::string("MONTH"), date],
6416 )));
6417 let plus_month = Expression::Add(Box::new(crate::expressions::BinaryOp::new(
6418 truncated,
6419 Expression::Interval(Box::new(crate::expressions::Interval {
6420 this: Some(Expression::string("1 MONTH")),
6421 unit: None,
6422 })),
6423 )));
6424 let minus_day = Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
6425 plus_month,
6426 Expression::Interval(Box::new(crate::expressions::Interval {
6427 this: Some(Expression::string("1 DAY")),
6428 unit: None,
6429 })),
6430 )));
6431 Ok(Expression::Cast(Box::new(Cast {
6432 this: minus_day,
6433 to: DataType::Date,
6434 trailing_comments: vec![], double_colon_syntax: false, format: None, default: None,
6435 })))
6436 }
6437 DialectType::DuckDB => {
6438 // DuckDB: LAST_DAY(CAST(date AS DATE) [+ INTERVAL (offset) MONTH])
6439 let date = cast_to_date(date_arg);
6440 let date = if let Some(offset) = month_offset {
6441 // Wrap negative numbers in parentheses for DuckDB INTERVAL
6442 let interval_val = if matches!(&offset, Expression::Neg(_)) {
6443 Expression::Paren(Box::new(crate::expressions::Paren {
6444 this: offset,
6445 trailing_comments: Vec::new(),
6446 }))
6447 } else {
6448 offset
6449 };
6450 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
6451 date,
6452 Expression::Interval(Box::new(crate::expressions::Interval {
6453 this: Some(interval_val),
6454 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
6455 unit: crate::expressions::IntervalUnit::Month,
6456 use_plural: false,
6457 }),
6458 })),
6459 )))
6460 } else {
6461 date
6462 };
6463 Ok(Expression::Function(Box::new(Function::new("LAST_DAY".to_string(), vec![date]))))
6464 }
6465 DialectType::Snowflake | DialectType::Redshift => {
6466 // Snowflake/Redshift: LAST_DAY(TO_DATE(date) or CAST(date AS DATE))
6467 // With offset: LAST_DAY(DATEADD(MONTH, offset, TO_DATE(date)))
6468 let date = if matches!(target, DialectType::Snowflake) {
6469 Expression::Function(Box::new(Function::new("TO_DATE".to_string(), vec![date_arg])))
6470 } else {
6471 cast_to_date(date_arg)
6472 };
6473 let date = if let Some(offset) = month_offset {
6474 Expression::Function(Box::new(Function::new(
6475 "DATEADD".to_string(), vec![
6476 Expression::Identifier(Identifier::new("MONTH")),
6477 offset, date,
6478 ],
6479 )))
6480 } else {
6481 date
6482 };
6483 Ok(Expression::Function(Box::new(Function::new("LAST_DAY".to_string(), vec![date]))))
6484 }
6485 DialectType::Spark | DialectType::Databricks => {
6486 // Spark: LAST_DAY(TO_DATE(date))
6487 // With offset: LAST_DAY(ADD_MONTHS(TO_DATE(date), offset))
6488 let date = Expression::Function(Box::new(Function::new("TO_DATE".to_string(), vec![date_arg])));
6489 let date = if let Some(offset) = month_offset {
6490 Expression::Function(Box::new(Function::new(
6491 "ADD_MONTHS".to_string(), vec![date, offset],
6492 )))
6493 } else {
6494 date
6495 };
6496 Ok(Expression::Function(Box::new(Function::new("LAST_DAY".to_string(), vec![date]))))
6497 }
6498 DialectType::MySQL => {
6499 // MySQL: LAST_DAY(DATE(date)) - no offset
6500 // With offset: LAST_DAY(DATE_ADD(date, INTERVAL offset MONTH)) - no DATE() wrapper
6501 let date = if let Some(offset) = month_offset {
6502 let iu = crate::expressions::IntervalUnit::Month;
6503 Expression::DateAdd(Box::new(crate::expressions::DateAddFunc {
6504 this: date_arg,
6505 interval: offset,
6506 unit: iu,
6507 }))
6508 } else {
6509 Expression::Function(Box::new(Function::new("DATE".to_string(), vec![date_arg])))
6510 };
6511 Ok(Expression::Function(Box::new(Function::new("LAST_DAY".to_string(), vec![date]))))
6512 }
6513 DialectType::BigQuery => {
6514 // BigQuery: LAST_DAY(CAST(date AS DATE))
6515 // With offset: LAST_DAY(DATE_ADD(CAST(date AS DATE), INTERVAL offset MONTH))
6516 let date = cast_to_date(date_arg);
6517 let date = if let Some(offset) = month_offset {
6518 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
6519 this: Some(offset),
6520 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
6521 unit: crate::expressions::IntervalUnit::Month,
6522 use_plural: false,
6523 }),
6524 }));
6525 Expression::Function(Box::new(Function::new(
6526 "DATE_ADD".to_string(), vec![date, interval],
6527 )))
6528 } else {
6529 date
6530 };
6531 Ok(Expression::Function(Box::new(Function::new("LAST_DAY".to_string(), vec![date]))))
6532 }
6533 DialectType::ClickHouse => {
6534 // ClickHouse: LAST_DAY(CAST(date AS Nullable(DATE)))
6535 let date = Expression::Cast(Box::new(Cast {
6536 this: date_arg,
6537 to: DataType::Custom { name: "Nullable(DATE)".to_string() },
6538 trailing_comments: vec![], double_colon_syntax: false, format: None, default: None,
6539 }));
6540 let date = if let Some(offset) = month_offset {
6541 Expression::Function(Box::new(Function::new(
6542 "DATE_ADD".to_string(), vec![
6543 Expression::Identifier(Identifier::new("MONTH")),
6544 offset, date,
6545 ],
6546 )))
6547 } else {
6548 date
6549 };
6550 Ok(Expression::Function(Box::new(Function::new("LAST_DAY".to_string(), vec![date]))))
6551 }
6552 DialectType::Hive => {
6553 // Hive: LAST_DAY(date)
6554 let date = if let Some(offset) = month_offset {
6555 Expression::Function(Box::new(Function::new(
6556 "ADD_MONTHS".to_string(), vec![date_arg, offset],
6557 )))
6558 } else {
6559 date_arg
6560 };
6561 Ok(Expression::Function(Box::new(Function::new("LAST_DAY".to_string(), vec![date]))))
6562 }
6563 _ => {
6564 // Default: LAST_DAY(date)
6565 let date = if let Some(offset) = month_offset {
6566 let unit = Expression::Identifier(Identifier::new("MONTH"));
6567 Expression::Function(Box::new(Function::new(
6568 "DATEADD".to_string(), vec![unit, offset, date_arg],
6569 )))
6570 } else {
6571 date_arg
6572 };
6573 Ok(Expression::Function(Box::new(Function::new("LAST_DAY".to_string(), vec![date]))))
6574 }
6575 }
6576 }
6577 // LAST_DAY(x) / LAST_DAY_OF_MONTH(x) -> target-specific
6578 "LAST_DAY" | "LAST_DAY_OF_MONTH" if !matches!(source, DialectType::BigQuery) && f.args.len() >= 1 => {
6579 let first_arg = f.args.into_iter().next().unwrap();
6580 match target {
6581 DialectType::TSQL | DialectType::Fabric => Ok(Expression::Function(Box::new(Function::new("EOMONTH".to_string(), vec![first_arg])))),
6582 DialectType::Presto | DialectType::Trino | DialectType::Athena => Ok(Expression::Function(Box::new(Function::new("LAST_DAY_OF_MONTH".to_string(), vec![first_arg])))),
6583 _ => Ok(Expression::Function(Box::new(Function::new("LAST_DAY".to_string(), vec![first_arg])))),
6584 }
6585 }
6586 // MAP(keys_array, vals_array) from Presto (2-arg form) -> target-specific
6587 "MAP" if f.args.len() == 2
6588 && matches!(source, DialectType::Presto | DialectType::Trino | DialectType::Athena) => {
6589 let keys_arg = f.args[0].clone();
6590 let vals_arg = f.args[1].clone();
6591
6592 // Helper: extract array elements from Array/ArrayFunc/Function("ARRAY") expressions
6593 fn extract_array_elements(expr: &Expression) -> Option<&Vec<Expression>> {
6594 match expr {
6595 Expression::Array(arr) => Some(&arr.expressions),
6596 Expression::ArrayFunc(arr) => Some(&arr.expressions),
6597 Expression::Function(f) if f.name.eq_ignore_ascii_case("ARRAY") => Some(&f.args),
6598 _ => None,
6599 }
6600 }
6601
6602 match target {
6603 DialectType::Spark | DialectType::Databricks => {
6604 // Presto MAP(keys, vals) -> Spark MAP_FROM_ARRAYS(keys, vals)
6605 Ok(Expression::Function(Box::new(Function::new("MAP_FROM_ARRAYS".to_string(), f.args))))
6606 }
6607 DialectType::Hive => {
6608 // Presto MAP(ARRAY[k1,k2], ARRAY[v1,v2]) -> Hive MAP(k1, v1, k2, v2)
6609 if let (Some(keys), Some(vals)) = (extract_array_elements(&keys_arg), extract_array_elements(&vals_arg)) {
6610 if keys.len() == vals.len() {
6611 let mut interleaved = Vec::new();
6612 for (k, v) in keys.iter().zip(vals.iter()) {
6613 interleaved.push(k.clone());
6614 interleaved.push(v.clone());
6615 }
6616 Ok(Expression::Function(Box::new(Function::new("MAP".to_string(), interleaved))))
6617 } else {
6618 Ok(Expression::Function(Box::new(Function::new("MAP".to_string(), f.args))))
6619 }
6620 } else {
6621 Ok(Expression::Function(Box::new(Function::new("MAP".to_string(), f.args))))
6622 }
6623 }
6624 DialectType::Snowflake => {
6625 // Presto MAP(ARRAY[k1,k2], ARRAY[v1,v2]) -> Snowflake OBJECT_CONSTRUCT(k1, v1, k2, v2)
6626 if let (Some(keys), Some(vals)) = (extract_array_elements(&keys_arg), extract_array_elements(&vals_arg)) {
6627 if keys.len() == vals.len() {
6628 let mut interleaved = Vec::new();
6629 for (k, v) in keys.iter().zip(vals.iter()) {
6630 interleaved.push(k.clone());
6631 interleaved.push(v.clone());
6632 }
6633 Ok(Expression::Function(Box::new(Function::new("OBJECT_CONSTRUCT".to_string(), interleaved))))
6634 } else {
6635 Ok(Expression::Function(Box::new(Function::new("MAP".to_string(), f.args))))
6636 }
6637 } else {
6638 Ok(Expression::Function(Box::new(Function::new("MAP".to_string(), f.args))))
6639 }
6640 }
6641 _ => Ok(Expression::Function(f)),
6642 }
6643 }
6644 // MAP() with 0 args from Spark -> MAP(ARRAY[], ARRAY[]) for Presto/Trino
6645 "MAP" if f.args.is_empty()
6646 && matches!(source, DialectType::Hive | DialectType::Spark | DialectType::Databricks)
6647 && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena) => {
6648 let empty_keys = Expression::Array(Box::new(crate::expressions::Array { expressions: vec![] }));
6649 let empty_vals = Expression::Array(Box::new(crate::expressions::Array { expressions: vec![] }));
6650 Ok(Expression::Function(Box::new(Function::new("MAP".to_string(), vec![empty_keys, empty_vals]))))
6651 }
6652 // MAP(k1, v1, k2, v2, ...) from Hive/Spark -> target-specific
6653 "MAP" if f.args.len() >= 2 && f.args.len() % 2 == 0
6654 && matches!(source, DialectType::Hive | DialectType::Spark | DialectType::Databricks | DialectType::ClickHouse) => {
6655 let args = f.args;
6656 match target {
6657 DialectType::DuckDB => {
6658 // MAP([k1, k2], [v1, v2])
6659 let mut keys = Vec::new();
6660 let mut vals = Vec::new();
6661 for (i, arg) in args.into_iter().enumerate() {
6662 if i % 2 == 0 { keys.push(arg); } else { vals.push(arg); }
6663 }
6664 let keys_arr = Expression::Array(Box::new(crate::expressions::Array {
6665 expressions: keys,
6666 }));
6667 let vals_arr = Expression::Array(Box::new(crate::expressions::Array {
6668 expressions: vals,
6669 }));
6670 Ok(Expression::Function(Box::new(Function::new("MAP".to_string(), vec![keys_arr, vals_arr]))))
6671 }
6672 DialectType::Presto | DialectType::Trino => {
6673 // MAP(ARRAY[k1, k2], ARRAY[v1, v2])
6674 let mut keys = Vec::new();
6675 let mut vals = Vec::new();
6676 for (i, arg) in args.into_iter().enumerate() {
6677 if i % 2 == 0 { keys.push(arg); } else { vals.push(arg); }
6678 }
6679 let keys_arr = Expression::Array(Box::new(crate::expressions::Array { expressions: keys }));
6680 let vals_arr = Expression::Array(Box::new(crate::expressions::Array { expressions: vals }));
6681 Ok(Expression::Function(Box::new(Function::new("MAP".to_string(), vec![keys_arr, vals_arr]))))
6682 }
6683 DialectType::Snowflake => {
6684 Ok(Expression::Function(Box::new(Function::new("OBJECT_CONSTRUCT".to_string(), args))))
6685 }
6686 DialectType::ClickHouse => {
6687 Ok(Expression::Function(Box::new(Function::new("map".to_string(), args))))
6688 }
6689 _ => Ok(Expression::Function(Box::new(Function::new("MAP".to_string(), args)))),
6690 }
6691 }
6692 // COLLECT_LIST(x) -> ARRAY_AGG(x) for most targets
6693 "COLLECT_LIST" if f.args.len() >= 1 => {
6694 let name = match target {
6695 DialectType::Spark | DialectType::Databricks | DialectType::Hive => "COLLECT_LIST",
6696 DialectType::DuckDB | DialectType::PostgreSQL | DialectType::Redshift
6697 | DialectType::Snowflake | DialectType::BigQuery => "ARRAY_AGG",
6698 DialectType::Presto | DialectType::Trino => "ARRAY_AGG",
6699 _ => "ARRAY_AGG",
6700 };
6701 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
6702 }
6703 // COLLECT_SET(x) -> target-specific distinct array aggregation
6704 "COLLECT_SET" if f.args.len() >= 1 => {
6705 let name = match target {
6706 DialectType::Spark | DialectType::Databricks | DialectType::Hive => "COLLECT_SET",
6707 DialectType::Presto | DialectType::Trino | DialectType::Athena => "SET_AGG",
6708 DialectType::Snowflake => "ARRAY_UNIQUE_AGG",
6709 _ => "ARRAY_AGG",
6710 };
6711 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
6712 }
6713 // ISNAN(x) / IS_NAN(x) - normalize
6714 "ISNAN" | "IS_NAN" => {
6715 let name = match target {
6716 DialectType::Spark | DialectType::Databricks | DialectType::Hive => "ISNAN",
6717 DialectType::Presto | DialectType::Trino | DialectType::Athena => "IS_NAN",
6718 DialectType::BigQuery | DialectType::PostgreSQL | DialectType::Redshift => "IS_NAN",
6719 DialectType::ClickHouse => "IS_NAN",
6720 _ => "ISNAN",
6721 };
6722 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
6723 }
6724 // SPLIT_PART(str, delim, index) -> target-specific
6725 "SPLIT_PART" if f.args.len() == 3 => {
6726 match target {
6727 DialectType::Spark | DialectType::Databricks => {
6728 // Keep as SPLIT_PART (Spark 3.4+)
6729 Ok(Expression::Function(Box::new(Function::new("SPLIT_PART".to_string(), f.args))))
6730 }
6731 DialectType::DuckDB | DialectType::PostgreSQL | DialectType::Snowflake
6732 | DialectType::Redshift | DialectType::Trino | DialectType::Presto => {
6733 Ok(Expression::Function(Box::new(Function::new("SPLIT_PART".to_string(), f.args))))
6734 }
6735 DialectType::Hive => {
6736 // SPLIT(str, delim)[index]
6737 // Complex conversion, just keep as-is for now
6738 Ok(Expression::Function(Box::new(Function::new("SPLIT_PART".to_string(), f.args))))
6739 }
6740 _ => Ok(Expression::Function(Box::new(Function::new("SPLIT_PART".to_string(), f.args)))),
6741 }
6742 }
6743 // JSON_EXTRACT(json, path) -> target-specific JSON extraction
6744 "JSON_EXTRACT" | "JSON_EXTRACT_SCALAR" if f.args.len() == 2 => {
6745 let is_scalar = name == "JSON_EXTRACT_SCALAR";
6746 match target {
6747 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
6748 let mut args = f.args;
6749 // Spark/Hive don't support Presto's TRY(expr) wrapper form here.
6750 // Mirror sqlglot by unwrapping TRY(expr) to expr before GET_JSON_OBJECT.
6751 if let Some(Expression::Function(inner)) = args.first() {
6752 if inner.name.eq_ignore_ascii_case("TRY") && inner.args.len() == 1 {
6753 let mut inner_args = inner.args.clone();
6754 args[0] = inner_args.remove(0);
6755 }
6756 }
6757 Ok(Expression::Function(Box::new(Function::new(
6758 "GET_JSON_OBJECT".to_string(),
6759 args,
6760 ))))
6761 }
6762 DialectType::DuckDB | DialectType::SQLite => {
6763 // json -> path syntax
6764 let mut args = f.args;
6765 let json_expr = args.remove(0);
6766 let path = args.remove(0);
6767 Ok(Expression::JsonExtract(Box::new(crate::expressions::JsonExtractFunc {
6768 this: json_expr,
6769 path,
6770 returning: None,
6771 arrow_syntax: true,
6772 hash_arrow_syntax: false,
6773 wrapper_option: None,
6774 quotes_option: None,
6775 on_scalar_string: false,
6776 on_error: None,
6777 })))
6778 }
6779 DialectType::TSQL => {
6780 let func_name = if is_scalar { "JSON_VALUE" } else { "JSON_QUERY" };
6781 Ok(Expression::Function(Box::new(Function::new(func_name.to_string(), f.args))))
6782 }
6783 DialectType::PostgreSQL | DialectType::Redshift => {
6784 let func_name = if is_scalar { "JSON_EXTRACT_PATH_TEXT" } else { "JSON_EXTRACT_PATH" };
6785 Ok(Expression::Function(Box::new(Function::new(func_name.to_string(), f.args))))
6786 }
6787 _ => {
6788 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
6789 }
6790 }
6791 }
6792 // SingleStore JSON_EXTRACT_JSON(json, key1, key2, ...) -> JSON_EXTRACT(json, '$.key1.key2' or '$.key1[key2]')
6793 // BSON_EXTRACT_BSON(json, key1, ...) -> JSONB_EXTRACT(json, '$.key1')
6794 "JSON_EXTRACT_JSON" | "BSON_EXTRACT_BSON" if f.args.len() >= 2
6795 && matches!(source, DialectType::SingleStore) => {
6796 let is_bson = name == "BSON_EXTRACT_BSON";
6797 let mut args = f.args;
6798 let json_expr = args.remove(0);
6799
6800 // Build JSONPath from remaining arguments
6801 let mut path = String::from("$");
6802 for arg in &args {
6803 if let Expression::Literal(crate::expressions::Literal::String(s)) = arg {
6804 // Check if it's a numeric string (array index)
6805 if s.parse::<i64>().is_ok() {
6806 path.push('[');
6807 path.push_str(s);
6808 path.push(']');
6809 } else {
6810 path.push('.');
6811 path.push_str(s);
6812 }
6813 }
6814 }
6815
6816 let target_func = if is_bson { "JSONB_EXTRACT" } else { "JSON_EXTRACT" };
6817 Ok(Expression::Function(Box::new(Function::new(
6818 target_func.to_string(),
6819 vec![json_expr, Expression::string(&path)],
6820 ))))
6821 }
6822 // ARRAY_SUM(lambda, array) from Doris -> ClickHouse arraySum
6823 "ARRAY_SUM" if matches!(target, DialectType::ClickHouse) => {
6824 Ok(Expression::Function(Box::new(Function {
6825 name: "arraySum".to_string(),
6826 args: f.args,
6827 distinct: f.distinct,
6828 trailing_comments: f.trailing_comments,
6829 use_bracket_syntax: f.use_bracket_syntax,
6830 no_parens: f.no_parens,
6831 quoted: f.quoted,
6832 })))
6833 }
6834 // TSQL JSON_QUERY/JSON_VALUE -> target-specific
6835 // Note: For TSQL->TSQL, JsonQuery stays as Expression::JsonQuery (source transform not called)
6836 // and is handled by JsonQueryValueConvert action. This handles the case where
6837 // TSQL read transform converted JsonQuery to Function("JSON_QUERY") for cross-dialect.
6838 "JSON_QUERY" | "JSON_VALUE" if f.args.len() == 2 && matches!(source, DialectType::TSQL | DialectType::Fabric) => {
6839 match target {
6840 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
6841 Ok(Expression::Function(Box::new(Function::new(
6842 "GET_JSON_OBJECT".to_string(),
6843 f.args,
6844 ))))
6845 }
6846 _ => Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args)))),
6847 }
6848 }
6849 // UNIX_TIMESTAMP(x) -> TO_UNIXTIME(x) for Presto
6850 "UNIX_TIMESTAMP" if f.args.len() == 1 => {
6851 let arg = f.args.into_iter().next().unwrap();
6852 let is_hive_source = matches!(source, DialectType::Hive | DialectType::Spark | DialectType::Databricks);
6853 match target {
6854 DialectType::DuckDB if is_hive_source => {
6855 // DuckDB: EPOCH(STRPTIME(x, '%Y-%m-%d %H:%M:%S'))
6856 let strptime = Expression::Function(Box::new(Function::new(
6857 "STRPTIME".to_string(),
6858 vec![arg, Expression::string("%Y-%m-%d %H:%M:%S")],
6859 )));
6860 Ok(Expression::Function(Box::new(Function::new("EPOCH".to_string(), vec![strptime]))))
6861 }
6862 DialectType::Presto | DialectType::Trino if is_hive_source => {
6863 // Presto: TO_UNIXTIME(COALESCE(TRY(DATE_PARSE(CAST(x AS VARCHAR), '%Y-%m-%d %T')), PARSE_DATETIME(DATE_FORMAT(x, '%Y-%m-%d %T'), 'yyyy-MM-dd HH:mm:ss')))
6864 let cast_varchar = Expression::Cast(Box::new(crate::expressions::Cast {
6865 this: arg.clone(),
6866 to: DataType::VarChar { length: None, parenthesized_length: false },
6867 trailing_comments: vec![],
6868 double_colon_syntax: false,
6869 format: None,
6870 default: None,
6871 }));
6872 let date_parse = Expression::Function(Box::new(Function::new(
6873 "DATE_PARSE".to_string(),
6874 vec![cast_varchar, Expression::string("%Y-%m-%d %T")],
6875 )));
6876 let try_expr = Expression::Function(Box::new(Function::new(
6877 "TRY".to_string(), vec![date_parse],
6878 )));
6879 let date_format = Expression::Function(Box::new(Function::new(
6880 "DATE_FORMAT".to_string(),
6881 vec![arg, Expression::string("%Y-%m-%d %T")],
6882 )));
6883 let parse_datetime = Expression::Function(Box::new(Function::new(
6884 "PARSE_DATETIME".to_string(),
6885 vec![date_format, Expression::string("yyyy-MM-dd HH:mm:ss")],
6886 )));
6887 let coalesce = Expression::Function(Box::new(Function::new(
6888 "COALESCE".to_string(), vec![try_expr, parse_datetime],
6889 )));
6890 Ok(Expression::Function(Box::new(Function::new("TO_UNIXTIME".to_string(), vec![coalesce]))))
6891 }
6892 DialectType::Presto | DialectType::Trino => {
6893 Ok(Expression::Function(Box::new(Function::new("TO_UNIXTIME".to_string(), vec![arg]))))
6894 }
6895 _ => Ok(Expression::Function(Box::new(Function::new("UNIX_TIMESTAMP".to_string(), vec![arg])))),
6896 }
6897 }
6898 // TO_UNIX_TIMESTAMP(x) -> UNIX_TIMESTAMP(x) for Spark/Hive
6899 "TO_UNIX_TIMESTAMP" if f.args.len() >= 1 => {
6900 match target {
6901 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
6902 Ok(Expression::Function(Box::new(Function::new("UNIX_TIMESTAMP".to_string(), f.args))))
6903 }
6904 _ => Ok(Expression::Function(Box::new(Function::new("TO_UNIX_TIMESTAMP".to_string(), f.args)))),
6905 }
6906 }
6907 // CURDATE() -> CURRENT_DATE
6908 "CURDATE" => {
6909 Ok(Expression::CurrentDate(crate::expressions::CurrentDate))
6910 }
6911 // CURTIME() -> CURRENT_TIME
6912 "CURTIME" => {
6913 Ok(Expression::CurrentTime(crate::expressions::CurrentTime { precision: None }))
6914 }
6915 // ARRAY_SORT(x) or ARRAY_SORT(x, lambda) -> SORT_ARRAY(x) for Hive (drop lambda)
6916 "ARRAY_SORT" if f.args.len() >= 1 => {
6917 match target {
6918 DialectType::Hive => {
6919 let mut args = f.args;
6920 args.truncate(1); // Drop lambda comparator
6921 Ok(Expression::Function(Box::new(Function::new("SORT_ARRAY".to_string(), args))))
6922 }
6923 _ => Ok(Expression::Function(f)),
6924 }
6925 }
6926 // SORT_ARRAY(x) -> ARRAY_SORT(x) for non-Hive
6927 "SORT_ARRAY" if f.args.len() == 1 => {
6928 match target {
6929 DialectType::Hive => Ok(Expression::Function(f)),
6930 _ => {
6931 Ok(Expression::Function(Box::new(Function::new("ARRAY_SORT".to_string(), f.args))))
6932 }
6933 }
6934 }
6935 // SORT_ARRAY(x, FALSE) -> ARRAY_REVERSE_SORT(x) for DuckDB, ARRAY_SORT(x, lambda) for Presto
6936 "SORT_ARRAY" if f.args.len() == 2 => {
6937 let is_desc = matches!(&f.args[1], Expression::Boolean(b) if !b.value);
6938 if is_desc {
6939 match target {
6940 DialectType::DuckDB => {
6941 Ok(Expression::Function(Box::new(Function::new("ARRAY_REVERSE_SORT".to_string(), vec![f.args.into_iter().next().unwrap()]))))
6942 }
6943 DialectType::Presto | DialectType::Trino => {
6944 let arr_arg = f.args.into_iter().next().unwrap();
6945 let a = Expression::Column(crate::expressions::Column {
6946 name: crate::expressions::Identifier::new("a"),
6947 table: None,
6948 join_mark: false,
6949 trailing_comments: Vec::new(),
6950 });
6951 let b = Expression::Column(crate::expressions::Column {
6952 name: crate::expressions::Identifier::new("b"),
6953 table: None,
6954 join_mark: false,
6955 trailing_comments: Vec::new(),
6956 });
6957 let case_expr = Expression::Case(Box::new(crate::expressions::Case {
6958 operand: None,
6959 whens: vec![
6960 (Expression::Lt(Box::new(BinaryOp::new(a.clone(), b.clone()))),
6961 Expression::Literal(Literal::Number("1".to_string()))),
6962 (Expression::Gt(Box::new(BinaryOp::new(a.clone(), b.clone()))),
6963 Expression::Literal(Literal::Number("-1".to_string()))),
6964 ],
6965 else_: Some(Expression::Literal(Literal::Number("0".to_string()))),
6966 }));
6967 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
6968 parameters: vec![
6969 crate::expressions::Identifier::new("a"),
6970 crate::expressions::Identifier::new("b"),
6971 ],
6972 body: case_expr,
6973 colon: false,
6974 parameter_types: Vec::new(),
6975 }));
6976 Ok(Expression::Function(Box::new(Function::new("ARRAY_SORT".to_string(), vec![arr_arg, lambda]))))
6977 }
6978 _ => Ok(Expression::Function(f))
6979 }
6980 } else {
6981 // SORT_ARRAY(x, TRUE) -> ARRAY_SORT(x)
6982 match target {
6983 DialectType::Hive => Ok(Expression::Function(f)),
6984 _ => Ok(Expression::Function(Box::new(Function::new("ARRAY_SORT".to_string(), vec![f.args.into_iter().next().unwrap()]))))
6985 }
6986 }
6987 }
6988 // LEFT(x, n), RIGHT(x, n) -> SUBSTRING for targets without LEFT/RIGHT
6989 "LEFT" if f.args.len() == 2 => {
6990 match target {
6991 DialectType::Hive | DialectType::Presto | DialectType::Trino | DialectType::Athena => {
6992 let x = f.args[0].clone();
6993 let n = f.args[1].clone();
6994 Ok(Expression::Function(Box::new(Function::new(
6995 "SUBSTRING".to_string(),
6996 vec![x, Expression::number(1), n],
6997 ))))
6998 }
6999 DialectType::Spark | DialectType::Databricks
7000 if matches!(source, DialectType::TSQL | DialectType::Fabric) => {
7001 // TSQL LEFT(x, n) -> LEFT(CAST(x AS STRING), n) for Spark
7002 let x = f.args[0].clone();
7003 let n = f.args[1].clone();
7004 let cast_x = Expression::Cast(Box::new(Cast {
7005 this: x,
7006 to: DataType::VarChar { length: None, parenthesized_length: false },
7007 double_colon_syntax: false,
7008 trailing_comments: Vec::new(),
7009 format: None,
7010 default: None,
7011 }));
7012 Ok(Expression::Function(Box::new(Function::new("LEFT".to_string(), vec![cast_x, n]))))
7013 }
7014 _ => Ok(Expression::Function(f)),
7015 }
7016 }
7017 "RIGHT" if f.args.len() == 2 => {
7018 match target {
7019 DialectType::Hive | DialectType::Presto | DialectType::Trino | DialectType::Athena => {
7020 let x = f.args[0].clone();
7021 let n = f.args[1].clone();
7022 // SUBSTRING(x, LENGTH(x) - (n - 1))
7023 let len_x = Expression::Function(Box::new(Function::new(
7024 "LENGTH".to_string(),
7025 vec![x.clone()],
7026 )));
7027 let n_minus_1 = Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
7028 n,
7029 Expression::number(1),
7030 )));
7031 let n_minus_1_paren = Expression::Paren(Box::new(crate::expressions::Paren {
7032 this: n_minus_1,
7033 trailing_comments: Vec::new(),
7034 }));
7035 let offset = Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
7036 len_x,
7037 n_minus_1_paren,
7038 )));
7039 Ok(Expression::Function(Box::new(Function::new(
7040 "SUBSTRING".to_string(),
7041 vec![x, offset],
7042 ))))
7043 }
7044 DialectType::Spark | DialectType::Databricks
7045 if matches!(source, DialectType::TSQL | DialectType::Fabric) => {
7046 // TSQL RIGHT(x, n) -> RIGHT(CAST(x AS STRING), n) for Spark
7047 let x = f.args[0].clone();
7048 let n = f.args[1].clone();
7049 let cast_x = Expression::Cast(Box::new(Cast {
7050 this: x,
7051 to: DataType::VarChar { length: None, parenthesized_length: false },
7052 double_colon_syntax: false,
7053 trailing_comments: Vec::new(),
7054 format: None,
7055 default: None,
7056 }));
7057 Ok(Expression::Function(Box::new(Function::new("RIGHT".to_string(), vec![cast_x, n]))))
7058 }
7059 _ => Ok(Expression::Function(f)),
7060 }
7061 }
7062 // MAP_FROM_ARRAYS(keys, vals) -> target-specific map construction
7063 "MAP_FROM_ARRAYS" if f.args.len() == 2 => {
7064 match target {
7065 DialectType::Snowflake => {
7066 Ok(Expression::Function(Box::new(Function::new("OBJECT_CONSTRUCT".to_string(), f.args))))
7067 }
7068 DialectType::Spark | DialectType::Databricks => {
7069 Ok(Expression::Function(Box::new(Function::new("MAP_FROM_ARRAYS".to_string(), f.args))))
7070 }
7071 _ => {
7072 Ok(Expression::Function(Box::new(Function::new("MAP".to_string(), f.args))))
7073 }
7074 }
7075 }
7076 // LIKE(foo, 'pat') -> foo LIKE 'pat'; LIKE(foo, 'pat', '!') -> foo LIKE 'pat' ESCAPE '!'
7077 // SQLite uses LIKE(pattern, string[, escape]) with args in reverse order
7078 "LIKE" if f.args.len() >= 2 => {
7079 let (this, pattern) = if matches!(source, DialectType::SQLite) {
7080 // SQLite: LIKE(pattern, string) -> string LIKE pattern
7081 (f.args[1].clone(), f.args[0].clone())
7082 } else {
7083 // Standard: LIKE(string, pattern) -> string LIKE pattern
7084 (f.args[0].clone(), f.args[1].clone())
7085 };
7086 let escape = if f.args.len() >= 3 { Some(f.args[2].clone()) } else { None };
7087 Ok(Expression::Like(Box::new(crate::expressions::LikeOp {
7088 left: this,
7089 right: pattern,
7090 escape,
7091 quantifier: None,
7092 })))
7093 }
7094 // ILIKE(foo, 'pat') -> foo ILIKE 'pat'
7095 "ILIKE" if f.args.len() >= 2 => {
7096 let this = f.args[0].clone();
7097 let pattern = f.args[1].clone();
7098 let escape = if f.args.len() >= 3 { Some(f.args[2].clone()) } else { None };
7099 Ok(Expression::ILike(Box::new(crate::expressions::LikeOp {
7100 left: this,
7101 right: pattern,
7102 escape,
7103 quantifier: None,
7104 })))
7105 }
7106 // CHAR(n) -> CHR(n) for non-MySQL/non-TSQL targets
7107 "CHAR" if f.args.len() == 1 => {
7108 match target {
7109 DialectType::MySQL | DialectType::SingleStore
7110 | DialectType::TSQL => Ok(Expression::Function(f)),
7111 _ => {
7112 Ok(Expression::Function(Box::new(Function::new("CHR".to_string(), f.args))))
7113 }
7114 }
7115 }
7116 // CONCAT(a, b) -> a || b for PostgreSQL
7117 "CONCAT" if f.args.len() == 2 && matches!(target, DialectType::PostgreSQL)
7118 && matches!(source, DialectType::ClickHouse | DialectType::MySQL) => {
7119 let mut args = f.args;
7120 let right = args.pop().unwrap();
7121 let left = args.pop().unwrap();
7122 Ok(Expression::DPipe(Box::new(crate::expressions::DPipe {
7123 this: Box::new(left),
7124 expression: Box::new(right),
7125 safe: None,
7126 })))
7127 }
7128 // ARRAY_TO_STRING(arr, delim) -> target-specific
7129 "ARRAY_TO_STRING" if f.args.len() >= 2 => {
7130 match target {
7131 DialectType::Presto | DialectType::Trino => {
7132 Ok(Expression::Function(Box::new(Function::new("ARRAY_JOIN".to_string(), f.args))))
7133 }
7134 DialectType::TSQL => {
7135 Ok(Expression::Function(Box::new(Function::new("STRING_AGG".to_string(), f.args))))
7136 }
7137 _ => Ok(Expression::Function(f)),
7138 }
7139 }
7140 // ARRAY_CONCAT -> target-specific
7141 "ARRAY_CONCAT" if f.args.len() == 2 => {
7142 match target {
7143 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
7144 Ok(Expression::Function(Box::new(Function::new("CONCAT".to_string(), f.args))))
7145 }
7146 DialectType::Snowflake => {
7147 Ok(Expression::Function(Box::new(Function::new("ARRAY_CAT".to_string(), f.args))))
7148 }
7149 DialectType::Redshift => {
7150 Ok(Expression::Function(Box::new(Function::new("ARRAY_CONCAT".to_string(), f.args))))
7151 }
7152 DialectType::PostgreSQL => {
7153 Ok(Expression::Function(Box::new(Function::new("ARRAY_CAT".to_string(), f.args))))
7154 }
7155 DialectType::DuckDB => {
7156 Ok(Expression::Function(Box::new(Function::new("ARRAY_CONCAT".to_string(), f.args))))
7157 }
7158 DialectType::Presto | DialectType::Trino => {
7159 Ok(Expression::Function(Box::new(Function::new("CONCAT".to_string(), f.args))))
7160 }
7161 _ => Ok(Expression::Function(f)),
7162 }
7163 }
7164 // ARRAY_CONTAINS(arr, x) / HAS(arr, x) / CONTAINS(arr, x) normalization
7165 "HAS" if f.args.len() == 2 => {
7166 match target {
7167 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
7168 Ok(Expression::Function(Box::new(Function::new("ARRAY_CONTAINS".to_string(), f.args))))
7169 }
7170 DialectType::Presto | DialectType::Trino => {
7171 Ok(Expression::Function(Box::new(Function::new("CONTAINS".to_string(), f.args))))
7172 }
7173 _ => Ok(Expression::Function(f)),
7174 }
7175 }
7176 // NVL(a, b, c, d) -> COALESCE(a, b, c, d) - NVL should keep all args
7177 "NVL" if f.args.len() > 2 => {
7178 Ok(Expression::Function(Box::new(Function::new("COALESCE".to_string(), f.args))))
7179 }
7180 // ISNULL(x) in MySQL -> (x IS NULL)
7181 "ISNULL" if f.args.len() == 1 && matches!(source, DialectType::MySQL) && matches!(target, DialectType::MySQL) => {
7182 let arg = f.args.into_iter().next().unwrap();
7183 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
7184 this: Expression::IsNull(Box::new(crate::expressions::IsNull {
7185 this: arg,
7186 not: false,
7187 postfix_form: false,
7188 })),
7189 trailing_comments: Vec::new(),
7190 })))
7191 }
7192 // MONTHNAME(x) -> DATE_FORMAT(x, '%M') for MySQL -> MySQL
7193 "MONTHNAME" if f.args.len() == 1 && matches!(target, DialectType::MySQL) => {
7194 let arg = f.args.into_iter().next().unwrap();
7195 Ok(Expression::Function(Box::new(Function::new(
7196 "DATE_FORMAT".to_string(),
7197 vec![arg, Expression::string("%M")],
7198 ))))
7199 }
7200 // ClickHouse splitByString('s', x) -> DuckDB STR_SPLIT(x, 's') / Hive SPLIT(x, CONCAT('\\Q', 's', '\\E'))
7201 "SPLITBYSTRING" if f.args.len() == 2 => {
7202 let sep = f.args[0].clone();
7203 let str_arg = f.args[1].clone();
7204 match target {
7205 DialectType::DuckDB => {
7206 Ok(Expression::Function(Box::new(Function::new("STR_SPLIT".to_string(), vec![str_arg, sep]))))
7207 }
7208 DialectType::Doris => {
7209 Ok(Expression::Function(Box::new(Function::new("SPLIT_BY_STRING".to_string(), vec![str_arg, sep]))))
7210 }
7211 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
7212 // SPLIT(x, CONCAT('\\Q', sep, '\\E'))
7213 let escaped = Expression::Function(Box::new(Function::new(
7214 "CONCAT".to_string(),
7215 vec![Expression::string("\\Q"), sep, Expression::string("\\E")],
7216 )));
7217 Ok(Expression::Function(Box::new(Function::new("SPLIT".to_string(), vec![str_arg, escaped]))))
7218 }
7219 _ => Ok(Expression::Function(f)),
7220 }
7221 }
7222 // ClickHouse splitByRegexp('pattern', x) -> DuckDB STR_SPLIT_REGEX(x, 'pattern')
7223 "SPLITBYREGEXP" if f.args.len() == 2 => {
7224 let sep = f.args[0].clone();
7225 let str_arg = f.args[1].clone();
7226 match target {
7227 DialectType::DuckDB => {
7228 Ok(Expression::Function(Box::new(Function::new("STR_SPLIT_REGEX".to_string(), vec![str_arg, sep]))))
7229 }
7230 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
7231 Ok(Expression::Function(Box::new(Function::new("SPLIT".to_string(), vec![str_arg, sep]))))
7232 }
7233 _ => Ok(Expression::Function(f)),
7234 }
7235 }
7236 // ClickHouse toMonday(x) -> DATE_TRUNC('WEEK', x) / DATE_TRUNC(x, 'WEEK') for Doris
7237 "TOMONDAY" => {
7238 if f.args.len() == 1 {
7239 let arg = f.args.into_iter().next().unwrap();
7240 match target {
7241 DialectType::Doris => {
7242 Ok(Expression::Function(Box::new(Function::new(
7243 "DATE_TRUNC".to_string(),
7244 vec![arg, Expression::string("WEEK")],
7245 ))))
7246 }
7247 _ => {
7248 Ok(Expression::Function(Box::new(Function::new(
7249 "DATE_TRUNC".to_string(),
7250 vec![Expression::string("WEEK"), arg],
7251 ))))
7252 }
7253 }
7254 } else {
7255 Ok(Expression::Function(f))
7256 }
7257 }
7258 // COLLECT_LIST with FILTER(WHERE x IS NOT NULL) for targets that need it
7259 "COLLECT_LIST" if f.args.len() == 1 => {
7260 match target {
7261 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
7262 Ok(Expression::Function(f))
7263 }
7264 _ => {
7265 Ok(Expression::Function(Box::new(Function::new("ARRAY_AGG".to_string(), f.args))))
7266 }
7267 }
7268 }
7269 // TO_CHAR(x) with 1 arg -> CAST(x AS STRING) for Doris
7270 "TO_CHAR" if f.args.len() == 1 && matches!(target, DialectType::Doris) => {
7271 let arg = f.args.into_iter().next().unwrap();
7272 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
7273 this: arg,
7274 to: DataType::Custom { name: "STRING".to_string() },
7275 double_colon_syntax: false,
7276 trailing_comments: Vec::new(),
7277 format: None,
7278 default: None,
7279 })))
7280 }
7281 // DBMS_RANDOM.VALUE() -> RANDOM() for PostgreSQL
7282 "DBMS_RANDOM.VALUE" if f.args.is_empty() => {
7283 match target {
7284 DialectType::PostgreSQL => {
7285 Ok(Expression::Function(Box::new(Function::new("RANDOM".to_string(), vec![]))))
7286 }
7287 _ => Ok(Expression::Function(f)),
7288 }
7289 }
7290 // ClickHouse formatDateTime -> target-specific
7291 "FORMATDATETIME" if f.args.len() >= 2 => {
7292 match target {
7293 DialectType::MySQL => {
7294 Ok(Expression::Function(Box::new(Function::new("DATE_FORMAT".to_string(), f.args))))
7295 }
7296 _ => Ok(Expression::Function(f)),
7297 }
7298 }
7299 // REPLICATE('x', n) -> REPEAT('x', n) for non-TSQL targets
7300 "REPLICATE" if f.args.len() == 2 => {
7301 match target {
7302 DialectType::TSQL => Ok(Expression::Function(f)),
7303 _ => {
7304 Ok(Expression::Function(Box::new(Function::new("REPEAT".to_string(), f.args))))
7305 }
7306 }
7307 }
7308 // LEN(x) -> LENGTH(x) for non-TSQL targets
7309 // No CAST needed when arg is already a string literal
7310 "LEN" if f.args.len() == 1 => {
7311 match target {
7312 DialectType::TSQL => Ok(Expression::Function(f)),
7313 DialectType::Spark | DialectType::Databricks => {
7314 let arg = f.args.into_iter().next().unwrap();
7315 // Don't wrap string literals with CAST - they're already strings
7316 let is_string = matches!(&arg, Expression::Literal(crate::expressions::Literal::String(_)));
7317 let final_arg = if is_string {
7318 arg
7319 } else {
7320 Expression::Cast(Box::new(Cast {
7321 this: arg,
7322 to: DataType::VarChar { length: None, parenthesized_length: false },
7323 double_colon_syntax: false,
7324 trailing_comments: Vec::new(),
7325 format: None,
7326 default: None,
7327 }))
7328 };
7329 Ok(Expression::Function(Box::new(Function::new(
7330 "LENGTH".to_string(),
7331 vec![final_arg],
7332 ))))
7333 }
7334 _ => {
7335 let arg = f.args.into_iter().next().unwrap();
7336 Ok(Expression::Function(Box::new(Function::new(
7337 "LENGTH".to_string(),
7338 vec![arg],
7339 ))))
7340 }
7341 }
7342 }
7343 // COUNT_BIG(x) -> COUNT(x) for non-TSQL targets
7344 "COUNT_BIG" if f.args.len() == 1 => {
7345 match target {
7346 DialectType::TSQL => Ok(Expression::Function(f)),
7347 _ => {
7348 Ok(Expression::Function(Box::new(Function::new("COUNT".to_string(), f.args))))
7349 }
7350 }
7351 }
7352 // DATEFROMPARTS(y, m, d) -> MAKE_DATE(y, m, d) for non-TSQL targets
7353 "DATEFROMPARTS" if f.args.len() == 3 => {
7354 match target {
7355 DialectType::TSQL => Ok(Expression::Function(f)),
7356 _ => {
7357 Ok(Expression::Function(Box::new(Function::new("MAKE_DATE".to_string(), f.args))))
7358 }
7359 }
7360 }
7361 // REGEXP_LIKE(str, pattern) -> RegexpLike expression (target-specific output)
7362 "REGEXP_LIKE" if f.args.len() >= 2 => {
7363 let str_expr = f.args[0].clone();
7364 let pattern = f.args[1].clone();
7365 let flags = if f.args.len() >= 3 { Some(f.args[2].clone()) } else { None };
7366 match target {
7367 DialectType::DuckDB => {
7368 let mut new_args = vec![str_expr, pattern];
7369 if let Some(fl) = flags {
7370 new_args.push(fl);
7371 }
7372 Ok(Expression::Function(Box::new(Function::new(
7373 "REGEXP_MATCHES".to_string(),
7374 new_args,
7375 ))))
7376 }
7377 _ => {
7378 Ok(Expression::RegexpLike(Box::new(crate::expressions::RegexpFunc {
7379 this: str_expr,
7380 pattern,
7381 flags,
7382 })))
7383 }
7384 }
7385 }
7386 // ClickHouse arrayJoin -> UNNEST for PostgreSQL
7387 "ARRAYJOIN" if f.args.len() == 1 => {
7388 match target {
7389 DialectType::PostgreSQL => {
7390 Ok(Expression::Function(Box::new(Function::new("UNNEST".to_string(), f.args))))
7391 }
7392 _ => Ok(Expression::Function(f)),
7393 }
7394 }
7395 // DATETIMEFROMPARTS(y, m, d, h, mi, s, ms) -> MAKE_TIMESTAMP / TIMESTAMP_FROM_PARTS
7396 "DATETIMEFROMPARTS" if f.args.len() == 7 => {
7397 match target {
7398 DialectType::TSQL => Ok(Expression::Function(f)),
7399 DialectType::DuckDB => {
7400 // MAKE_TIMESTAMP(y, m, d, h, mi, s + (ms / 1000.0))
7401 let mut args = f.args;
7402 let ms = args.pop().unwrap();
7403 let s = args.pop().unwrap();
7404 // s + (ms / 1000.0)
7405 let ms_frac = Expression::Div(Box::new(BinaryOp::new(
7406 ms,
7407 Expression::Literal(crate::expressions::Literal::Number("1000.0".to_string())),
7408 )));
7409 let s_with_ms = Expression::Add(Box::new(BinaryOp::new(
7410 s,
7411 Expression::Paren(Box::new(Paren { this: ms_frac, trailing_comments: vec![] })),
7412 )));
7413 args.push(s_with_ms);
7414 Ok(Expression::Function(Box::new(Function::new("MAKE_TIMESTAMP".to_string(), args))))
7415 }
7416 DialectType::Snowflake => {
7417 // TIMESTAMP_FROM_PARTS(y, m, d, h, mi, s, ms * 1000000)
7418 let mut args = f.args;
7419 let ms = args.pop().unwrap();
7420 // ms * 1000000
7421 let ns = Expression::Mul(Box::new(BinaryOp::new(
7422 ms,
7423 Expression::number(1000000),
7424 )));
7425 args.push(ns);
7426 Ok(Expression::Function(Box::new(Function::new("TIMESTAMP_FROM_PARTS".to_string(), args))))
7427 }
7428 _ => {
7429 // Default: keep function name for other targets
7430 Ok(Expression::Function(Box::new(Function::new("DATETIMEFROMPARTS".to_string(), f.args))))
7431 }
7432 }
7433 }
7434 // CONVERT(type, expr [, style]) -> CAST(expr AS type) for non-TSQL targets
7435 // TRY_CONVERT(type, expr [, style]) -> TRY_CAST(expr AS type) for non-TSQL targets
7436 "CONVERT" | "TRY_CONVERT" if f.args.len() >= 2 => {
7437 let is_try = name == "TRY_CONVERT";
7438 let type_expr = f.args[0].clone();
7439 let value_expr = f.args[1].clone();
7440 let style = if f.args.len() >= 3 { Some(&f.args[2]) } else { None };
7441
7442 // For TSQL->TSQL, normalize types and preserve CONVERT/TRY_CONVERT
7443 if matches!(target, DialectType::TSQL) {
7444 let normalized_type = match &type_expr {
7445 Expression::DataType(dt) => {
7446 let new_dt = match dt {
7447 DataType::Int { .. } => DataType::Custom { name: "INTEGER".to_string() },
7448 _ => dt.clone(),
7449 };
7450 Expression::DataType(new_dt)
7451 }
7452 Expression::Identifier(id) => {
7453 let upper = id.name.to_uppercase();
7454 let normalized = match upper.as_str() {
7455 "INT" => "INTEGER",
7456 _ => &upper,
7457 };
7458 Expression::Identifier(crate::expressions::Identifier::new(normalized))
7459 }
7460 Expression::Column(col) => {
7461 let upper = col.name.name.to_uppercase();
7462 let normalized = match upper.as_str() {
7463 "INT" => "INTEGER",
7464 _ => &upper,
7465 };
7466 Expression::Identifier(crate::expressions::Identifier::new(normalized))
7467 }
7468 _ => type_expr.clone(),
7469 };
7470 let func_name = if is_try { "TRY_CONVERT" } else { "CONVERT" };
7471 let mut new_args = vec![normalized_type, value_expr];
7472 if let Some(s) = style {
7473 new_args.push(s.clone());
7474 }
7475 return Ok(Expression::Function(Box::new(Function::new(func_name.to_string(), new_args))));
7476 }
7477
7478 // For other targets: CONVERT(type, expr) -> CAST(expr AS type)
7479 fn expr_to_datatype(e: &Expression) -> Option<DataType> {
7480 match e {
7481 Expression::DataType(dt) => {
7482 // Convert NVARCHAR/NCHAR Custom types to standard VarChar/Char
7483 match dt {
7484 DataType::Custom { name } if name.starts_with("NVARCHAR(") || name.starts_with("NCHAR(") => {
7485 // Extract the length from "NVARCHAR(200)" or "NCHAR(40)"
7486 let inner = &name[name.find('(').unwrap()+1..name.len()-1];
7487 if inner.eq_ignore_ascii_case("MAX") {
7488 Some(DataType::Text)
7489 } else if let Ok(len) = inner.parse::<u32>() {
7490 if name.starts_with("NCHAR") {
7491 Some(DataType::Char { length: Some(len) })
7492 } else {
7493 Some(DataType::VarChar { length: Some(len), parenthesized_length: false })
7494 }
7495 } else {
7496 Some(dt.clone())
7497 }
7498 }
7499 DataType::Custom { name } if name == "NVARCHAR" => {
7500 Some(DataType::VarChar { length: None, parenthesized_length: false })
7501 }
7502 DataType::Custom { name } if name == "NCHAR" => {
7503 Some(DataType::Char { length: None })
7504 }
7505 DataType::Custom { name } if name == "NVARCHAR(MAX)" || name == "VARCHAR(MAX)" => {
7506 Some(DataType::Text)
7507 }
7508 _ => Some(dt.clone()),
7509 }
7510 }
7511 Expression::Identifier(id) => {
7512 let name = id.name.to_uppercase();
7513 match name.as_str() {
7514 "INT" | "INTEGER" => Some(DataType::Int { length: None, integer_spelling: false }),
7515 "BIGINT" => Some(DataType::BigInt { length: None }),
7516 "SMALLINT" => Some(DataType::SmallInt { length: None }),
7517 "TINYINT" => Some(DataType::TinyInt { length: None }),
7518 "FLOAT" => Some(DataType::Float { precision: None, scale: None, real_spelling: false }),
7519 "REAL" => Some(DataType::Float { precision: None, scale: None, real_spelling: true }),
7520 "DATETIME" | "DATETIME2" => Some(DataType::Timestamp { timezone: false, precision: None }),
7521 "DATE" => Some(DataType::Date),
7522 "BIT" => Some(DataType::Boolean),
7523 "TEXT" => Some(DataType::Text),
7524 "NUMERIC" => Some(DataType::Decimal { precision: None, scale: None }),
7525 "MONEY" => Some(DataType::Decimal { precision: Some(15), scale: Some(4) }),
7526 "SMALLMONEY" => Some(DataType::Decimal { precision: Some(6), scale: Some(4) }),
7527 "VARCHAR" => Some(DataType::VarChar { length: None, parenthesized_length: false }),
7528 "NVARCHAR" => Some(DataType::VarChar { length: None, parenthesized_length: false }),
7529 "CHAR" => Some(DataType::Char { length: None }),
7530 "NCHAR" => Some(DataType::Char { length: None }),
7531 _ => Some(DataType::Custom { name }),
7532 }
7533 }
7534 Expression::Column(col) => {
7535 let name = col.name.name.to_uppercase();
7536 match name.as_str() {
7537 "INT" | "INTEGER" => Some(DataType::Int { length: None, integer_spelling: false }),
7538 "BIGINT" => Some(DataType::BigInt { length: None }),
7539 "FLOAT" => Some(DataType::Float { precision: None, scale: None, real_spelling: false }),
7540 "DATETIME" | "DATETIME2" => Some(DataType::Timestamp { timezone: false, precision: None }),
7541 "DATE" => Some(DataType::Date),
7542 "NUMERIC" => Some(DataType::Decimal { precision: None, scale: None }),
7543 "VARCHAR" => Some(DataType::VarChar { length: None, parenthesized_length: false }),
7544 "NVARCHAR" => Some(DataType::VarChar { length: None, parenthesized_length: false }),
7545 "CHAR" => Some(DataType::Char { length: None }),
7546 "NCHAR" => Some(DataType::Char { length: None }),
7547 _ => Some(DataType::Custom { name }),
7548 }
7549 }
7550 // NVARCHAR(200) parsed as Function("NVARCHAR", [200])
7551 Expression::Function(f) => {
7552 let fname = f.name.to_uppercase();
7553 match fname.as_str() {
7554 "VARCHAR" | "NVARCHAR" => {
7555 let len = f.args.first().and_then(|a| {
7556 if let Expression::Literal(crate::expressions::Literal::Number(n)) = a {
7557 n.parse::<u32>().ok()
7558 } else if let Expression::Identifier(id) = a {
7559 if id.name.eq_ignore_ascii_case("MAX") { None } else { None }
7560 } else { None }
7561 });
7562 // Check for VARCHAR(MAX) -> TEXT
7563 let is_max = f.args.first().map_or(false, |a| {
7564 matches!(a, Expression::Identifier(id) if id.name.eq_ignore_ascii_case("MAX"))
7565 || matches!(a, Expression::Column(col) if col.name.name.eq_ignore_ascii_case("MAX"))
7566 });
7567 if is_max {
7568 Some(DataType::Text)
7569 } else {
7570 Some(DataType::VarChar { length: len, parenthesized_length: false })
7571 }
7572 }
7573 "NCHAR" | "CHAR" => {
7574 let len = f.args.first().and_then(|a| {
7575 if let Expression::Literal(crate::expressions::Literal::Number(n)) = a {
7576 n.parse::<u32>().ok()
7577 } else { None }
7578 });
7579 Some(DataType::Char { length: len })
7580 }
7581 "NUMERIC" | "DECIMAL" => {
7582 let precision = f.args.first().and_then(|a| {
7583 if let Expression::Literal(crate::expressions::Literal::Number(n)) = a {
7584 n.parse::<u32>().ok()
7585 } else { None }
7586 });
7587 let scale = f.args.get(1).and_then(|a| {
7588 if let Expression::Literal(crate::expressions::Literal::Number(n)) = a {
7589 n.parse::<u32>().ok()
7590 } else { None }
7591 });
7592 Some(DataType::Decimal { precision, scale })
7593 }
7594 _ => None,
7595 }
7596 }
7597 _ => None,
7598 }
7599 }
7600
7601 if let Some(mut dt) = expr_to_datatype(&type_expr) {
7602 // For TSQL source: VARCHAR/CHAR without length defaults to 30
7603 let is_tsql_source = matches!(source, DialectType::TSQL | DialectType::Fabric);
7604 if is_tsql_source {
7605 match &dt {
7606 DataType::VarChar { length: None, .. } => {
7607 dt = DataType::VarChar { length: Some(30), parenthesized_length: false };
7608 }
7609 DataType::Char { length: None } => {
7610 dt = DataType::Char { length: Some(30) };
7611 }
7612 _ => {}
7613 }
7614 }
7615
7616 // Determine if this is a string type
7617 let is_string_type = matches!(dt, DataType::VarChar { .. } | DataType::Char { .. } | DataType::Text)
7618 || matches!(&dt, DataType::Custom { name } if name == "NVARCHAR" || name == "NCHAR"
7619 || name.starts_with("NVARCHAR(") || name.starts_with("NCHAR(")
7620 || name.starts_with("VARCHAR(") || name == "VARCHAR"
7621 || name == "STRING");
7622
7623 // Determine if this is a date/time type
7624 let is_datetime_type = matches!(dt, DataType::Timestamp { .. } | DataType::Date)
7625 || matches!(&dt, DataType::Custom { name } if name == "DATETIME"
7626 || name == "DATETIME2" || name == "SMALLDATETIME");
7627
7628 // Check for date conversion with style
7629 if style.is_some() {
7630 let style_num = style.and_then(|s| {
7631 if let Expression::Literal(crate::expressions::Literal::Number(n)) = s {
7632 n.parse::<u32>().ok()
7633 } else { None }
7634 });
7635
7636 // TSQL CONVERT date styles (Java format)
7637 let format_str = style_num.and_then(|n| match n {
7638 101 => Some("MM/dd/yyyy"),
7639 102 => Some("yyyy.MM.dd"),
7640 103 => Some("dd/MM/yyyy"),
7641 104 => Some("dd.MM.yyyy"),
7642 105 => Some("dd-MM-yyyy"),
7643 108 => Some("HH:mm:ss"),
7644 110 => Some("MM-dd-yyyy"),
7645 112 => Some("yyyyMMdd"),
7646 120 | 20 => Some("yyyy-MM-dd HH:mm:ss"),
7647 121 | 21 => Some("yyyy-MM-dd HH:mm:ss.SSSSSS"),
7648 126 | 127 => Some("yyyy-MM-dd'T'HH:mm:ss.SSS"),
7649 _ => None,
7650 });
7651
7652 // Non-string, non-datetime types with style: just CAST, ignore the style
7653 if !is_string_type && !is_datetime_type {
7654 let cast_expr = if is_try {
7655 Expression::TryCast(Box::new(crate::expressions::Cast {
7656 this: value_expr,
7657 to: dt,
7658 trailing_comments: Vec::new(),
7659 double_colon_syntax: false,
7660 format: None,
7661 default: None,
7662 }))
7663 } else {
7664 Expression::Cast(Box::new(crate::expressions::Cast {
7665 this: value_expr,
7666 to: dt,
7667 trailing_comments: Vec::new(),
7668 double_colon_syntax: false,
7669 format: None,
7670 default: None,
7671 }))
7672 };
7673 return Ok(cast_expr);
7674 }
7675
7676 if let Some(java_fmt) = format_str {
7677 let c_fmt = java_fmt
7678 .replace("yyyy", "%Y")
7679 .replace("MM", "%m")
7680 .replace("dd", "%d")
7681 .replace("HH", "%H")
7682 .replace("mm", "%M")
7683 .replace("ss", "%S")
7684 .replace("SSSSSS", "%f")
7685 .replace("SSS", "%f")
7686 .replace("'T'", "T");
7687
7688 // For datetime target types: style is the INPUT format for parsing strings -> dates
7689 if is_datetime_type {
7690 match target {
7691 DialectType::DuckDB => {
7692 return Ok(Expression::Function(Box::new(Function::new(
7693 "STRPTIME".to_string(),
7694 vec![value_expr, Expression::string(&c_fmt)],
7695 ))));
7696 }
7697 DialectType::Spark | DialectType::Databricks => {
7698 // CONVERT(DATETIME, x, style) -> TO_TIMESTAMP(x, fmt)
7699 // CONVERT(DATE, x, style) -> TO_DATE(x, fmt)
7700 let func_name = if matches!(dt, DataType::Date) {
7701 "TO_DATE"
7702 } else {
7703 "TO_TIMESTAMP"
7704 };
7705 return Ok(Expression::Function(Box::new(Function::new(
7706 func_name.to_string(),
7707 vec![value_expr, Expression::string(java_fmt)],
7708 ))));
7709 }
7710 DialectType::Hive => {
7711 return Ok(Expression::Function(Box::new(Function::new(
7712 "TO_TIMESTAMP".to_string(),
7713 vec![value_expr, Expression::string(java_fmt)],
7714 ))));
7715 }
7716 _ => {
7717 return Ok(Expression::Cast(Box::new(crate::expressions::Cast {
7718 this: value_expr,
7719 to: dt,
7720 trailing_comments: Vec::new(),
7721 double_colon_syntax: false,
7722 format: None,
7723 default: None,
7724 })));
7725 }
7726 }
7727 }
7728
7729 // For string target types: style is the OUTPUT format for dates -> strings
7730 match target {
7731 DialectType::DuckDB => {
7732 Ok(Expression::Function(Box::new(Function::new(
7733 "STRPTIME".to_string(),
7734 vec![value_expr, Expression::string(&c_fmt)],
7735 ))))
7736 }
7737 DialectType::Spark | DialectType::Databricks => {
7738 // For string target types with style: CAST(DATE_FORMAT(x, fmt) AS type)
7739 // Determine the target string type
7740 let string_dt = match &dt {
7741 DataType::VarChar { length: Some(l), .. } => DataType::VarChar { length: Some(*l), parenthesized_length: false },
7742 DataType::Text => DataType::Custom { name: "STRING".to_string() },
7743 _ => DataType::Custom { name: "STRING".to_string() },
7744 };
7745 let date_format_expr = Expression::Function(Box::new(Function::new(
7746 "DATE_FORMAT".to_string(),
7747 vec![value_expr, Expression::string(java_fmt)],
7748 )));
7749 let cast_expr = if is_try {
7750 Expression::TryCast(Box::new(crate::expressions::Cast {
7751 this: date_format_expr,
7752 to: string_dt,
7753 trailing_comments: Vec::new(),
7754 double_colon_syntax: false,
7755 format: None,
7756 default: None,
7757 }))
7758 } else {
7759 Expression::Cast(Box::new(crate::expressions::Cast {
7760 this: date_format_expr,
7761 to: string_dt,
7762 trailing_comments: Vec::new(),
7763 double_colon_syntax: false,
7764 format: None,
7765 default: None,
7766 }))
7767 };
7768 Ok(cast_expr)
7769 }
7770 DialectType::MySQL | DialectType::SingleStore => {
7771 // For MySQL: CAST(DATE_FORMAT(x, mysql_fmt) AS CHAR(n))
7772 let mysql_fmt = java_fmt
7773 .replace("yyyy", "%Y")
7774 .replace("MM", "%m")
7775 .replace("dd", "%d")
7776 .replace("HH:mm:ss.SSSSSS", "%T")
7777 .replace("HH:mm:ss", "%T")
7778 .replace("HH", "%H")
7779 .replace("mm", "%i")
7780 .replace("ss", "%S");
7781 let date_format_expr = Expression::Function(Box::new(Function::new(
7782 "DATE_FORMAT".to_string(),
7783 vec![value_expr, Expression::string(&mysql_fmt)],
7784 )));
7785 // MySQL uses CHAR for string casts
7786 let mysql_dt = match &dt {
7787 DataType::VarChar { length, .. } => DataType::Char { length: *length },
7788 _ => dt,
7789 };
7790 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
7791 this: date_format_expr,
7792 to: mysql_dt,
7793 trailing_comments: Vec::new(),
7794 double_colon_syntax: false,
7795 format: None,
7796 default: None,
7797 })))
7798 }
7799 DialectType::Hive => {
7800 let func_name = "TO_TIMESTAMP";
7801 Ok(Expression::Function(Box::new(Function::new(
7802 func_name.to_string(),
7803 vec![value_expr, Expression::string(java_fmt)],
7804 ))))
7805 }
7806 _ => {
7807 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
7808 this: value_expr,
7809 to: dt,
7810 trailing_comments: Vec::new(),
7811 double_colon_syntax: false,
7812 format: None,
7813 default: None,
7814 })))
7815 }
7816 }
7817 } else {
7818 // Unknown style, just CAST
7819 let cast_expr = if is_try {
7820 Expression::TryCast(Box::new(crate::expressions::Cast {
7821 this: value_expr,
7822 to: dt,
7823 trailing_comments: Vec::new(),
7824 double_colon_syntax: false,
7825 format: None,
7826 default: None,
7827 }))
7828 } else {
7829 Expression::Cast(Box::new(crate::expressions::Cast {
7830 this: value_expr,
7831 to: dt,
7832 trailing_comments: Vec::new(),
7833 double_colon_syntax: false,
7834 format: None,
7835 default: None,
7836 }))
7837 };
7838 Ok(cast_expr)
7839 }
7840 } else {
7841 // No style - simple CAST
7842 let final_dt = if matches!(target, DialectType::MySQL | DialectType::SingleStore) {
7843 match &dt {
7844 DataType::Int { .. } | DataType::BigInt { .. } | DataType::SmallInt { .. } | DataType::TinyInt { .. } => {
7845 DataType::Custom { name: "SIGNED".to_string() }
7846 }
7847 DataType::VarChar { length, .. } => DataType::Char { length: *length },
7848 _ => dt,
7849 }
7850 } else {
7851 dt
7852 };
7853 let cast_expr = if is_try {
7854 Expression::TryCast(Box::new(crate::expressions::Cast {
7855 this: value_expr,
7856 to: final_dt,
7857 trailing_comments: Vec::new(),
7858 double_colon_syntax: false,
7859 format: None,
7860 default: None,
7861 }))
7862 } else {
7863 Expression::Cast(Box::new(crate::expressions::Cast {
7864 this: value_expr,
7865 to: final_dt,
7866 trailing_comments: Vec::new(),
7867 double_colon_syntax: false,
7868 format: None,
7869 default: None,
7870 }))
7871 };
7872 Ok(cast_expr)
7873 }
7874 } else {
7875 // Can't convert type expression - keep as CONVERT/TRY_CONVERT function
7876 Ok(Expression::Function(f))
7877 }
7878 }
7879 // STRFTIME(val, fmt) from DuckDB / STRFTIME(fmt, val) from SQLite -> target-specific
7880 "STRFTIME" if f.args.len() == 2 => {
7881 // SQLite uses STRFTIME(fmt, val); DuckDB uses STRFTIME(val, fmt)
7882 let (val, fmt_expr) = if matches!(source, DialectType::SQLite) {
7883 // SQLite: args[0] = format, args[1] = value
7884 (f.args[1].clone(), &f.args[0])
7885 } else {
7886 // DuckDB and others: args[0] = value, args[1] = format
7887 (f.args[0].clone(), &f.args[1])
7888 };
7889
7890 // Helper to convert C-style format to Java-style
7891 fn c_to_java_format(fmt: &str) -> String {
7892 fmt.replace("%Y", "yyyy")
7893 .replace("%m", "MM")
7894 .replace("%d", "dd")
7895 .replace("%H", "HH")
7896 .replace("%M", "mm")
7897 .replace("%S", "ss")
7898 .replace("%f", "SSSSSS")
7899 .replace("%y", "yy")
7900 .replace("%-m", "M")
7901 .replace("%-d", "d")
7902 .replace("%-H", "H")
7903 .replace("%-I", "h")
7904 .replace("%I", "hh")
7905 .replace("%p", "a")
7906 .replace("%j", "DDD")
7907 .replace("%a", "EEE")
7908 .replace("%b", "MMM")
7909 .replace("%F", "yyyy-MM-dd")
7910 .replace("%T", "HH:mm:ss")
7911 }
7912
7913 // Helper: recursively convert format strings within expressions (handles CONCAT)
7914 fn convert_fmt_expr(expr: &Expression, converter: &dyn Fn(&str) -> String) -> Expression {
7915 match expr {
7916 Expression::Literal(crate::expressions::Literal::String(s)) => {
7917 Expression::string(&converter(s))
7918 }
7919 Expression::Function(func) if func.name.eq_ignore_ascii_case("CONCAT") => {
7920 let new_args: Vec<Expression> = func.args.iter()
7921 .map(|a| convert_fmt_expr(a, converter))
7922 .collect();
7923 Expression::Function(Box::new(Function::new("CONCAT".to_string(), new_args)))
7924 }
7925 other => other.clone(),
7926 }
7927 }
7928
7929 match target {
7930 DialectType::DuckDB => {
7931 if matches!(source, DialectType::SQLite) {
7932 // SQLite STRFTIME(fmt, val) -> DuckDB STRFTIME(CAST(val AS TIMESTAMP), fmt)
7933 let cast_val = Expression::Cast(Box::new(Cast {
7934 this: val,
7935 to: crate::expressions::DataType::Timestamp { precision: None, timezone: false },
7936 trailing_comments: Vec::new(),
7937 double_colon_syntax: false,
7938 format: None,
7939 default: None,
7940 }));
7941 Ok(Expression::Function(Box::new(Function::new(
7942 "STRFTIME".to_string(),
7943 vec![cast_val, fmt_expr.clone()],
7944 ))))
7945 } else {
7946 Ok(Expression::Function(f))
7947 }
7948 }
7949 DialectType::Spark | DialectType::Databricks
7950 | DialectType::Hive => {
7951 // STRFTIME(val, fmt) -> DATE_FORMAT(val, java_fmt)
7952 let converted_fmt = convert_fmt_expr(fmt_expr, &c_to_java_format);
7953 Ok(Expression::Function(Box::new(Function::new(
7954 "DATE_FORMAT".to_string(),
7955 vec![val, converted_fmt],
7956 ))))
7957 }
7958 DialectType::TSQL | DialectType::Fabric => {
7959 // STRFTIME(val, fmt) -> FORMAT(val, java_fmt)
7960 let converted_fmt = convert_fmt_expr(fmt_expr, &c_to_java_format);
7961 Ok(Expression::Function(Box::new(Function::new(
7962 "FORMAT".to_string(),
7963 vec![val, converted_fmt],
7964 ))))
7965 }
7966 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
7967 // STRFTIME(val, fmt) -> DATE_FORMAT(val, presto_fmt) (convert DuckDB format to Presto)
7968 if let Expression::Literal(crate::expressions::Literal::String(s)) = fmt_expr {
7969 let presto_fmt = duckdb_to_presto_format(s);
7970 Ok(Expression::Function(Box::new(Function::new(
7971 "DATE_FORMAT".to_string(),
7972 vec![val, Expression::string(&presto_fmt)],
7973 ))))
7974 } else {
7975 Ok(Expression::Function(Box::new(Function::new(
7976 "DATE_FORMAT".to_string(),
7977 vec![val, fmt_expr.clone()],
7978 ))))
7979 }
7980 }
7981 DialectType::BigQuery => {
7982 // STRFTIME(val, fmt) -> FORMAT_DATE(bq_fmt, val) - note reversed arg order
7983 if let Expression::Literal(crate::expressions::Literal::String(s)) = fmt_expr {
7984 let bq_fmt = duckdb_to_bigquery_format(s);
7985 Ok(Expression::Function(Box::new(Function::new(
7986 "FORMAT_DATE".to_string(),
7987 vec![Expression::string(&bq_fmt), val],
7988 ))))
7989 } else {
7990 Ok(Expression::Function(Box::new(Function::new(
7991 "FORMAT_DATE".to_string(),
7992 vec![fmt_expr.clone(), val],
7993 ))))
7994 }
7995 }
7996 DialectType::PostgreSQL | DialectType::Redshift => {
7997 // STRFTIME(val, fmt) -> TO_CHAR(val, pg_fmt)
7998 if let Expression::Literal(crate::expressions::Literal::String(s)) = fmt_expr {
7999 let pg_fmt = s
8000 .replace("%Y", "YYYY")
8001 .replace("%m", "MM")
8002 .replace("%d", "DD")
8003 .replace("%H", "HH24")
8004 .replace("%M", "MI")
8005 .replace("%S", "SS")
8006 .replace("%y", "YY")
8007 .replace("%-m", "FMMM")
8008 .replace("%-d", "FMDD")
8009 .replace("%-H", "FMHH24")
8010 .replace("%-I", "FMHH12")
8011 .replace("%p", "AM")
8012 .replace("%F", "YYYY-MM-DD")
8013 .replace("%T", "HH24:MI:SS");
8014 Ok(Expression::Function(Box::new(Function::new(
8015 "TO_CHAR".to_string(),
8016 vec![val, Expression::string(&pg_fmt)],
8017 ))))
8018 } else {
8019 Ok(Expression::Function(Box::new(Function::new(
8020 "TO_CHAR".to_string(),
8021 vec![val, fmt_expr.clone()],
8022 ))))
8023 }
8024 }
8025 _ => Ok(Expression::Function(f)),
8026 }
8027 }
8028 // STRPTIME(val, fmt) from DuckDB -> target-specific date parse function
8029 "STRPTIME" if f.args.len() == 2 => {
8030 let val = f.args[0].clone();
8031 let fmt_expr = &f.args[1];
8032
8033 fn c_to_java_format_parse(fmt: &str) -> String {
8034 fmt.replace("%Y", "yyyy")
8035 .replace("%m", "MM")
8036 .replace("%d", "dd")
8037 .replace("%H", "HH")
8038 .replace("%M", "mm")
8039 .replace("%S", "ss")
8040 .replace("%f", "SSSSSS")
8041 .replace("%y", "yy")
8042 .replace("%-m", "M")
8043 .replace("%-d", "d")
8044 .replace("%-H", "H")
8045 .replace("%-I", "h")
8046 .replace("%I", "hh")
8047 .replace("%p", "a")
8048 .replace("%F", "yyyy-MM-dd")
8049 .replace("%T", "HH:mm:ss")
8050 }
8051
8052 match target {
8053 DialectType::DuckDB => Ok(Expression::Function(f)),
8054 DialectType::Spark | DialectType::Databricks => {
8055 // STRPTIME(val, fmt) -> TO_TIMESTAMP(val, java_fmt)
8056 if let Expression::Literal(crate::expressions::Literal::String(s)) = fmt_expr {
8057 let java_fmt = c_to_java_format_parse(s);
8058 Ok(Expression::Function(Box::new(Function::new(
8059 "TO_TIMESTAMP".to_string(),
8060 vec![val, Expression::string(&java_fmt)],
8061 ))))
8062 } else {
8063 Ok(Expression::Function(Box::new(Function::new(
8064 "TO_TIMESTAMP".to_string(),
8065 vec![val, fmt_expr.clone()],
8066 ))))
8067 }
8068 }
8069 DialectType::Hive => {
8070 // STRPTIME(val, fmt) -> CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(val, java_fmt)) AS TIMESTAMP)
8071 if let Expression::Literal(crate::expressions::Literal::String(s)) = fmt_expr {
8072 let java_fmt = c_to_java_format_parse(s);
8073 let unix_ts = Expression::Function(Box::new(Function::new(
8074 "UNIX_TIMESTAMP".to_string(),
8075 vec![val, Expression::string(&java_fmt)],
8076 )));
8077 let from_unix = Expression::Function(Box::new(Function::new(
8078 "FROM_UNIXTIME".to_string(),
8079 vec![unix_ts],
8080 )));
8081 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
8082 this: from_unix,
8083 to: DataType::Timestamp { timezone: false, precision: None },
8084 trailing_comments: Vec::new(),
8085 double_colon_syntax: false,
8086 format: None,
8087 default: None,
8088 })))
8089 } else {
8090 Ok(Expression::Function(f))
8091 }
8092 }
8093 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
8094 // STRPTIME(val, fmt) -> DATE_PARSE(val, presto_fmt) (convert DuckDB format to Presto)
8095 if let Expression::Literal(crate::expressions::Literal::String(s)) = fmt_expr {
8096 let presto_fmt = duckdb_to_presto_format(s);
8097 Ok(Expression::Function(Box::new(Function::new(
8098 "DATE_PARSE".to_string(),
8099 vec![val, Expression::string(&presto_fmt)],
8100 ))))
8101 } else {
8102 Ok(Expression::Function(Box::new(Function::new(
8103 "DATE_PARSE".to_string(),
8104 vec![val, fmt_expr.clone()],
8105 ))))
8106 }
8107 }
8108 DialectType::BigQuery => {
8109 // STRPTIME(val, fmt) -> PARSE_TIMESTAMP(bq_fmt, val) - note reversed arg order
8110 if let Expression::Literal(crate::expressions::Literal::String(s)) = fmt_expr {
8111 let bq_fmt = duckdb_to_bigquery_format(s);
8112 Ok(Expression::Function(Box::new(Function::new(
8113 "PARSE_TIMESTAMP".to_string(),
8114 vec![Expression::string(&bq_fmt), val],
8115 ))))
8116 } else {
8117 Ok(Expression::Function(Box::new(Function::new(
8118 "PARSE_TIMESTAMP".to_string(),
8119 vec![fmt_expr.clone(), val],
8120 ))))
8121 }
8122 }
8123 _ => Ok(Expression::Function(f)),
8124 }
8125 }
8126 // DATE_FORMAT(val, fmt) from Presto source (C-style format) -> target-specific
8127 "DATE_FORMAT" if f.args.len() >= 2
8128 && matches!(source, DialectType::Presto | DialectType::Trino | DialectType::Athena) => {
8129 let val = f.args[0].clone();
8130 let fmt_expr = &f.args[1];
8131
8132 match target {
8133 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
8134 // Presto -> Presto: normalize format (e.g., %H:%i:%S -> %T)
8135 if let Expression::Literal(crate::expressions::Literal::String(s)) = fmt_expr {
8136 let normalized = crate::dialects::presto::PrestoDialect::normalize_presto_format(s);
8137 Ok(Expression::Function(Box::new(Function::new(
8138 "DATE_FORMAT".to_string(),
8139 vec![val, Expression::string(&normalized)],
8140 ))))
8141 } else {
8142 Ok(Expression::Function(f))
8143 }
8144 }
8145 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
8146 // Convert Presto C-style to Java-style format
8147 if let Expression::Literal(crate::expressions::Literal::String(s)) = fmt_expr {
8148 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
8149 Ok(Expression::Function(Box::new(Function::new(
8150 "DATE_FORMAT".to_string(),
8151 vec![val, Expression::string(&java_fmt)],
8152 ))))
8153 } else {
8154 Ok(Expression::Function(f))
8155 }
8156 }
8157 DialectType::DuckDB => {
8158 // Convert to STRFTIME(val, duckdb_fmt)
8159 if let Expression::Literal(crate::expressions::Literal::String(s)) = fmt_expr {
8160 let duckdb_fmt = crate::dialects::presto::PrestoDialect::presto_to_duckdb_format(s);
8161 Ok(Expression::Function(Box::new(Function::new(
8162 "STRFTIME".to_string(),
8163 vec![val, Expression::string(&duckdb_fmt)],
8164 ))))
8165 } else {
8166 Ok(Expression::Function(Box::new(Function::new(
8167 "STRFTIME".to_string(),
8168 vec![val, fmt_expr.clone()],
8169 ))))
8170 }
8171 }
8172 DialectType::BigQuery => {
8173 // Convert to FORMAT_DATE(bq_fmt, val) - reversed args
8174 if let Expression::Literal(crate::expressions::Literal::String(s)) = fmt_expr {
8175 let bq_fmt = crate::dialects::presto::PrestoDialect::presto_to_bigquery_format(s);
8176 Ok(Expression::Function(Box::new(Function::new(
8177 "FORMAT_DATE".to_string(),
8178 vec![Expression::string(&bq_fmt), val],
8179 ))))
8180 } else {
8181 Ok(Expression::Function(Box::new(Function::new(
8182 "FORMAT_DATE".to_string(),
8183 vec![fmt_expr.clone(), val],
8184 ))))
8185 }
8186 }
8187 _ => Ok(Expression::Function(f)),
8188 }
8189 }
8190 // DATE_PARSE(val, fmt) from Presto source -> target-specific parse function
8191 "DATE_PARSE" if f.args.len() >= 2
8192 && matches!(source, DialectType::Presto | DialectType::Trino | DialectType::Athena) => {
8193 let val = f.args[0].clone();
8194 let fmt_expr = &f.args[1];
8195
8196 match target {
8197 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
8198 // Presto -> Presto: normalize format
8199 if let Expression::Literal(crate::expressions::Literal::String(s)) = fmt_expr {
8200 let normalized = crate::dialects::presto::PrestoDialect::normalize_presto_format(s);
8201 Ok(Expression::Function(Box::new(Function::new(
8202 "DATE_PARSE".to_string(),
8203 vec![val, Expression::string(&normalized)],
8204 ))))
8205 } else {
8206 Ok(Expression::Function(f))
8207 }
8208 }
8209 DialectType::Hive => {
8210 // Presto -> Hive: if default format, just CAST(x AS TIMESTAMP)
8211 if let Expression::Literal(crate::expressions::Literal::String(s)) = fmt_expr {
8212 if crate::dialects::presto::PrestoDialect::is_default_timestamp_format(s)
8213 || crate::dialects::presto::PrestoDialect::is_default_date_format(s) {
8214 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
8215 this: val,
8216 to: DataType::Timestamp { timezone: false, precision: None },
8217 trailing_comments: Vec::new(),
8218 double_colon_syntax: false,
8219 format: None,
8220 default: None,
8221 })))
8222 } else {
8223 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
8224 Ok(Expression::Function(Box::new(Function::new(
8225 "TO_TIMESTAMP".to_string(),
8226 vec![val, Expression::string(&java_fmt)],
8227 ))))
8228 }
8229 } else {
8230 Ok(Expression::Function(f))
8231 }
8232 }
8233 DialectType::Spark | DialectType::Databricks => {
8234 // Presto -> Spark: TO_TIMESTAMP(val, java_fmt)
8235 if let Expression::Literal(crate::expressions::Literal::String(s)) = fmt_expr {
8236 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
8237 Ok(Expression::Function(Box::new(Function::new(
8238 "TO_TIMESTAMP".to_string(),
8239 vec![val, Expression::string(&java_fmt)],
8240 ))))
8241 } else {
8242 Ok(Expression::Function(f))
8243 }
8244 }
8245 DialectType::DuckDB => {
8246 // Presto -> DuckDB: STRPTIME(val, duckdb_fmt)
8247 if let Expression::Literal(crate::expressions::Literal::String(s)) = fmt_expr {
8248 let duckdb_fmt = crate::dialects::presto::PrestoDialect::presto_to_duckdb_format(s);
8249 Ok(Expression::Function(Box::new(Function::new(
8250 "STRPTIME".to_string(),
8251 vec![val, Expression::string(&duckdb_fmt)],
8252 ))))
8253 } else {
8254 Ok(Expression::Function(Box::new(Function::new(
8255 "STRPTIME".to_string(),
8256 vec![val, fmt_expr.clone()],
8257 ))))
8258 }
8259 }
8260 _ => Ok(Expression::Function(f)),
8261 }
8262 }
8263 // FROM_BASE64(x) / TO_BASE64(x) from Presto -> Hive-specific renames
8264 "FROM_BASE64" if f.args.len() == 1
8265 && matches!(target, DialectType::Hive) => {
8266 Ok(Expression::Function(Box::new(Function::new("UNBASE64".to_string(), f.args))))
8267 }
8268 "TO_BASE64" if f.args.len() == 1
8269 && matches!(target, DialectType::Hive) => {
8270 Ok(Expression::Function(Box::new(Function::new("BASE64".to_string(), f.args))))
8271 }
8272 // FROM_UNIXTIME(x) -> CAST(FROM_UNIXTIME(x) AS TIMESTAMP) for Spark
8273 "FROM_UNIXTIME" if f.args.len() == 1
8274 && matches!(source, DialectType::Presto | DialectType::Trino | DialectType::Athena)
8275 && matches!(target, DialectType::Spark | DialectType::Databricks) => {
8276 // Wrap FROM_UNIXTIME(x) in CAST(... AS TIMESTAMP)
8277 let from_unix = Expression::Function(Box::new(Function::new("FROM_UNIXTIME".to_string(), f.args)));
8278 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
8279 this: from_unix,
8280 to: DataType::Timestamp { timezone: false, precision: None },
8281 trailing_comments: Vec::new(),
8282 double_colon_syntax: false,
8283 format: None,
8284 default: None,
8285 })))
8286 }
8287 // DATE_FORMAT(val, fmt) from Hive/Spark/MySQL -> target-specific format function
8288 "DATE_FORMAT" if f.args.len() >= 2
8289 && !matches!(target, DialectType::Hive | DialectType::Spark
8290 | DialectType::Databricks
8291 | DialectType::MySQL | DialectType::SingleStore) => {
8292 let val = f.args[0].clone();
8293 let fmt_expr = &f.args[1];
8294 let is_hive_source = matches!(source, DialectType::Hive | DialectType::Spark | DialectType::Databricks);
8295
8296 fn java_to_c_format(fmt: &str) -> String {
8297 // Replace Java patterns with C strftime patterns.
8298 // Uses multi-pass to handle patterns that conflict.
8299 // First pass: replace multi-char patterns (longer first)
8300 let result = fmt
8301 .replace("yyyy", "%Y")
8302 .replace("SSSSSS", "%f")
8303 .replace("EEEE", "%W")
8304 .replace("MM", "%m")
8305 .replace("dd", "%d")
8306 .replace("HH", "%H")
8307 .replace("mm", "%M")
8308 .replace("ss", "%S")
8309 .replace("yy", "%y");
8310 // Second pass: handle single-char timezone patterns
8311 // z -> %Z (timezone name), Z -> %z (timezone offset)
8312 // Must be careful not to replace 'z'/'Z' inside already-replaced %Y, %M etc.
8313 let mut out = String::new();
8314 let chars: Vec<char> = result.chars().collect();
8315 let mut i = 0;
8316 while i < chars.len() {
8317 if chars[i] == '%' && i + 1 < chars.len() {
8318 // Already a format specifier, skip both chars
8319 out.push(chars[i]);
8320 out.push(chars[i + 1]);
8321 i += 2;
8322 } else if chars[i] == 'z' {
8323 out.push_str("%Z");
8324 i += 1;
8325 } else if chars[i] == 'Z' {
8326 out.push_str("%z");
8327 i += 1;
8328 } else {
8329 out.push(chars[i]);
8330 i += 1;
8331 }
8332 }
8333 out
8334 }
8335
8336 fn java_to_presto_format(fmt: &str) -> String {
8337 // Presto uses %T for HH:MM:SS
8338 let c_fmt = java_to_c_format(fmt);
8339 c_fmt.replace("%H:%M:%S", "%T")
8340 }
8341
8342 fn java_to_bq_format(fmt: &str) -> String {
8343 // BigQuery uses %F for yyyy-MM-dd and %T for HH:mm:ss
8344 let c_fmt = java_to_c_format(fmt);
8345 c_fmt.replace("%Y-%m-%d", "%F")
8346 .replace("%H:%M:%S", "%T")
8347 }
8348
8349 // For Hive source, CAST string literals to appropriate type
8350 let cast_val = if is_hive_source {
8351 match &val {
8352 Expression::Literal(crate::expressions::Literal::String(_)) => {
8353 match target {
8354 DialectType::DuckDB | DialectType::Presto | DialectType::Trino | DialectType::Athena => {
8355 Self::ensure_cast_timestamp(val.clone())
8356 }
8357 DialectType::BigQuery => {
8358 // BigQuery: CAST(val AS DATETIME)
8359 Expression::Cast(Box::new(crate::expressions::Cast {
8360 this: val.clone(),
8361 to: DataType::Custom { name: "DATETIME".to_string() },
8362 trailing_comments: vec![],
8363 double_colon_syntax: false,
8364 format: None,
8365 default: None,
8366 }))
8367 }
8368 _ => val.clone(),
8369 }
8370 }
8371 // For CAST(x AS DATE) or DATE literal, Presto needs CAST(CAST(x AS DATE) AS TIMESTAMP)
8372 Expression::Cast(c) if matches!(c.to, DataType::Date) && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena) => {
8373 Expression::Cast(Box::new(crate::expressions::Cast {
8374 this: val.clone(),
8375 to: DataType::Timestamp { timezone: false, precision: None },
8376 trailing_comments: vec![],
8377 double_colon_syntax: false,
8378 format: None,
8379 default: None,
8380 }))
8381 }
8382 Expression::Literal(crate::expressions::Literal::Date(_)) if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena) => {
8383 // DATE 'x' -> CAST(CAST('x' AS DATE) AS TIMESTAMP)
8384 let cast_date = Self::date_literal_to_cast(val.clone());
8385 Expression::Cast(Box::new(crate::expressions::Cast {
8386 this: cast_date,
8387 to: DataType::Timestamp { timezone: false, precision: None },
8388 trailing_comments: vec![],
8389 double_colon_syntax: false,
8390 format: None,
8391 default: None,
8392 }))
8393 }
8394 _ => val.clone(),
8395 }
8396 } else {
8397 val.clone()
8398 };
8399
8400 match target {
8401 DialectType::DuckDB => {
8402 if let Expression::Literal(crate::expressions::Literal::String(s)) = fmt_expr {
8403 let c_fmt = if is_hive_source {
8404 java_to_c_format(s)
8405 } else { s.clone() };
8406 Ok(Expression::Function(Box::new(Function::new(
8407 "STRFTIME".to_string(),
8408 vec![cast_val, Expression::string(&c_fmt)],
8409 ))))
8410 } else {
8411 Ok(Expression::Function(Box::new(Function::new(
8412 "STRFTIME".to_string(),
8413 vec![cast_val, fmt_expr.clone()],
8414 ))))
8415 }
8416 }
8417 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
8418 if is_hive_source {
8419 if let Expression::Literal(crate::expressions::Literal::String(s)) = fmt_expr {
8420 let p_fmt = java_to_presto_format(s);
8421 Ok(Expression::Function(Box::new(Function::new(
8422 "DATE_FORMAT".to_string(),
8423 vec![cast_val, Expression::string(&p_fmt)],
8424 ))))
8425 } else {
8426 Ok(Expression::Function(Box::new(Function::new(
8427 "DATE_FORMAT".to_string(),
8428 vec![cast_val, fmt_expr.clone()],
8429 ))))
8430 }
8431 } else {
8432 Ok(Expression::Function(Box::new(Function::new(
8433 "DATE_FORMAT".to_string(),
8434 f.args,
8435 ))))
8436 }
8437 }
8438 DialectType::BigQuery => {
8439 // DATE_FORMAT(val, fmt) -> FORMAT_DATE(fmt, val)
8440 if let Expression::Literal(crate::expressions::Literal::String(s)) = fmt_expr {
8441 let bq_fmt = if is_hive_source {
8442 java_to_bq_format(s)
8443 } else {
8444 java_to_c_format(s)
8445 };
8446 Ok(Expression::Function(Box::new(Function::new(
8447 "FORMAT_DATE".to_string(),
8448 vec![Expression::string(&bq_fmt), cast_val],
8449 ))))
8450 } else {
8451 Ok(Expression::Function(Box::new(Function::new(
8452 "FORMAT_DATE".to_string(),
8453 vec![fmt_expr.clone(), cast_val],
8454 ))))
8455 }
8456 }
8457 DialectType::PostgreSQL | DialectType::Redshift => {
8458 if let Expression::Literal(crate::expressions::Literal::String(s)) = fmt_expr {
8459 let pg_fmt = s
8460 .replace("yyyy", "YYYY")
8461 .replace("MM", "MM")
8462 .replace("dd", "DD")
8463 .replace("HH", "HH24")
8464 .replace("mm", "MI")
8465 .replace("ss", "SS")
8466 .replace("yy", "YY");
8467 Ok(Expression::Function(Box::new(Function::new(
8468 "TO_CHAR".to_string(),
8469 vec![val, Expression::string(&pg_fmt)],
8470 ))))
8471 } else {
8472 Ok(Expression::Function(Box::new(Function::new(
8473 "TO_CHAR".to_string(),
8474 vec![val, fmt_expr.clone()],
8475 ))))
8476 }
8477 }
8478 _ => Ok(Expression::Function(f)),
8479 }
8480 }
8481 // DATEDIFF(unit, start, end) - 3-arg form
8482 // SQLite uses DATEDIFF(date1, date2, unit_string) instead
8483 "DATEDIFF" if f.args.len() == 3 => {
8484 let mut args = f.args;
8485 // SQLite source: args = (date1, date2, unit_string)
8486 // Standard source: args = (unit, start, end)
8487 let (_arg0, arg1, arg2, unit_str) = if matches!(source, DialectType::SQLite) {
8488 let date1 = args.remove(0);
8489 let date2 = args.remove(0);
8490 let unit_expr = args.remove(0);
8491 let unit_s = Self::get_unit_str_static(&unit_expr);
8492
8493 // For SQLite target, generate JULIANDAY arithmetic directly
8494 if matches!(target, DialectType::SQLite) {
8495 let jd_first = Expression::Function(Box::new(Function::new(
8496 "JULIANDAY".to_string(), vec![date1],
8497 )));
8498 let jd_second = Expression::Function(Box::new(Function::new(
8499 "JULIANDAY".to_string(), vec![date2],
8500 )));
8501 let diff = Expression::Sub(Box::new(crate::expressions::BinaryOp::new(jd_first, jd_second)));
8502 let paren_diff = Expression::Paren(Box::new(crate::expressions::Paren {
8503 this: diff, trailing_comments: Vec::new(),
8504 }));
8505 let adjusted = match unit_s.as_str() {
8506 "HOUR" => Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
8507 paren_diff, Expression::Literal(Literal::Number("24.0".to_string())),
8508 ))),
8509 "MINUTE" => Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
8510 paren_diff, Expression::Literal(Literal::Number("1440.0".to_string())),
8511 ))),
8512 "SECOND" => Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
8513 paren_diff, Expression::Literal(Literal::Number("86400.0".to_string())),
8514 ))),
8515 "MONTH" => Expression::Div(Box::new(crate::expressions::BinaryOp::new(
8516 paren_diff, Expression::Literal(Literal::Number("30.0".to_string())),
8517 ))),
8518 "YEAR" => Expression::Div(Box::new(crate::expressions::BinaryOp::new(
8519 paren_diff, Expression::Literal(Literal::Number("365.0".to_string())),
8520 ))),
8521 _ => paren_diff,
8522 };
8523 return Ok(Expression::Cast(Box::new(Cast {
8524 this: adjusted,
8525 to: DataType::Int { length: None, integer_spelling: true },
8526 trailing_comments: vec![], double_colon_syntax: false, format: None, default: None,
8527 })));
8528 }
8529
8530 // For other targets, remap to standard (unit, start, end) form
8531 let unit_ident = Expression::Identifier(Identifier::new(&unit_s));
8532 (unit_ident, date1, date2, unit_s)
8533 } else {
8534 let arg0 = args.remove(0);
8535 let arg1 = args.remove(0);
8536 let arg2 = args.remove(0);
8537 let unit_s = Self::get_unit_str_static(&arg0);
8538 (arg0, arg1, arg2, unit_s)
8539 };
8540
8541 // For Hive/Spark source, string literal dates need to be cast
8542 // Note: Databricks is excluded - it handles string args like standard SQL
8543 let is_hive_spark = matches!(source, DialectType::Hive | DialectType::Spark);
8544
8545 match target {
8546 DialectType::Snowflake => {
8547 let unit = Expression::Identifier(Identifier::new(&unit_str));
8548 // Use ensure_to_date_preserved to add TO_DATE with a marker
8549 // that prevents the Snowflake TO_DATE handler from converting it to CAST
8550 let d1 = if is_hive_spark { Self::ensure_to_date_preserved(arg1) } else { arg1 };
8551 let d2 = if is_hive_spark { Self::ensure_to_date_preserved(arg2) } else { arg2 };
8552 Ok(Expression::Function(Box::new(Function::new(
8553 "DATEDIFF".to_string(), vec![unit, d1, d2],
8554 ))))
8555 }
8556 DialectType::Redshift => {
8557 let unit = Expression::Identifier(Identifier::new(&unit_str));
8558 let d1 = if is_hive_spark { Self::ensure_cast_date(arg1) } else { arg1 };
8559 let d2 = if is_hive_spark { Self::ensure_cast_date(arg2) } else { arg2 };
8560 Ok(Expression::Function(Box::new(Function::new(
8561 "DATEDIFF".to_string(), vec![unit, d1, d2],
8562 ))))
8563 }
8564 DialectType::TSQL => {
8565 let unit = Expression::Identifier(Identifier::new(&unit_str));
8566 Ok(Expression::Function(Box::new(Function::new(
8567 "DATEDIFF".to_string(), vec![unit, arg1, arg2],
8568 ))))
8569 }
8570 DialectType::DuckDB => {
8571 let is_redshift_tsql = matches!(source, DialectType::Redshift | DialectType::TSQL);
8572 if is_hive_spark {
8573 // For Hive/Spark source, CAST string args to DATE and emit DATE_DIFF directly
8574 let d1 = Self::ensure_cast_date(arg1);
8575 let d2 = Self::ensure_cast_date(arg2);
8576 Ok(Expression::Function(Box::new(Function::new(
8577 "DATE_DIFF".to_string(), vec![
8578 Expression::string(&unit_str),
8579 d1, d2,
8580 ],
8581 ))))
8582 } else if matches!(source, DialectType::Snowflake) {
8583 // For Snowflake source: special handling per unit
8584 match unit_str.as_str() {
8585 "NANOSECOND" => {
8586 // DATEDIFF(NANOSECOND, start, end) -> EPOCH_NS(CAST(end AS TIMESTAMP_NS)) - EPOCH_NS(CAST(start AS TIMESTAMP_NS))
8587 fn cast_to_timestamp_ns(expr: Expression) -> Expression {
8588 Expression::Cast(Box::new(Cast {
8589 this: expr,
8590 to: DataType::Custom { name: "TIMESTAMP_NS".to_string() },
8591 trailing_comments: vec![], double_colon_syntax: false, format: None, default: None,
8592 }))
8593 }
8594 let epoch_end = Expression::Function(Box::new(Function::new(
8595 "EPOCH_NS".to_string(), vec![cast_to_timestamp_ns(arg2)],
8596 )));
8597 let epoch_start = Expression::Function(Box::new(Function::new(
8598 "EPOCH_NS".to_string(), vec![cast_to_timestamp_ns(arg1)],
8599 )));
8600 Ok(Expression::Sub(Box::new(BinaryOp::new(epoch_end, epoch_start))))
8601 }
8602 "WEEK" => {
8603 // DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST(x AS DATE)), DATE_TRUNC('WEEK', CAST(y AS DATE)))
8604 let d1 = Self::force_cast_date(arg1);
8605 let d2 = Self::force_cast_date(arg2);
8606 let dt1 = Expression::Function(Box::new(Function::new(
8607 "DATE_TRUNC".to_string(), vec![Expression::string("WEEK"), d1],
8608 )));
8609 let dt2 = Expression::Function(Box::new(Function::new(
8610 "DATE_TRUNC".to_string(), vec![Expression::string("WEEK"), d2],
8611 )));
8612 Ok(Expression::Function(Box::new(Function::new(
8613 "DATE_DIFF".to_string(), vec![
8614 Expression::string(&unit_str),
8615 dt1, dt2,
8616 ],
8617 ))))
8618 }
8619 _ => {
8620 // YEAR, MONTH, QUARTER, DAY, etc.: CAST to DATE
8621 let d1 = Self::force_cast_date(arg1);
8622 let d2 = Self::force_cast_date(arg2);
8623 Ok(Expression::Function(Box::new(Function::new(
8624 "DATE_DIFF".to_string(), vec![
8625 Expression::string(&unit_str),
8626 d1, d2,
8627 ],
8628 ))))
8629 }
8630 }
8631 } else if is_redshift_tsql {
8632 // For Redshift/TSQL source, CAST args to TIMESTAMP (always)
8633 let d1 = Self::force_cast_timestamp(arg1);
8634 let d2 = Self::force_cast_timestamp(arg2);
8635 Ok(Expression::Function(Box::new(Function::new(
8636 "DATE_DIFF".to_string(), vec![
8637 Expression::string(&unit_str),
8638 d1, d2,
8639 ],
8640 ))))
8641 } else {
8642 // Keep as DATEDIFF so DuckDB's transform_datediff handles
8643 // DATE_TRUNC for WEEK, CAST for string literals, etc.
8644 let unit = Expression::Identifier(Identifier::new(&unit_str));
8645 Ok(Expression::Function(Box::new(Function::new(
8646 "DATEDIFF".to_string(), vec![unit, arg1, arg2],
8647 ))))
8648 }
8649 }
8650 DialectType::BigQuery => {
8651 let is_redshift_tsql = matches!(source, DialectType::Redshift | DialectType::TSQL | DialectType::Snowflake);
8652 let cast_d1 = if is_hive_spark { Self::ensure_cast_date(arg1) }
8653 else if is_redshift_tsql { Self::force_cast_datetime(arg1) }
8654 else { Self::ensure_cast_datetime(arg1) };
8655 let cast_d2 = if is_hive_spark { Self::ensure_cast_date(arg2) }
8656 else if is_redshift_tsql { Self::force_cast_datetime(arg2) }
8657 else { Self::ensure_cast_datetime(arg2) };
8658 let unit = Expression::Identifier(Identifier::new(&unit_str));
8659 Ok(Expression::Function(Box::new(Function::new(
8660 "DATE_DIFF".to_string(), vec![cast_d2, cast_d1, unit],
8661 ))))
8662 }
8663 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
8664 // For Hive/Spark source, string literals need double-cast: CAST(CAST(x AS TIMESTAMP) AS DATE)
8665 // For Redshift/TSQL source, args need CAST to TIMESTAMP (always)
8666 let is_redshift_tsql = matches!(source, DialectType::Redshift | DialectType::TSQL | DialectType::Snowflake);
8667 let d1 = if is_hive_spark { Self::double_cast_timestamp_date(arg1) }
8668 else if is_redshift_tsql { Self::force_cast_timestamp(arg1) }
8669 else { arg1 };
8670 let d2 = if is_hive_spark { Self::double_cast_timestamp_date(arg2) }
8671 else if is_redshift_tsql { Self::force_cast_timestamp(arg2) }
8672 else { arg2 };
8673 Ok(Expression::Function(Box::new(Function::new(
8674 "DATE_DIFF".to_string(), vec![
8675 Expression::string(&unit_str),
8676 d1, d2,
8677 ],
8678 ))))
8679 }
8680 DialectType::Hive => {
8681 match unit_str.as_str() {
8682 "MONTH" => {
8683 Ok(Expression::Cast(Box::new(Cast {
8684 this: Expression::Function(Box::new(Function::new(
8685 "MONTHS_BETWEEN".to_string(), vec![arg2, arg1],
8686 ))),
8687 to: DataType::Int { length: None, integer_spelling: false },
8688 trailing_comments: vec![], double_colon_syntax: false, format: None, default: None,
8689 })))
8690 }
8691 "WEEK" => {
8692 Ok(Expression::Cast(Box::new(Cast {
8693 this: Expression::Div(Box::new(crate::expressions::BinaryOp::new(
8694 Expression::Function(Box::new(Function::new(
8695 "DATEDIFF".to_string(), vec![arg2, arg1],
8696 ))),
8697 Expression::number(7),
8698 ))),
8699 to: DataType::Int { length: None, integer_spelling: false },
8700 trailing_comments: vec![], double_colon_syntax: false, format: None, default: None,
8701 })))
8702 }
8703 _ => {
8704 Ok(Expression::Function(Box::new(Function::new(
8705 "DATEDIFF".to_string(), vec![arg2, arg1],
8706 ))))
8707 }
8708 }
8709 }
8710 DialectType::Spark | DialectType::Databricks => {
8711 let unit = Expression::Identifier(Identifier::new(&unit_str));
8712 Ok(Expression::Function(Box::new(Function::new(
8713 "DATEDIFF".to_string(), vec![unit, arg1, arg2],
8714 ))))
8715 }
8716 _ => {
8717 // For Hive/Spark source targeting PostgreSQL etc., cast string literals to DATE
8718 let d1 = if is_hive_spark { Self::ensure_cast_date(arg1) } else { arg1 };
8719 let d2 = if is_hive_spark { Self::ensure_cast_date(arg2) } else { arg2 };
8720 let unit = Expression::Identifier(Identifier::new(&unit_str));
8721 Ok(Expression::Function(Box::new(Function::new(
8722 "DATEDIFF".to_string(), vec![unit, d1, d2],
8723 ))))
8724 }
8725 }
8726 }
8727 // DATEDIFF(end, start) - 2-arg form from Hive/MySQL
8728 "DATEDIFF" if f.args.len() == 2 => {
8729 let mut args = f.args;
8730 let arg0 = args.remove(0);
8731 let arg1 = args.remove(0);
8732
8733 // Helper: unwrap TO_DATE(x) -> x (extracts inner arg)
8734 // Also recognizes TryCast/Cast to DATE that may have been produced by
8735 // cross-dialect TO_DATE -> TRY_CAST conversion
8736 let unwrap_to_date = |e: Expression| -> (Expression, bool) {
8737 if let Expression::Function(ref f) = e {
8738 if f.name.eq_ignore_ascii_case("TO_DATE") && f.args.len() == 1 {
8739 return (f.args[0].clone(), true);
8740 }
8741 }
8742 // Also recognize TryCast(x, Date) as an already-converted TO_DATE
8743 if let Expression::TryCast(ref c) = e {
8744 if matches!(c.to, DataType::Date) {
8745 return (e, true); // Already properly cast, return as-is
8746 }
8747 }
8748 (e, false)
8749 };
8750
8751 match target {
8752 DialectType::DuckDB => {
8753 // For Hive source, always CAST to DATE
8754 // If arg is TO_DATE(x) or TRY_CAST(x AS DATE), use it directly
8755 let cast_d0 = if matches!(source, DialectType::Hive | DialectType::Spark | DialectType::Databricks) {
8756 let (inner, was_to_date) = unwrap_to_date(arg1);
8757 if was_to_date {
8758 // Already a date expression, use directly
8759 if matches!(&inner, Expression::TryCast(_)) {
8760 inner // Already TRY_CAST(x AS DATE)
8761 } else {
8762 Self::try_cast_date(inner)
8763 }
8764 } else {
8765 Self::force_cast_date(inner)
8766 }
8767 } else {
8768 Self::ensure_cast_date(arg1)
8769 };
8770 let cast_d1 = if matches!(source, DialectType::Hive | DialectType::Spark | DialectType::Databricks) {
8771 let (inner, was_to_date) = unwrap_to_date(arg0);
8772 if was_to_date {
8773 if matches!(&inner, Expression::TryCast(_)) {
8774 inner
8775 } else {
8776 Self::try_cast_date(inner)
8777 }
8778 } else {
8779 Self::force_cast_date(inner)
8780 }
8781 } else {
8782 Self::ensure_cast_date(arg0)
8783 };
8784 Ok(Expression::Function(Box::new(Function::new(
8785 "DATE_DIFF".to_string(), vec![
8786 Expression::string("DAY"),
8787 cast_d0, cast_d1,
8788 ],
8789 ))))
8790 }
8791 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
8792 // For Hive/Spark source, apply double_cast_timestamp_date
8793 // For other sources (MySQL etc.), just swap args without casting
8794 if matches!(source, DialectType::Hive | DialectType::Spark | DialectType::Databricks) {
8795 let cast_fn = |e: Expression| -> Expression {
8796 let (inner, was_to_date) = unwrap_to_date(e);
8797 if was_to_date {
8798 let first_cast = Self::double_cast_timestamp_date(inner);
8799 Self::double_cast_timestamp_date(first_cast)
8800 } else {
8801 Self::double_cast_timestamp_date(inner)
8802 }
8803 };
8804 Ok(Expression::Function(Box::new(Function::new(
8805 "DATE_DIFF".to_string(), vec![
8806 Expression::string("DAY"),
8807 cast_fn(arg1), cast_fn(arg0),
8808 ],
8809 ))))
8810 } else {
8811 Ok(Expression::Function(Box::new(Function::new(
8812 "DATE_DIFF".to_string(), vec![
8813 Expression::string("DAY"),
8814 arg1, arg0,
8815 ],
8816 ))))
8817 }
8818 }
8819 DialectType::Redshift => {
8820 let unit = Expression::Identifier(Identifier::new("DAY"));
8821 Ok(Expression::Function(Box::new(Function::new(
8822 "DATEDIFF".to_string(), vec![unit, arg1, arg0],
8823 ))))
8824 }
8825 _ => {
8826 Ok(Expression::Function(Box::new(Function::new(
8827 "DATEDIFF".to_string(), vec![arg0, arg1],
8828 ))))
8829 }
8830 }
8831 }
8832 // DATE_DIFF(unit, start, end) - 3-arg with string unit (ClickHouse/DuckDB style)
8833 "DATE_DIFF" if f.args.len() == 3 => {
8834 let mut args = f.args;
8835 let arg0 = args.remove(0);
8836 let arg1 = args.remove(0);
8837 let arg2 = args.remove(0);
8838 let unit_str = Self::get_unit_str_static(&arg0);
8839
8840 match target {
8841 DialectType::DuckDB => {
8842 // DuckDB: DATE_DIFF('UNIT', start, end)
8843 Ok(Expression::Function(Box::new(Function::new(
8844 "DATE_DIFF".to_string(), vec![
8845 Expression::string(&unit_str),
8846 arg1, arg2,
8847 ],
8848 ))))
8849 }
8850 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
8851 Ok(Expression::Function(Box::new(Function::new(
8852 "DATE_DIFF".to_string(), vec![
8853 Expression::string(&unit_str),
8854 arg1, arg2,
8855 ],
8856 ))))
8857 }
8858 DialectType::ClickHouse => {
8859 // ClickHouse: DATE_DIFF(UNIT, start, end) - identifier unit
8860 let unit = Expression::Identifier(Identifier::new(&unit_str));
8861 Ok(Expression::Function(Box::new(Function::new(
8862 "DATE_DIFF".to_string(), vec![unit, arg1, arg2],
8863 ))))
8864 }
8865 DialectType::Snowflake | DialectType::Redshift => {
8866 let unit = Expression::Identifier(Identifier::new(&unit_str));
8867 Ok(Expression::Function(Box::new(Function::new(
8868 "DATEDIFF".to_string(), vec![unit, arg1, arg2],
8869 ))))
8870 }
8871 _ => {
8872 let unit = Expression::Identifier(Identifier::new(&unit_str));
8873 Ok(Expression::Function(Box::new(Function::new(
8874 "DATEDIFF".to_string(), vec![unit, arg1, arg2],
8875 ))))
8876 }
8877 }
8878 }
8879 // DATEADD(unit, val, date) - 3-arg form
8880 "DATEADD" if f.args.len() == 3 => {
8881 let mut args = f.args;
8882 let arg0 = args.remove(0);
8883 let arg1 = args.remove(0);
8884 let arg2 = args.remove(0);
8885 let unit_str = Self::get_unit_str_static(&arg0);
8886
8887 // Normalize TSQL unit abbreviations to standard names
8888 let unit_str = match unit_str.as_str() {
8889 "YY" | "YYYY" => "YEAR".to_string(),
8890 "QQ" | "Q" => "QUARTER".to_string(),
8891 "MM" | "M" => "MONTH".to_string(),
8892 "WK" | "WW" => "WEEK".to_string(),
8893 "DD" | "D" | "DY" => "DAY".to_string(),
8894 "HH" => "HOUR".to_string(),
8895 "MI" | "N" => "MINUTE".to_string(),
8896 "SS" | "S" => "SECOND".to_string(),
8897 "MS" => "MILLISECOND".to_string(),
8898 "MCS" | "US" => "MICROSECOND".to_string(),
8899 _ => unit_str,
8900 };
8901 match target {
8902 DialectType::Snowflake => {
8903 let unit = Expression::Identifier(Identifier::new(&unit_str));
8904 // Cast string literal to TIMESTAMP, but not for Snowflake source
8905 // (Snowflake natively accepts string literals in DATEADD)
8906 let arg2 = if matches!(&arg2, Expression::Literal(Literal::String(_)))
8907 && !matches!(source, DialectType::Snowflake) {
8908 Expression::Cast(Box::new(Cast {
8909 this: arg2, to: DataType::Timestamp { precision: None, timezone: false },
8910 trailing_comments: Vec::new(), double_colon_syntax: false, format: None, default: None,
8911 }))
8912 } else { arg2 };
8913 Ok(Expression::Function(Box::new(Function::new(
8914 "DATEADD".to_string(), vec![unit, arg1, arg2],
8915 ))))
8916 }
8917 DialectType::TSQL => {
8918 let unit = Expression::Identifier(Identifier::new(&unit_str));
8919 // Cast string literal to DATETIME2, but not when source is Spark/Databricks family
8920 let arg2 = if matches!(&arg2, Expression::Literal(Literal::String(_)))
8921 && !matches!(source, DialectType::Spark | DialectType::Databricks | DialectType::Hive) {
8922 Expression::Cast(Box::new(Cast {
8923 this: arg2, to: DataType::Custom { name: "DATETIME2".to_string() },
8924 trailing_comments: Vec::new(), double_colon_syntax: false, format: None, default: None,
8925 }))
8926 } else { arg2 };
8927 Ok(Expression::Function(Box::new(Function::new(
8928 "DATEADD".to_string(), vec![unit, arg1, arg2],
8929 ))))
8930 }
8931 DialectType::Redshift => {
8932 let unit = Expression::Identifier(Identifier::new(&unit_str));
8933 Ok(Expression::Function(Box::new(Function::new(
8934 "DATEADD".to_string(), vec![unit, arg1, arg2],
8935 ))))
8936 }
8937 DialectType::Databricks => {
8938 let unit = Expression::Identifier(Identifier::new(&unit_str));
8939 // Sources with native DATEADD (TSQL, Databricks, Snowflake) -> DATEADD
8940 // Other sources (Redshift TsOrDsAdd, etc.) -> DATE_ADD
8941 let func_name = if matches!(source, DialectType::TSQL | DialectType::Fabric | DialectType::Databricks | DialectType::Snowflake) {
8942 "DATEADD"
8943 } else {
8944 "DATE_ADD"
8945 };
8946 Ok(Expression::Function(Box::new(Function::new(
8947 func_name.to_string(), vec![unit, arg1, arg2],
8948 ))))
8949 }
8950 DialectType::DuckDB => {
8951 // Special handling for NANOSECOND from Snowflake
8952 if unit_str == "NANOSECOND" && matches!(source, DialectType::Snowflake) {
8953 // DATEADD(NANOSECOND, offset, ts) -> MAKE_TIMESTAMP_NS(EPOCH_NS(CAST(ts AS TIMESTAMP_NS)) + offset)
8954 let cast_ts = Expression::Cast(Box::new(Cast {
8955 this: arg2,
8956 to: DataType::Custom { name: "TIMESTAMP_NS".to_string() },
8957 trailing_comments: vec![], double_colon_syntax: false, format: None, default: None,
8958 }));
8959 let epoch_ns = Expression::Function(Box::new(Function::new(
8960 "EPOCH_NS".to_string(), vec![cast_ts],
8961 )));
8962 let sum = Expression::Add(Box::new(BinaryOp::new(epoch_ns, arg1)));
8963 Ok(Expression::Function(Box::new(Function::new(
8964 "MAKE_TIMESTAMP_NS".to_string(), vec![sum],
8965 ))))
8966 } else {
8967 // DuckDB: convert to date + INTERVAL syntax with CAST
8968 let iu = Self::parse_interval_unit_static(&unit_str);
8969 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
8970 this: Some(arg1),
8971 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: iu, use_plural: false }),
8972 }));
8973 // Cast string literal to TIMESTAMP
8974 let arg2 = if matches!(&arg2, Expression::Literal(Literal::String(_))) {
8975 Expression::Cast(Box::new(Cast {
8976 this: arg2, to: DataType::Timestamp { precision: None, timezone: false },
8977 trailing_comments: Vec::new(), double_colon_syntax: false, format: None, default: None,
8978 }))
8979 } else { arg2 };
8980 Ok(Expression::Add(Box::new(crate::expressions::BinaryOp::new(arg2, interval))))
8981 }
8982 }
8983 DialectType::Spark => {
8984 // For TSQL source: convert to ADD_MONTHS/DATE_ADD(date, val)
8985 // For other sources: keep 3-arg DATE_ADD(UNIT, val, date) form
8986 if matches!(source, DialectType::TSQL | DialectType::Fabric) {
8987 fn multiply_expr_spark(expr: Expression, factor: i64) -> Expression {
8988 if let Expression::Literal(crate::expressions::Literal::Number(n)) = &expr {
8989 if let Ok(val) = n.parse::<i64>() {
8990 return Expression::Literal(crate::expressions::Literal::Number((val * factor).to_string()));
8991 }
8992 }
8993 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
8994 expr, Expression::Literal(crate::expressions::Literal::Number(factor.to_string())),
8995 )))
8996 }
8997 let normalized_unit = match unit_str.as_str() {
8998 "YEAR" | "YY" | "YYYY" => "YEAR",
8999 "QUARTER" | "QQ" | "Q" => "QUARTER",
9000 "MONTH" | "MM" | "M" => "MONTH",
9001 "WEEK" | "WK" | "WW" => "WEEK",
9002 "DAY" | "DD" | "D" | "DY" => "DAY",
9003 _ => &unit_str,
9004 };
9005 match normalized_unit {
9006 "YEAR" => {
9007 let months = multiply_expr_spark(arg1, 12);
9008 Ok(Expression::Function(Box::new(Function::new(
9009 "ADD_MONTHS".to_string(), vec![arg2, months],
9010 ))))
9011 }
9012 "QUARTER" => {
9013 let months = multiply_expr_spark(arg1, 3);
9014 Ok(Expression::Function(Box::new(Function::new(
9015 "ADD_MONTHS".to_string(), vec![arg2, months],
9016 ))))
9017 }
9018 "MONTH" => {
9019 Ok(Expression::Function(Box::new(Function::new(
9020 "ADD_MONTHS".to_string(), vec![arg2, arg1],
9021 ))))
9022 }
9023 "WEEK" => {
9024 let days = multiply_expr_spark(arg1, 7);
9025 Ok(Expression::Function(Box::new(Function::new(
9026 "DATE_ADD".to_string(), vec![arg2, days],
9027 ))))
9028 }
9029 "DAY" => {
9030 Ok(Expression::Function(Box::new(Function::new(
9031 "DATE_ADD".to_string(), vec![arg2, arg1],
9032 ))))
9033 }
9034 _ => {
9035 let unit = Expression::Identifier(Identifier::new(&unit_str));
9036 Ok(Expression::Function(Box::new(Function::new(
9037 "DATE_ADD".to_string(), vec![unit, arg1, arg2],
9038 ))))
9039 }
9040 }
9041 } else {
9042 // Non-TSQL source: keep 3-arg DATE_ADD(UNIT, val, date)
9043 let unit = Expression::Identifier(Identifier::new(&unit_str));
9044 Ok(Expression::Function(Box::new(Function::new(
9045 "DATE_ADD".to_string(), vec![unit, arg1, arg2],
9046 ))))
9047 }
9048 }
9049 DialectType::Hive => {
9050 match unit_str.as_str() {
9051 "MONTH" => {
9052 Ok(Expression::Function(Box::new(Function::new(
9053 "ADD_MONTHS".to_string(), vec![arg2, arg1],
9054 ))))
9055 }
9056 _ => {
9057 Ok(Expression::Function(Box::new(Function::new(
9058 "DATE_ADD".to_string(), vec![arg2, arg1],
9059 ))))
9060 }
9061 }
9062 }
9063 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
9064 // Cast string literal date to TIMESTAMP
9065 let arg2 = if matches!(&arg2, Expression::Literal(Literal::String(_))) {
9066 Expression::Cast(Box::new(Cast {
9067 this: arg2, to: DataType::Timestamp { precision: None, timezone: false },
9068 trailing_comments: Vec::new(), double_colon_syntax: false, format: None, default: None,
9069 }))
9070 } else { arg2 };
9071 Ok(Expression::Function(Box::new(Function::new(
9072 "DATE_ADD".to_string(), vec![
9073 Expression::string(&unit_str),
9074 arg1, arg2,
9075 ],
9076 ))))
9077 }
9078 DialectType::MySQL => {
9079 let iu = Self::parse_interval_unit_static(&unit_str);
9080 Ok(Expression::DateAdd(Box::new(crate::expressions::DateAddFunc {
9081 this: arg2,
9082 interval: arg1,
9083 unit: iu,
9084 })))
9085 }
9086 DialectType::PostgreSQL => {
9087 // Cast string literal date to TIMESTAMP
9088 let arg2 = if matches!(&arg2, Expression::Literal(Literal::String(_))) {
9089 Expression::Cast(Box::new(Cast {
9090 this: arg2, to: DataType::Timestamp { precision: None, timezone: false },
9091 trailing_comments: Vec::new(), double_colon_syntax: false, format: None, default: None,
9092 }))
9093 } else { arg2 };
9094 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
9095 this: Some(Expression::string(&format!("{} {}", Self::expr_to_string_static(&arg1), unit_str))),
9096 unit: None,
9097 }));
9098 Ok(Expression::Add(Box::new(crate::expressions::BinaryOp::new(arg2, interval))))
9099 }
9100 DialectType::BigQuery => {
9101 let iu = Self::parse_interval_unit_static(&unit_str);
9102 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
9103 this: Some(arg1),
9104 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: iu, use_plural: false }),
9105 }));
9106 // Non-TSQL sources: CAST string literal to DATETIME
9107 let arg2 = if !matches!(source, DialectType::TSQL | DialectType::Fabric)
9108 && matches!(&arg2, Expression::Literal(Literal::String(_)))
9109 {
9110 Expression::Cast(Box::new(Cast {
9111 this: arg2, to: DataType::Custom { name: "DATETIME".to_string() },
9112 trailing_comments: Vec::new(), double_colon_syntax: false, format: None, default: None,
9113 }))
9114 } else { arg2 };
9115 Ok(Expression::Function(Box::new(Function::new(
9116 "DATE_ADD".to_string(), vec![arg2, interval],
9117 ))))
9118 }
9119 _ => {
9120 let unit = Expression::Identifier(Identifier::new(&unit_str));
9121 Ok(Expression::Function(Box::new(Function::new(
9122 "DATEADD".to_string(), vec![unit, arg1, arg2],
9123 ))))
9124 }
9125 }
9126 }
9127 // DATE_ADD(unit, val, date) - 3-arg from ClickHouse/Presto/Spark
9128 "DATE_ADD" if f.args.len() == 3 => {
9129 let mut args = f.args;
9130 let arg0 = args.remove(0);
9131 let arg1 = args.remove(0);
9132 let arg2 = args.remove(0);
9133 let unit_str = Self::get_unit_str_static(&arg0);
9134
9135 match target {
9136 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
9137 Ok(Expression::Function(Box::new(Function::new(
9138 "DATE_ADD".to_string(), vec![
9139 Expression::string(&unit_str),
9140 arg1, arg2,
9141 ],
9142 ))))
9143 }
9144 DialectType::DuckDB => {
9145 let iu = Self::parse_interval_unit_static(&unit_str);
9146 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
9147 this: Some(arg1),
9148 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: iu, use_plural: false }),
9149 }));
9150 Ok(Expression::Add(Box::new(crate::expressions::BinaryOp::new(arg2, interval))))
9151 }
9152 DialectType::Snowflake | DialectType::TSQL | DialectType::Redshift => {
9153 let unit = Expression::Identifier(Identifier::new(&unit_str));
9154 Ok(Expression::Function(Box::new(Function::new(
9155 "DATEADD".to_string(), vec![unit, arg1, arg2],
9156 ))))
9157 }
9158 DialectType::Spark => {
9159 // Spark: DATE_ADD(date, val) for DAY, or DATEADD(UNIT, val, date)
9160 if unit_str == "DAY" {
9161 Ok(Expression::Function(Box::new(Function::new(
9162 "DATE_ADD".to_string(), vec![arg2, arg1],
9163 ))))
9164 } else {
9165 let unit = Expression::Identifier(Identifier::new(&unit_str));
9166 Ok(Expression::Function(Box::new(Function::new(
9167 "DATE_ADD".to_string(), vec![unit, arg1, arg2],
9168 ))))
9169 }
9170 }
9171 DialectType::Databricks => {
9172 let unit = Expression::Identifier(Identifier::new(&unit_str));
9173 Ok(Expression::Function(Box::new(Function::new(
9174 "DATE_ADD".to_string(), vec![unit, arg1, arg2],
9175 ))))
9176 }
9177 DialectType::Hive => {
9178 // Hive: DATE_ADD(date, val) for DAY
9179 Ok(Expression::Function(Box::new(Function::new(
9180 "DATE_ADD".to_string(), vec![arg2, arg1],
9181 ))))
9182 }
9183 _ => {
9184 let unit = Expression::Identifier(Identifier::new(&unit_str));
9185 Ok(Expression::Function(Box::new(Function::new(
9186 "DATE_ADD".to_string(), vec![unit, arg1, arg2],
9187 ))))
9188 }
9189 }
9190 }
9191 // DATE_ADD(date, days) - 2-arg Hive/Spark form (add days)
9192 "DATE_ADD" if f.args.len() == 2
9193 && matches!(source, DialectType::Hive | DialectType::Spark | DialectType::Databricks) => {
9194 let mut args = f.args;
9195 let date = args.remove(0);
9196 let days = args.remove(0);
9197 match target {
9198 DialectType::Hive | DialectType::Spark => {
9199 // Keep as DATE_ADD(date, days) for Hive/Spark
9200 Ok(Expression::Function(Box::new(Function::new(
9201 "DATE_ADD".to_string(), vec![date, days],
9202 ))))
9203 }
9204 DialectType::Databricks => {
9205 // Databricks: DATEADD(DAY, days, date)
9206 Ok(Expression::Function(Box::new(Function::new(
9207 "DATEADD".to_string(), vec![
9208 Expression::Identifier(Identifier::new("DAY")),
9209 days, date,
9210 ],
9211 ))))
9212 }
9213 DialectType::DuckDB => {
9214 // DuckDB: CAST(date AS DATE) + INTERVAL days DAY
9215 let cast_date = Self::ensure_cast_date(date);
9216 // Wrap complex expressions (like Mul from DATE_SUB negation) in Paren
9217 let interval_val = if matches!(days, Expression::Mul(_) | Expression::Sub(_) | Expression::Add(_)) {
9218 Expression::Paren(Box::new(crate::expressions::Paren { this: days, trailing_comments: vec![] }))
9219 } else { days };
9220 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
9221 this: Some(interval_val),
9222 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
9223 unit: crate::expressions::IntervalUnit::Day,
9224 use_plural: false,
9225 }),
9226 }));
9227 Ok(Expression::Add(Box::new(crate::expressions::BinaryOp::new(cast_date, interval))))
9228 }
9229 DialectType::Snowflake => {
9230 // For Hive source with string literal date, use CAST(CAST(date AS TIMESTAMP) AS DATE)
9231 let cast_date = if matches!(source, DialectType::Hive | DialectType::Spark | DialectType::Databricks) {
9232 if matches!(date, Expression::Literal(Literal::String(_))) {
9233 Self::double_cast_timestamp_date(date)
9234 } else { date }
9235 } else { date };
9236 Ok(Expression::Function(Box::new(Function::new(
9237 "DATEADD".to_string(), vec![
9238 Expression::Identifier(Identifier::new("DAY")),
9239 days, cast_date,
9240 ],
9241 ))))
9242 }
9243 DialectType::Redshift => {
9244 Ok(Expression::Function(Box::new(Function::new(
9245 "DATEADD".to_string(), vec![
9246 Expression::Identifier(Identifier::new("DAY")),
9247 days, date,
9248 ],
9249 ))))
9250 }
9251 DialectType::TSQL | DialectType::Fabric => {
9252 // For Hive source with string literal date, use CAST(CAST(date AS DATETIME2) AS DATE)
9253 // But Databricks DATE_ADD doesn't need this wrapping for TSQL
9254 let cast_date = if matches!(source, DialectType::Hive | DialectType::Spark) {
9255 if matches!(date, Expression::Literal(Literal::String(_))) {
9256 Self::double_cast_datetime2_date(date)
9257 } else { date }
9258 } else { date };
9259 Ok(Expression::Function(Box::new(Function::new(
9260 "DATEADD".to_string(), vec![
9261 Expression::Identifier(Identifier::new("DAY")),
9262 days, cast_date,
9263 ],
9264 ))))
9265 }
9266 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
9267 // For Hive source with string literal date, use CAST(CAST(date AS TIMESTAMP) AS DATE)
9268 let cast_date = if matches!(source, DialectType::Hive | DialectType::Spark | DialectType::Databricks) {
9269 if matches!(date, Expression::Literal(Literal::String(_))) {
9270 Self::double_cast_timestamp_date(date)
9271 } else { date }
9272 } else { date };
9273 Ok(Expression::Function(Box::new(Function::new(
9274 "DATE_ADD".to_string(), vec![
9275 Expression::string("DAY"),
9276 days, cast_date,
9277 ],
9278 ))))
9279 }
9280 DialectType::BigQuery => {
9281 // For Hive/Spark source, wrap date in CAST(CAST(date AS DATETIME) AS DATE)
9282 let cast_date = if matches!(source, DialectType::Hive | DialectType::Spark | DialectType::Databricks) {
9283 Self::double_cast_datetime_date(date)
9284 } else { date };
9285 // Wrap complex expressions in Paren for interval
9286 let interval_val = if matches!(days, Expression::Mul(_) | Expression::Sub(_) | Expression::Add(_)) {
9287 Expression::Paren(Box::new(crate::expressions::Paren { this: days, trailing_comments: vec![] }))
9288 } else { days };
9289 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
9290 this: Some(interval_val),
9291 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
9292 unit: crate::expressions::IntervalUnit::Day,
9293 use_plural: false,
9294 }),
9295 }));
9296 Ok(Expression::Function(Box::new(Function::new(
9297 "DATE_ADD".to_string(), vec![cast_date, interval],
9298 ))))
9299 }
9300 DialectType::MySQL => {
9301 let iu = crate::expressions::IntervalUnit::Day;
9302 Ok(Expression::DateAdd(Box::new(crate::expressions::DateAddFunc {
9303 this: date,
9304 interval: days,
9305 unit: iu,
9306 })))
9307 }
9308 DialectType::PostgreSQL => {
9309 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
9310 this: Some(Expression::string(&format!("{} DAY", Self::expr_to_string_static(&days)))),
9311 unit: None,
9312 }));
9313 Ok(Expression::Add(Box::new(crate::expressions::BinaryOp::new(date, interval))))
9314 }
9315 _ => {
9316 Ok(Expression::Function(Box::new(Function::new(
9317 "DATE_ADD".to_string(), vec![date, days],
9318 ))))
9319 }
9320 }
9321 }
9322 // DATE_SUB(date, days) - 2-arg Hive/Spark form (subtract days)
9323 "DATE_SUB" if f.args.len() == 2
9324 && matches!(source, DialectType::Hive | DialectType::Spark | DialectType::Databricks) => {
9325 let mut args = f.args;
9326 let date = args.remove(0);
9327 let days = args.remove(0);
9328 // Helper to create days * -1
9329 let make_neg_days = |d: Expression| -> Expression {
9330 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
9331 d,
9332 Expression::Literal(Literal::Number("-1".to_string())),
9333 )))
9334 };
9335 let is_string_literal = matches!(date, Expression::Literal(Literal::String(_)));
9336 match target {
9337 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
9338 // Keep as DATE_SUB(date, days) for Hive/Spark
9339 Ok(Expression::Function(Box::new(Function::new(
9340 "DATE_SUB".to_string(), vec![date, days],
9341 ))))
9342 }
9343 DialectType::DuckDB => {
9344 let cast_date = Self::ensure_cast_date(date);
9345 let neg = make_neg_days(days);
9346 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
9347 this: Some(Expression::Paren(Box::new(crate::expressions::Paren { this: neg, trailing_comments: vec![] }))),
9348 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
9349 unit: crate::expressions::IntervalUnit::Day,
9350 use_plural: false,
9351 }),
9352 }));
9353 Ok(Expression::Add(Box::new(crate::expressions::BinaryOp::new(cast_date, interval))))
9354 }
9355 DialectType::Snowflake => {
9356 let cast_date = if is_string_literal {
9357 Self::double_cast_timestamp_date(date)
9358 } else { date };
9359 let neg = make_neg_days(days);
9360 Ok(Expression::Function(Box::new(Function::new(
9361 "DATEADD".to_string(), vec![
9362 Expression::Identifier(Identifier::new("DAY")),
9363 neg, cast_date,
9364 ],
9365 ))))
9366 }
9367 DialectType::Redshift => {
9368 let neg = make_neg_days(days);
9369 Ok(Expression::Function(Box::new(Function::new(
9370 "DATEADD".to_string(), vec![
9371 Expression::Identifier(Identifier::new("DAY")),
9372 neg, date,
9373 ],
9374 ))))
9375 }
9376 DialectType::TSQL | DialectType::Fabric => {
9377 let cast_date = if is_string_literal {
9378 Self::double_cast_datetime2_date(date)
9379 } else { date };
9380 let neg = make_neg_days(days);
9381 Ok(Expression::Function(Box::new(Function::new(
9382 "DATEADD".to_string(), vec![
9383 Expression::Identifier(Identifier::new("DAY")),
9384 neg, cast_date,
9385 ],
9386 ))))
9387 }
9388 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
9389 let cast_date = if is_string_literal {
9390 Self::double_cast_timestamp_date(date)
9391 } else { date };
9392 let neg = make_neg_days(days);
9393 Ok(Expression::Function(Box::new(Function::new(
9394 "DATE_ADD".to_string(), vec![
9395 Expression::string("DAY"),
9396 neg, cast_date,
9397 ],
9398 ))))
9399 }
9400 DialectType::BigQuery => {
9401 let cast_date = if is_string_literal {
9402 Self::double_cast_datetime_date(date)
9403 } else { date };
9404 let neg = make_neg_days(days);
9405 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
9406 this: Some(Expression::Paren(Box::new(crate::expressions::Paren { this: neg, trailing_comments: vec![] }))),
9407 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
9408 unit: crate::expressions::IntervalUnit::Day,
9409 use_plural: false,
9410 }),
9411 }));
9412 Ok(Expression::Function(Box::new(Function::new(
9413 "DATE_ADD".to_string(), vec![cast_date, interval],
9414 ))))
9415 }
9416 _ => {
9417 Ok(Expression::Function(Box::new(Function::new(
9418 "DATE_SUB".to_string(), vec![date, days],
9419 ))))
9420 }
9421 }
9422 }
9423 // ADD_MONTHS(date, val) -> target-specific
9424 "ADD_MONTHS" if f.args.len() == 2 => {
9425 let mut args = f.args;
9426 let date = args.remove(0);
9427 let val = args.remove(0);
9428 match target {
9429 DialectType::TSQL => {
9430 let cast_date = Self::ensure_cast_datetime2(date);
9431 Ok(Expression::Function(Box::new(Function::new(
9432 "DATEADD".to_string(), vec![
9433 Expression::Identifier(Identifier::new("MONTH")),
9434 val, cast_date,
9435 ],
9436 ))))
9437 }
9438 DialectType::DuckDB => {
9439 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
9440 this: Some(val),
9441 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
9442 unit: crate::expressions::IntervalUnit::Month,
9443 use_plural: false,
9444 }),
9445 }));
9446 Ok(Expression::Add(Box::new(crate::expressions::BinaryOp::new(date, interval))))
9447 }
9448 DialectType::Snowflake => {
9449 // Keep ADD_MONTHS when source is Snowflake
9450 if matches!(source, DialectType::Snowflake) {
9451 Ok(Expression::Function(Box::new(Function::new(
9452 "ADD_MONTHS".to_string(), vec![date, val],
9453 ))))
9454 } else {
9455 Ok(Expression::Function(Box::new(Function::new(
9456 "DATEADD".to_string(), vec![
9457 Expression::Identifier(Identifier::new("MONTH")),
9458 val, date,
9459 ],
9460 ))))
9461 }
9462 }
9463 DialectType::Redshift => {
9464 Ok(Expression::Function(Box::new(Function::new(
9465 "DATEADD".to_string(), vec![
9466 Expression::Identifier(Identifier::new("MONTH")),
9467 val, date,
9468 ],
9469 ))))
9470 }
9471 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
9472 Ok(Expression::Function(Box::new(Function::new(
9473 "DATE_ADD".to_string(), vec![
9474 Expression::string("MONTH"),
9475 val, date,
9476 ],
9477 ))))
9478 }
9479 DialectType::BigQuery => {
9480 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
9481 this: Some(val),
9482 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
9483 unit: crate::expressions::IntervalUnit::Month,
9484 use_plural: false,
9485 }),
9486 }));
9487 Ok(Expression::Function(Box::new(Function::new(
9488 "DATE_ADD".to_string(), vec![date, interval],
9489 ))))
9490 }
9491 _ => {
9492 Ok(Expression::Function(Box::new(Function::new(
9493 "ADD_MONTHS".to_string(), vec![date, val],
9494 ))))
9495 }
9496 }
9497 }
9498 // DATETRUNC(unit, date) - TSQL form -> DATE_TRUNC for other targets
9499 "DATETRUNC" if f.args.len() == 2 => {
9500 let mut args = f.args;
9501 let arg0 = args.remove(0);
9502 let arg1 = args.remove(0);
9503 let unit_str = Self::get_unit_str_static(&arg0);
9504 match target {
9505 DialectType::TSQL | DialectType::Fabric => {
9506 // Keep as DATETRUNC for TSQL - the target handler will uppercase the unit
9507 Ok(Expression::Function(Box::new(Function::new(
9508 "DATETRUNC".to_string(), vec![
9509 Expression::Identifier(Identifier::new(&unit_str)),
9510 arg1,
9511 ],
9512 ))))
9513 }
9514 DialectType::DuckDB => {
9515 // DuckDB: DATE_TRUNC('UNIT', expr) with CAST for string literals
9516 let date = Self::ensure_cast_timestamp(arg1);
9517 Ok(Expression::Function(Box::new(Function::new(
9518 "DATE_TRUNC".to_string(), vec![
9519 Expression::string(&unit_str),
9520 date,
9521 ],
9522 ))))
9523 }
9524 DialectType::ClickHouse => {
9525 // ClickHouse: dateTrunc('UNIT', expr)
9526 Ok(Expression::Function(Box::new(Function::new(
9527 "dateTrunc".to_string(), vec![
9528 Expression::string(&unit_str),
9529 arg1,
9530 ],
9531 ))))
9532 }
9533 _ => {
9534 // Standard: DATE_TRUNC('UNIT', expr)
9535 let unit = Expression::string(&unit_str);
9536 Ok(Expression::Function(Box::new(Function::new(
9537 "DATE_TRUNC".to_string(), vec![unit, arg1],
9538 ))))
9539 }
9540 }
9541 }
9542 // GETDATE() -> CURRENT_TIMESTAMP for non-TSQL targets
9543 "GETDATE" if f.args.is_empty() => {
9544 match target {
9545 DialectType::TSQL => Ok(Expression::Function(f)),
9546 DialectType::Redshift => Ok(Expression::Function(Box::new(Function::new("GETDATE".to_string(), vec![])))),
9547 _ => Ok(Expression::CurrentTimestamp(crate::expressions::CurrentTimestamp {
9548 precision: None,
9549 sysdate: false,
9550 })),
9551 }
9552 }
9553 // TO_HEX(x) / HEX(x) -> target-specific hex function
9554 "TO_HEX" | "HEX" if f.args.len() == 1 => {
9555 let name = match target {
9556 DialectType::Presto | DialectType::Trino => "TO_HEX",
9557 DialectType::Spark | DialectType::Databricks | DialectType::Hive => "HEX",
9558 DialectType::DuckDB | DialectType::PostgreSQL | DialectType::Redshift => "TO_HEX",
9559 _ => &f.name,
9560 };
9561 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
9562 }
9563 // FROM_HEX(x) / UNHEX(x) -> target-specific hex decode function
9564 "FROM_HEX" | "UNHEX" if f.args.len() == 1 => {
9565 match target {
9566 DialectType::BigQuery => {
9567 // BigQuery: UNHEX(x) -> FROM_HEX(x)
9568 // Special case: UNHEX(MD5(x)) -> FROM_HEX(TO_HEX(MD5(x)))
9569 // because BigQuery MD5 returns BYTES, not hex string
9570 let arg = &f.args[0];
9571 let wrapped_arg = match arg {
9572 Expression::Function(inner_f) if inner_f.name.to_uppercase() == "MD5"
9573 || inner_f.name.to_uppercase() == "SHA1"
9574 || inner_f.name.to_uppercase() == "SHA256"
9575 || inner_f.name.to_uppercase() == "SHA512" => {
9576 // Wrap hash function in TO_HEX for BigQuery
9577 Expression::Function(Box::new(Function::new(
9578 "TO_HEX".to_string(), vec![arg.clone()],
9579 )))
9580 }
9581 _ => f.args.into_iter().next().unwrap(),
9582 };
9583 Ok(Expression::Function(Box::new(Function::new("FROM_HEX".to_string(), vec![wrapped_arg]))))
9584 }
9585 _ => {
9586 let name = match target {
9587 DialectType::Presto | DialectType::Trino => "FROM_HEX",
9588 DialectType::Spark | DialectType::Databricks | DialectType::Hive => "UNHEX",
9589 _ => &f.name,
9590 };
9591 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
9592 }
9593 }
9594 }
9595 // TO_UTF8(x) -> ENCODE(x, 'utf-8') for Spark
9596 "TO_UTF8" if f.args.len() == 1 => {
9597 match target {
9598 DialectType::Spark | DialectType::Databricks => {
9599 let mut args = f.args;
9600 args.push(Expression::string("utf-8"));
9601 Ok(Expression::Function(Box::new(Function::new("ENCODE".to_string(), args))))
9602 }
9603 _ => Ok(Expression::Function(f)),
9604 }
9605 }
9606 // FROM_UTF8(x) -> DECODE(x, 'utf-8') for Spark
9607 "FROM_UTF8" if f.args.len() == 1 => {
9608 match target {
9609 DialectType::Spark | DialectType::Databricks => {
9610 let mut args = f.args;
9611 args.push(Expression::string("utf-8"));
9612 Ok(Expression::Function(Box::new(Function::new("DECODE".to_string(), args))))
9613 }
9614 _ => Ok(Expression::Function(f)),
9615 }
9616 }
9617 // STARTS_WITH(x, y) / STARTSWITH(x, y) -> target-specific
9618 "STARTS_WITH" | "STARTSWITH" if f.args.len() == 2 => {
9619 let name = match target {
9620 DialectType::Spark | DialectType::Databricks => "STARTSWITH",
9621 DialectType::Presto | DialectType::Trino => "STARTS_WITH",
9622 DialectType::PostgreSQL | DialectType::Redshift => "STARTS_WITH",
9623 _ => &f.name,
9624 };
9625 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
9626 }
9627 // APPROX_COUNT_DISTINCT(x) <-> APPROX_DISTINCT(x)
9628 "APPROX_COUNT_DISTINCT" if f.args.len() >= 1 => {
9629 let name = match target {
9630 DialectType::Presto | DialectType::Trino | DialectType::Athena => "APPROX_DISTINCT",
9631 _ => "APPROX_COUNT_DISTINCT",
9632 };
9633 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
9634 }
9635 // JSON_EXTRACT -> GET_JSON_OBJECT for Spark/Hive
9636 "JSON_EXTRACT" if f.args.len() == 2
9637 && !matches!(source, DialectType::BigQuery)
9638 && matches!(target, DialectType::Spark | DialectType::Databricks | DialectType::Hive) => {
9639 Ok(Expression::Function(Box::new(Function::new("GET_JSON_OBJECT".to_string(), f.args))))
9640 }
9641 // JSON_EXTRACT(x, path) -> x -> path for SQLite (arrow syntax)
9642 "JSON_EXTRACT" if f.args.len() == 2
9643 && matches!(target, DialectType::SQLite) => {
9644 let mut args = f.args;
9645 let path = args.remove(1);
9646 let this = args.remove(0);
9647 Ok(Expression::JsonExtract(Box::new(crate::expressions::JsonExtractFunc {
9648 this,
9649 path,
9650 returning: None,
9651 arrow_syntax: true,
9652 hash_arrow_syntax: false,
9653 wrapper_option: None,
9654 quotes_option: None,
9655 on_scalar_string: false,
9656 on_error: None,
9657 })))
9658 }
9659 // JSON_FORMAT(x) -> TO_JSON(x) for Spark, TO_JSON_STRING for BigQuery, CAST(TO_JSON(x) AS TEXT) for DuckDB
9660 "JSON_FORMAT" if f.args.len() == 1 => {
9661 match target {
9662 DialectType::Spark | DialectType::Databricks => {
9663 // Presto JSON_FORMAT(JSON '...') needs Spark's string-unquoting flow:
9664 // REGEXP_EXTRACT(TO_JSON(FROM_JSON('[...]', SCHEMA_OF_JSON('[...]'))), '^.(.*).$', 1)
9665 if matches!(source, DialectType::Presto | DialectType::Trino | DialectType::Athena) {
9666 if let Some(Expression::ParseJson(pj)) = f.args.first() {
9667 if let Expression::Literal(Literal::String(s)) = &pj.this {
9668 let wrapped = Expression::Literal(Literal::String(format!("[{}]", s)));
9669 let schema_of_json = Expression::Function(Box::new(Function::new(
9670 "SCHEMA_OF_JSON".to_string(),
9671 vec![wrapped.clone()],
9672 )));
9673 let from_json = Expression::Function(Box::new(Function::new(
9674 "FROM_JSON".to_string(),
9675 vec![wrapped, schema_of_json],
9676 )));
9677 let to_json = Expression::Function(Box::new(Function::new(
9678 "TO_JSON".to_string(),
9679 vec![from_json],
9680 )));
9681 return Ok(Expression::Function(Box::new(Function::new(
9682 "REGEXP_EXTRACT".to_string(),
9683 vec![
9684 to_json,
9685 Expression::Literal(Literal::String("^.(.*).$".to_string())),
9686 Expression::Literal(Literal::Number("1".to_string())),
9687 ],
9688 ))));
9689 }
9690 }
9691 }
9692
9693 // Strip inner CAST(... AS JSON) or TO_JSON() if present
9694 // The CastToJsonForSpark may have already converted CAST(x AS JSON) to TO_JSON(x)
9695 let mut args = f.args;
9696 if let Some(Expression::Cast(ref c)) = args.first() {
9697 if matches!(&c.to, DataType::Json | DataType::JsonB) {
9698 args = vec![c.this.clone()];
9699 }
9700 } else if let Some(Expression::Function(ref inner_f)) = args.first() {
9701 if inner_f.name.eq_ignore_ascii_case("TO_JSON") && inner_f.args.len() == 1 {
9702 // Already TO_JSON(x) from CastToJsonForSpark, just use the inner arg
9703 args = inner_f.args.clone();
9704 }
9705 }
9706 Ok(Expression::Function(Box::new(Function::new("TO_JSON".to_string(), args))))
9707 }
9708 DialectType::BigQuery => {
9709 Ok(Expression::Function(Box::new(Function::new("TO_JSON_STRING".to_string(), f.args))))
9710 }
9711 DialectType::DuckDB => {
9712 // CAST(TO_JSON(x) AS TEXT)
9713 let to_json = Expression::Function(Box::new(Function::new("TO_JSON".to_string(), f.args)));
9714 Ok(Expression::Cast(Box::new(Cast {
9715 this: to_json,
9716 to: DataType::Text,
9717 trailing_comments: Vec::new(),
9718 double_colon_syntax: false,
9719 format: None,
9720 default: None,
9721 })))
9722 }
9723 _ => Ok(Expression::Function(f)),
9724 }
9725 }
9726 // SYSDATE -> CURRENT_TIMESTAMP for non-Oracle/Redshift/Snowflake targets
9727 "SYSDATE" if f.args.is_empty() => {
9728 match target {
9729 DialectType::Oracle | DialectType::Redshift => Ok(Expression::Function(f)),
9730 DialectType::Snowflake => {
9731 // Snowflake uses SYSDATE() with parens
9732 let mut f = *f;
9733 f.no_parens = false;
9734 Ok(Expression::Function(Box::new(f)))
9735 }
9736 DialectType::DuckDB => {
9737 // DuckDB: SYSDATE() -> CURRENT_TIMESTAMP AT TIME ZONE 'UTC'
9738 Ok(Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
9739 this: Expression::CurrentTimestamp(crate::expressions::CurrentTimestamp {
9740 precision: None,
9741 sysdate: false,
9742 }),
9743 zone: Expression::Literal(Literal::String("UTC".to_string())),
9744 })))
9745 }
9746 _ => Ok(Expression::CurrentTimestamp(crate::expressions::CurrentTimestamp {
9747 precision: None,
9748 sysdate: true,
9749 })),
9750 }
9751 }
9752 // LOGICAL_OR(x) -> BOOL_OR(x)
9753 "LOGICAL_OR" if f.args.len() == 1 => {
9754 let name = match target {
9755 DialectType::Spark | DialectType::Databricks => "BOOL_OR",
9756 _ => &f.name,
9757 };
9758 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
9759 }
9760 // LOGICAL_AND(x) -> BOOL_AND(x)
9761 "LOGICAL_AND" if f.args.len() == 1 => {
9762 let name = match target {
9763 DialectType::Spark | DialectType::Databricks => "BOOL_AND",
9764 _ => &f.name,
9765 };
9766 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
9767 }
9768 // MONTHS_ADD(d, n) -> ADD_MONTHS(d, n) for Oracle
9769 "MONTHS_ADD" if f.args.len() == 2 => {
9770 match target {
9771 DialectType::Oracle => {
9772 Ok(Expression::Function(Box::new(Function::new("ADD_MONTHS".to_string(), f.args))))
9773 }
9774 _ => Ok(Expression::Function(f)),
9775 }
9776 }
9777 // ARRAY_JOIN(arr, sep[, null_replacement]) -> target-specific
9778 "ARRAY_JOIN" if f.args.len() >= 2 => {
9779 match target {
9780 DialectType::Spark | DialectType::Databricks => {
9781 // Keep as ARRAY_JOIN for Spark (it supports null_replacement)
9782 Ok(Expression::Function(f))
9783 }
9784 DialectType::Hive => {
9785 // ARRAY_JOIN(arr, sep[, null_rep]) -> CONCAT_WS(sep, arr) (drop null_replacement)
9786 let mut args = f.args;
9787 let arr = args.remove(0);
9788 let sep = args.remove(0);
9789 // Drop any remaining args (null_replacement)
9790 Ok(Expression::Function(Box::new(Function::new("CONCAT_WS".to_string(), vec![sep, arr]))))
9791 }
9792 DialectType::Presto | DialectType::Trino => {
9793 Ok(Expression::Function(f))
9794 }
9795 _ => Ok(Expression::Function(f)),
9796 }
9797 }
9798 // LOCATE(substr, str, pos) 3-arg -> target-specific
9799 // For Presto/DuckDB: STRPOS doesn't support 3-arg, need complex expansion
9800 "LOCATE" if f.args.len() == 3 && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena | DialectType::DuckDB) => {
9801 let mut args = f.args;
9802 let substr = args.remove(0);
9803 let string = args.remove(0);
9804 let pos = args.remove(0);
9805 // STRPOS(SUBSTRING(string, pos), substr)
9806 let substring_call = Expression::Function(Box::new(Function::new(
9807 "SUBSTRING".to_string(), vec![string.clone(), pos.clone()],
9808 )));
9809 let strpos_call = Expression::Function(Box::new(Function::new(
9810 "STRPOS".to_string(), vec![substring_call, substr.clone()],
9811 )));
9812 // STRPOS(...) + pos - 1
9813 let pos_adjusted = Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
9814 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
9815 strpos_call.clone(),
9816 pos.clone(),
9817 ))),
9818 Expression::number(1),
9819 )));
9820 // STRPOS(...) = 0
9821 let is_zero = Expression::Eq(Box::new(crate::expressions::BinaryOp::new(
9822 strpos_call.clone(),
9823 Expression::number(0),
9824 )));
9825
9826 match target {
9827 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
9828 // IF(STRPOS(...) = 0, 0, STRPOS(...) + pos - 1)
9829 Ok(Expression::Function(Box::new(Function::new(
9830 "IF".to_string(),
9831 vec![is_zero, Expression::number(0), pos_adjusted],
9832 ))))
9833 }
9834 DialectType::DuckDB => {
9835 // CASE WHEN STRPOS(...) = 0 THEN 0 ELSE STRPOS(...) + pos - 1 END
9836 Ok(Expression::Case(Box::new(crate::expressions::Case {
9837 operand: None,
9838 whens: vec![
9839 (is_zero, Expression::number(0)),
9840 ],
9841 else_: Some(pos_adjusted),
9842 })))
9843 }
9844 _ => Ok(Expression::Function(Box::new(Function::new(
9845 "LOCATE".to_string(), vec![substr, string, pos],
9846 )))),
9847 }
9848 }
9849 // STRPOS(haystack, needle, occurrence) 3-arg -> INSTR(haystack, needle, 1, occurrence)
9850 "STRPOS" if f.args.len() == 3
9851 && matches!(target, DialectType::BigQuery | DialectType::Oracle | DialectType::Teradata) => {
9852 let mut args = f.args;
9853 let haystack = args.remove(0);
9854 let needle = args.remove(0);
9855 let occurrence = args.remove(0);
9856 Ok(Expression::Function(Box::new(Function::new(
9857 "INSTR".to_string(),
9858 vec![haystack, needle, Expression::number(1), occurrence],
9859 ))))
9860 }
9861 // SCHEMA_NAME(id) -> target-specific
9862 "SCHEMA_NAME" if f.args.len() <= 1 => {
9863 match target {
9864 DialectType::MySQL | DialectType::SingleStore => {
9865 Ok(Expression::Function(Box::new(Function::new("SCHEMA".to_string(), vec![]))))
9866 }
9867 DialectType::PostgreSQL => {
9868 Ok(Expression::CurrentSchema(Box::new(crate::expressions::CurrentSchema { this: None })))
9869 }
9870 DialectType::SQLite => {
9871 Ok(Expression::string("main"))
9872 }
9873 _ => Ok(Expression::Function(f)),
9874 }
9875 }
9876 // STRTOL(str, base) -> FROM_BASE(str, base) for Trino/Presto
9877 "STRTOL" if f.args.len() == 2 => {
9878 match target {
9879 DialectType::Presto | DialectType::Trino => {
9880 Ok(Expression::Function(Box::new(Function::new("FROM_BASE".to_string(), f.args))))
9881 }
9882 _ => Ok(Expression::Function(f)),
9883 }
9884 }
9885 // EDITDIST3(a, b) -> LEVENSHTEIN(a, b) for Spark
9886 "EDITDIST3" if f.args.len() == 2 => {
9887 match target {
9888 DialectType::Spark | DialectType::Databricks => {
9889 Ok(Expression::Function(Box::new(Function::new("LEVENSHTEIN".to_string(), f.args))))
9890 }
9891 _ => Ok(Expression::Function(f)),
9892 }
9893 }
9894 // FORMAT(num, decimals) from MySQL -> DuckDB FORMAT('{:,.Xf}', num)
9895 "FORMAT" if f.args.len() == 2
9896 && matches!(source, DialectType::MySQL | DialectType::SingleStore)
9897 && matches!(target, DialectType::DuckDB) => {
9898 let mut args = f.args;
9899 let num_expr = args.remove(0);
9900 let decimals_expr = args.remove(0);
9901 // Extract decimal count
9902 let dec_count = match &decimals_expr {
9903 Expression::Literal(Literal::Number(n)) => n.clone(),
9904 _ => "0".to_string(),
9905 };
9906 let fmt_str = format!("{{:,.{}f}}", dec_count);
9907 Ok(Expression::Function(Box::new(Function::new(
9908 "FORMAT".to_string(),
9909 vec![Expression::string(&fmt_str), num_expr],
9910 ))))
9911 }
9912 // FORMAT(x, fmt) from TSQL -> DATE_FORMAT for Spark, or expand short codes
9913 "FORMAT" if f.args.len() == 2 && matches!(source, DialectType::TSQL | DialectType::Fabric) => {
9914 let val_expr = f.args[0].clone();
9915 let fmt_expr = f.args[1].clone();
9916 // Expand unambiguous .NET single-char date format shortcodes to full patterns.
9917 // Only expand shortcodes that are NOT also valid numeric format specifiers.
9918 // Ambiguous: d, D, f, F, g, G (used for both dates and numbers)
9919 // Unambiguous date: m/M (Month day), t/T (Time), y/Y (Year month)
9920 let (expanded_fmt, is_shortcode) = match &fmt_expr {
9921 Expression::Literal(crate::expressions::Literal::String(s)) => {
9922 match s.as_str() {
9923 "m" | "M" => (Expression::string("MMMM d"), true),
9924 "t" => (Expression::string("h:mm tt"), true),
9925 "T" => (Expression::string("h:mm:ss tt"), true),
9926 "y" | "Y" => (Expression::string("MMMM yyyy"), true),
9927 _ => (fmt_expr.clone(), false),
9928 }
9929 }
9930 _ => (fmt_expr.clone(), false),
9931 };
9932 // Check if the format looks like a date format
9933 let is_date_format = is_shortcode || match &expanded_fmt {
9934 Expression::Literal(crate::expressions::Literal::String(s)) => {
9935 // Date formats typically contain yyyy, MM, dd, MMMM, HH, etc.
9936 s.contains("yyyy") || s.contains("YYYY") || s.contains("MM")
9937 || s.contains("dd") || s.contains("MMMM") || s.contains("HH")
9938 || s.contains("hh") || s.contains("ss")
9939 }
9940 _ => false,
9941 };
9942 match target {
9943 DialectType::Spark | DialectType::Databricks => {
9944 let func_name = if is_date_format {
9945 "DATE_FORMAT"
9946 } else {
9947 "FORMAT_NUMBER"
9948 };
9949 Ok(Expression::Function(Box::new(Function::new(
9950 func_name.to_string(), vec![val_expr, expanded_fmt],
9951 ))))
9952 }
9953 _ => {
9954 // For TSQL and other targets, expand shortcodes but keep FORMAT
9955 if is_shortcode {
9956 Ok(Expression::Function(Box::new(Function::new(
9957 "FORMAT".to_string(), vec![val_expr, expanded_fmt],
9958 ))))
9959 } else {
9960 Ok(Expression::Function(f))
9961 }
9962 }
9963 }
9964 }
9965 // FORMAT('%s', x) from Trino/Presto -> target-specific
9966 "FORMAT" if f.args.len() >= 2
9967 && matches!(source, DialectType::Trino | DialectType::Presto | DialectType::Athena) => {
9968 let fmt_expr = f.args[0].clone();
9969 let value_args: Vec<Expression> = f.args[1..].to_vec();
9970 match target {
9971 // DuckDB: replace %s with {} in format string
9972 DialectType::DuckDB => {
9973 let new_fmt = match &fmt_expr {
9974 Expression::Literal(Literal::String(s)) => {
9975 Expression::Literal(Literal::String(s.replace("%s", "{}")))
9976 }
9977 _ => fmt_expr,
9978 };
9979 let mut args = vec![new_fmt];
9980 args.extend(value_args);
9981 Ok(Expression::Function(Box::new(Function::new(
9982 "FORMAT".to_string(), args,
9983 ))))
9984 }
9985 // Snowflake: FORMAT('%s', x) -> TO_CHAR(x) when just %s
9986 DialectType::Snowflake => {
9987 match &fmt_expr {
9988 Expression::Literal(Literal::String(s)) if s == "%s" && value_args.len() == 1 => {
9989 Ok(Expression::Function(Box::new(Function::new(
9990 "TO_CHAR".to_string(), value_args,
9991 ))))
9992 }
9993 _ => Ok(Expression::Function(f)),
9994 }
9995 }
9996 // Default: keep FORMAT as-is
9997 _ => Ok(Expression::Function(f)),
9998 }
9999 }
10000 // LIST_CONTAINS / LIST_HAS / ARRAY_CONTAINS -> target-specific
10001 "LIST_CONTAINS" | "LIST_HAS" | "ARRAY_CONTAINS" if f.args.len() == 2 => {
10002 match target {
10003 DialectType::PostgreSQL | DialectType::Redshift => {
10004 // CASE WHEN needle IS NULL THEN NULL ELSE COALESCE(needle = ANY(arr), FALSE) END
10005 let arr = f.args[0].clone();
10006 let needle = f.args[1].clone();
10007 // Convert [] to ARRAY[] for PostgreSQL
10008 let pg_arr = match arr {
10009 Expression::Array(a) => Expression::ArrayFunc(Box::new(crate::expressions::ArrayConstructor {
10010 expressions: a.expressions,
10011 bracket_notation: false,
10012 use_list_keyword: false,
10013 })),
10014 _ => arr,
10015 };
10016 // needle = ANY(arr) using the Any quantified expression
10017 let any_expr = Expression::Any(Box::new(crate::expressions::QuantifiedExpr {
10018 this: needle.clone(),
10019 subquery: pg_arr,
10020 op: Some(crate::expressions::QuantifiedOp::Eq),
10021 }));
10022 let coalesce = Expression::Coalesce(Box::new(crate::expressions::VarArgFunc {
10023 expressions: vec![any_expr, Expression::Boolean(crate::expressions::BooleanLiteral { value: false })],
10024 original_name: None,
10025 }));
10026 let is_null_check = Expression::IsNull(Box::new(crate::expressions::IsNull {
10027 this: needle,
10028 not: false,
10029 postfix_form: false,
10030 }));
10031 Ok(Expression::Case(Box::new(Case {
10032 operand: None,
10033 whens: vec![(is_null_check, Expression::Null(crate::expressions::Null))],
10034 else_: Some(coalesce),
10035 })))
10036 }
10037 _ => {
10038 Ok(Expression::Function(Box::new(Function::new("ARRAY_CONTAINS".to_string(), f.args))))
10039 }
10040 }
10041 }
10042 // LIST_HAS_ANY / ARRAY_HAS_ANY -> target-specific overlap operator
10043 "LIST_HAS_ANY" | "ARRAY_HAS_ANY" if f.args.len() == 2 => {
10044 match target {
10045 DialectType::PostgreSQL | DialectType::Redshift => {
10046 // arr1 && arr2 with ARRAY[] syntax
10047 let mut args = f.args;
10048 let arr1 = args.remove(0);
10049 let arr2 = args.remove(0);
10050 let pg_arr1 = match arr1 {
10051 Expression::Array(a) => Expression::ArrayFunc(Box::new(crate::expressions::ArrayConstructor {
10052 expressions: a.expressions,
10053 bracket_notation: false,
10054 use_list_keyword: false,
10055 })),
10056 _ => arr1,
10057 };
10058 let pg_arr2 = match arr2 {
10059 Expression::Array(a) => Expression::ArrayFunc(Box::new(crate::expressions::ArrayConstructor {
10060 expressions: a.expressions,
10061 bracket_notation: false,
10062 use_list_keyword: false,
10063 })),
10064 _ => arr2,
10065 };
10066 Ok(Expression::ArrayOverlaps(Box::new(BinaryOp::new(pg_arr1, pg_arr2))))
10067 }
10068 DialectType::DuckDB => {
10069 // DuckDB: arr1 && arr2 (native support)
10070 let mut args = f.args;
10071 let arr1 = args.remove(0);
10072 let arr2 = args.remove(0);
10073 Ok(Expression::ArrayOverlaps(Box::new(BinaryOp::new(arr1, arr2))))
10074 }
10075 _ => Ok(Expression::Function(Box::new(Function::new("LIST_HAS_ANY".to_string(), f.args)))),
10076 }
10077 }
10078 // APPROX_QUANTILE(x, q) -> target-specific
10079 "APPROX_QUANTILE" if f.args.len() == 2 => {
10080 match target {
10081 DialectType::Snowflake => {
10082 Ok(Expression::Function(Box::new(Function::new("APPROX_PERCENTILE".to_string(), f.args))))
10083 }
10084 DialectType::DuckDB => {
10085 Ok(Expression::Function(f))
10086 }
10087 _ => Ok(Expression::Function(f)),
10088 }
10089 }
10090 // MAKE_DATE(y, m, d) -> DATE(y, m, d) for BigQuery
10091 "MAKE_DATE" if f.args.len() == 3 => {
10092 match target {
10093 DialectType::BigQuery => {
10094 Ok(Expression::Function(Box::new(Function::new("DATE".to_string(), f.args))))
10095 }
10096 _ => Ok(Expression::Function(f)),
10097 }
10098 }
10099 // RANGE(start, end[, step]) -> target-specific
10100 "RANGE" if f.args.len() >= 2 && !matches!(target, DialectType::DuckDB) => {
10101 let start = f.args[0].clone();
10102 let end = f.args[1].clone();
10103 let step = f.args.get(2).cloned();
10104 match target {
10105 DialectType::Spark | DialectType::Databricks => {
10106 // RANGE(start, end) -> SEQUENCE(start, end-1)
10107 // RANGE(start, end, step) -> SEQUENCE(start, end-step, step) when step constant
10108 // RANGE(start, start) -> ARRAY() (empty)
10109 // RANGE(start, end, 0) -> ARRAY() (empty)
10110 // When end is variable: IF((end - 1) <= start, ARRAY(), SEQUENCE(start, (end - 1)))
10111
10112 // Check for constant args
10113 fn extract_i64(e: &Expression) -> Option<i64> {
10114 match e {
10115 Expression::Literal(Literal::Number(n)) => n.parse::<i64>().ok(),
10116 Expression::Neg(u) => {
10117 if let Expression::Literal(Literal::Number(n)) = &u.this {
10118 n.parse::<i64>().ok().map(|v| -v)
10119 } else { None }
10120 }
10121 _ => None,
10122 }
10123 }
10124 let start_val = extract_i64(&start);
10125 let end_val = extract_i64(&end);
10126 let step_val = step.as_ref().and_then(|s| extract_i64(s));
10127
10128 // Check for RANGE(x, x) or RANGE(x, y, 0) -> empty array
10129 if step_val == Some(0) {
10130 return Ok(Expression::Function(Box::new(Function::new("ARRAY".to_string(), vec![]))));
10131 }
10132 if let (Some(s), Some(e_val)) = (start_val, end_val) {
10133 if s == e_val {
10134 return Ok(Expression::Function(Box::new(Function::new("ARRAY".to_string(), vec![]))));
10135 }
10136 }
10137
10138 if let (Some(_s_val), Some(e_val)) = (start_val, end_val) {
10139 // All constants - compute new end = end - step (if step provided) or end - 1
10140 match step_val {
10141 Some(st) if st < 0 => {
10142 // Negative step: SEQUENCE(start, end - step, step)
10143 let new_end = e_val - st; // end - step (= end + |step|)
10144 let mut args = vec![start, Expression::number(new_end)];
10145 if let Some(s) = step { args.push(s); }
10146 Ok(Expression::Function(Box::new(Function::new("SEQUENCE".to_string(), args))))
10147 }
10148 Some(st) => {
10149 let new_end = e_val - st;
10150 let mut args = vec![start, Expression::number(new_end)];
10151 if let Some(s) = step { args.push(s); }
10152 Ok(Expression::Function(Box::new(Function::new("SEQUENCE".to_string(), args))))
10153 }
10154 None => {
10155 // No step: SEQUENCE(start, end - 1)
10156 let new_end = e_val - 1;
10157 Ok(Expression::Function(Box::new(Function::new("SEQUENCE".to_string(), vec![start, Expression::number(new_end)]))))
10158 }
10159 }
10160 } else {
10161 // Variable end: IF((end - 1) <= start, ARRAY(), SEQUENCE(start, (end - 1)))
10162 let end_m1 = Expression::Sub(Box::new(BinaryOp::new(end.clone(), Expression::number(1))));
10163 let cond = Expression::Lte(Box::new(BinaryOp::new(
10164 Expression::Paren(Box::new(Paren { this: end_m1.clone(), trailing_comments: Vec::new() })),
10165 start.clone(),
10166 )));
10167 let empty = Expression::Function(Box::new(Function::new("ARRAY".to_string(), vec![])));
10168 let mut seq_args = vec![start, Expression::Paren(Box::new(Paren { this: end_m1, trailing_comments: Vec::new() }))];
10169 if let Some(s) = step { seq_args.push(s); }
10170 let seq = Expression::Function(Box::new(Function::new("SEQUENCE".to_string(), seq_args)));
10171 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
10172 condition: cond,
10173 true_value: empty,
10174 false_value: Some(seq),
10175 original_name: None,
10176 })))
10177 }
10178 }
10179 DialectType::SQLite => {
10180 // RANGE(start, end) -> GENERATE_SERIES(start, end)
10181 // The subquery wrapping is handled at the Alias level
10182 let mut args = vec![start, end];
10183 if let Some(s) = step { args.push(s); }
10184 Ok(Expression::Function(Box::new(Function::new("GENERATE_SERIES".to_string(), args))))
10185 }
10186 _ => Ok(Expression::Function(f)),
10187 }
10188 }
10189 // ARRAY_REVERSE_SORT -> target-specific
10190 // (handled above as well, but also need DuckDB self-normalization)
10191 // MAP_FROM_ARRAYS(keys, values) -> target-specific map construction
10192 "MAP_FROM_ARRAYS" if f.args.len() == 2 => {
10193 match target {
10194 DialectType::Snowflake => {
10195 Ok(Expression::Function(Box::new(Function::new("OBJECT_CONSTRUCT".to_string(), f.args))))
10196 }
10197 DialectType::Spark | DialectType::Databricks => {
10198 Ok(Expression::Function(Box::new(Function::new("MAP_FROM_ARRAYS".to_string(), f.args))))
10199 }
10200 _ => {
10201 Ok(Expression::Function(Box::new(Function::new("MAP".to_string(), f.args))))
10202 }
10203 }
10204 }
10205 // VARIANCE(x) -> varSamp(x) for ClickHouse
10206 "VARIANCE" if f.args.len() == 1 => {
10207 match target {
10208 DialectType::ClickHouse => {
10209 Ok(Expression::Function(Box::new(Function::new("varSamp".to_string(), f.args))))
10210 }
10211 _ => Ok(Expression::Function(f)),
10212 }
10213 }
10214 // STDDEV(x) -> stddevSamp(x) for ClickHouse
10215 "STDDEV" if f.args.len() == 1 => {
10216 match target {
10217 DialectType::ClickHouse => {
10218 Ok(Expression::Function(Box::new(Function::new("stddevSamp".to_string(), f.args))))
10219 }
10220 _ => Ok(Expression::Function(f)),
10221 }
10222 }
10223 // ISINF(x) -> IS_INF(x) for BigQuery
10224 "ISINF" if f.args.len() == 1 => {
10225 match target {
10226 DialectType::BigQuery => {
10227 Ok(Expression::Function(Box::new(Function::new("IS_INF".to_string(), f.args))))
10228 }
10229 _ => Ok(Expression::Function(f)),
10230 }
10231 }
10232 // CONTAINS(arr, x) -> ARRAY_CONTAINS(arr, x) for Spark/Hive
10233 "CONTAINS" if f.args.len() == 2 => {
10234 match target {
10235 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
10236 Ok(Expression::Function(Box::new(Function::new("ARRAY_CONTAINS".to_string(), f.args))))
10237 }
10238 _ => Ok(Expression::Function(f)),
10239 }
10240 }
10241 // ARRAY_CONTAINS(arr, x) -> CONTAINS(arr, x) for Presto
10242 "ARRAY_CONTAINS" if f.args.len() == 2 => {
10243 match target {
10244 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
10245 Ok(Expression::Function(Box::new(Function::new("CONTAINS".to_string(), f.args))))
10246 }
10247 DialectType::DuckDB => {
10248 Ok(Expression::Function(Box::new(Function::new("ARRAY_CONTAINS".to_string(), f.args))))
10249 }
10250 _ => Ok(Expression::Function(f)),
10251 }
10252 }
10253 // TO_UNIXTIME(x) -> UNIX_TIMESTAMP(x) for Hive/Spark
10254 "TO_UNIXTIME" if f.args.len() == 1 => {
10255 match target {
10256 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
10257 Ok(Expression::Function(Box::new(Function::new("UNIX_TIMESTAMP".to_string(), f.args))))
10258 }
10259 _ => Ok(Expression::Function(f)),
10260 }
10261 }
10262 // FROM_UNIXTIME(x) -> target-specific
10263 "FROM_UNIXTIME" if f.args.len() == 1 => {
10264 match target {
10265 DialectType::Hive | DialectType::Spark | DialectType::Databricks
10266 | DialectType::Presto | DialectType::Trino => {
10267 Ok(Expression::Function(f))
10268 }
10269 DialectType::DuckDB => {
10270 // DuckDB: TO_TIMESTAMP(x)
10271 let arg = f.args.into_iter().next().unwrap();
10272 Ok(Expression::Function(Box::new(Function::new("TO_TIMESTAMP".to_string(), vec![arg]))))
10273 }
10274 DialectType::PostgreSQL => {
10275 // PG: TO_TIMESTAMP(col)
10276 let arg = f.args.into_iter().next().unwrap();
10277 Ok(Expression::Function(Box::new(Function::new("TO_TIMESTAMP".to_string(), vec![arg]))))
10278 }
10279 DialectType::Redshift => {
10280 // Redshift: (TIMESTAMP 'epoch' + col * INTERVAL '1 SECOND')
10281 let arg = f.args.into_iter().next().unwrap();
10282 let epoch_ts = Expression::Literal(Literal::Timestamp("epoch".to_string()));
10283 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
10284 this: Some(Expression::string("1 SECOND")),
10285 unit: None,
10286 }));
10287 let mul = Expression::Mul(Box::new(BinaryOp::new(arg, interval)));
10288 let add = Expression::Add(Box::new(BinaryOp::new(epoch_ts, mul)));
10289 Ok(Expression::Paren(Box::new(crate::expressions::Paren { this: add, trailing_comments: Vec::new() })))
10290 }
10291 _ => Ok(Expression::Function(f)),
10292 }
10293 }
10294 // FROM_UNIXTIME(x, fmt) with 2 args from Hive/Spark -> target-specific
10295 "FROM_UNIXTIME" if f.args.len() == 2
10296 && matches!(source, DialectType::Hive | DialectType::Spark | DialectType::Databricks) => {
10297 let mut args = f.args;
10298 let unix_ts = args.remove(0);
10299 let fmt_expr = args.remove(0);
10300 match target {
10301 DialectType::DuckDB => {
10302 // DuckDB: STRFTIME(TO_TIMESTAMP(x), c_fmt)
10303 let to_ts = Expression::Function(Box::new(Function::new("TO_TIMESTAMP".to_string(), vec![unix_ts])));
10304 if let Expression::Literal(crate::expressions::Literal::String(s)) = &fmt_expr {
10305 let c_fmt = Self::hive_format_to_c_format(s);
10306 Ok(Expression::Function(Box::new(Function::new(
10307 "STRFTIME".to_string(), vec![to_ts, Expression::string(&c_fmt)],
10308 ))))
10309 } else {
10310 Ok(Expression::Function(Box::new(Function::new(
10311 "STRFTIME".to_string(), vec![to_ts, fmt_expr],
10312 ))))
10313 }
10314 }
10315 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
10316 // Presto: DATE_FORMAT(FROM_UNIXTIME(x), presto_fmt)
10317 let from_unix = Expression::Function(Box::new(Function::new("FROM_UNIXTIME".to_string(), vec![unix_ts])));
10318 if let Expression::Literal(crate::expressions::Literal::String(s)) = &fmt_expr {
10319 let p_fmt = Self::hive_format_to_presto_format(s);
10320 Ok(Expression::Function(Box::new(Function::new(
10321 "DATE_FORMAT".to_string(), vec![from_unix, Expression::string(&p_fmt)],
10322 ))))
10323 } else {
10324 Ok(Expression::Function(Box::new(Function::new(
10325 "DATE_FORMAT".to_string(), vec![from_unix, fmt_expr],
10326 ))))
10327 }
10328 }
10329 _ => {
10330 // Keep as FROM_UNIXTIME(x, fmt) for other targets
10331 Ok(Expression::Function(Box::new(Function::new(
10332 "FROM_UNIXTIME".to_string(), vec![unix_ts, fmt_expr],
10333 ))))
10334 }
10335 }
10336 }
10337 // DATEPART(unit, expr) -> EXTRACT(unit FROM expr) for Spark
10338 "DATEPART" | "DATE_PART" if f.args.len() == 2 => {
10339 let unit_str = Self::get_unit_str_static(&f.args[0]);
10340 // Get the raw unit text preserving original case
10341 let raw_unit = match &f.args[0] {
10342 Expression::Identifier(id) => id.name.clone(),
10343 Expression::Literal(crate::expressions::Literal::String(s)) => s.clone(),
10344 Expression::Column(col) => col.name.name.clone(),
10345 _ => unit_str.clone(),
10346 };
10347 match target {
10348 DialectType::TSQL | DialectType::Fabric => {
10349 // Preserve original case of unit for TSQL
10350 let unit_name = match unit_str.as_str() {
10351 "YY" | "YYYY" => "YEAR".to_string(),
10352 "QQ" | "Q" => "QUARTER".to_string(),
10353 "MM" | "M" => "MONTH".to_string(),
10354 "WK" | "WW" => "WEEK".to_string(),
10355 "DD" | "D" | "DY" => "DAY".to_string(),
10356 "HH" => "HOUR".to_string(),
10357 "MI" | "N" => "MINUTE".to_string(),
10358 "SS" | "S" => "SECOND".to_string(),
10359 _ => raw_unit.clone(), // preserve original case
10360 };
10361 let mut args = f.args;
10362 args[0] = Expression::Identifier(Identifier::new(&unit_name));
10363 Ok(Expression::Function(Box::new(Function::new("DATEPART".to_string(), args))))
10364 }
10365 DialectType::Spark | DialectType::Databricks => {
10366 // DATEPART(unit, expr) -> EXTRACT(unit FROM expr)
10367 // Preserve original case for non-abbreviation units
10368 let unit = match unit_str.as_str() {
10369 "YY" | "YYYY" => "YEAR".to_string(),
10370 "QQ" | "Q" => "QUARTER".to_string(),
10371 "MM" | "M" => "MONTH".to_string(),
10372 "WK" | "WW" => "WEEK".to_string(),
10373 "DD" | "D" | "DY" => "DAY".to_string(),
10374 "HH" => "HOUR".to_string(),
10375 "MI" | "N" => "MINUTE".to_string(),
10376 "SS" | "S" => "SECOND".to_string(),
10377 _ => raw_unit, // preserve original case
10378 };
10379 Ok(Expression::Extract(Box::new(crate::expressions::ExtractFunc {
10380 this: f.args[1].clone(),
10381 field: crate::expressions::DateTimeField::Custom(unit),
10382 })))
10383 }
10384 _ => {
10385 Ok(Expression::Function(Box::new(Function::new("DATE_PART".to_string(), f.args))))
10386 }
10387 }
10388 }
10389 // DATENAME(mm, date) -> FORMAT(CAST(date AS DATETIME2), 'MMMM') for TSQL
10390 // DATENAME(dw, date) -> FORMAT(CAST(date AS DATETIME2), 'dddd') for TSQL
10391 // DATENAME(mm, date) -> DATE_FORMAT(CAST(date AS TIMESTAMP), 'MMMM') for Spark
10392 // DATENAME(dw, date) -> DATE_FORMAT(CAST(date AS TIMESTAMP), 'EEEE') for Spark
10393 "DATENAME" if f.args.len() == 2 => {
10394 let unit_str = Self::get_unit_str_static(&f.args[0]);
10395 let date_expr = f.args[1].clone();
10396 match unit_str.as_str() {
10397 "MM" | "M" | "MONTH" => {
10398 match target {
10399 DialectType::TSQL => {
10400 let cast_date = Expression::Cast(Box::new(crate::expressions::Cast {
10401 this: date_expr,
10402 to: DataType::Custom { name: "DATETIME2".to_string() },
10403 trailing_comments: Vec::new(),
10404 double_colon_syntax: false,
10405 format: None,
10406 default: None,
10407 }));
10408 Ok(Expression::Function(Box::new(Function::new(
10409 "FORMAT".to_string(), vec![cast_date, Expression::string("MMMM")],
10410 ))))
10411 }
10412 DialectType::Spark | DialectType::Databricks => {
10413 let cast_date = Expression::Cast(Box::new(crate::expressions::Cast {
10414 this: date_expr,
10415 to: DataType::Timestamp { timezone: false, precision: None },
10416 trailing_comments: Vec::new(),
10417 double_colon_syntax: false,
10418 format: None,
10419 default: None,
10420 }));
10421 Ok(Expression::Function(Box::new(Function::new(
10422 "DATE_FORMAT".to_string(), vec![cast_date, Expression::string("MMMM")],
10423 ))))
10424 }
10425 _ => Ok(Expression::Function(f)),
10426 }
10427 }
10428 "DW" | "WEEKDAY" => {
10429 match target {
10430 DialectType::TSQL => {
10431 let cast_date = Expression::Cast(Box::new(crate::expressions::Cast {
10432 this: date_expr,
10433 to: DataType::Custom { name: "DATETIME2".to_string() },
10434 trailing_comments: Vec::new(),
10435 double_colon_syntax: false,
10436 format: None,
10437 default: None,
10438 }));
10439 Ok(Expression::Function(Box::new(Function::new(
10440 "FORMAT".to_string(), vec![cast_date, Expression::string("dddd")],
10441 ))))
10442 }
10443 DialectType::Spark | DialectType::Databricks => {
10444 let cast_date = Expression::Cast(Box::new(crate::expressions::Cast {
10445 this: date_expr,
10446 to: DataType::Timestamp { timezone: false, precision: None },
10447 trailing_comments: Vec::new(),
10448 double_colon_syntax: false,
10449 format: None,
10450 default: None,
10451 }));
10452 Ok(Expression::Function(Box::new(Function::new(
10453 "DATE_FORMAT".to_string(), vec![cast_date, Expression::string("EEEE")],
10454 ))))
10455 }
10456 _ => Ok(Expression::Function(f)),
10457 }
10458 }
10459 _ => Ok(Expression::Function(f)),
10460 }
10461 }
10462 // STRING_AGG(x, sep) without WITHIN GROUP -> target-specific
10463 "STRING_AGG" if f.args.len() >= 2 => {
10464 let x = f.args[0].clone();
10465 let sep = f.args[1].clone();
10466 match target {
10467 DialectType::MySQL | DialectType::SingleStore | DialectType::Doris | DialectType::StarRocks => {
10468 Ok(Expression::GroupConcat(Box::new(crate::expressions::GroupConcatFunc {
10469 this: x, separator: Some(sep), order_by: None, distinct: false, filter: None,
10470 })))
10471 }
10472 DialectType::SQLite => {
10473 Ok(Expression::GroupConcat(Box::new(crate::expressions::GroupConcatFunc {
10474 this: x, separator: Some(sep), order_by: None, distinct: false, filter: None,
10475 })))
10476 }
10477 DialectType::PostgreSQL | DialectType::Redshift => {
10478 Ok(Expression::StringAgg(Box::new(crate::expressions::StringAggFunc {
10479 this: x, separator: Some(sep), order_by: None, distinct: false, filter: None, limit: None,
10480 })))
10481 }
10482 _ => Ok(Expression::Function(f)),
10483 }
10484 }
10485 // JSON_ARRAYAGG -> JSON_AGG for PostgreSQL
10486 "JSON_ARRAYAGG" => {
10487 match target {
10488 DialectType::PostgreSQL => {
10489 Ok(Expression::Function(Box::new(Function { name: "JSON_AGG".to_string(), ..(*f) })))
10490 }
10491 _ => Ok(Expression::Function(f)),
10492 }
10493 }
10494 // SCHEMA_NAME(id) -> CURRENT_SCHEMA for PostgreSQL, 'main' for SQLite
10495 "SCHEMA_NAME" => {
10496 match target {
10497 DialectType::PostgreSQL => {
10498 Ok(Expression::CurrentSchema(Box::new(crate::expressions::CurrentSchema { this: None })))
10499 }
10500 DialectType::SQLite => {
10501 Ok(Expression::string("main"))
10502 }
10503 _ => Ok(Expression::Function(f)),
10504 }
10505 }
10506 // TO_TIMESTAMP(x, fmt) 2-arg from Spark/Hive: convert Java format to target format
10507 "TO_TIMESTAMP" if f.args.len() == 2
10508 && matches!(source, DialectType::Spark | DialectType::Databricks | DialectType::Hive)
10509 && matches!(target, DialectType::DuckDB) => {
10510 let mut args = f.args;
10511 let val = args.remove(0);
10512 let fmt_expr = args.remove(0);
10513 if let Expression::Literal(Literal::String(ref s)) = fmt_expr {
10514 // Convert Java/Spark format to C strptime format
10515 fn java_to_c_fmt(fmt: &str) -> String {
10516 let result = fmt
10517 .replace("yyyy", "%Y")
10518 .replace("SSSSSS", "%f")
10519 .replace("EEEE", "%W")
10520 .replace("MM", "%m")
10521 .replace("dd", "%d")
10522 .replace("HH", "%H")
10523 .replace("mm", "%M")
10524 .replace("ss", "%S")
10525 .replace("yy", "%y");
10526 let mut out = String::new();
10527 let chars: Vec<char> = result.chars().collect();
10528 let mut i = 0;
10529 while i < chars.len() {
10530 if chars[i] == '%' && i + 1 < chars.len() {
10531 out.push(chars[i]);
10532 out.push(chars[i + 1]);
10533 i += 2;
10534 } else if chars[i] == 'z' {
10535 out.push_str("%Z");
10536 i += 1;
10537 } else if chars[i] == 'Z' {
10538 out.push_str("%z");
10539 i += 1;
10540 } else {
10541 out.push(chars[i]);
10542 i += 1;
10543 }
10544 }
10545 out
10546 }
10547 let c_fmt = java_to_c_fmt(s);
10548 Ok(Expression::Function(Box::new(Function::new(
10549 "STRPTIME".to_string(),
10550 vec![val, Expression::string(&c_fmt)],
10551 ))))
10552 } else {
10553 Ok(Expression::Function(Box::new(Function::new(
10554 "STRPTIME".to_string(),
10555 vec![val, fmt_expr],
10556 ))))
10557 }
10558 }
10559 // TO_DATE(x) 1-arg from Doris: date conversion
10560 "TO_DATE" if f.args.len() == 1
10561 && matches!(source, DialectType::Doris | DialectType::StarRocks) => {
10562 let arg = f.args.into_iter().next().unwrap();
10563 match target {
10564 DialectType::Oracle | DialectType::DuckDB | DialectType::TSQL => {
10565 // CAST(x AS DATE)
10566 Ok(Expression::Cast(Box::new(Cast {
10567 this: arg,
10568 to: DataType::Date,
10569 double_colon_syntax: false,
10570 trailing_comments: vec![],
10571 format: None,
10572 default: None,
10573 })))
10574 }
10575 DialectType::MySQL | DialectType::SingleStore => {
10576 // DATE(x)
10577 Ok(Expression::Function(Box::new(Function::new("DATE".to_string(), vec![arg]))))
10578 }
10579 _ => {
10580 // Default: keep as TO_DATE(x) (Spark, PostgreSQL, etc.)
10581 Ok(Expression::Function(Box::new(Function::new("TO_DATE".to_string(), vec![arg]))))
10582 }
10583 }
10584 }
10585 // TO_DATE(x) 1-arg from Spark/Hive: safe date conversion
10586 "TO_DATE" if f.args.len() == 1
10587 && matches!(source, DialectType::Spark | DialectType::Databricks | DialectType::Hive) => {
10588 let arg = f.args.into_iter().next().unwrap();
10589 match target {
10590 DialectType::DuckDB => {
10591 // Spark TO_DATE is safe -> TRY_CAST(x AS DATE)
10592 Ok(Expression::TryCast(Box::new(Cast {
10593 this: arg,
10594 to: DataType::Date,
10595 double_colon_syntax: false,
10596 trailing_comments: vec![],
10597 format: None,
10598 default: None,
10599 })))
10600 }
10601 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
10602 // CAST(CAST(x AS TIMESTAMP) AS DATE)
10603 Ok(Self::double_cast_timestamp_date(arg))
10604 }
10605 DialectType::Snowflake => {
10606 // Spark's TO_DATE is safe -> TRY_TO_DATE(x, 'yyyy-mm-DD')
10607 // The default Spark format 'yyyy-MM-dd' maps to Snowflake 'yyyy-mm-DD'
10608 Ok(Expression::Function(Box::new(Function::new(
10609 "TRY_TO_DATE".to_string(),
10610 vec![arg, Expression::string("yyyy-mm-DD")],
10611 ))))
10612 }
10613 _ => {
10614 // Default: keep as TO_DATE(x)
10615 Ok(Expression::Function(Box::new(Function::new("TO_DATE".to_string(), vec![arg]))))
10616 }
10617 }
10618 }
10619 // TO_DATE(x, fmt) 2-arg from Spark/Hive: format-based date conversion
10620 "TO_DATE" if f.args.len() == 2
10621 && matches!(source, DialectType::Spark | DialectType::Databricks | DialectType::Hive) => {
10622 let mut args = f.args;
10623 let val = args.remove(0);
10624 let fmt_expr = args.remove(0);
10625 let is_default_format = matches!(&fmt_expr, Expression::Literal(Literal::String(s)) if s == "yyyy-MM-dd");
10626
10627 if is_default_format {
10628 // Default format: same as 1-arg form
10629 match target {
10630 DialectType::DuckDB => {
10631 Ok(Expression::TryCast(Box::new(Cast {
10632 this: val,
10633 to: DataType::Date,
10634 double_colon_syntax: false,
10635 trailing_comments: vec![],
10636 format: None,
10637 default: None,
10638 })))
10639 }
10640 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
10641 Ok(Self::double_cast_timestamp_date(val))
10642 }
10643 DialectType::Snowflake => {
10644 // TRY_TO_DATE(x, format) with Snowflake format mapping
10645 let sf_fmt = "yyyy-MM-dd".replace("yyyy", "yyyy").replace("MM", "mm").replace("dd", "DD");
10646 Ok(Expression::Function(Box::new(Function::new(
10647 "TRY_TO_DATE".to_string(),
10648 vec![val, Expression::string(&sf_fmt)],
10649 ))))
10650 }
10651 _ => {
10652 Ok(Expression::Function(Box::new(Function::new("TO_DATE".to_string(), vec![val]))))
10653 }
10654 }
10655 } else {
10656 // Non-default format: use format-based parsing
10657 if let Expression::Literal(Literal::String(ref s)) = fmt_expr {
10658 match target {
10659 DialectType::DuckDB => {
10660 // CAST(CAST(TRY_STRPTIME(x, c_fmt) AS TIMESTAMP) AS DATE)
10661 fn java_to_c_fmt_todate(fmt: &str) -> String {
10662 let result = fmt
10663 .replace("yyyy", "%Y")
10664 .replace("SSSSSS", "%f")
10665 .replace("EEEE", "%W")
10666 .replace("MM", "%m")
10667 .replace("dd", "%d")
10668 .replace("HH", "%H")
10669 .replace("mm", "%M")
10670 .replace("ss", "%S")
10671 .replace("yy", "%y");
10672 let mut out = String::new();
10673 let chars: Vec<char> = result.chars().collect();
10674 let mut i = 0;
10675 while i < chars.len() {
10676 if chars[i] == '%' && i + 1 < chars.len() {
10677 out.push(chars[i]);
10678 out.push(chars[i + 1]);
10679 i += 2;
10680 } else if chars[i] == 'z' {
10681 out.push_str("%Z");
10682 i += 1;
10683 } else if chars[i] == 'Z' {
10684 out.push_str("%z");
10685 i += 1;
10686 } else {
10687 out.push(chars[i]);
10688 i += 1;
10689 }
10690 }
10691 out
10692 }
10693 let c_fmt = java_to_c_fmt_todate(s);
10694 // CAST(CAST(TRY_STRPTIME(x, fmt) AS TIMESTAMP) AS DATE)
10695 let try_strptime = Expression::Function(Box::new(Function::new(
10696 "TRY_STRPTIME".to_string(),
10697 vec![val, Expression::string(&c_fmt)],
10698 )));
10699 let cast_ts = Expression::Cast(Box::new(Cast {
10700 this: try_strptime,
10701 to: DataType::Timestamp { precision: None, timezone: false },
10702 double_colon_syntax: false,
10703 trailing_comments: vec![],
10704 format: None,
10705 default: None,
10706 }));
10707 Ok(Expression::Cast(Box::new(Cast {
10708 this: cast_ts,
10709 to: DataType::Date,
10710 double_colon_syntax: false,
10711 trailing_comments: vec![],
10712 format: None,
10713 default: None,
10714 })))
10715 }
10716 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
10717 // CAST(DATE_PARSE(x, presto_fmt) AS DATE)
10718 let p_fmt = s
10719 .replace("yyyy", "%Y")
10720 .replace("SSSSSS", "%f")
10721 .replace("MM", "%m")
10722 .replace("dd", "%d")
10723 .replace("HH", "%H")
10724 .replace("mm", "%M")
10725 .replace("ss", "%S")
10726 .replace("yy", "%y");
10727 let date_parse = Expression::Function(Box::new(Function::new(
10728 "DATE_PARSE".to_string(),
10729 vec![val, Expression::string(&p_fmt)],
10730 )));
10731 Ok(Expression::Cast(Box::new(Cast {
10732 this: date_parse,
10733 to: DataType::Date,
10734 double_colon_syntax: false,
10735 trailing_comments: vec![],
10736 format: None,
10737 default: None,
10738 })))
10739 }
10740 DialectType::Snowflake => {
10741 // TRY_TO_DATE(x, snowflake_fmt)
10742 Ok(Expression::Function(Box::new(Function::new(
10743 "TRY_TO_DATE".to_string(),
10744 vec![val, Expression::string(s)],
10745 ))))
10746 }
10747 _ => {
10748 Ok(Expression::Function(Box::new(Function::new("TO_DATE".to_string(), vec![val, fmt_expr]))))
10749 }
10750 }
10751 } else {
10752 Ok(Expression::Function(Box::new(Function::new("TO_DATE".to_string(), vec![val, fmt_expr]))))
10753 }
10754 }
10755 }
10756 // TO_TIMESTAMP(x) 1-arg: epoch conversion
10757 "TO_TIMESTAMP" if f.args.len() == 1
10758 && matches!(source, DialectType::DuckDB)
10759 && matches!(target, DialectType::BigQuery | DialectType::Presto | DialectType::Trino
10760 | DialectType::Hive | DialectType::Spark | DialectType::Databricks | DialectType::Athena) => {
10761 let arg = f.args.into_iter().next().unwrap();
10762 let func_name = match target {
10763 DialectType::BigQuery => "TIMESTAMP_SECONDS",
10764 DialectType::Presto | DialectType::Trino | DialectType::Athena
10765 | DialectType::Hive | DialectType::Spark | DialectType::Databricks => "FROM_UNIXTIME",
10766 _ => "TO_TIMESTAMP",
10767 };
10768 Ok(Expression::Function(Box::new(Function::new(func_name.to_string(), vec![arg]))))
10769 }
10770 // CONCAT(x) single-arg: -> CONCAT(COALESCE(x, '')) for Spark
10771 "CONCAT" if f.args.len() == 1
10772 && matches!(target, DialectType::Spark | DialectType::Databricks) => {
10773 let arg = f.args.into_iter().next().unwrap();
10774 let coalesced = Expression::Coalesce(Box::new(crate::expressions::VarArgFunc {
10775 expressions: vec![arg, Expression::string("")],
10776 original_name: None,
10777 }));
10778 Ok(Expression::Function(Box::new(Function::new("CONCAT".to_string(), vec![coalesced]))))
10779 }
10780 // REGEXP_EXTRACT(a, p) 2-arg: BigQuery default group is 0 (no 3rd arg needed)
10781 "REGEXP_EXTRACT" if f.args.len() == 3
10782 && matches!(target, DialectType::BigQuery) => {
10783 // If group_index is 0, drop it
10784 let drop_group = match &f.args[2] {
10785 Expression::Literal(Literal::Number(n)) => n == "0",
10786 _ => false,
10787 };
10788 if drop_group {
10789 let mut args = f.args;
10790 args.truncate(2);
10791 Ok(Expression::Function(Box::new(Function::new("REGEXP_EXTRACT".to_string(), args))))
10792 } else {
10793 Ok(Expression::Function(f))
10794 }
10795 }
10796 // REGEXP_EXTRACT(a, pattern, group, flags) 4-arg -> REGEXP_SUBSTR for Snowflake
10797 "REGEXP_EXTRACT" if f.args.len() == 4
10798 && matches!(target, DialectType::Snowflake) => {
10799 // REGEXP_EXTRACT(a, 'pattern', 2, 'i') -> REGEXP_SUBSTR(a, 'pattern', 1, 1, 'i', 2)
10800 let mut args = f.args;
10801 let this = args.remove(0);
10802 let pattern = args.remove(0);
10803 let group = args.remove(0);
10804 let flags = args.remove(0);
10805 Ok(Expression::Function(Box::new(Function::new(
10806 "REGEXP_SUBSTR".to_string(),
10807 vec![this, pattern, Expression::number(1), Expression::number(1), flags, group],
10808 ))))
10809 }
10810 // REGEXP_SUBSTR(a, pattern, position) 3-arg -> REGEXP_EXTRACT(SUBSTRING(a, pos), pattern)
10811 "REGEXP_SUBSTR" if f.args.len() == 3
10812 && matches!(target, DialectType::DuckDB | DialectType::Presto | DialectType::Trino | DialectType::Spark | DialectType::Databricks) => {
10813 let mut args = f.args;
10814 let this = args.remove(0);
10815 let pattern = args.remove(0);
10816 let position = args.remove(0);
10817 // Wrap subject in SUBSTRING(this, position) to apply the offset
10818 let substring_expr = Expression::Function(Box::new(Function::new(
10819 "SUBSTRING".to_string(),
10820 vec![this, position],
10821 )));
10822 let target_name = match target {
10823 DialectType::DuckDB => "REGEXP_EXTRACT",
10824 _ => "REGEXP_EXTRACT",
10825 };
10826 Ok(Expression::Function(Box::new(Function::new(
10827 target_name.to_string(),
10828 vec![substring_expr, pattern],
10829 ))))
10830 }
10831 // TO_DAYS(x) -> (DATEDIFF(x, '0000-01-01') + 1) or target-specific
10832 "TO_DAYS" if f.args.len() == 1 => {
10833 let x = f.args.into_iter().next().unwrap();
10834 let epoch = Expression::string("0000-01-01");
10835 // Build the final target-specific expression directly
10836 let datediff_expr = match target {
10837 DialectType::MySQL | DialectType::SingleStore => {
10838 // MySQL: (DATEDIFF(x, '0000-01-01') + 1)
10839 Expression::Function(Box::new(Function::new("DATEDIFF".to_string(), vec![x, epoch])))
10840 }
10841 DialectType::DuckDB => {
10842 // DuckDB: (DATE_DIFF('DAY', CAST('0000-01-01' AS DATE), CAST(x AS DATE)) + 1)
10843 let cast_epoch = Expression::Cast(Box::new(Cast {
10844 this: epoch, to: DataType::Date,
10845 trailing_comments: Vec::new(), double_colon_syntax: false,
10846 format: None, default: None,
10847 }));
10848 let cast_x = Expression::Cast(Box::new(Cast {
10849 this: x, to: DataType::Date,
10850 trailing_comments: Vec::new(), double_colon_syntax: false,
10851 format: None, default: None,
10852 }));
10853 Expression::Function(Box::new(Function::new("DATE_DIFF".to_string(), vec![
10854 Expression::string("DAY"), cast_epoch, cast_x,
10855 ])))
10856 }
10857 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
10858 // Presto: (DATE_DIFF('DAY', CAST(CAST('0000-01-01' AS TIMESTAMP) AS DATE), CAST(CAST(x AS TIMESTAMP) AS DATE)) + 1)
10859 let cast_epoch = Self::double_cast_timestamp_date(epoch);
10860 let cast_x = Self::double_cast_timestamp_date(x);
10861 Expression::Function(Box::new(Function::new("DATE_DIFF".to_string(), vec![
10862 Expression::string("DAY"), cast_epoch, cast_x,
10863 ])))
10864 }
10865 _ => {
10866 // Default: (DATEDIFF(x, '0000-01-01') + 1)
10867 Expression::Function(Box::new(Function::new("DATEDIFF".to_string(), vec![x, epoch])))
10868 }
10869 };
10870 let add_one = Expression::Add(Box::new(BinaryOp::new(datediff_expr, Expression::number(1))));
10871 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
10872 this: add_one,
10873 trailing_comments: Vec::new(),
10874 })))
10875 }
10876 // STR_TO_DATE(x, format) -> DATE_PARSE / STRPTIME / TO_DATE etc.
10877 "STR_TO_DATE" if f.args.len() == 2
10878 && matches!(target, DialectType::Presto | DialectType::Trino) => {
10879 let mut args = f.args;
10880 let x = args.remove(0);
10881 let format_expr = args.remove(0);
10882 // Check if the format contains time components
10883 let has_time = if let Expression::Literal(Literal::String(ref fmt)) = format_expr {
10884 fmt.contains("%H") || fmt.contains("%T") || fmt.contains("%M") || fmt.contains("%S")
10885 || fmt.contains("%I") || fmt.contains("%p")
10886 } else {
10887 false
10888 };
10889 let date_parse = Expression::Function(Box::new(Function::new(
10890 "DATE_PARSE".to_string(),
10891 vec![x, format_expr],
10892 )));
10893 if has_time {
10894 // Has time components: just DATE_PARSE
10895 Ok(date_parse)
10896 } else {
10897 // Date-only: CAST(DATE_PARSE(...) AS DATE)
10898 Ok(Expression::Cast(Box::new(Cast {
10899 this: date_parse,
10900 to: DataType::Date,
10901 trailing_comments: Vec::new(),
10902 double_colon_syntax: false,
10903 format: None,
10904 default: None,
10905 })))
10906 }
10907 }
10908 "STR_TO_DATE" if f.args.len() == 2
10909 && matches!(target, DialectType::PostgreSQL | DialectType::Redshift) => {
10910 let mut args = f.args;
10911 let x = args.remove(0);
10912 let fmt = args.remove(0);
10913 let pg_fmt = match fmt {
10914 Expression::Literal(Literal::String(s)) => {
10915 Expression::string(
10916 &s.replace("%Y", "YYYY")
10917 .replace("%m", "MM")
10918 .replace("%d", "DD")
10919 .replace("%H", "HH24")
10920 .replace("%M", "MI")
10921 .replace("%S", "SS")
10922 )
10923 }
10924 other => other,
10925 };
10926 let to_date = Expression::Function(Box::new(Function::new(
10927 "TO_DATE".to_string(),
10928 vec![x, pg_fmt],
10929 )));
10930 Ok(Expression::Cast(Box::new(Cast {
10931 this: to_date,
10932 to: DataType::Timestamp { timezone: false, precision: None },
10933 trailing_comments: Vec::new(),
10934 double_colon_syntax: false,
10935 format: None,
10936 default: None,
10937 })))
10938 }
10939 // RANGE(start, end) -> GENERATE_SERIES for SQLite
10940 "RANGE" if (f.args.len() == 1 || f.args.len() == 2)
10941 && matches!(target, DialectType::SQLite) => {
10942 if f.args.len() == 2 {
10943 // RANGE(start, end) -> (SELECT value AS col_alias FROM GENERATE_SERIES(start, end))
10944 // For SQLite, RANGE is exclusive on end, GENERATE_SERIES is inclusive
10945 let mut args = f.args;
10946 let start = args.remove(0);
10947 let end = args.remove(0);
10948 Ok(Expression::Function(Box::new(Function::new("GENERATE_SERIES".to_string(), vec![start, end]))))
10949 } else {
10950 Ok(Expression::Function(f))
10951 }
10952 }
10953 // UNIFORM(low, high[, seed]) -> UNIFORM(low, high, RANDOM([seed])) for Snowflake
10954 // When source is Snowflake, keep as-is (args already in correct form)
10955 "UNIFORM" if matches!(target, DialectType::Snowflake) && (f.args.len() == 2 || f.args.len() == 3) => {
10956 if matches!(source, DialectType::Snowflake) {
10957 // Snowflake -> Snowflake: keep as-is
10958 Ok(Expression::Function(f))
10959 } else {
10960 let mut args = f.args;
10961 let low = args.remove(0);
10962 let high = args.remove(0);
10963 let random = if !args.is_empty() {
10964 let seed = args.remove(0);
10965 Expression::Function(Box::new(Function::new("RANDOM".to_string(), vec![seed])))
10966 } else {
10967 Expression::Function(Box::new(Function::new("RANDOM".to_string(), vec![])))
10968 };
10969 Ok(Expression::Function(Box::new(Function::new("UNIFORM".to_string(), vec![low, high, random]))))
10970 }
10971 }
10972 // TO_UTC_TIMESTAMP(ts, tz) -> target-specific UTC conversion
10973 "TO_UTC_TIMESTAMP" if f.args.len() == 2 => {
10974 let mut args = f.args;
10975 let ts_arg = args.remove(0);
10976 let tz_arg = args.remove(0);
10977 // Cast string literal to TIMESTAMP for all targets
10978 let ts_cast = if matches!(&ts_arg, Expression::Literal(Literal::String(_))) {
10979 Expression::Cast(Box::new(Cast {
10980 this: ts_arg, to: DataType::Timestamp { timezone: false, precision: None },
10981 trailing_comments: vec![], double_colon_syntax: false, format: None, default: None,
10982 }))
10983 } else { ts_arg };
10984 match target {
10985 DialectType::Spark | DialectType::Databricks => {
10986 Ok(Expression::Function(Box::new(Function::new(
10987 "TO_UTC_TIMESTAMP".to_string(), vec![ts_cast, tz_arg],
10988 ))))
10989 }
10990 DialectType::Snowflake => {
10991 // CONVERT_TIMEZONE(tz, 'UTC', CAST(ts AS TIMESTAMP))
10992 Ok(Expression::Function(Box::new(Function::new(
10993 "CONVERT_TIMEZONE".to_string(), vec![tz_arg, Expression::string("UTC"), ts_cast],
10994 ))))
10995 }
10996 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
10997 // WITH_TIMEZONE(CAST(ts AS TIMESTAMP), tz) AT TIME ZONE 'UTC'
10998 let wtz = Expression::Function(Box::new(Function::new(
10999 "WITH_TIMEZONE".to_string(), vec![ts_cast, tz_arg],
11000 )));
11001 Ok(Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
11002 this: wtz, zone: Expression::string("UTC"),
11003 })))
11004 }
11005 DialectType::BigQuery => {
11006 // DATETIME(TIMESTAMP(CAST(ts AS DATETIME), tz), 'UTC')
11007 let cast_dt = Expression::Cast(Box::new(Cast {
11008 this: if let Expression::Cast(c) = ts_cast { c.this } else { ts_cast.clone() },
11009 to: DataType::Custom { name: "DATETIME".to_string() },
11010 trailing_comments: vec![], double_colon_syntax: false, format: None, default: None,
11011 }));
11012 let ts_func = Expression::Function(Box::new(Function::new(
11013 "TIMESTAMP".to_string(), vec![cast_dt, tz_arg],
11014 )));
11015 Ok(Expression::Function(Box::new(Function::new(
11016 "DATETIME".to_string(), vec![ts_func, Expression::string("UTC")],
11017 ))))
11018 }
11019 _ => {
11020 // DuckDB, PostgreSQL, Redshift: CAST(ts AS TIMESTAMP) AT TIME ZONE tz AT TIME ZONE 'UTC'
11021 let atz1 = Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
11022 this: ts_cast, zone: tz_arg,
11023 }));
11024 Ok(Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
11025 this: atz1, zone: Expression::string("UTC"),
11026 })))
11027 }
11028 }
11029 }
11030 // FROM_UTC_TIMESTAMP(ts, tz) -> target-specific UTC conversion
11031 "FROM_UTC_TIMESTAMP" if f.args.len() == 2 => {
11032 let mut args = f.args;
11033 let ts_arg = args.remove(0);
11034 let tz_arg = args.remove(0);
11035 // Cast string literal to TIMESTAMP
11036 let ts_cast = if matches!(&ts_arg, Expression::Literal(Literal::String(_))) {
11037 Expression::Cast(Box::new(Cast {
11038 this: ts_arg, to: DataType::Timestamp { timezone: false, precision: None },
11039 trailing_comments: vec![], double_colon_syntax: false, format: None, default: None,
11040 }))
11041 } else { ts_arg };
11042 match target {
11043 DialectType::Spark | DialectType::Databricks => {
11044 Ok(Expression::Function(Box::new(Function::new(
11045 "FROM_UTC_TIMESTAMP".to_string(), vec![ts_cast, tz_arg],
11046 ))))
11047 }
11048 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
11049 // AT_TIMEZONE(CAST(ts AS TIMESTAMP), tz)
11050 Ok(Expression::Function(Box::new(Function::new(
11051 "AT_TIMEZONE".to_string(), vec![ts_cast, tz_arg],
11052 ))))
11053 }
11054 DialectType::Snowflake => {
11055 // CONVERT_TIMEZONE('UTC', tz, CAST(ts AS TIMESTAMP))
11056 Ok(Expression::Function(Box::new(Function::new(
11057 "CONVERT_TIMEZONE".to_string(), vec![Expression::string("UTC"), tz_arg, ts_cast],
11058 ))))
11059 }
11060 _ => {
11061 // DuckDB, PostgreSQL, Redshift: CAST(ts AS TIMESTAMP) AT TIME ZONE tz
11062 Ok(Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
11063 this: ts_cast, zone: tz_arg,
11064 })))
11065 }
11066 }
11067 }
11068 // MAP_FROM_ARRAYS(keys, values) -> target-specific map construction
11069 "MAP_FROM_ARRAYS" if f.args.len() == 2 => {
11070 let name = match target {
11071 DialectType::Snowflake => "OBJECT_CONSTRUCT",
11072 _ => "MAP",
11073 };
11074 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
11075 }
11076 // STR_TO_MAP(s, pair_delim, kv_delim) -> SPLIT_TO_MAP for Presto
11077 "STR_TO_MAP" if f.args.len() >= 1 => {
11078 match target {
11079 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
11080 Ok(Expression::Function(Box::new(Function::new("SPLIT_TO_MAP".to_string(), f.args))))
11081 }
11082 _ => Ok(Expression::Function(f)),
11083 }
11084 }
11085 // TIME_TO_STR(x, fmt) -> Expression::TimeToStr for proper generation
11086 "TIME_TO_STR" if f.args.len() == 2 => {
11087 let mut args = f.args;
11088 let this = args.remove(0);
11089 let fmt_expr = args.remove(0);
11090 let format = if let Expression::Literal(Literal::String(s)) = fmt_expr {
11091 s
11092 } else {
11093 "%Y-%m-%d %H:%M:%S".to_string()
11094 };
11095 Ok(Expression::TimeToStr(Box::new(crate::expressions::TimeToStr {
11096 this: Box::new(this),
11097 format,
11098 culture: None,
11099 zone: None,
11100 })))
11101 }
11102 // MONTHS_BETWEEN(end, start) -> DuckDB complex expansion
11103 "MONTHS_BETWEEN" if f.args.len() == 2 => {
11104 match target {
11105 DialectType::DuckDB => {
11106 let mut args = f.args;
11107 let end_date = args.remove(0);
11108 let start_date = args.remove(0);
11109 let cast_end = Self::ensure_cast_date(end_date);
11110 let cast_start = Self::ensure_cast_date(start_date);
11111 // DATE_DIFF('MONTH', start, end) + CASE WHEN DAY(end) = DAY(LAST_DAY(end)) AND DAY(start) = DAY(LAST_DAY(start)) THEN 0 ELSE (DAY(end) - DAY(start)) / 31.0 END
11112 let dd = Expression::Function(Box::new(Function::new(
11113 "DATE_DIFF".to_string(), vec![Expression::string("MONTH"), cast_start.clone(), cast_end.clone()],
11114 )));
11115 let day_end = Expression::Function(Box::new(Function::new("DAY".to_string(), vec![cast_end.clone()])));
11116 let day_start = Expression::Function(Box::new(Function::new("DAY".to_string(), vec![cast_start.clone()])));
11117 let last_day_end = Expression::Function(Box::new(Function::new("LAST_DAY".to_string(), vec![cast_end.clone()])));
11118 let last_day_start = Expression::Function(Box::new(Function::new("LAST_DAY".to_string(), vec![cast_start.clone()])));
11119 let day_last_end = Expression::Function(Box::new(Function::new("DAY".to_string(), vec![last_day_end])));
11120 let day_last_start = Expression::Function(Box::new(Function::new("DAY".to_string(), vec![last_day_start])));
11121 let cond1 = Expression::Eq(Box::new(BinaryOp::new(day_end.clone(), day_last_end)));
11122 let cond2 = Expression::Eq(Box::new(BinaryOp::new(day_start.clone(), day_last_start)));
11123 let both_cond = Expression::And(Box::new(BinaryOp::new(cond1, cond2)));
11124 let day_diff = Expression::Sub(Box::new(BinaryOp::new(day_end, day_start)));
11125 let day_diff_paren = Expression::Paren(Box::new(crate::expressions::Paren {
11126 this: day_diff, trailing_comments: Vec::new(),
11127 }));
11128 let frac = Expression::Div(Box::new(BinaryOp::new(
11129 day_diff_paren,
11130 Expression::Literal(Literal::Number("31.0".to_string())),
11131 )));
11132 let case_expr = Expression::Case(Box::new(Case {
11133 operand: None,
11134 whens: vec![(both_cond, Expression::number(0))],
11135 else_: Some(frac),
11136 }));
11137 Ok(Expression::Add(Box::new(BinaryOp::new(dd, case_expr))))
11138 }
11139 DialectType::Snowflake | DialectType::Redshift => {
11140 let mut args = f.args;
11141 let end_date = args.remove(0);
11142 let start_date = args.remove(0);
11143 let unit = Expression::Identifier(Identifier::new("MONTH"));
11144 Ok(Expression::Function(Box::new(Function::new(
11145 "DATEDIFF".to_string(), vec![unit, start_date, end_date],
11146 ))))
11147 }
11148 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
11149 let mut args = f.args;
11150 let end_date = args.remove(0);
11151 let start_date = args.remove(0);
11152 Ok(Expression::Function(Box::new(Function::new(
11153 "DATE_DIFF".to_string(), vec![Expression::string("MONTH"), start_date, end_date],
11154 ))))
11155 }
11156 _ => Ok(Expression::Function(f)),
11157 }
11158 }
11159 // MONTHS_BETWEEN(end, start, roundOff) - 3-arg form (Spark-specific)
11160 // Drop the roundOff arg for non-Spark targets, keep it for Spark
11161 "MONTHS_BETWEEN" if f.args.len() == 3 => {
11162 match target {
11163 DialectType::Spark | DialectType::Databricks => {
11164 Ok(Expression::Function(f))
11165 }
11166 _ => {
11167 // Drop the 3rd arg and delegate to the 2-arg logic
11168 let mut args = f.args;
11169 let end_date = args.remove(0);
11170 let start_date = args.remove(0);
11171 // Re-create as 2-arg and process
11172 let f2 = Function::new("MONTHS_BETWEEN".to_string(), vec![end_date, start_date]);
11173 let e2 = Expression::Function(Box::new(f2));
11174 Self::cross_dialect_normalize(e2, source, target)
11175 }
11176 }
11177 }
11178 // TO_TIMESTAMP(x) with 1 arg -> CAST(x AS TIMESTAMP) for most targets
11179 "TO_TIMESTAMP" if f.args.len() == 1
11180 && matches!(source, DialectType::Spark | DialectType::Databricks | DialectType::Hive) => {
11181 let arg = f.args.into_iter().next().unwrap();
11182 Ok(Expression::Cast(Box::new(Cast {
11183 this: arg, to: DataType::Timestamp { timezone: false, precision: None },
11184 trailing_comments: vec![], double_colon_syntax: false, format: None, default: None,
11185 })))
11186 }
11187 // STRING(x) -> CAST(x AS STRING) for Spark target
11188 "STRING" if f.args.len() == 1
11189 && matches!(source, DialectType::Spark | DialectType::Databricks) => {
11190 let arg = f.args.into_iter().next().unwrap();
11191 let dt = match target {
11192 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
11193 DataType::Custom { name: "STRING".to_string() }
11194 }
11195 _ => DataType::Text,
11196 };
11197 Ok(Expression::Cast(Box::new(Cast {
11198 this: arg, to: dt,
11199 trailing_comments: vec![], double_colon_syntax: false, format: None, default: None,
11200 })))
11201 }
11202 // LOGICAL_OR(x) -> BOOL_OR(x) for Spark target
11203 "LOGICAL_OR" if f.args.len() == 1 => {
11204 let name = match target {
11205 DialectType::Spark | DialectType::Databricks => "BOOL_OR",
11206 _ => "LOGICAL_OR",
11207 };
11208 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
11209 }
11210 // SPLIT(x, pattern) from Spark -> STR_SPLIT_REGEX for DuckDB, REGEXP_SPLIT for Presto
11211 "SPLIT" if f.args.len() == 2
11212 && matches!(source, DialectType::Spark | DialectType::Databricks | DialectType::Hive) => {
11213 let name = match target {
11214 DialectType::DuckDB => "STR_SPLIT_REGEX",
11215 DialectType::Presto | DialectType::Trino | DialectType::Athena => "REGEXP_SPLIT",
11216 DialectType::Spark | DialectType::Databricks | DialectType::Hive => "SPLIT",
11217 _ => "SPLIT",
11218 };
11219 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
11220 }
11221 // TRY_ELEMENT_AT -> ELEMENT_AT for Presto, array[idx] for DuckDB
11222 "TRY_ELEMENT_AT" if f.args.len() == 2 => {
11223 match target {
11224 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
11225 Ok(Expression::Function(Box::new(Function::new("ELEMENT_AT".to_string(), f.args))))
11226 }
11227 DialectType::DuckDB => {
11228 let mut args = f.args;
11229 let arr = args.remove(0);
11230 let idx = args.remove(0);
11231 Ok(Expression::Subscript(Box::new(crate::expressions::Subscript {
11232 this: arr,
11233 index: idx,
11234 })))
11235 }
11236 _ => Ok(Expression::Function(f)),
11237 }
11238 }
11239 _ => Ok(Expression::Function(f)),
11240 }
11241 } else if let Expression::AggregateFunction(mut af) = e {
11242 let name = af.name.to_uppercase();
11243 match name.as_str() {
11244 "ARBITRARY" if af.args.len() == 1 => {
11245 let arg = af.args.into_iter().next().unwrap();
11246 Ok(convert_arbitrary(arg, target))
11247 }
11248 "JSON_ARRAYAGG" => {
11249 match target {
11250 DialectType::PostgreSQL => {
11251 af.name = "JSON_AGG".to_string();
11252 // Add NULLS FIRST to ORDER BY items for PostgreSQL
11253 for ordered in af.order_by.iter_mut() {
11254 if ordered.nulls_first.is_none() {
11255 ordered.nulls_first = Some(true);
11256 }
11257 }
11258 Ok(Expression::AggregateFunction(af))
11259 }
11260 _ => Ok(Expression::AggregateFunction(af)),
11261 }
11262 }
11263 _ => Ok(Expression::AggregateFunction(af)),
11264 }
11265 } else if let Expression::JSONArrayAgg(ja) = e {
11266 // JSONArrayAgg -> JSON_AGG for PostgreSQL, JSON_ARRAYAGG for others
11267 match target {
11268 DialectType::PostgreSQL => {
11269 let mut order_by = Vec::new();
11270 if let Some(order_expr) = ja.order {
11271 if let Expression::OrderBy(ob) = *order_expr {
11272 for mut ordered in ob.expressions {
11273 if ordered.nulls_first.is_none() {
11274 ordered.nulls_first = Some(true);
11275 }
11276 order_by.push(ordered);
11277 }
11278 }
11279 }
11280 Ok(Expression::AggregateFunction(Box::new(crate::expressions::AggregateFunction {
11281 name: "JSON_AGG".to_string(),
11282 args: vec![*ja.this],
11283 distinct: false,
11284 filter: None,
11285 order_by,
11286 limit: None,
11287 ignore_nulls: None,
11288 })))
11289 }
11290 _ => Ok(Expression::JSONArrayAgg(ja)),
11291 }
11292 } else if let Expression::ToNumber(tn) = e {
11293 // TO_NUMBER(x) with no format/precision/scale -> CAST(x AS DOUBLE)
11294 let arg = *tn.this;
11295 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
11296 this: arg,
11297 to: crate::expressions::DataType::Double { precision: None, scale: None },
11298 double_colon_syntax: false,
11299 trailing_comments: Vec::new(),
11300 format: None,
11301 default: None,
11302 })))
11303 } else {
11304 Ok(e)
11305 }
11306 }
11307
11308 Action::RegexpLikeToDuckDB => {
11309 if let Expression::RegexpLike(f) = e {
11310 let mut args = vec![f.this, f.pattern];
11311 if let Some(flags) = f.flags {
11312 args.push(flags);
11313 }
11314 Ok(Expression::Function(Box::new(Function::new(
11315 "REGEXP_MATCHES".to_string(),
11316 args,
11317 ))))
11318 } else {
11319 Ok(e)
11320 }
11321 }
11322 Action::EpochConvert => {
11323 if let Expression::Epoch(f) = e {
11324 let arg = f.this;
11325 let name = match target {
11326 DialectType::Spark | DialectType::Databricks | DialectType::Hive => "UNIX_TIMESTAMP",
11327 DialectType::Presto | DialectType::Trino => "TO_UNIXTIME",
11328 DialectType::BigQuery => "TIME_TO_UNIX",
11329 _ => "EPOCH",
11330 };
11331 Ok(Expression::Function(Box::new(Function::new(name.to_string(), vec![arg]))))
11332 } else {
11333 Ok(e)
11334 }
11335 }
11336 Action::EpochMsConvert => {
11337 use crate::expressions::{BinaryOp, Cast};
11338 if let Expression::EpochMs(f) = e {
11339 let arg = f.this;
11340 match target {
11341 DialectType::Spark | DialectType::Databricks => {
11342 Ok(Expression::Function(Box::new(Function::new("TIMESTAMP_MILLIS".to_string(), vec![arg]))))
11343 }
11344 DialectType::BigQuery => {
11345 Ok(Expression::Function(Box::new(Function::new("TIMESTAMP_MILLIS".to_string(), vec![arg]))))
11346 }
11347 DialectType::Presto | DialectType::Trino => {
11348 // FROM_UNIXTIME(CAST(x AS DOUBLE) / POW(10, 3))
11349 let cast_arg = Expression::Cast(Box::new(Cast {
11350 this: arg,
11351 to: DataType::Double { precision: None, scale: None },
11352 trailing_comments: Vec::new(),
11353 double_colon_syntax: false,
11354 format: None,
11355 default: None,
11356 }));
11357 let div = Expression::Div(Box::new(BinaryOp::new(
11358 cast_arg,
11359 Expression::Function(Box::new(Function::new("POW".to_string(), vec![Expression::number(10), Expression::number(3)]))),
11360 )));
11361 Ok(Expression::Function(Box::new(Function::new("FROM_UNIXTIME".to_string(), vec![div]))))
11362 }
11363 DialectType::MySQL => {
11364 // FROM_UNIXTIME(x / POWER(10, 3))
11365 let div = Expression::Div(Box::new(BinaryOp::new(
11366 arg,
11367 Expression::Function(Box::new(Function::new("POWER".to_string(), vec![Expression::number(10), Expression::number(3)]))),
11368 )));
11369 Ok(Expression::Function(Box::new(Function::new("FROM_UNIXTIME".to_string(), vec![div]))))
11370 }
11371 DialectType::PostgreSQL | DialectType::Redshift => {
11372 // TO_TIMESTAMP(CAST(x AS DOUBLE PRECISION) / POWER(10, 3))
11373 let cast_arg = Expression::Cast(Box::new(Cast {
11374 this: arg,
11375 to: DataType::Custom { name: "DOUBLE PRECISION".to_string() },
11376 trailing_comments: Vec::new(),
11377 double_colon_syntax: false,
11378 format: None,
11379 default: None,
11380 }));
11381 let div = Expression::Div(Box::new(BinaryOp::new(
11382 cast_arg,
11383 Expression::Function(Box::new(Function::new("POWER".to_string(), vec![Expression::number(10), Expression::number(3)]))),
11384 )));
11385 Ok(Expression::Function(Box::new(Function::new("TO_TIMESTAMP".to_string(), vec![div]))))
11386 }
11387 DialectType::ClickHouse => {
11388 // fromUnixTimestamp64Milli(CAST(x AS Nullable(Int64)))
11389 let cast_arg = Expression::Cast(Box::new(Cast {
11390 this: arg,
11391 to: DataType::Custom { name: "Nullable(Int64)".to_string() },
11392 trailing_comments: Vec::new(),
11393 double_colon_syntax: false,
11394 format: None,
11395 default: None,
11396 }));
11397 Ok(Expression::Function(Box::new(Function::new("fromUnixTimestamp64Milli".to_string(), vec![cast_arg]))))
11398 }
11399 _ => Ok(Expression::Function(Box::new(Function::new("EPOCH_MS".to_string(), vec![arg])))),
11400 }
11401 } else {
11402 Ok(e)
11403 }
11404 }
11405 Action::TSQLTypeNormalize => {
11406 if let Expression::DataType(dt) = e {
11407 let new_dt = match &dt {
11408 DataType::Custom { name } if name.eq_ignore_ascii_case("MONEY") => {
11409 DataType::Decimal { precision: Some(15), scale: Some(4) }
11410 }
11411 DataType::Custom { name } if name.eq_ignore_ascii_case("SMALLMONEY") => {
11412 DataType::Decimal { precision: Some(6), scale: Some(4) }
11413 }
11414 DataType::Custom { name } if name.eq_ignore_ascii_case("DATETIME2") => {
11415 DataType::Timestamp { timezone: false, precision: None }
11416 }
11417 DataType::Custom { name } if name.eq_ignore_ascii_case("REAL") => {
11418 DataType::Float { precision: None, scale: None, real_spelling: false }
11419 }
11420 DataType::Float { real_spelling: true, .. } => {
11421 DataType::Float { precision: None, scale: None, real_spelling: false }
11422 }
11423 DataType::Custom { name } if name.eq_ignore_ascii_case("IMAGE") => {
11424 DataType::Custom { name: "BLOB".to_string() }
11425 }
11426 DataType::Custom { name } if name.eq_ignore_ascii_case("BIT") => {
11427 DataType::Boolean
11428 }
11429 DataType::Custom { name } if name.eq_ignore_ascii_case("ROWVERSION") => {
11430 DataType::Custom { name: "BINARY".to_string() }
11431 }
11432 DataType::Custom { name } if name.eq_ignore_ascii_case("UNIQUEIDENTIFIER") => {
11433 match target {
11434 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
11435 DataType::Custom { name: "STRING".to_string() }
11436 }
11437 _ => DataType::VarChar { length: Some(36), parenthesized_length: true },
11438 }
11439 }
11440 DataType::Custom { name } if name.eq_ignore_ascii_case("DATETIMEOFFSET") => {
11441 match target {
11442 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
11443 DataType::Timestamp { timezone: false, precision: None }
11444 }
11445 _ => DataType::Timestamp { timezone: true, precision: None },
11446 }
11447 }
11448 DataType::Custom { ref name } if name.to_uppercase().starts_with("DATETIME2(") => {
11449 // DATETIME2(n) -> TIMESTAMP
11450 DataType::Timestamp { timezone: false, precision: None }
11451 }
11452 DataType::Custom { ref name } if name.to_uppercase().starts_with("TIME(") => {
11453 // TIME(n) -> TIMESTAMP for Spark, keep as TIME for others
11454 match target {
11455 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
11456 DataType::Timestamp { timezone: false, precision: None }
11457 }
11458 _ => return Ok(Expression::DataType(dt)),
11459 }
11460 }
11461 DataType::Custom { ref name } if name.to_uppercase().starts_with("NUMERIC") => {
11462 // Parse NUMERIC(p,s) back to Decimal(p,s)
11463 let upper = name.to_uppercase();
11464 if let Some(inner) = upper.strip_prefix("NUMERIC(").and_then(|s| s.strip_suffix(')')) {
11465 let parts: Vec<&str> = inner.split(',').collect();
11466 let precision = parts.first().and_then(|s| s.trim().parse::<u32>().ok());
11467 let scale = parts.get(1).and_then(|s| s.trim().parse::<u32>().ok());
11468 DataType::Decimal { precision, scale }
11469 } else if upper == "NUMERIC" {
11470 DataType::Decimal { precision: None, scale: None }
11471 } else {
11472 return Ok(Expression::DataType(dt));
11473 }
11474 }
11475 DataType::Float { precision: Some(p), .. } => {
11476 // For Hive/Spark: FLOAT(1-32) -> FLOAT, FLOAT(33+) -> DOUBLE (IEEE 754 boundary)
11477 // For other targets: FLOAT(1-24) -> FLOAT, FLOAT(25+) -> DOUBLE (TSQL boundary)
11478 let boundary = match target {
11479 DialectType::Hive | DialectType::Spark | DialectType::Databricks => 32,
11480 _ => 24,
11481 };
11482 if *p <= boundary {
11483 DataType::Float { precision: None, scale: None, real_spelling: false }
11484 } else {
11485 DataType::Double { precision: None, scale: None }
11486 }
11487 }
11488 DataType::TinyInt { .. } => {
11489 match target {
11490 DialectType::DuckDB => DataType::Custom { name: "UTINYINT".to_string() },
11491 DialectType::Hive | DialectType::Spark | DialectType::Databricks => DataType::SmallInt { length: None },
11492 _ => return Ok(Expression::DataType(dt)),
11493 }
11494 }
11495 // INTEGER -> INT for Spark/Databricks
11496 DataType::Int { length, integer_spelling: true } => {
11497 DataType::Int { length: *length, integer_spelling: false }
11498 }
11499 _ => return Ok(Expression::DataType(dt)),
11500 };
11501 Ok(Expression::DataType(new_dt))
11502 } else {
11503 Ok(e)
11504 }
11505 }
11506 Action::MySQLSafeDivide => {
11507 use crate::expressions::{BinaryOp, Cast};
11508 if let Expression::Div(op) = e {
11509 let left = op.left;
11510 let right = op.right;
11511 // For SQLite: CAST left as REAL but NO NULLIF wrapping
11512 if matches!(target, DialectType::SQLite) {
11513 let new_left = Expression::Cast(Box::new(Cast {
11514 this: left,
11515 to: DataType::Float { precision: None, scale: None, real_spelling: true },
11516 trailing_comments: Vec::new(),
11517 double_colon_syntax: false,
11518 format: None,
11519 default: None,
11520 }));
11521 return Ok(Expression::Div(Box::new(BinaryOp::new(new_left, right))));
11522 }
11523 // Wrap right in NULLIF(right, 0)
11524 let nullif_right = Expression::Function(Box::new(Function::new(
11525 "NULLIF".to_string(),
11526 vec![right, Expression::number(0)],
11527 )));
11528 // For some dialects, also CAST the left side
11529 let new_left = match target {
11530 DialectType::PostgreSQL | DialectType::Redshift | DialectType::Teradata => {
11531 Expression::Cast(Box::new(Cast {
11532 this: left,
11533 to: DataType::Custom { name: "DOUBLE PRECISION".to_string() },
11534 trailing_comments: Vec::new(),
11535 double_colon_syntax: false,
11536 format: None,
11537 default: None,
11538 }))
11539 }
11540 DialectType::Drill | DialectType::Trino | DialectType::Presto => {
11541 Expression::Cast(Box::new(Cast {
11542 this: left,
11543 to: DataType::Double { precision: None, scale: None },
11544 trailing_comments: Vec::new(),
11545 double_colon_syntax: false,
11546 format: None,
11547 default: None,
11548 }))
11549 }
11550 DialectType::TSQL => {
11551 Expression::Cast(Box::new(Cast {
11552 this: left,
11553 to: DataType::Float { precision: None, scale: None, real_spelling: false },
11554 trailing_comments: Vec::new(),
11555 double_colon_syntax: false,
11556 format: None,
11557 default: None,
11558 }))
11559 }
11560 _ => left,
11561 };
11562 Ok(Expression::Div(Box::new(BinaryOp::new(new_left, nullif_right))))
11563 } else {
11564 Ok(e)
11565 }
11566 }
11567 Action::AlterTableRenameStripSchema => {
11568 if let Expression::AlterTable(mut at) = e {
11569 if let Some(crate::expressions::AlterTableAction::RenameTable(ref mut new_tbl)) = at.actions.first_mut() {
11570 new_tbl.schema = None;
11571 new_tbl.catalog = None;
11572 }
11573 Ok(Expression::AlterTable(at))
11574 } else {
11575 Ok(e)
11576 }
11577 }
11578 Action::NullsOrdering => {
11579 // Fill in the source dialect's implied null ordering default.
11580 // This makes implicit null ordering explicit so the target generator
11581 // can correctly strip or keep it.
11582 //
11583 // Dialect null ordering categories:
11584 // nulls_are_large (Oracle, PostgreSQL, Redshift, Snowflake):
11585 // ASC -> NULLS LAST, DESC -> NULLS FIRST
11586 // nulls_are_small (Spark, Hive, BigQuery, MySQL, Databricks, ClickHouse, etc.):
11587 // ASC -> NULLS FIRST, DESC -> NULLS LAST
11588 // nulls_are_last (DuckDB, Presto, Trino, Dremio, Athena):
11589 // NULLS LAST always (both ASC and DESC)
11590 if let Expression::Ordered(mut o) = e {
11591 let is_asc = !o.desc;
11592
11593 let is_source_nulls_large = matches!(source,
11594 DialectType::Oracle | DialectType::PostgreSQL | DialectType::Redshift
11595 | DialectType::Snowflake
11596 );
11597 let is_source_nulls_last = matches!(source,
11598 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
11599 | DialectType::Dremio | DialectType::Athena | DialectType::ClickHouse
11600 | DialectType::Drill | DialectType::Exasol
11601 );
11602
11603 // Determine target category to check if default matches
11604 let is_target_nulls_large = matches!(target,
11605 DialectType::Oracle | DialectType::PostgreSQL | DialectType::Redshift
11606 | DialectType::Snowflake
11607 );
11608 let is_target_nulls_last = matches!(target,
11609 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
11610 | DialectType::Dremio | DialectType::Athena | DialectType::ClickHouse
11611 | DialectType::Drill | DialectType::Exasol
11612 );
11613
11614 // Compute the implied nulls_first for source
11615 let source_nulls_first = if is_source_nulls_large {
11616 !is_asc // ASC -> NULLS LAST (false), DESC -> NULLS FIRST (true)
11617 } else if is_source_nulls_last {
11618 false // NULLS LAST always
11619 } else {
11620 is_asc // nulls_are_small: ASC -> NULLS FIRST (true), DESC -> NULLS LAST (false)
11621 };
11622
11623 // Compute the target's default
11624 let target_nulls_first = if is_target_nulls_large {
11625 !is_asc
11626 } else if is_target_nulls_last {
11627 false
11628 } else {
11629 is_asc
11630 };
11631
11632 // Only add explicit nulls ordering if source and target defaults differ
11633 if source_nulls_first != target_nulls_first {
11634 o.nulls_first = Some(source_nulls_first);
11635 }
11636 // If they match, leave nulls_first as None so the generator won't output it
11637
11638 Ok(Expression::Ordered(o))
11639 } else {
11640 Ok(e)
11641 }
11642 }
11643 Action::StringAggConvert => {
11644 match e {
11645 Expression::WithinGroup(wg) => {
11646 // STRING_AGG(x, sep) WITHIN GROUP (ORDER BY z) -> target-specific
11647 // Extract args and distinct flag from either Function, AggregateFunction, or StringAgg
11648 let (x_opt, sep_opt, distinct) = match wg.this {
11649 Expression::AggregateFunction(ref af) if af.name.eq_ignore_ascii_case("STRING_AGG") && af.args.len() >= 2 => {
11650 (Some(af.args[0].clone()), Some(af.args[1].clone()), af.distinct)
11651 }
11652 Expression::Function(ref f) if f.name.eq_ignore_ascii_case("STRING_AGG") && f.args.len() >= 2 => {
11653 (Some(f.args[0].clone()), Some(f.args[1].clone()), false)
11654 }
11655 Expression::StringAgg(ref sa) => {
11656 (Some(sa.this.clone()), sa.separator.clone(), sa.distinct)
11657 }
11658 _ => (None, None, false),
11659 };
11660 if let (Some(x), Some(sep)) = (x_opt, sep_opt) {
11661 let order_by = wg.order_by;
11662
11663 match target {
11664 DialectType::TSQL | DialectType::Fabric => {
11665 // Keep as WithinGroup(StringAgg) for TSQL
11666 Ok(Expression::WithinGroup(Box::new(crate::expressions::WithinGroup {
11667 this: Expression::StringAgg(Box::new(crate::expressions::StringAggFunc {
11668 this: x,
11669 separator: Some(sep),
11670 order_by: None, // order_by goes in WithinGroup, not StringAgg
11671 distinct,
11672 filter: None,
11673 limit: None,
11674 })),
11675 order_by,
11676 })))
11677 }
11678 DialectType::MySQL | DialectType::SingleStore | DialectType::Doris | DialectType::StarRocks => {
11679 // GROUP_CONCAT(x ORDER BY z SEPARATOR sep)
11680 Ok(Expression::GroupConcat(Box::new(crate::expressions::GroupConcatFunc {
11681 this: x,
11682 separator: Some(sep),
11683 order_by: Some(order_by),
11684 distinct,
11685 filter: None,
11686 })))
11687 }
11688 DialectType::SQLite => {
11689 // GROUP_CONCAT(x, sep) - no ORDER BY support
11690 Ok(Expression::GroupConcat(Box::new(crate::expressions::GroupConcatFunc {
11691 this: x,
11692 separator: Some(sep),
11693 order_by: None,
11694 distinct,
11695 filter: None,
11696 })))
11697 }
11698 DialectType::PostgreSQL | DialectType::Redshift => {
11699 // STRING_AGG(x, sep ORDER BY z)
11700 Ok(Expression::StringAgg(Box::new(crate::expressions::StringAggFunc {
11701 this: x,
11702 separator: Some(sep),
11703 order_by: Some(order_by),
11704 distinct,
11705 filter: None,
11706 limit: None,
11707 })))
11708 }
11709 _ => {
11710 // Default: keep as STRING_AGG(x, sep) with ORDER BY inside
11711 Ok(Expression::StringAgg(Box::new(crate::expressions::StringAggFunc {
11712 this: x,
11713 separator: Some(sep),
11714 order_by: Some(order_by),
11715 distinct,
11716 filter: None,
11717 limit: None,
11718 })))
11719 }
11720 }
11721 } else {
11722 Ok(Expression::WithinGroup(wg))
11723 }
11724 }
11725 Expression::StringAgg(sa) => {
11726 match target {
11727 DialectType::MySQL | DialectType::SingleStore | DialectType::Doris | DialectType::StarRocks => {
11728 // STRING_AGG(x, sep) -> GROUP_CONCAT(x SEPARATOR sep)
11729 Ok(Expression::GroupConcat(Box::new(crate::expressions::GroupConcatFunc {
11730 this: sa.this,
11731 separator: sa.separator,
11732 order_by: sa.order_by,
11733 distinct: sa.distinct,
11734 filter: sa.filter,
11735 })))
11736 }
11737 DialectType::SQLite => {
11738 // STRING_AGG(x, sep) -> GROUP_CONCAT(x, sep)
11739 Ok(Expression::GroupConcat(Box::new(crate::expressions::GroupConcatFunc {
11740 this: sa.this,
11741 separator: sa.separator,
11742 order_by: None, // SQLite doesn't support ORDER BY in GROUP_CONCAT
11743 distinct: sa.distinct,
11744 filter: sa.filter,
11745 })))
11746 }
11747 DialectType::Spark | DialectType::Databricks => {
11748 // STRING_AGG(x, sep) -> LISTAGG(x, sep)
11749 Ok(Expression::ListAgg(Box::new(crate::expressions::ListAggFunc {
11750 this: sa.this,
11751 separator: sa.separator,
11752 on_overflow: None,
11753 order_by: sa.order_by,
11754 distinct: sa.distinct,
11755 filter: None,
11756 })))
11757 }
11758 _ => Ok(Expression::StringAgg(sa)),
11759 }
11760 }
11761 _ => Ok(e),
11762 }
11763 }
11764 Action::GroupConcatConvert => {
11765 // Helper to expand CONCAT(a, b, c) -> a || b || c (for PostgreSQL/SQLite)
11766 // or CONCAT(a, b, c) -> a + b + c (for TSQL)
11767 fn expand_concat_to_dpipe(expr: Expression) -> Expression {
11768 if let Expression::Function(ref f) = expr {
11769 if f.name.to_uppercase() == "CONCAT" && f.args.len() > 1 {
11770 let mut result = f.args[0].clone();
11771 for arg in &f.args[1..] {
11772 result = Expression::Concat(Box::new(BinaryOp {
11773 left: result,
11774 right: arg.clone(),
11775 left_comments: vec![],
11776 operator_comments: vec![],
11777 trailing_comments: vec![],
11778 }));
11779 }
11780 return result;
11781 }
11782 }
11783 expr
11784 }
11785 fn expand_concat_to_plus(expr: Expression) -> Expression {
11786 if let Expression::Function(ref f) = expr {
11787 if f.name.to_uppercase() == "CONCAT" && f.args.len() > 1 {
11788 let mut result = f.args[0].clone();
11789 for arg in &f.args[1..] {
11790 result = Expression::Add(Box::new(BinaryOp {
11791 left: result,
11792 right: arg.clone(),
11793 left_comments: vec![],
11794 operator_comments: vec![],
11795 trailing_comments: vec![],
11796 }));
11797 }
11798 return result;
11799 }
11800 }
11801 expr
11802 }
11803 // Helper to wrap each arg in CAST(arg AS VARCHAR) for Presto/Trino CONCAT
11804 fn wrap_concat_args_in_varchar_cast(expr: Expression) -> Expression {
11805 if let Expression::Function(ref f) = expr {
11806 if f.name.to_uppercase() == "CONCAT" && f.args.len() > 1 {
11807 let new_args: Vec<Expression> = f.args.iter().map(|arg| {
11808 Expression::Cast(Box::new(crate::expressions::Cast {
11809 this: arg.clone(),
11810 to: crate::expressions::DataType::VarChar { length: None, parenthesized_length: false },
11811 trailing_comments: Vec::new(),
11812 double_colon_syntax: false,
11813 format: None,
11814 default: None,
11815 }))
11816 }).collect();
11817 return Expression::Function(Box::new(crate::expressions::Function::new(
11818 "CONCAT".to_string(),
11819 new_args,
11820 )));
11821 }
11822 }
11823 expr
11824 }
11825 if let Expression::GroupConcat(gc) = e {
11826 match target {
11827 DialectType::Presto => {
11828 // GROUP_CONCAT(x [, sep]) -> ARRAY_JOIN(ARRAY_AGG(x), sep)
11829 let sep = gc.separator.unwrap_or(Expression::string(","));
11830 // For multi-arg CONCAT, wrap each arg in CAST(... AS VARCHAR)
11831 let this = wrap_concat_args_in_varchar_cast(gc.this);
11832 let array_agg = Expression::ArrayAgg(Box::new(crate::expressions::AggFunc {
11833 this,
11834 distinct: gc.distinct,
11835 filter: gc.filter,
11836 order_by: gc.order_by.unwrap_or_default(),
11837 name: None,
11838 ignore_nulls: None,
11839 having_max: None,
11840 limit: None,
11841 }));
11842 Ok(Expression::ArrayJoin(Box::new(crate::expressions::ArrayJoinFunc {
11843 this: array_agg,
11844 separator: sep,
11845 null_replacement: None,
11846 })))
11847 }
11848 DialectType::Trino => {
11849 // GROUP_CONCAT(x [, sep]) -> LISTAGG(x, sep)
11850 let sep = gc.separator.unwrap_or(Expression::string(","));
11851 // For multi-arg CONCAT, wrap each arg in CAST(... AS VARCHAR)
11852 let this = wrap_concat_args_in_varchar_cast(gc.this);
11853 Ok(Expression::ListAgg(Box::new(crate::expressions::ListAggFunc {
11854 this,
11855 separator: Some(sep),
11856 on_overflow: None,
11857 order_by: gc.order_by,
11858 distinct: gc.distinct,
11859 filter: gc.filter,
11860 })))
11861 }
11862 DialectType::PostgreSQL | DialectType::Redshift | DialectType::Snowflake
11863 | DialectType::DuckDB
11864 | DialectType::Hive | DialectType::ClickHouse => {
11865 // GROUP_CONCAT(x [, sep]) -> STRING_AGG(x, sep)
11866 let sep = gc.separator.unwrap_or(Expression::string(","));
11867 // Expand CONCAT(a,b,c) -> a || b || c for || dialects
11868 let this = expand_concat_to_dpipe(gc.this);
11869 // For PostgreSQL, add NULLS LAST for DESC / NULLS FIRST for ASC
11870 let order_by = if target == DialectType::PostgreSQL {
11871 gc.order_by.map(|ords| {
11872 ords.into_iter().map(|mut o| {
11873 if o.nulls_first.is_none() {
11874 if o.desc {
11875 o.nulls_first = Some(false); // NULLS LAST
11876 } else {
11877 o.nulls_first = Some(true); // NULLS FIRST
11878 }
11879 }
11880 o
11881 }).collect()
11882 })
11883 } else {
11884 gc.order_by
11885 };
11886 Ok(Expression::StringAgg(Box::new(crate::expressions::StringAggFunc {
11887 this,
11888 separator: Some(sep),
11889 order_by,
11890 distinct: gc.distinct,
11891 filter: gc.filter,
11892 limit: None,
11893 })))
11894 }
11895 DialectType::TSQL => {
11896 // GROUP_CONCAT(x [, sep]) -> STRING_AGG(x, sep) WITHIN GROUP (ORDER BY ...)
11897 // TSQL doesn't support DISTINCT in STRING_AGG
11898 let sep = gc.separator.unwrap_or(Expression::string(","));
11899 // Expand CONCAT(a,b,c) -> a + b + c for TSQL
11900 let this = expand_concat_to_plus(gc.this);
11901 Ok(Expression::StringAgg(Box::new(crate::expressions::StringAggFunc {
11902 this,
11903 separator: Some(sep),
11904 order_by: gc.order_by,
11905 distinct: false, // TSQL doesn't support DISTINCT in STRING_AGG
11906 filter: gc.filter,
11907 limit: None,
11908 })))
11909 }
11910 DialectType::SQLite => {
11911 // GROUP_CONCAT stays as GROUP_CONCAT but ORDER BY is removed
11912 // SQLite GROUP_CONCAT doesn't support ORDER BY
11913 // Expand CONCAT(a,b,c) -> a || b || c
11914 let this = expand_concat_to_dpipe(gc.this);
11915 Ok(Expression::GroupConcat(Box::new(crate::expressions::GroupConcatFunc {
11916 this,
11917 separator: gc.separator,
11918 order_by: None, // SQLite doesn't support ORDER BY in GROUP_CONCAT
11919 distinct: gc.distinct,
11920 filter: gc.filter,
11921 })))
11922 }
11923 DialectType::Spark | DialectType::Databricks => {
11924 // GROUP_CONCAT(x [, sep]) -> LISTAGG(x, sep)
11925 let sep = gc.separator.unwrap_or(Expression::string(","));
11926 Ok(Expression::ListAgg(Box::new(crate::expressions::ListAggFunc {
11927 this: gc.this,
11928 separator: Some(sep),
11929 on_overflow: None,
11930 order_by: gc.order_by,
11931 distinct: gc.distinct,
11932 filter: None,
11933 })))
11934 }
11935 DialectType::MySQL | DialectType::SingleStore | DialectType::StarRocks => {
11936 // MySQL GROUP_CONCAT should have explicit SEPARATOR (default ',')
11937 if gc.separator.is_none() {
11938 let mut gc = gc;
11939 gc.separator = Some(Expression::string(","));
11940 Ok(Expression::GroupConcat(gc))
11941 } else {
11942 Ok(Expression::GroupConcat(gc))
11943 }
11944 }
11945 _ => Ok(Expression::GroupConcat(gc)),
11946 }
11947 } else {
11948 Ok(e)
11949 }
11950 }
11951 Action::TempTableHash => {
11952 match e {
11953 Expression::CreateTable(mut ct) => {
11954 // TSQL #table -> TEMPORARY TABLE with # stripped from name
11955 let name = &ct.name.name.name;
11956 if name.starts_with('#') {
11957 ct.name.name.name = name.trim_start_matches('#').to_string();
11958 }
11959 // Set temporary flag
11960 ct.temporary = true;
11961 Ok(Expression::CreateTable(ct))
11962 }
11963 Expression::Table(mut tr) => {
11964 // Strip # from table references
11965 let name = &tr.name.name;
11966 if name.starts_with('#') {
11967 tr.name.name = name.trim_start_matches('#').to_string();
11968 }
11969 Ok(Expression::Table(tr))
11970 }
11971 Expression::DropTable(mut dt) => {
11972 // Strip # from DROP TABLE names
11973 for table_ref in &mut dt.names {
11974 if table_ref.name.name.starts_with('#') {
11975 table_ref.name.name = table_ref.name.name.trim_start_matches('#').to_string();
11976 }
11977 }
11978 Ok(Expression::DropTable(dt))
11979 }
11980 _ => Ok(e),
11981 }
11982 }
11983 Action::NvlClearOriginal => {
11984 if let Expression::Nvl(mut f) = e {
11985 f.original_name = None;
11986 Ok(Expression::Nvl(f))
11987 } else {
11988 Ok(e)
11989 }
11990 }
11991 Action::HiveCastToTryCast => {
11992 // Convert Hive/Spark CAST to TRY_CAST for targets that support it
11993 if let Expression::Cast(mut c) = e {
11994 // For Spark/Hive -> DuckDB: TIMESTAMP -> TIMESTAMPTZ
11995 // (Spark's TIMESTAMP is always timezone-aware)
11996 if matches!(target, DialectType::DuckDB)
11997 && matches!(source, DialectType::Spark | DialectType::Databricks)
11998 && matches!(c.to, DataType::Timestamp { timezone: false, .. })
11999 {
12000 c.to = DataType::Custom { name: "TIMESTAMPTZ".to_string() };
12001 }
12002 // For Spark source -> Databricks: VARCHAR/CHAR -> STRING
12003 // Spark parses VARCHAR(n)/CHAR(n) as TEXT, normalize to STRING
12004 if matches!(target, DialectType::Databricks | DialectType::Spark)
12005 && matches!(source, DialectType::Spark | DialectType::Databricks | DialectType::Hive)
12006 && Self::has_varchar_char_type(&c.to)
12007 {
12008 c.to = Self::normalize_varchar_to_string(c.to);
12009 }
12010 Ok(Expression::TryCast(c))
12011 } else {
12012 Ok(e)
12013 }
12014 }
12015 Action::XorExpand => {
12016 // Expand XOR to (a AND NOT b) OR (NOT a AND b) for dialects without XOR keyword
12017 // Snowflake: use BOOLXOR(a, b) instead
12018 if let Expression::Xor(xor) = e {
12019 // Collect all XOR operands
12020 let mut operands = Vec::new();
12021 if let Some(this) = xor.this {
12022 operands.push(*this);
12023 }
12024 if let Some(expr) = xor.expression {
12025 operands.push(*expr);
12026 }
12027 operands.extend(xor.expressions);
12028
12029 // Snowflake: use BOOLXOR(a, b)
12030 if matches!(target, DialectType::Snowflake) && operands.len() == 2 {
12031 let a = operands.remove(0);
12032 let b = operands.remove(0);
12033 return Ok(Expression::Function(Box::new(Function::new("BOOLXOR".to_string(), vec![a, b]))));
12034 }
12035
12036 // Helper to build (a AND NOT b) OR (NOT a AND b)
12037 let make_xor = |a: Expression, b: Expression| -> Expression {
12038 let not_b = Expression::Not(Box::new(crate::expressions::UnaryOp::new(b.clone())));
12039 let not_a = Expression::Not(Box::new(crate::expressions::UnaryOp::new(a.clone())));
12040 let left_and = Expression::And(Box::new(BinaryOp {
12041 left: a,
12042 right: Expression::Paren(Box::new(Paren { this: not_b, trailing_comments: Vec::new() })),
12043 left_comments: Vec::new(), operator_comments: Vec::new(), trailing_comments: Vec::new(),
12044 }));
12045 let right_and = Expression::And(Box::new(BinaryOp {
12046 left: Expression::Paren(Box::new(Paren { this: not_a, trailing_comments: Vec::new() })),
12047 right: b,
12048 left_comments: Vec::new(), operator_comments: Vec::new(), trailing_comments: Vec::new(),
12049 }));
12050 Expression::Or(Box::new(BinaryOp {
12051 left: Expression::Paren(Box::new(Paren { this: left_and, trailing_comments: Vec::new() })),
12052 right: Expression::Paren(Box::new(Paren { this: right_and, trailing_comments: Vec::new() })),
12053 left_comments: Vec::new(), operator_comments: Vec::new(), trailing_comments: Vec::new(),
12054 }))
12055 };
12056
12057 if operands.len() >= 2 {
12058 let mut result = make_xor(operands.remove(0), operands.remove(0));
12059 for operand in operands {
12060 result = make_xor(result, operand);
12061 }
12062 Ok(result)
12063 } else if operands.len() == 1 {
12064 Ok(operands.remove(0))
12065 } else {
12066 // No operands - return FALSE (shouldn't happen)
12067 Ok(Expression::Boolean(crate::expressions::BooleanLiteral { value: false }))
12068 }
12069 } else {
12070 Ok(e)
12071 }
12072 }
12073 Action::DatePartUnquote => {
12074 // DATE_PART('month', x) -> DATE_PART(month, x) for Snowflake target
12075 // Convert the quoted string first arg to a bare Column/Identifier
12076 if let Expression::Function(mut f) = e {
12077 if let Some(Expression::Literal(crate::expressions::Literal::String(s))) = f.args.first() {
12078 let bare_name = s.to_lowercase();
12079 f.args[0] = Expression::Column(crate::expressions::Column {
12080 name: Identifier::new(bare_name),
12081 table: None,
12082 join_mark: false,
12083 trailing_comments: Vec::new(),
12084 });
12085 }
12086 Ok(Expression::Function(f))
12087 } else {
12088 Ok(e)
12089 }
12090 }
12091 Action::ArrayLengthConvert => {
12092 // Extract the argument from the expression
12093 let arg = match e {
12094 Expression::Cardinality(ref f) => f.this.clone(),
12095 Expression::ArrayLength(ref f) => f.this.clone(),
12096 Expression::ArraySize(ref f) => f.this.clone(),
12097 _ => return Ok(e),
12098 };
12099 match target {
12100 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
12101 Ok(Expression::Function(Box::new(Function::new("SIZE".to_string(), vec![arg]))))
12102 }
12103 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
12104 Ok(Expression::Cardinality(Box::new(crate::expressions::UnaryFunc::new(arg))))
12105 }
12106 DialectType::BigQuery => {
12107 Ok(Expression::ArrayLength(Box::new(crate::expressions::UnaryFunc::new(arg))))
12108 }
12109 DialectType::DuckDB => {
12110 Ok(Expression::ArrayLength(Box::new(crate::expressions::UnaryFunc::new(arg))))
12111 }
12112 DialectType::PostgreSQL | DialectType::Redshift => {
12113 // PostgreSQL ARRAY_LENGTH requires dimension arg
12114 Ok(Expression::Function(Box::new(Function::new("ARRAY_LENGTH".to_string(), vec![arg, Expression::number(1)]))))
12115 }
12116 _ => Ok(e), // Keep original
12117 }
12118 }
12119
12120 Action::JsonExtractToArrow => {
12121 // JSON_EXTRACT(x, path) -> x -> path for SQLite/DuckDB (set arrow_syntax = true)
12122 if let Expression::JsonExtract(mut f) = e {
12123 f.arrow_syntax = true;
12124 Ok(Expression::JsonExtract(f))
12125 } else {
12126 Ok(e)
12127 }
12128 }
12129
12130 Action::JsonExtractToGetJsonObject => {
12131 if let Expression::JsonExtract(f) = e {
12132 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
12133 // JSON_EXTRACT(x, '$.key') -> JSON_EXTRACT_PATH(x, 'key') for PostgreSQL
12134 // Convert JSONPath to individual keys
12135 let extracted_keys: Option<Vec<String>> = if let Expression::Literal(Literal::String(ref s)) = f.path {
12136 s.strip_prefix("$.").map(|stripped| {
12137 stripped.split('.').map(|k| k.to_string()).collect()
12138 })
12139 } else {
12140 None
12141 };
12142 let keys = if let Some(key_list) = extracted_keys {
12143 key_list.into_iter().map(|k| Expression::string(&k)).collect::<Vec<_>>()
12144 } else {
12145 vec![f.path]
12146 };
12147 let mut args = vec![f.this];
12148 args.extend(keys);
12149 Ok(Expression::Function(Box::new(Function::new(
12150 "JSON_EXTRACT_PATH".to_string(),
12151 args,
12152 ))))
12153 } else {
12154 // GET_JSON_OBJECT(x, '$.path') for Hive/Spark
12155 Ok(Expression::Function(Box::new(Function::new(
12156 "GET_JSON_OBJECT".to_string(),
12157 vec![f.this, f.path],
12158 ))))
12159 }
12160 } else {
12161 Ok(e)
12162 }
12163 }
12164
12165 Action::JsonExtractScalarToGetJsonObject => {
12166 // JSON_EXTRACT_SCALAR(x, '$.path') -> GET_JSON_OBJECT(x, '$.path') for Hive/Spark
12167 if let Expression::JsonExtractScalar(f) = e {
12168 Ok(Expression::Function(Box::new(Function::new(
12169 "GET_JSON_OBJECT".to_string(),
12170 vec![f.this, f.path],
12171 ))))
12172 } else {
12173 Ok(e)
12174 }
12175 }
12176
12177 Action::JsonQueryValueConvert => {
12178 // JsonQuery/JsonValue -> target-specific
12179 let (f, is_query) = match e {
12180 Expression::JsonQuery(f) => (f, true),
12181 Expression::JsonValue(f) => (f, false),
12182 _ => return Ok(e),
12183 };
12184 match target {
12185 DialectType::TSQL | DialectType::Fabric => {
12186 // ISNULL(JSON_QUERY(...), JSON_VALUE(...))
12187 let json_query = Expression::Function(Box::new(Function::new(
12188 "JSON_QUERY".to_string(),
12189 vec![f.this.clone(), f.path.clone()],
12190 )));
12191 let json_value = Expression::Function(Box::new(Function::new(
12192 "JSON_VALUE".to_string(),
12193 vec![f.this, f.path],
12194 )));
12195 Ok(Expression::Function(Box::new(Function::new(
12196 "ISNULL".to_string(),
12197 vec![json_query, json_value],
12198 ))))
12199 }
12200 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
12201 Ok(Expression::Function(Box::new(Function::new(
12202 "GET_JSON_OBJECT".to_string(),
12203 vec![f.this, f.path],
12204 ))))
12205 }
12206 DialectType::PostgreSQL | DialectType::Redshift => {
12207 Ok(Expression::Function(Box::new(Function::new(
12208 "JSON_EXTRACT_PATH_TEXT".to_string(),
12209 vec![f.this, f.path],
12210 ))))
12211 }
12212 DialectType::DuckDB | DialectType::SQLite => {
12213 // json -> path arrow syntax
12214 Ok(Expression::JsonExtract(Box::new(crate::expressions::JsonExtractFunc {
12215 this: f.this,
12216 path: f.path,
12217 returning: f.returning,
12218 arrow_syntax: true,
12219 hash_arrow_syntax: false,
12220 wrapper_option: f.wrapper_option,
12221 quotes_option: f.quotes_option,
12222 on_scalar_string: f.on_scalar_string,
12223 on_error: f.on_error,
12224 })))
12225 }
12226 DialectType::Snowflake => {
12227 // GET_PATH(PARSE_JSON(json), 'path')
12228 // Strip $. prefix from path
12229 // Only wrap in PARSE_JSON if not already a PARSE_JSON call or ParseJson expression
12230 let json_expr = match &f.this {
12231 Expression::Function(ref inner_f) if inner_f.name.eq_ignore_ascii_case("PARSE_JSON") => {
12232 f.this
12233 }
12234 Expression::ParseJson(_) => {
12235 // Already a ParseJson expression, which generates as PARSE_JSON(...)
12236 f.this
12237 }
12238 _ => {
12239 Expression::Function(Box::new(Function::new(
12240 "PARSE_JSON".to_string(), vec![f.this],
12241 )))
12242 }
12243 };
12244 let path_str = match &f.path {
12245 Expression::Literal(Literal::String(s)) => {
12246 let stripped = s.strip_prefix("$.").unwrap_or(s);
12247 Expression::Literal(Literal::String(stripped.to_string()))
12248 }
12249 other => other.clone(),
12250 };
12251 Ok(Expression::Function(Box::new(Function::new(
12252 "GET_PATH".to_string(), vec![json_expr, path_str],
12253 ))))
12254 }
12255 _ => {
12256 // Default: keep as JSON_QUERY/JSON_VALUE function
12257 let func_name = if is_query { "JSON_QUERY" } else { "JSON_VALUE" };
12258 Ok(Expression::Function(Box::new(Function::new(
12259 func_name.to_string(),
12260 vec![f.this, f.path],
12261 ))))
12262 }
12263 }
12264 }
12265
12266 Action::JsonLiteralToJsonParse => {
12267 // CAST('x' AS JSON) -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake
12268 if let Expression::Cast(c) = e {
12269 let func_name = if matches!(target, DialectType::Snowflake) {
12270 "PARSE_JSON"
12271 } else {
12272 "JSON_PARSE"
12273 };
12274 Ok(Expression::Function(Box::new(Function::new(
12275 func_name.to_string(),
12276 vec![c.this],
12277 ))))
12278 } else {
12279 Ok(e)
12280 }
12281 }
12282
12283 Action::AtTimeZoneConvert => {
12284 // AT TIME ZONE -> target-specific conversion
12285 if let Expression::AtTimeZone(atz) = e {
12286 match target {
12287 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
12288 Ok(Expression::Function(Box::new(Function::new(
12289 "AT_TIMEZONE".to_string(),
12290 vec![atz.this, atz.zone],
12291 ))))
12292 }
12293 DialectType::Spark | DialectType::Databricks => {
12294 Ok(Expression::Function(Box::new(Function::new(
12295 "FROM_UTC_TIMESTAMP".to_string(),
12296 vec![atz.this, atz.zone],
12297 ))))
12298 }
12299 DialectType::Snowflake => {
12300 // CONVERT_TIMEZONE('zone', expr)
12301 Ok(Expression::Function(Box::new(Function::new(
12302 "CONVERT_TIMEZONE".to_string(),
12303 vec![atz.zone, atz.this],
12304 ))))
12305 }
12306 DialectType::BigQuery => {
12307 // TIMESTAMP(DATETIME(expr, 'zone'))
12308 let datetime_call = Expression::Function(Box::new(Function::new(
12309 "DATETIME".to_string(),
12310 vec![atz.this, atz.zone],
12311 )));
12312 Ok(Expression::Function(Box::new(Function::new(
12313 "TIMESTAMP".to_string(),
12314 vec![datetime_call],
12315 ))))
12316 }
12317 _ => {
12318 Ok(Expression::Function(Box::new(Function::new(
12319 "AT_TIMEZONE".to_string(),
12320 vec![atz.this, atz.zone],
12321 ))))
12322 }
12323 }
12324 } else {
12325 Ok(e)
12326 }
12327 }
12328
12329 Action::DayOfWeekConvert => {
12330 // DAY_OF_WEEK -> ISODOW for DuckDB, ((DAYOFWEEK(x) % 7) + 1) for Spark
12331 if let Expression::DayOfWeek(f) = e {
12332 match target {
12333 DialectType::DuckDB => {
12334 Ok(Expression::Function(Box::new(Function::new(
12335 "ISODOW".to_string(),
12336 vec![f.this],
12337 ))))
12338 }
12339 DialectType::Spark | DialectType::Databricks => {
12340 // ((DAYOFWEEK(x) % 7) + 1)
12341 let dayofweek = Expression::Function(Box::new(Function::new(
12342 "DAYOFWEEK".to_string(),
12343 vec![f.this],
12344 )));
12345 let modulo = Expression::Mod(Box::new(BinaryOp {
12346 left: dayofweek,
12347 right: Expression::number(7),
12348 left_comments: Vec::new(),
12349 operator_comments: Vec::new(),
12350 trailing_comments: Vec::new(),
12351 }));
12352 let paren_mod = Expression::Paren(Box::new(Paren {
12353 this: modulo,
12354 trailing_comments: Vec::new(),
12355 }));
12356 let add_one = Expression::Add(Box::new(BinaryOp {
12357 left: paren_mod,
12358 right: Expression::number(1),
12359 left_comments: Vec::new(),
12360 operator_comments: Vec::new(),
12361 trailing_comments: Vec::new(),
12362 }));
12363 Ok(Expression::Paren(Box::new(Paren {
12364 this: add_one,
12365 trailing_comments: Vec::new(),
12366 })))
12367 }
12368 _ => Ok(Expression::DayOfWeek(f)),
12369 }
12370 } else {
12371 Ok(e)
12372 }
12373 }
12374
12375 Action::MaxByMinByConvert => {
12376 // MAX_BY -> argMax for ClickHouse, drop 3rd arg for Spark
12377 // MIN_BY -> argMin for ClickHouse, ARG_MIN for DuckDB, drop 3rd arg for Spark/ClickHouse
12378 // Handle both Expression::Function and Expression::AggregateFunction
12379 let (is_max, args) = match &e {
12380 Expression::Function(f) => (f.name.eq_ignore_ascii_case("MAX_BY"), f.args.clone()),
12381 Expression::AggregateFunction(af) => (af.name.eq_ignore_ascii_case("MAX_BY"), af.args.clone()),
12382 _ => return Ok(e),
12383 };
12384 match target {
12385 DialectType::ClickHouse => {
12386 let name = if is_max { "argMax" } else { "argMin" };
12387 let mut args = args;
12388 args.truncate(2);
12389 Ok(Expression::Function(Box::new(Function::new(name.to_string(), args))))
12390 }
12391 DialectType::DuckDB => {
12392 let name = if is_max { "ARG_MAX" } else { "ARG_MIN" };
12393 Ok(Expression::Function(Box::new(Function::new(name.to_string(), args))))
12394 }
12395 DialectType::Spark | DialectType::Databricks => {
12396 let mut args = args;
12397 args.truncate(2);
12398 let name = if is_max { "MAX_BY" } else { "MIN_BY" };
12399 Ok(Expression::Function(Box::new(Function::new(name.to_string(), args))))
12400 }
12401 _ => Ok(e),
12402 }
12403 }
12404
12405 Action::ElementAtConvert => {
12406 // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
12407 let (arr, idx) = if let Expression::ElementAt(bf) = e {
12408 (bf.this, bf.expression)
12409 } else if let Expression::Function(ref f) = e {
12410 if f.args.len() >= 2 {
12411 if let Expression::Function(f) = e {
12412 let mut args = f.args;
12413 let arr = args.remove(0);
12414 let idx = args.remove(0);
12415 (arr, idx)
12416 } else {
12417 unreachable!("outer condition already matched Expression::Function")
12418 }
12419 } else {
12420 return Ok(e);
12421 }
12422 } else {
12423 return Ok(e);
12424 };
12425 match target {
12426 DialectType::PostgreSQL => {
12427 // Wrap array in parens for PostgreSQL: (ARRAY[1,2,3])[4]
12428 let arr_expr = Expression::Paren(Box::new(Paren {
12429 this: arr,
12430 trailing_comments: vec![],
12431 }));
12432 Ok(Expression::Subscript(Box::new(crate::expressions::Subscript {
12433 this: arr_expr,
12434 index: idx,
12435 })))
12436 }
12437 DialectType::BigQuery => {
12438 // BigQuery: convert ARRAY[...] to bare [...] for subscript
12439 let arr_expr = match arr {
12440 Expression::ArrayFunc(af) => {
12441 Expression::ArrayFunc(Box::new(crate::expressions::ArrayConstructor {
12442 expressions: af.expressions,
12443 bracket_notation: true,
12444 use_list_keyword: false,
12445 }))
12446 }
12447 other => other,
12448 };
12449 let safe_ordinal = Expression::Function(Box::new(Function::new(
12450 "SAFE_ORDINAL".to_string(),
12451 vec![idx],
12452 )));
12453 Ok(Expression::Subscript(Box::new(crate::expressions::Subscript {
12454 this: arr_expr,
12455 index: safe_ordinal,
12456 })))
12457 }
12458 _ => Ok(Expression::Function(Box::new(Function::new(
12459 "ELEMENT_AT".to_string(),
12460 vec![arr, idx],
12461 ))))
12462 }
12463 }
12464
12465 Action::CurrentUserParens => {
12466 // CURRENT_USER -> CURRENT_USER() for Snowflake
12467 Ok(Expression::Function(Box::new(Function::new(
12468 "CURRENT_USER".to_string(),
12469 vec![],
12470 ))))
12471 }
12472
12473 Action::ArrayAggToCollectList => {
12474 // ARRAY_AGG(x ORDER BY ...) -> COLLECT_LIST(x) for Hive/Spark
12475 // Python sqlglot Hive.arrayagg_sql strips ORDER BY for simple cases
12476 // but preserves it when DISTINCT/IGNORE NULLS/LIMIT are present
12477 match e {
12478 Expression::AggregateFunction(mut af) => {
12479 let is_simple = !af.distinct && af.ignore_nulls.is_none() && af.limit.is_none();
12480 let args = if af.args.is_empty() { vec![] } else { vec![af.args[0].clone()] };
12481 af.name = "COLLECT_LIST".to_string();
12482 af.args = args;
12483 if is_simple {
12484 af.order_by = Vec::new();
12485 }
12486 Ok(Expression::AggregateFunction(af))
12487 }
12488 Expression::ArrayAgg(agg) => {
12489 let is_simple = !agg.distinct && agg.ignore_nulls.is_none() && agg.limit.is_none();
12490 Ok(Expression::AggregateFunction(Box::new(crate::expressions::AggregateFunction {
12491 name: "COLLECT_LIST".to_string(),
12492 args: vec![agg.this.clone()],
12493 distinct: agg.distinct,
12494 filter: agg.filter.clone(),
12495 order_by: if is_simple { Vec::new() } else { agg.order_by.clone() },
12496 limit: agg.limit.clone(),
12497 ignore_nulls: agg.ignore_nulls,
12498 })))
12499 }
12500 _ => Ok(e),
12501 }
12502 }
12503
12504 Action::ArraySyntaxConvert => {
12505 match e {
12506 // ARRAY[1, 2] (ArrayFunc bracket_notation=false) -> set bracket_notation=true
12507 // so the generator uses dialect-specific output (ARRAY() for Spark, [] for BigQuery)
12508 Expression::ArrayFunc(arr) if !arr.bracket_notation => {
12509 Ok(Expression::ArrayFunc(Box::new(crate::expressions::ArrayConstructor {
12510 expressions: arr.expressions,
12511 bracket_notation: true,
12512 use_list_keyword: false,
12513 })))
12514 }
12515 // ARRAY(y) function style -> ArrayFunc for target dialect
12516 // bracket_notation=true for BigQuery/DuckDB/ClickHouse/StarRocks (output []), false for Presto (output ARRAY[])
12517 Expression::Function(f) if f.name.eq_ignore_ascii_case("ARRAY") => {
12518 let bracket = matches!(target, DialectType::BigQuery | DialectType::DuckDB | DialectType::ClickHouse | DialectType::StarRocks);
12519 Ok(Expression::ArrayFunc(Box::new(crate::expressions::ArrayConstructor {
12520 expressions: f.args,
12521 bracket_notation: bracket,
12522 use_list_keyword: false,
12523 })))
12524 }
12525 _ => Ok(e),
12526 }
12527 }
12528
12529 Action::CastToJsonForSpark => {
12530 // CAST(x AS JSON) -> TO_JSON(x) for Spark
12531 if let Expression::Cast(c) = e {
12532 Ok(Expression::Function(Box::new(Function::new("TO_JSON".to_string(), vec![c.this]))))
12533 } else {
12534 Ok(e)
12535 }
12536 }
12537
12538 Action::CastJsonToFromJson => {
12539 // CAST(ParseJson(literal) AS ARRAY/MAP/STRUCT) -> FROM_JSON(literal, type_string) for Spark
12540 if let Expression::Cast(c) = e {
12541 // Extract the string literal from ParseJson
12542 let literal_expr = if let Expression::ParseJson(pj) = c.this {
12543 pj.this
12544 } else {
12545 c.this
12546 };
12547 // Convert the target DataType to Spark's type string format
12548 let type_str = Self::data_type_to_spark_string(&c.to);
12549 Ok(Expression::Function(Box::new(Function::new(
12550 "FROM_JSON".to_string(),
12551 vec![literal_expr, Expression::Literal(Literal::String(type_str))],
12552 ))))
12553 } else {
12554 Ok(e)
12555 }
12556 }
12557
12558 Action::ToJsonConvert => {
12559 // TO_JSON(x) -> target-specific conversion
12560 if let Expression::ToJson(f) = e {
12561 let arg = f.this;
12562 match target {
12563 DialectType::Presto | DialectType::Trino => {
12564 // JSON_FORMAT(CAST(x AS JSON))
12565 let cast_json = Expression::Cast(Box::new(Cast {
12566 this: arg,
12567 to: DataType::Custom { name: "JSON".to_string() },
12568 trailing_comments: vec![],
12569 double_colon_syntax: false,
12570 format: None,
12571 default: None,
12572 }));
12573 Ok(Expression::Function(Box::new(Function::new("JSON_FORMAT".to_string(), vec![cast_json]))))
12574 }
12575 DialectType::BigQuery => {
12576 Ok(Expression::Function(Box::new(Function::new("TO_JSON_STRING".to_string(), vec![arg]))))
12577 }
12578 DialectType::DuckDB => {
12579 // CAST(TO_JSON(x) AS TEXT)
12580 let to_json = Expression::ToJson(Box::new(crate::expressions::UnaryFunc { this: arg, original_name: None }));
12581 Ok(Expression::Cast(Box::new(Cast {
12582 this: to_json,
12583 to: DataType::Text,
12584 trailing_comments: vec![],
12585 double_colon_syntax: false,
12586 format: None,
12587 default: None,
12588 })))
12589 }
12590 _ => Ok(Expression::ToJson(Box::new(crate::expressions::UnaryFunc { this: arg, original_name: None })))
12591 }
12592 } else {
12593 Ok(e)
12594 }
12595 }
12596
12597 Action::VarianceToClickHouse => {
12598 if let Expression::Variance(f) = e {
12599 Ok(Expression::Function(Box::new(Function::new("varSamp".to_string(), vec![f.this]))))
12600 } else { Ok(e) }
12601 }
12602
12603 Action::StddevToClickHouse => {
12604 if let Expression::Stddev(f) = e {
12605 Ok(Expression::Function(Box::new(Function::new("stddevSamp".to_string(), vec![f.this]))))
12606 } else { Ok(e) }
12607 }
12608
12609 Action::ApproxQuantileConvert => {
12610 if let Expression::ApproxQuantile(aq) = e {
12611 let mut args = vec![*aq.this];
12612 if let Some(q) = aq.quantile { args.push(*q); }
12613 Ok(Expression::Function(Box::new(Function::new("APPROX_PERCENTILE".to_string(), args))))
12614 } else { Ok(e) }
12615 }
12616
12617 Action::DollarParamConvert => {
12618 if let Expression::Parameter(p) = e {
12619 Ok(Expression::Parameter(Box::new(crate::expressions::Parameter {
12620 name: p.name,
12621 index: p.index,
12622 style: crate::expressions::ParameterStyle::At,
12623 quoted: p.quoted,
12624 expression: p.expression,
12625 })))
12626 } else { Ok(e) }
12627 }
12628
12629 Action::EscapeStringNormalize => {
12630 if let Expression::Literal(Literal::EscapeString(s)) = e {
12631 // Strip prefix (e.g., "e:" or "E:") if present from tokenizer
12632 let stripped = if s.starts_with("e:") || s.starts_with("E:") {
12633 s[2..].to_string()
12634 } else {
12635 s
12636 };
12637 let normalized = stripped.replace('\n', "\\n").replace('\r', "\\r").replace('\t', "\\t");
12638 match target {
12639 DialectType::BigQuery => {
12640 // BigQuery: e'...' -> CAST(b'...' AS STRING)
12641 // Use Raw for the b'...' part to avoid double-escaping
12642 let raw_sql = format!("CAST(b'{}' AS STRING)", normalized);
12643 Ok(Expression::Raw(crate::expressions::Raw { sql: raw_sql }))
12644 }
12645 _ => {
12646 Ok(Expression::Literal(Literal::EscapeString(normalized)))
12647 }
12648 }
12649 } else { Ok(e) }
12650 }
12651
12652 Action::StraightJoinCase => {
12653 // straight_join: keep lowercase for DuckDB, quote for MySQL
12654 if let Expression::Column(col) = e {
12655 if col.name.name == "STRAIGHT_JOIN" {
12656 let mut new_col = col;
12657 new_col.name.name = "straight_join".to_string();
12658 if matches!(target, DialectType::MySQL) {
12659 // MySQL: needs quoting since it's a reserved keyword
12660 new_col.name.quoted = true;
12661 }
12662 Ok(Expression::Column(new_col))
12663 } else {
12664 Ok(Expression::Column(col))
12665 }
12666 } else { Ok(e) }
12667 }
12668
12669 Action::TablesampleReservoir => {
12670 // TABLESAMPLE -> TABLESAMPLE RESERVOIR for DuckDB
12671 if let Expression::TableSample(mut ts) = e {
12672 if let Some(ref mut sample) = ts.sample {
12673 sample.method = crate::expressions::SampleMethod::Reservoir;
12674 sample.explicit_method = true;
12675 }
12676 Ok(Expression::TableSample(ts))
12677 } else { Ok(e) }
12678 }
12679
12680 Action::TablesampleSnowflakeStrip => {
12681 // Strip method and PERCENT for Snowflake target from non-Snowflake source
12682 match e {
12683 Expression::TableSample(mut ts) => {
12684 if let Some(ref mut sample) = ts.sample {
12685 sample.suppress_method_output = true;
12686 sample.unit_after_size = false;
12687 sample.is_percent = false;
12688 }
12689 Ok(Expression::TableSample(ts))
12690 }
12691 Expression::Table(mut t) => {
12692 if let Some(ref mut sample) = t.table_sample {
12693 sample.suppress_method_output = true;
12694 sample.unit_after_size = false;
12695 sample.is_percent = false;
12696 }
12697 Ok(Expression::Table(t))
12698 }
12699 _ => Ok(e),
12700 }
12701 }
12702
12703 Action::FirstToAnyValue => {
12704 // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
12705 if let Expression::First(mut agg) = e {
12706 agg.ignore_nulls = None;
12707 agg.name = Some("ANY_VALUE".to_string());
12708 Ok(Expression::AnyValue(agg))
12709 } else { Ok(e) }
12710 }
12711
12712 Action::ArrayIndexConvert => {
12713 // Subscript index: 1-based to 0-based for BigQuery
12714 if let Expression::Subscript(mut sub) = e {
12715 if let Expression::Literal(Literal::Number(ref n)) = sub.index {
12716 if let Ok(val) = n.parse::<i64>() {
12717 sub.index = Expression::Literal(Literal::Number((val - 1).to_string()));
12718 }
12719 }
12720 Ok(Expression::Subscript(sub))
12721 } else { Ok(e) }
12722 }
12723
12724 Action::AnyValueIgnoreNulls => {
12725 // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
12726 if let Expression::AnyValue(mut av) = e {
12727 if av.ignore_nulls.is_none() {
12728 av.ignore_nulls = Some(true);
12729 }
12730 Ok(Expression::AnyValue(av))
12731 } else { Ok(e) }
12732 }
12733
12734 Action::BigQueryNullsOrdering => {
12735 // BigQuery doesn't support NULLS FIRST/LAST in window function ORDER BY
12736 if let Expression::WindowFunction(mut wf) = e {
12737 for o in &mut wf.over.order_by {
12738 o.nulls_first = None;
12739 }
12740 Ok(Expression::WindowFunction(wf))
12741 } else if let Expression::Ordered(mut o) = e {
12742 o.nulls_first = None;
12743 Ok(Expression::Ordered(o))
12744 } else { Ok(e) }
12745 }
12746
12747 Action::SnowflakeFloatProtect => {
12748 // Convert DataType::Float to DataType::Custom("FLOAT") to prevent
12749 // Snowflake's target transform from converting it to DOUBLE.
12750 // Non-Snowflake sources should keep their FLOAT spelling.
12751 if let Expression::DataType(DataType::Float { .. }) = e {
12752 Ok(Expression::DataType(DataType::Custom { name: "FLOAT".to_string() }))
12753 } else { Ok(e) }
12754 }
12755
12756 Action::MysqlNullsOrdering => {
12757 // MySQL doesn't support NULLS FIRST/LAST - strip or rewrite
12758 if let Expression::Ordered(mut o) = e {
12759 let nulls_last = o.nulls_first == Some(false);
12760 let desc = o.desc;
12761 // MySQL default: ASC -> NULLS LAST, DESC -> NULLS FIRST
12762 // If requested ordering matches default, just strip NULLS clause
12763 let matches_default = if desc {
12764 // DESC default is NULLS FIRST, so nulls_first=true matches
12765 o.nulls_first == Some(true)
12766 } else {
12767 // ASC default is NULLS LAST, so nulls_first=false matches
12768 nulls_last
12769 };
12770 if matches_default {
12771 o.nulls_first = None;
12772 Ok(Expression::Ordered(o))
12773 } else {
12774 // Need CASE WHEN x IS NULL THEN 0/1 ELSE 0/1 END, x
12775 // For ASC NULLS FIRST: ORDER BY CASE WHEN x IS NULL THEN 0 ELSE 1 END, x ASC
12776 // For DESC NULLS LAST: ORDER BY CASE WHEN x IS NULL THEN 1 ELSE 0 END, x DESC
12777 let null_val = if desc { 1 } else { 0 };
12778 let non_null_val = if desc { 0 } else { 1 };
12779 let _case_expr = Expression::Case(Box::new(Case {
12780 operand: None,
12781 whens: vec![(
12782 Expression::IsNull(Box::new(crate::expressions::IsNull {
12783 this: o.this.clone(),
12784 not: false,
12785 postfix_form: false,
12786 })),
12787 Expression::number(null_val),
12788 )],
12789 else_: Some(Expression::number(non_null_val)),
12790 }));
12791 o.nulls_first = None;
12792 // Return a tuple of [case_expr, ordered_expr]
12793 // We need to return both as part of the ORDER BY
12794 // But since transform_recursive processes individual expressions,
12795 // we can't easily add extra ORDER BY items here.
12796 // Instead, strip the nulls_first
12797 o.nulls_first = None;
12798 Ok(Expression::Ordered(o))
12799 }
12800 } else { Ok(e) }
12801 }
12802
12803 Action::MysqlNullsLastRewrite => {
12804 // DuckDB -> MySQL: Add CASE WHEN IS NULL THEN 1 ELSE 0 END to ORDER BY
12805 // to simulate NULLS LAST for ASC ordering
12806 if let Expression::WindowFunction(mut wf) = e {
12807 let mut new_order_by = Vec::new();
12808 for o in wf.over.order_by {
12809 if !o.desc {
12810 // ASC: DuckDB has NULLS LAST, MySQL has NULLS FIRST
12811 // Add CASE WHEN expr IS NULL THEN 1 ELSE 0 END before expr
12812 let case_expr = Expression::Case(Box::new(Case {
12813 operand: None,
12814 whens: vec![(
12815 Expression::IsNull(Box::new(crate::expressions::IsNull {
12816 this: o.this.clone(),
12817 not: false,
12818 postfix_form: false,
12819 })),
12820 Expression::Literal(Literal::Number("1".to_string())),
12821 )],
12822 else_: Some(Expression::Literal(Literal::Number("0".to_string()))),
12823 }));
12824 new_order_by.push(crate::expressions::Ordered {
12825 this: case_expr,
12826 desc: false,
12827 nulls_first: None,
12828 explicit_asc: false,
12829 with_fill: None,
12830 });
12831 let mut ordered = o;
12832 ordered.nulls_first = None;
12833 new_order_by.push(ordered);
12834 } else {
12835 // DESC: DuckDB has NULLS LAST, MySQL also has NULLS LAST (NULLs smallest in DESC)
12836 // No change needed
12837 let mut ordered = o;
12838 ordered.nulls_first = None;
12839 new_order_by.push(ordered);
12840 }
12841 }
12842 wf.over.order_by = new_order_by;
12843 Ok(Expression::WindowFunction(wf))
12844 } else { Ok(e) }
12845 }
12846
12847 Action::RespectNullsConvert => {
12848 // RESPECT NULLS -> strip for SQLite (FIRST_VALUE(c) OVER (...))
12849 if let Expression::WindowFunction(mut wf) = e {
12850 match &mut wf.this {
12851 Expression::FirstValue(ref mut vf) => {
12852 if vf.ignore_nulls == Some(false) {
12853 vf.ignore_nulls = None;
12854 // For SQLite, we'd need to add NULLS LAST to ORDER BY in the OVER clause
12855 // but that's handled by the generator's NULLS ordering
12856 }
12857 }
12858 Expression::LastValue(ref mut vf) => {
12859 if vf.ignore_nulls == Some(false) {
12860 vf.ignore_nulls = None;
12861 }
12862 }
12863 _ => {}
12864 }
12865 Ok(Expression::WindowFunction(wf))
12866 } else { Ok(e) }
12867 }
12868
12869 Action::CreateTableStripComment => {
12870 // Strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
12871 if let Expression::CreateTable(mut ct) = e {
12872 for col in &mut ct.columns {
12873 col.comment = None;
12874 col.constraints.retain(|c| !matches!(c, crate::expressions::ColumnConstraint::Comment(_)));
12875 // Also remove Comment from constraint_order
12876 col.constraint_order.retain(|c| !matches!(c, crate::expressions::ConstraintType::Comment));
12877 }
12878 // Strip properties (USING, PARTITIONED BY, etc.)
12879 ct.properties.clear();
12880 Ok(Expression::CreateTable(ct))
12881 } else { Ok(e) }
12882 }
12883
12884 Action::AlterTableToSpRename => {
12885 // ALTER TABLE db.t1 RENAME TO db.t2 -> EXEC sp_rename 'db.t1', 't2'
12886 if let Expression::AlterTable(ref at) = e {
12887 if let Some(crate::expressions::AlterTableAction::RenameTable(ref new_tbl)) = at.actions.first() {
12888 // Build the old table name using TSQL bracket quoting
12889 let old_name = if let Some(ref schema) = at.name.schema {
12890 if at.name.name.quoted || schema.quoted {
12891 format!("[{}].[{}]", schema.name, at.name.name.name)
12892 } else {
12893 format!("{}.{}", schema.name, at.name.name.name)
12894 }
12895 } else {
12896 if at.name.name.quoted {
12897 format!("[{}]", at.name.name.name)
12898 } else {
12899 at.name.name.name.clone()
12900 }
12901 };
12902 let new_name = new_tbl.name.name.clone();
12903 // EXEC sp_rename 'old_name', 'new_name'
12904 let sql = format!("EXEC sp_rename '{}', '{}'", old_name, new_name);
12905 Ok(Expression::Raw(crate::expressions::Raw { sql }))
12906 } else { Ok(e) }
12907 } else { Ok(e) }
12908 }
12909
12910 Action::SnowflakeIntervalFormat => {
12911 // INTERVAL '2' HOUR -> INTERVAL '2 HOUR' for Snowflake
12912 if let Expression::Interval(mut iv) = e {
12913 if let (Some(Expression::Literal(Literal::String(ref val))), Some(ref unit_spec)) = (&iv.this, &iv.unit) {
12914 let unit_str = match unit_spec {
12915 crate::expressions::IntervalUnitSpec::Simple { unit, .. } => {
12916 match unit {
12917 crate::expressions::IntervalUnit::Year => "YEAR",
12918 crate::expressions::IntervalUnit::Quarter => "QUARTER",
12919 crate::expressions::IntervalUnit::Month => "MONTH",
12920 crate::expressions::IntervalUnit::Week => "WEEK",
12921 crate::expressions::IntervalUnit::Day => "DAY",
12922 crate::expressions::IntervalUnit::Hour => "HOUR",
12923 crate::expressions::IntervalUnit::Minute => "MINUTE",
12924 crate::expressions::IntervalUnit::Second => "SECOND",
12925 crate::expressions::IntervalUnit::Millisecond => "MILLISECOND",
12926 crate::expressions::IntervalUnit::Microsecond => "MICROSECOND",
12927 }
12928 }
12929 _ => "",
12930 };
12931 if !unit_str.is_empty() {
12932 let combined = format!("{} {}", val, unit_str);
12933 iv.this = Some(Expression::Literal(Literal::String(combined)));
12934 iv.unit = None;
12935 }
12936 }
12937 Ok(Expression::Interval(iv))
12938 } else { Ok(e) }
12939 }
12940
12941 Action::ArrayConcatBracketConvert => {
12942 // Expression::Array/ArrayFunc -> target-specific
12943 // For PostgreSQL: Array -> ArrayFunc (bracket_notation: false)
12944 // For Redshift: Array/ArrayFunc -> Function("ARRAY", args) to produce ARRAY(1, 2) with parens
12945 match e {
12946 Expression::Array(arr) => {
12947 if matches!(target, DialectType::Redshift) {
12948 Ok(Expression::Function(Box::new(Function::new(
12949 "ARRAY".to_string(), arr.expressions,
12950 ))))
12951 } else {
12952 Ok(Expression::ArrayFunc(Box::new(crate::expressions::ArrayConstructor {
12953 expressions: arr.expressions,
12954 bracket_notation: false,
12955 use_list_keyword: false,
12956 })))
12957 }
12958 }
12959 Expression::ArrayFunc(arr) => {
12960 // Only for Redshift: convert bracket-notation ArrayFunc to Function("ARRAY")
12961 if matches!(target, DialectType::Redshift) {
12962 Ok(Expression::Function(Box::new(Function::new(
12963 "ARRAY".to_string(), arr.expressions,
12964 ))))
12965 } else {
12966 Ok(Expression::ArrayFunc(arr))
12967 }
12968 }
12969 _ => Ok(e),
12970 }
12971 }
12972
12973 Action::BitAggFloatCast => {
12974 // BIT_OR/BIT_AND/BIT_XOR with float/decimal cast arg -> wrap with ROUND+INT cast for DuckDB
12975 // For FLOAT/DOUBLE/REAL: CAST(ROUND(CAST(val AS type)) AS INT)
12976 // For DECIMAL: CAST(CAST(val AS DECIMAL(p,s)) AS INT)
12977 let int_type = DataType::Int { length: None, integer_spelling: false };
12978 let wrap_agg = |agg_this: Expression, int_dt: DataType| -> Expression {
12979 if let Expression::Cast(c) = agg_this {
12980 match &c.to {
12981 DataType::Float { .. } | DataType::Double { .. }
12982 | DataType::Custom { .. } => {
12983 // FLOAT/DOUBLE/REAL: CAST(ROUND(CAST(val AS type)) AS INT)
12984 // Change FLOAT to REAL (Float with real_spelling=true) for DuckDB generator
12985 let inner_type = match &c.to {
12986 DataType::Float { precision, scale, .. } => DataType::Float { precision: *precision, scale: *scale, real_spelling: true },
12987 other => other.clone(),
12988 };
12989 let inner_cast = Expression::Cast(Box::new(crate::expressions::Cast {
12990 this: c.this.clone(),
12991 to: inner_type,
12992 trailing_comments: Vec::new(),
12993 double_colon_syntax: false,
12994 format: None,
12995 default: None,
12996 }));
12997 let rounded = Expression::Function(Box::new(Function::new("ROUND".to_string(), vec![inner_cast])));
12998 Expression::Cast(Box::new(crate::expressions::Cast {
12999 this: rounded,
13000 to: int_dt,
13001 trailing_comments: Vec::new(),
13002 double_colon_syntax: false,
13003 format: None,
13004 default: None,
13005 }))
13006 }
13007 DataType::Decimal { .. } => {
13008 // DECIMAL: CAST(CAST(val AS DECIMAL(p,s)) AS INT)
13009 Expression::Cast(Box::new(crate::expressions::Cast {
13010 this: Expression::Cast(c),
13011 to: int_dt,
13012 trailing_comments: Vec::new(),
13013 double_colon_syntax: false,
13014 format: None,
13015 default: None,
13016 }))
13017 }
13018 _ => Expression::Cast(c),
13019 }
13020 } else {
13021 agg_this
13022 }
13023 };
13024 match e {
13025 Expression::BitwiseOrAgg(mut f) => {
13026 f.this = wrap_agg(f.this, int_type);
13027 Ok(Expression::BitwiseOrAgg(f))
13028 }
13029 Expression::BitwiseAndAgg(mut f) => {
13030 let int_type = DataType::Int { length: None, integer_spelling: false };
13031 f.this = wrap_agg(f.this, int_type);
13032 Ok(Expression::BitwiseAndAgg(f))
13033 }
13034 Expression::BitwiseXorAgg(mut f) => {
13035 let int_type = DataType::Int { length: None, integer_spelling: false };
13036 f.this = wrap_agg(f.this, int_type);
13037 Ok(Expression::BitwiseXorAgg(f))
13038 }
13039 _ => Ok(e),
13040 }
13041 }
13042
13043 Action::BitAggSnowflakeRename => {
13044 // BIT_OR -> BITORAGG, BIT_AND -> BITANDAGG, BIT_XOR -> BITXORAGG for Snowflake
13045 match e {
13046 Expression::BitwiseOrAgg(f) => {
13047 Ok(Expression::Function(Box::new(Function::new("BITORAGG".to_string(), vec![f.this]))))
13048 }
13049 Expression::BitwiseAndAgg(f) => {
13050 Ok(Expression::Function(Box::new(Function::new("BITANDAGG".to_string(), vec![f.this]))))
13051 }
13052 Expression::BitwiseXorAgg(f) => {
13053 Ok(Expression::Function(Box::new(Function::new("BITXORAGG".to_string(), vec![f.this]))))
13054 }
13055 _ => Ok(e),
13056 }
13057 }
13058
13059 Action::StrftimeCastTimestamp => {
13060 // CAST(x AS TIMESTAMP) -> CAST(x AS TIMESTAMP_NTZ) for Spark
13061 if let Expression::Cast(mut c) = e {
13062 if matches!(c.to, DataType::Timestamp { timezone: false, .. }) {
13063 c.to = DataType::Custom { name: "TIMESTAMP_NTZ".to_string() };
13064 }
13065 Ok(Expression::Cast(c))
13066 } else { Ok(e) }
13067 }
13068
13069 Action::DecimalDefaultPrecision => {
13070 // DECIMAL without precision -> DECIMAL(18, 3) for Snowflake
13071 if let Expression::Cast(mut c) = e {
13072 if matches!(c.to, DataType::Decimal { precision: None, .. }) {
13073 c.to = DataType::Decimal {
13074 precision: Some(18),
13075 scale: Some(3),
13076 };
13077 }
13078 Ok(Expression::Cast(c))
13079 } else { Ok(e) }
13080 }
13081
13082 Action::FilterToIff => {
13083 // FILTER(WHERE cond) -> rewrite aggregate: AGG(IFF(cond, val, NULL))
13084 if let Expression::Filter(f) = e {
13085 let condition = *f.expression;
13086 let agg = *f.this;
13087 // Strip WHERE from condition
13088 let cond = match condition {
13089 Expression::Where(w) => w.this,
13090 other => other,
13091 };
13092 // Extract the aggregate function and its argument
13093 // We want AVG(IFF(condition, x, NULL))
13094 match agg {
13095 Expression::Function(mut func) => {
13096 if !func.args.is_empty() {
13097 let orig_arg = func.args[0].clone();
13098 let iff_call = Expression::Function(Box::new(Function::new(
13099 "IFF".to_string(),
13100 vec![cond, orig_arg, Expression::Null(Null)],
13101 )));
13102 func.args[0] = iff_call;
13103 Ok(Expression::Function(func))
13104 } else {
13105 Ok(Expression::Filter(Box::new(crate::expressions::Filter {
13106 this: Box::new(Expression::Function(func)),
13107 expression: Box::new(cond),
13108 })))
13109 }
13110 }
13111 Expression::Avg(mut avg) => {
13112 let iff_call = Expression::Function(Box::new(Function::new(
13113 "IFF".to_string(),
13114 vec![cond, avg.this.clone(), Expression::Null(Null)],
13115 )));
13116 avg.this = iff_call;
13117 Ok(Expression::Avg(avg))
13118 }
13119 Expression::Sum(mut s) => {
13120 let iff_call = Expression::Function(Box::new(Function::new(
13121 "IFF".to_string(),
13122 vec![cond, s.this.clone(), Expression::Null(Null)],
13123 )));
13124 s.this = iff_call;
13125 Ok(Expression::Sum(s))
13126 }
13127 Expression::Count(mut c) => {
13128 if let Some(ref this_expr) = c.this {
13129 let iff_call = Expression::Function(Box::new(Function::new(
13130 "IFF".to_string(),
13131 vec![cond, this_expr.clone(), Expression::Null(Null)],
13132 )));
13133 c.this = Some(iff_call);
13134 }
13135 Ok(Expression::Count(c))
13136 }
13137 other => {
13138 // Fallback: keep as Filter
13139 Ok(Expression::Filter(Box::new(crate::expressions::Filter {
13140 this: Box::new(other),
13141 expression: Box::new(cond),
13142 })))
13143 }
13144 }
13145 } else { Ok(e) }
13146 }
13147
13148 Action::AggFilterToIff => {
13149 // AggFunc.filter -> IFF wrapping: AVG(x) FILTER(WHERE cond) -> AVG(IFF(cond, x, NULL))
13150 // Helper macro to handle the common AggFunc case
13151 macro_rules! handle_agg_filter_to_iff {
13152 ($variant:ident, $agg:expr) => {{
13153 let mut agg = $agg;
13154 if let Some(filter_cond) = agg.filter.take() {
13155 let iff_call = Expression::Function(Box::new(Function::new(
13156 "IFF".to_string(),
13157 vec![filter_cond, agg.this.clone(), Expression::Null(Null)],
13158 )));
13159 agg.this = iff_call;
13160 }
13161 Ok(Expression::$variant(agg))
13162 }};
13163 }
13164
13165 match e {
13166 Expression::Avg(agg) => handle_agg_filter_to_iff!(Avg, agg),
13167 Expression::Sum(agg) => handle_agg_filter_to_iff!(Sum, agg),
13168 Expression::Min(agg) => handle_agg_filter_to_iff!(Min, agg),
13169 Expression::Max(agg) => handle_agg_filter_to_iff!(Max, agg),
13170 Expression::ArrayAgg(agg) => handle_agg_filter_to_iff!(ArrayAgg, agg),
13171 Expression::CountIf(agg) => handle_agg_filter_to_iff!(CountIf, agg),
13172 Expression::Stddev(agg) => handle_agg_filter_to_iff!(Stddev, agg),
13173 Expression::StddevPop(agg) => handle_agg_filter_to_iff!(StddevPop, agg),
13174 Expression::StddevSamp(agg) => handle_agg_filter_to_iff!(StddevSamp, agg),
13175 Expression::Variance(agg) => handle_agg_filter_to_iff!(Variance, agg),
13176 Expression::VarPop(agg) => handle_agg_filter_to_iff!(VarPop, agg),
13177 Expression::VarSamp(agg) => handle_agg_filter_to_iff!(VarSamp, agg),
13178 Expression::Median(agg) => handle_agg_filter_to_iff!(Median, agg),
13179 Expression::Mode(agg) => handle_agg_filter_to_iff!(Mode, agg),
13180 Expression::First(agg) => handle_agg_filter_to_iff!(First, agg),
13181 Expression::Last(agg) => handle_agg_filter_to_iff!(Last, agg),
13182 Expression::AnyValue(agg) => handle_agg_filter_to_iff!(AnyValue, agg),
13183 Expression::ApproxDistinct(agg) => handle_agg_filter_to_iff!(ApproxDistinct, agg),
13184 Expression::Count(mut c) => {
13185 if let Some(filter_cond) = c.filter.take() {
13186 if let Some(ref this_expr) = c.this {
13187 let iff_call = Expression::Function(Box::new(Function::new(
13188 "IFF".to_string(),
13189 vec![filter_cond, this_expr.clone(), Expression::Null(Null)],
13190 )));
13191 c.this = Some(iff_call);
13192 }
13193 }
13194 Ok(Expression::Count(c))
13195 }
13196 other => Ok(other),
13197 }
13198 }
13199
13200 Action::JsonToGetPath => {
13201 // JSON_EXTRACT(JSON('x'), '$.key') -> GET_PATH(PARSE_JSON('x'), 'key')
13202 if let Expression::JsonExtract(je) = e {
13203 // Convert JSON() to PARSE_JSON()
13204 let this = match &je.this {
13205 Expression::Function(f) if f.name.eq_ignore_ascii_case("JSON") && f.args.len() == 1 => {
13206 Expression::Function(Box::new(Function::new("PARSE_JSON".to_string(), f.args.clone())))
13207 }
13208 _ => je.this.clone(),
13209 };
13210 // Convert path: extract key from JSONPath or strip $. prefix from string
13211 let path = match &je.path {
13212 Expression::JSONPath(jp) => {
13213 // Extract the key from JSONPath: $root.key -> 'key'
13214 let mut key_parts = Vec::new();
13215 for expr in &jp.expressions {
13216 match expr {
13217 Expression::JSONPathRoot(_) => {} // skip root
13218 Expression::JSONPathKey(k) => {
13219 if let Expression::Literal(Literal::String(s)) = &*k.this {
13220 key_parts.push(s.clone());
13221 }
13222 }
13223 _ => {}
13224 }
13225 }
13226 if !key_parts.is_empty() {
13227 Expression::Literal(Literal::String(key_parts.join(".")))
13228 } else {
13229 je.path.clone()
13230 }
13231 }
13232 Expression::Literal(Literal::String(s)) if s.starts_with("$.") => {
13233 Expression::Literal(Literal::String(s[2..].to_string()))
13234 }
13235 Expression::Literal(Literal::String(s)) if s.starts_with('$') => {
13236 Expression::Literal(Literal::String(s[1..].to_string()))
13237 }
13238 _ => je.path.clone(),
13239 };
13240 Ok(Expression::Function(Box::new(Function::new(
13241 "GET_PATH".to_string(),
13242 vec![this, path],
13243 ))))
13244 } else { Ok(e) }
13245 }
13246
13247 Action::StructToRow => {
13248 // DuckDB struct/dict -> BigQuery STRUCT(value AS key, ...) / Presto ROW
13249 // Handles both Expression::Struct and Expression::MapFunc(curly_brace_syntax=true)
13250
13251 // Extract key-value pairs from either Struct or MapFunc
13252 let kv_pairs: Option<Vec<(String, Expression)>> = match &e {
13253 Expression::Struct(s) => {
13254 Some(s.fields.iter().map(|(opt_name, field_expr)| {
13255 if let Some(name) = opt_name {
13256 (name.clone(), field_expr.clone())
13257 } else if let Expression::NamedArgument(na) = field_expr {
13258 (na.name.name.clone(), na.value.clone())
13259 } else {
13260 (String::new(), field_expr.clone())
13261 }
13262 }).collect())
13263 }
13264 Expression::MapFunc(m) if m.curly_brace_syntax => {
13265 Some(m.keys.iter().zip(m.values.iter()).map(|(key, value)| {
13266 let key_name = match key {
13267 Expression::Literal(Literal::String(s)) => s.clone(),
13268 Expression::Identifier(id) => id.name.clone(),
13269 _ => String::new(),
13270 };
13271 (key_name, value.clone())
13272 }).collect())
13273 }
13274 _ => None,
13275 };
13276
13277 if let Some(pairs) = kv_pairs {
13278 let mut named_args = Vec::new();
13279 for (key_name, value) in pairs {
13280 if matches!(target, DialectType::BigQuery) && !key_name.is_empty() {
13281 named_args.push(Expression::Alias(Box::new(
13282 crate::expressions::Alias::new(value, Identifier::new(key_name))
13283 )));
13284 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
13285 named_args.push(value);
13286 } else {
13287 named_args.push(value);
13288 }
13289 }
13290
13291 if matches!(target, DialectType::BigQuery) {
13292 Ok(Expression::Function(Box::new(Function::new(
13293 "STRUCT".to_string(),
13294 named_args,
13295 ))))
13296 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
13297 // For Presto/Trino, infer types and wrap in CAST(ROW(...) AS ROW(name TYPE, ...))
13298 let row_func = Expression::Function(Box::new(Function::new(
13299 "ROW".to_string(),
13300 named_args,
13301 )));
13302
13303 // Try to infer types for each pair
13304 let kv_pairs_again: Option<Vec<(String, Expression)>> = match &e {
13305 Expression::Struct(s) => {
13306 Some(s.fields.iter().map(|(opt_name, field_expr)| {
13307 if let Some(name) = opt_name {
13308 (name.clone(), field_expr.clone())
13309 } else if let Expression::NamedArgument(na) = field_expr {
13310 (na.name.name.clone(), na.value.clone())
13311 } else {
13312 (String::new(), field_expr.clone())
13313 }
13314 }).collect())
13315 }
13316 Expression::MapFunc(m) if m.curly_brace_syntax => {
13317 Some(m.keys.iter().zip(m.values.iter()).map(|(key, value)| {
13318 let key_name = match key {
13319 Expression::Literal(Literal::String(s)) => s.clone(),
13320 Expression::Identifier(id) => id.name.clone(),
13321 _ => String::new(),
13322 };
13323 (key_name, value.clone())
13324 }).collect())
13325 }
13326 _ => None,
13327 };
13328
13329 if let Some(pairs) = kv_pairs_again {
13330 // Infer types for all values
13331 let mut all_inferred = true;
13332 let mut fields = Vec::new();
13333 for (name, value) in &pairs {
13334 let inferred_type = match value {
13335 Expression::Literal(Literal::Number(n)) => {
13336 if n.contains('.') {
13337 Some(DataType::Double { precision: None, scale: None })
13338 } else {
13339 Some(DataType::Int { length: None, integer_spelling: true })
13340 }
13341 }
13342 Expression::Literal(Literal::String(_)) => {
13343 Some(DataType::VarChar { length: None, parenthesized_length: false })
13344 }
13345 Expression::Boolean(_) => Some(DataType::Boolean),
13346 _ => None,
13347 };
13348 if let Some(dt) = inferred_type {
13349 fields.push(crate::expressions::StructField::new(name.clone(), dt));
13350 } else {
13351 all_inferred = false;
13352 break;
13353 }
13354 }
13355
13356 if all_inferred && !fields.is_empty() {
13357 let row_type = DataType::Struct { fields, nested: true };
13358 Ok(Expression::Cast(Box::new(Cast {
13359 this: row_func,
13360 to: row_type,
13361 trailing_comments: Vec::new(),
13362 double_colon_syntax: false,
13363 format: None,
13364 default: None,
13365 })))
13366 } else {
13367 Ok(row_func)
13368 }
13369 } else {
13370 Ok(row_func)
13371 }
13372 } else {
13373 Ok(Expression::Function(Box::new(Function::new(
13374 "ROW".to_string(),
13375 named_args,
13376 ))))
13377 }
13378 } else { Ok(e) }
13379 }
13380
13381 Action::SparkStructConvert => {
13382 // Spark STRUCT(val AS name, ...) -> Presto CAST(ROW(...) AS ROW(name TYPE, ...))
13383 // or DuckDB {'name': val, ...}
13384 if let Expression::Function(f) = e {
13385 // Extract name-value pairs from aliased args
13386 let mut pairs: Vec<(String, Expression)> = Vec::new();
13387 for arg in &f.args {
13388 match arg {
13389 Expression::Alias(a) => {
13390 pairs.push((a.alias.name.clone(), a.this.clone()));
13391 }
13392 _ => {
13393 pairs.push((String::new(), arg.clone()));
13394 }
13395 }
13396 }
13397
13398 match target {
13399 DialectType::DuckDB => {
13400 // Convert to DuckDB struct literal {'name': value, ...}
13401 let mut keys = Vec::new();
13402 let mut values = Vec::new();
13403 for (name, value) in &pairs {
13404 keys.push(Expression::Literal(Literal::String(name.clone())));
13405 values.push(value.clone());
13406 }
13407 Ok(Expression::MapFunc(Box::new(crate::expressions::MapConstructor {
13408 keys,
13409 values,
13410 curly_brace_syntax: true,
13411 with_map_keyword: false,
13412 })))
13413 }
13414 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
13415 // Convert to CAST(ROW(val1, val2) AS ROW(name1 TYPE1, name2 TYPE2))
13416 let row_args: Vec<Expression> = pairs.iter().map(|(_, v)| v.clone()).collect();
13417 let row_func = Expression::Function(Box::new(Function::new(
13418 "ROW".to_string(), row_args,
13419 )));
13420
13421 // Infer types
13422 let mut all_inferred = true;
13423 let mut fields = Vec::new();
13424 for (name, value) in &pairs {
13425 let inferred_type = match value {
13426 Expression::Literal(Literal::Number(n)) => {
13427 if n.contains('.') {
13428 Some(DataType::Double { precision: None, scale: None })
13429 } else {
13430 Some(DataType::Int { length: None, integer_spelling: true })
13431 }
13432 }
13433 Expression::Literal(Literal::String(_)) => {
13434 Some(DataType::VarChar { length: None, parenthesized_length: false })
13435 }
13436 Expression::Boolean(_) => Some(DataType::Boolean),
13437 _ => None,
13438 };
13439 if let Some(dt) = inferred_type {
13440 fields.push(crate::expressions::StructField::new(name.clone(), dt));
13441 } else {
13442 all_inferred = false;
13443 break;
13444 }
13445 }
13446
13447 if all_inferred && !fields.is_empty() {
13448 let row_type = DataType::Struct { fields, nested: true };
13449 Ok(Expression::Cast(Box::new(Cast {
13450 this: row_func,
13451 to: row_type,
13452 trailing_comments: Vec::new(),
13453 double_colon_syntax: false,
13454 format: None,
13455 default: None,
13456 })))
13457 } else {
13458 Ok(row_func)
13459 }
13460 }
13461 _ => Ok(Expression::Function(f)),
13462 }
13463 } else { Ok(e) }
13464 }
13465
13466 Action::ApproxCountDistinctToApproxDistinct => {
13467 // APPROX_COUNT_DISTINCT(x) -> APPROX_DISTINCT(x)
13468 if let Expression::ApproxCountDistinct(f) = e {
13469 Ok(Expression::ApproxDistinct(f))
13470 } else {
13471 Ok(e)
13472 }
13473 }
13474
13475 Action::CollectListToArrayAgg => {
13476 // COLLECT_LIST(x) -> ARRAY_AGG(x) FILTER(WHERE x IS NOT NULL)
13477 if let Expression::AggregateFunction(f) = e {
13478 let filter_expr = if !f.args.is_empty() {
13479 let arg = f.args[0].clone();
13480 Some(Expression::IsNull(Box::new(crate::expressions::IsNull {
13481 this: arg,
13482 not: true,
13483 postfix_form: false,
13484 })))
13485 } else {
13486 None
13487 };
13488 let agg = crate::expressions::AggFunc {
13489 this: if f.args.is_empty() { Expression::Null(crate::expressions::Null) } else { f.args[0].clone() },
13490 distinct: f.distinct,
13491 order_by: f.order_by.clone(),
13492 filter: filter_expr,
13493 ignore_nulls: None,
13494 name: None,
13495 having_max: None,
13496 limit: None,
13497 };
13498 Ok(Expression::ArrayAgg(Box::new(agg)))
13499 } else {
13500 Ok(e)
13501 }
13502 }
13503
13504 Action::CollectSetConvert => {
13505 // COLLECT_SET(x) -> target-specific
13506 if let Expression::AggregateFunction(f) = e {
13507 match target {
13508 DialectType::Presto => {
13509 Ok(Expression::AggregateFunction(Box::new(crate::expressions::AggregateFunction {
13510 name: "SET_AGG".to_string(),
13511 args: f.args,
13512 distinct: false,
13513 order_by: f.order_by,
13514 filter: f.filter,
13515 limit: f.limit,
13516 ignore_nulls: f.ignore_nulls,
13517 })))
13518 }
13519 DialectType::Snowflake => {
13520 Ok(Expression::AggregateFunction(Box::new(crate::expressions::AggregateFunction {
13521 name: "ARRAY_UNIQUE_AGG".to_string(),
13522 args: f.args,
13523 distinct: false,
13524 order_by: f.order_by,
13525 filter: f.filter,
13526 limit: f.limit,
13527 ignore_nulls: f.ignore_nulls,
13528 })))
13529 }
13530 DialectType::Trino | DialectType::DuckDB => {
13531 let agg = crate::expressions::AggFunc {
13532 this: if f.args.is_empty() { Expression::Null(crate::expressions::Null) } else { f.args[0].clone() },
13533 distinct: true,
13534 order_by: Vec::new(),
13535 filter: None,
13536 ignore_nulls: None,
13537 name: None,
13538 having_max: None,
13539 limit: None,
13540 };
13541 Ok(Expression::ArrayAgg(Box::new(agg)))
13542 }
13543 _ => Ok(Expression::AggregateFunction(f))
13544 }
13545 } else {
13546 Ok(e)
13547 }
13548 }
13549
13550 Action::PercentileConvert => {
13551 // PERCENTILE(x, 0.5) -> QUANTILE(x, 0.5) / APPROX_PERCENTILE(x, 0.5)
13552 if let Expression::AggregateFunction(f) = e {
13553 let name = match target {
13554 DialectType::DuckDB => "QUANTILE",
13555 DialectType::Presto | DialectType::Trino => "APPROX_PERCENTILE",
13556 _ => "PERCENTILE",
13557 };
13558 Ok(Expression::AggregateFunction(Box::new(crate::expressions::AggregateFunction {
13559 name: name.to_string(),
13560 args: f.args,
13561 distinct: f.distinct,
13562 order_by: f.order_by,
13563 filter: f.filter,
13564 limit: f.limit,
13565 ignore_nulls: f.ignore_nulls,
13566 })))
13567 } else {
13568 Ok(e)
13569 }
13570 }
13571
13572 Action::CorrIsnanWrap => {
13573 // CORR(a, b) -> CASE WHEN ISNAN(CORR(a, b)) THEN NULL ELSE CORR(a, b) END
13574 // The CORR expression could be AggregateFunction, WindowFunction, or Filter-wrapped
13575 let corr_clone = e.clone();
13576 let isnan = Expression::Function(Box::new(Function::new(
13577 "ISNAN".to_string(), vec![corr_clone.clone()],
13578 )));
13579 let case_expr = Expression::Case(Box::new(Case {
13580 operand: None,
13581 whens: vec![(isnan, Expression::Null(crate::expressions::Null))],
13582 else_: Some(corr_clone),
13583 }));
13584 Ok(case_expr)
13585 }
13586
13587 Action::TruncToDateTrunc => {
13588 // TRUNC(timestamp, 'MONTH') -> DATE_TRUNC('MONTH', timestamp)
13589 if let Expression::Function(f) = e {
13590 if f.args.len() == 2 {
13591 let timestamp = f.args[0].clone();
13592 let unit_expr = f.args[1].clone();
13593
13594 if matches!(target, DialectType::ClickHouse) {
13595 // For ClickHouse, produce Expression::DateTrunc which the generator
13596 // outputs as DATE_TRUNC(...) without going through the ClickHouse
13597 // target transform that would convert it to dateTrunc
13598 let unit_str = Self::get_unit_str_static(&unit_expr);
13599 let dt_field = match unit_str.as_str() {
13600 "YEAR" => DateTimeField::Year,
13601 "MONTH" => DateTimeField::Month,
13602 "DAY" => DateTimeField::Day,
13603 "HOUR" => DateTimeField::Hour,
13604 "MINUTE" => DateTimeField::Minute,
13605 "SECOND" => DateTimeField::Second,
13606 "WEEK" => DateTimeField::Week,
13607 "QUARTER" => DateTimeField::Quarter,
13608 _ => DateTimeField::Custom(unit_str),
13609 };
13610 Ok(Expression::DateTrunc(Box::new(crate::expressions::DateTruncFunc {
13611 this: timestamp,
13612 unit: dt_field,
13613 })))
13614 } else {
13615 let new_args = vec![unit_expr, timestamp];
13616 Ok(Expression::Function(Box::new(Function::new("DATE_TRUNC".to_string(), new_args))))
13617 }
13618 } else {
13619 Ok(Expression::Function(f))
13620 }
13621 } else {
13622 Ok(e)
13623 }
13624 }
13625
13626 Action::ArrayContainsConvert => {
13627 if let Expression::ArrayContains(f) = e {
13628 match target {
13629 DialectType::Presto | DialectType::Trino => {
13630 // ARRAY_CONTAINS(arr, val) -> CONTAINS(arr, val)
13631 Ok(Expression::Function(Box::new(Function::new("CONTAINS".to_string(), vec![f.this, f.expression]))))
13632 }
13633 DialectType::Snowflake => {
13634 // ARRAY_CONTAINS(arr, val) -> ARRAY_CONTAINS(CAST(val AS VARIANT), arr)
13635 let cast_val = Expression::Cast(Box::new(crate::expressions::Cast {
13636 this: f.expression,
13637 to: crate::expressions::DataType::Custom { name: "VARIANT".to_string() },
13638 trailing_comments: Vec::new(),
13639 double_colon_syntax: false,
13640 format: None,
13641 default: None,
13642 }));
13643 Ok(Expression::Function(Box::new(Function::new("ARRAY_CONTAINS".to_string(), vec![cast_val, f.this]))))
13644 }
13645 _ => Ok(Expression::ArrayContains(f))
13646 }
13647 } else {
13648 Ok(e)
13649 }
13650 }
13651
13652 Action::StrPositionExpand => {
13653 // StrPosition with position arg -> complex STRPOS expansion for Presto/DuckDB
13654 // LOCATE(substr, str, pos) / STRPOS(str, substr, pos) ->
13655 // For Presto: IF(STRPOS(SUBSTRING(str, pos), substr) = 0, 0, STRPOS(SUBSTRING(str, pos), substr) + pos - 1)
13656 // For DuckDB: CASE WHEN STRPOS(SUBSTRING(str, pos), substr) = 0 THEN 0 ELSE STRPOS(SUBSTRING(str, pos), substr) + pos - 1 END
13657 if let Expression::StrPosition(sp) = e {
13658 let crate::expressions::StrPosition { this, substr, position, occurrence } = *sp;
13659 let string = *this;
13660 let substr_expr = match substr {
13661 Some(s) => *s,
13662 None => Expression::Null(Null),
13663 };
13664 let pos = match position {
13665 Some(p) => *p,
13666 None => Expression::number(1),
13667 };
13668
13669 // SUBSTRING(string, pos)
13670 let substring_call = Expression::Function(Box::new(Function::new(
13671 "SUBSTRING".to_string(), vec![string.clone(), pos.clone()],
13672 )));
13673 // STRPOS(SUBSTRING(string, pos), substr)
13674 let strpos_call = Expression::Function(Box::new(Function::new(
13675 "STRPOS".to_string(), vec![substring_call, substr_expr.clone()],
13676 )));
13677 // STRPOS(...) + pos - 1
13678 let pos_adjusted = Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
13679 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
13680 strpos_call.clone(),
13681 pos.clone(),
13682 ))),
13683 Expression::number(1),
13684 )));
13685 // STRPOS(...) = 0
13686 let is_zero = Expression::Eq(Box::new(crate::expressions::BinaryOp::new(
13687 strpos_call.clone(),
13688 Expression::number(0),
13689 )));
13690
13691 match target {
13692 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
13693 // IF(STRPOS(SUBSTRING(str, pos), substr) = 0, 0, STRPOS(SUBSTRING(str, pos), substr) + pos - 1)
13694 Ok(Expression::Function(Box::new(Function::new(
13695 "IF".to_string(),
13696 vec![is_zero, Expression::number(0), pos_adjusted],
13697 ))))
13698 }
13699 DialectType::DuckDB => {
13700 // CASE WHEN STRPOS(SUBSTRING(str, pos), substr) = 0 THEN 0 ELSE STRPOS(SUBSTRING(str, pos), substr) + pos - 1 END
13701 Ok(Expression::Case(Box::new(Case {
13702 operand: None,
13703 whens: vec![
13704 (is_zero, Expression::number(0)),
13705 ],
13706 else_: Some(pos_adjusted),
13707 })))
13708 }
13709 _ => {
13710 // Reconstruct StrPosition
13711 Ok(Expression::StrPosition(Box::new(crate::expressions::StrPosition {
13712 this: Box::new(string),
13713 substr: Some(Box::new(substr_expr)),
13714 position: Some(Box::new(pos)),
13715 occurrence,
13716 })))
13717 }
13718 }
13719 } else {
13720 Ok(e)
13721 }
13722 }
13723
13724 Action::MonthsBetweenConvert => {
13725 if let Expression::MonthsBetween(mb) = e {
13726 let crate::expressions::BinaryFunc { this: end_date, expression: start_date, .. } = *mb;
13727 match target {
13728 DialectType::DuckDB => {
13729 let cast_end = Self::ensure_cast_date(end_date);
13730 let cast_start = Self::ensure_cast_date(start_date);
13731 let dd = Expression::Function(Box::new(Function::new(
13732 "DATE_DIFF".to_string(), vec![Expression::string("MONTH"), cast_start.clone(), cast_end.clone()],
13733 )));
13734 let day_end = Expression::Function(Box::new(Function::new("DAY".to_string(), vec![cast_end.clone()])));
13735 let day_start = Expression::Function(Box::new(Function::new("DAY".to_string(), vec![cast_start.clone()])));
13736 let last_day_end = Expression::Function(Box::new(Function::new("LAST_DAY".to_string(), vec![cast_end.clone()])));
13737 let last_day_start = Expression::Function(Box::new(Function::new("LAST_DAY".to_string(), vec![cast_start.clone()])));
13738 let day_last_end = Expression::Function(Box::new(Function::new("DAY".to_string(), vec![last_day_end])));
13739 let day_last_start = Expression::Function(Box::new(Function::new("DAY".to_string(), vec![last_day_start])));
13740 let cond1 = Expression::Eq(Box::new(BinaryOp::new(day_end.clone(), day_last_end)));
13741 let cond2 = Expression::Eq(Box::new(BinaryOp::new(day_start.clone(), day_last_start)));
13742 let both_cond = Expression::And(Box::new(BinaryOp::new(cond1, cond2)));
13743 let day_diff = Expression::Sub(Box::new(BinaryOp::new(day_end, day_start)));
13744 let day_diff_paren = Expression::Paren(Box::new(crate::expressions::Paren {
13745 this: day_diff,
13746 trailing_comments: Vec::new(),
13747 }));
13748 let frac = Expression::Div(Box::new(BinaryOp::new(
13749 day_diff_paren,
13750 Expression::Literal(Literal::Number("31.0".to_string())),
13751 )));
13752 let case_expr = Expression::Case(Box::new(Case {
13753 operand: None,
13754 whens: vec![(both_cond, Expression::number(0))],
13755 else_: Some(frac),
13756 }));
13757 Ok(Expression::Add(Box::new(BinaryOp::new(dd, case_expr))))
13758 }
13759 DialectType::Snowflake | DialectType::Redshift => {
13760 let unit = Expression::Identifier(Identifier::new("MONTH"));
13761 Ok(Expression::Function(Box::new(Function::new(
13762 "DATEDIFF".to_string(), vec![unit, start_date, end_date],
13763 ))))
13764 }
13765 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
13766 Ok(Expression::Function(Box::new(Function::new(
13767 "DATE_DIFF".to_string(), vec![Expression::string("MONTH"), start_date, end_date],
13768 ))))
13769 }
13770 _ => {
13771 Ok(Expression::MonthsBetween(Box::new(crate::expressions::BinaryFunc {
13772 this: end_date, expression: start_date, original_name: None,
13773 })))
13774 }
13775 }
13776 } else {
13777 Ok(e)
13778 }
13779 }
13780
13781 Action::AddMonthsConvert => {
13782 if let Expression::AddMonths(am) = e {
13783 let date = am.this;
13784 let val = am.expression;
13785 match target {
13786 DialectType::TSQL | DialectType::Fabric => {
13787 let cast_date = Self::ensure_cast_datetime2(date);
13788 Ok(Expression::Function(Box::new(Function::new(
13789 "DATEADD".to_string(), vec![
13790 Expression::Identifier(Identifier::new("MONTH")),
13791 val, cast_date,
13792 ],
13793 ))))
13794 }
13795 DialectType::DuckDB if matches!(source, DialectType::Snowflake) => {
13796 // DuckDB ADD_MONTHS from Snowflake: CASE WHEN LAST_DAY(date) = date THEN LAST_DAY(date + interval) ELSE date + interval END
13797 // Optionally wrapped in CAST(... AS type) if the input had a specific type
13798
13799 // Determine the cast type from the date expression
13800 let (cast_date, return_type) = match &date {
13801 Expression::Literal(Literal::String(_)) => {
13802 // String literal: CAST(str AS TIMESTAMP), no outer CAST
13803 (Expression::Cast(Box::new(Cast {
13804 this: date.clone(), to: DataType::Timestamp { precision: None, timezone: false },
13805 trailing_comments: Vec::new(), double_colon_syntax: false, format: None, default: None,
13806 })), None)
13807 }
13808 Expression::Cast(c) => {
13809 // Already cast (e.g., '2023-01-31'::DATE) - keep the cast, wrap result in CAST(... AS type)
13810 (date.clone(), Some(c.to.clone()))
13811 }
13812 _ => {
13813 // Expression or NULL::TYPE - keep as-is, check for cast type
13814 if let Expression::Cast(c) = &date {
13815 (date.clone(), Some(c.to.clone()))
13816 } else {
13817 (date.clone(), None)
13818 }
13819 }
13820 };
13821
13822 // Build the interval expression
13823 // For non-integer values (float, decimal, cast), use TO_MONTHS(CAST(ROUND(val) AS INT))
13824 // For integer values, use INTERVAL val MONTH
13825 let is_non_integer_val = match &val {
13826 Expression::Literal(Literal::Number(n)) => n.contains('.'),
13827 Expression::Cast(_) => true, // e.g., 3.2::DECIMAL(10,2)
13828 Expression::Neg(n) => {
13829 if let Expression::Literal(Literal::Number(s)) = &n.this {
13830 s.contains('.')
13831 } else { false }
13832 }
13833 _ => false,
13834 };
13835
13836 let add_interval = if is_non_integer_val {
13837 // TO_MONTHS(CAST(ROUND(val) AS INT))
13838 let round_val = Expression::Function(Box::new(Function::new(
13839 "ROUND".to_string(), vec![val.clone()],
13840 )));
13841 let cast_int = Expression::Cast(Box::new(Cast {
13842 this: round_val, to: DataType::Int { length: None, integer_spelling: false },
13843 trailing_comments: Vec::new(), double_colon_syntax: false, format: None, default: None,
13844 }));
13845 Expression::Function(Box::new(Function::new(
13846 "TO_MONTHS".to_string(), vec![cast_int],
13847 )))
13848 } else {
13849 // INTERVAL val MONTH
13850 // For negative numbers, wrap in parens
13851 let interval_val = match &val {
13852 Expression::Literal(Literal::Number(n)) if n.starts_with('-') => {
13853 Expression::Paren(Box::new(Paren { this: val.clone(), trailing_comments: Vec::new() }))
13854 }
13855 Expression::Neg(_) => {
13856 Expression::Paren(Box::new(Paren { this: val.clone(), trailing_comments: Vec::new() }))
13857 }
13858 Expression::Null(_) => {
13859 Expression::Paren(Box::new(Paren { this: val.clone(), trailing_comments: Vec::new() }))
13860 }
13861 _ => val.clone(),
13862 };
13863 Expression::Interval(Box::new(crate::expressions::Interval {
13864 this: Some(interval_val),
13865 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
13866 unit: crate::expressions::IntervalUnit::Month,
13867 use_plural: false,
13868 }),
13869 }))
13870 };
13871
13872 // Build: date + interval
13873 let date_plus_interval = Expression::Add(Box::new(BinaryOp::new(
13874 cast_date.clone(), add_interval.clone(),
13875 )));
13876
13877 // Build LAST_DAY(date)
13878 let last_day_date = Expression::Function(Box::new(Function::new(
13879 "LAST_DAY".to_string(), vec![cast_date.clone()],
13880 )));
13881
13882 // Build LAST_DAY(date + interval)
13883 let last_day_date_plus = Expression::Function(Box::new(Function::new(
13884 "LAST_DAY".to_string(), vec![date_plus_interval.clone()],
13885 )));
13886
13887 // Build: CASE WHEN LAST_DAY(date) = date THEN LAST_DAY(date + interval) ELSE date + interval END
13888 let case_expr = Expression::Case(Box::new(Case {
13889 operand: None,
13890 whens: vec![(
13891 Expression::Eq(Box::new(BinaryOp::new(
13892 last_day_date, cast_date.clone(),
13893 ))),
13894 last_day_date_plus,
13895 )],
13896 else_: Some(date_plus_interval),
13897 }));
13898
13899 // Wrap in CAST(... AS type) if needed
13900 if let Some(dt) = return_type {
13901 Ok(Expression::Cast(Box::new(Cast {
13902 this: case_expr, to: dt,
13903 trailing_comments: Vec::new(), double_colon_syntax: false, format: None, default: None,
13904 })))
13905 } else {
13906 Ok(case_expr)
13907 }
13908 }
13909 DialectType::DuckDB => {
13910 // Non-Snowflake source: simple date + INTERVAL
13911 let cast_date = if matches!(&date, Expression::Literal(Literal::String(_))) {
13912 Expression::Cast(Box::new(Cast {
13913 this: date, to: DataType::Timestamp { precision: None, timezone: false },
13914 trailing_comments: Vec::new(), double_colon_syntax: false, format: None, default: None,
13915 }))
13916 } else { date };
13917 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
13918 this: Some(val),
13919 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
13920 unit: crate::expressions::IntervalUnit::Month,
13921 use_plural: false,
13922 }),
13923 }));
13924 Ok(Expression::Add(Box::new(BinaryOp::new(cast_date, interval))))
13925 }
13926 DialectType::Snowflake => {
13927 // Keep ADD_MONTHS when source is also Snowflake
13928 if matches!(source, DialectType::Snowflake) {
13929 Ok(Expression::Function(Box::new(Function::new(
13930 "ADD_MONTHS".to_string(), vec![date, val],
13931 ))))
13932 } else {
13933 Ok(Expression::Function(Box::new(Function::new(
13934 "DATEADD".to_string(), vec![
13935 Expression::Identifier(Identifier::new("MONTH")),
13936 val, date,
13937 ],
13938 ))))
13939 }
13940 }
13941 DialectType::Redshift => {
13942 Ok(Expression::Function(Box::new(Function::new(
13943 "DATEADD".to_string(), vec![
13944 Expression::Identifier(Identifier::new("MONTH")),
13945 val, date,
13946 ],
13947 ))))
13948 }
13949 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
13950 let cast_date = if matches!(&date, Expression::Literal(Literal::String(_))) {
13951 Expression::Cast(Box::new(Cast {
13952 this: date, to: DataType::Timestamp { precision: None, timezone: false },
13953 trailing_comments: Vec::new(), double_colon_syntax: false, format: None, default: None,
13954 }))
13955 } else { date };
13956 Ok(Expression::Function(Box::new(Function::new(
13957 "DATE_ADD".to_string(), vec![
13958 Expression::string("MONTH"),
13959 val, cast_date,
13960 ],
13961 ))))
13962 }
13963 DialectType::BigQuery => {
13964 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
13965 this: Some(val),
13966 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
13967 unit: crate::expressions::IntervalUnit::Month,
13968 use_plural: false,
13969 }),
13970 }));
13971 let cast_date = if matches!(&date, Expression::Literal(Literal::String(_))) {
13972 Expression::Cast(Box::new(Cast {
13973 this: date, to: DataType::Custom { name: "DATETIME".to_string() },
13974 trailing_comments: Vec::new(), double_colon_syntax: false, format: None, default: None,
13975 }))
13976 } else { date };
13977 Ok(Expression::Function(Box::new(Function::new(
13978 "DATE_ADD".to_string(), vec![cast_date, interval],
13979 ))))
13980 }
13981 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
13982 Ok(Expression::Function(Box::new(Function::new(
13983 "ADD_MONTHS".to_string(), vec![date, val],
13984 ))))
13985 }
13986 _ => {
13987 // Default: keep as AddMonths expression
13988 Ok(Expression::AddMonths(Box::new(crate::expressions::BinaryFunc {
13989 this: date, expression: val, original_name: None,
13990 })))
13991 }
13992 }
13993 } else {
13994 Ok(e)
13995 }
13996 }
13997
13998 Action::PercentileContConvert => {
13999 // PERCENTILE_CONT(p) WITHIN GROUP (ORDER BY col) ->
14000 // Presto/Trino: APPROX_PERCENTILE(col, p)
14001 // Spark/Databricks: PERCENTILE_APPROX(col, p)
14002 if let Expression::WithinGroup(wg) = e {
14003 // Extract percentile value and order by column
14004 let (percentile, _is_disc) = match &wg.this {
14005 Expression::Function(f) => {
14006 let is_disc = f.name.eq_ignore_ascii_case("PERCENTILE_DISC");
14007 let pct = f.args.first().cloned().unwrap_or(Expression::Literal(Literal::Number("0.5".to_string())));
14008 (pct, is_disc)
14009 }
14010 Expression::AggregateFunction(af) => {
14011 let is_disc = af.name.eq_ignore_ascii_case("PERCENTILE_DISC");
14012 let pct = af.args.first().cloned().unwrap_or(Expression::Literal(Literal::Number("0.5".to_string())));
14013 (pct, is_disc)
14014 }
14015 Expression::PercentileCont(pc) => {
14016 (pc.percentile.clone(), false)
14017 }
14018 _ => return Ok(Expression::WithinGroup(wg)),
14019 };
14020 let col = wg.order_by.first().map(|o| o.this.clone())
14021 .unwrap_or(Expression::Literal(Literal::Number("1".to_string())));
14022
14023 let func_name = match target {
14024 DialectType::Presto | DialectType::Trino | DialectType::Athena => "APPROX_PERCENTILE",
14025 _ => "PERCENTILE_APPROX", // Spark, Databricks
14026 };
14027 Ok(Expression::Function(Box::new(Function::new(
14028 func_name.to_string(), vec![col, percentile],
14029 ))))
14030 } else {
14031 Ok(e)
14032 }
14033 }
14034
14035 Action::CurrentUserSparkParens => {
14036 // CURRENT_USER -> CURRENT_USER() for Spark
14037 if let Expression::CurrentUser(_) = e {
14038 Ok(Expression::Function(Box::new(Function::new("CURRENT_USER".to_string(), vec![]))))
14039 } else {
14040 Ok(e)
14041 }
14042 }
14043
14044 Action::SparkDateFuncCast => {
14045 // MONTH/YEAR/DAY('string') from Spark -> wrap arg in CAST to DATE
14046 let cast_arg = |arg: Expression| -> Expression {
14047 match target {
14048 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
14049 Self::double_cast_timestamp_date(arg)
14050 }
14051 _ => {
14052 // DuckDB, PostgreSQL, etc: CAST(arg AS DATE)
14053 Self::ensure_cast_date(arg)
14054 }
14055 }
14056 };
14057 match e {
14058 Expression::Month(f) => {
14059 Ok(Expression::Month(Box::new(crate::expressions::UnaryFunc::new(cast_arg(f.this)))))
14060 }
14061 Expression::Year(f) => {
14062 Ok(Expression::Year(Box::new(crate::expressions::UnaryFunc::new(cast_arg(f.this)))))
14063 }
14064 Expression::Day(f) => {
14065 Ok(Expression::Day(Box::new(crate::expressions::UnaryFunc::new(cast_arg(f.this)))))
14066 }
14067 other => Ok(other),
14068 }
14069 }
14070
14071 Action::MapFromArraysConvert => {
14072 // Expression::MapFromArrays -> target-specific
14073 if let Expression::MapFromArrays(mfa) = e {
14074 let keys = mfa.this;
14075 let values = mfa.expression;
14076 match target {
14077 DialectType::Snowflake => {
14078 Ok(Expression::Function(Box::new(Function::new(
14079 "OBJECT_CONSTRUCT".to_string(),
14080 vec![keys, values],
14081 ))))
14082 }
14083 _ => {
14084 // Hive, Presto, DuckDB, etc.: MAP(keys, values)
14085 Ok(Expression::Function(Box::new(Function::new(
14086 "MAP".to_string(),
14087 vec![keys, values],
14088 ))))
14089 }
14090 }
14091 } else {
14092 Ok(e)
14093 }
14094 }
14095
14096 Action::AnyToExists => {
14097 if let Expression::Any(q) = e {
14098 if let Some(op) = q.op.clone() {
14099 let lambda_param = crate::expressions::Identifier::new("x");
14100 let rhs = Expression::Identifier(lambda_param.clone());
14101 let body = match op {
14102 crate::expressions::QuantifiedOp::Eq => Expression::Eq(Box::new(BinaryOp::new(q.this, rhs))),
14103 crate::expressions::QuantifiedOp::Neq => Expression::Neq(Box::new(BinaryOp::new(q.this, rhs))),
14104 crate::expressions::QuantifiedOp::Lt => Expression::Lt(Box::new(BinaryOp::new(q.this, rhs))),
14105 crate::expressions::QuantifiedOp::Lte => Expression::Lte(Box::new(BinaryOp::new(q.this, rhs))),
14106 crate::expressions::QuantifiedOp::Gt => Expression::Gt(Box::new(BinaryOp::new(q.this, rhs))),
14107 crate::expressions::QuantifiedOp::Gte => Expression::Gte(Box::new(BinaryOp::new(q.this, rhs))),
14108 };
14109 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
14110 parameters: vec![lambda_param],
14111 body,
14112 colon: false,
14113 parameter_types: Vec::new(),
14114 }));
14115 Ok(Expression::Function(Box::new(Function::new(
14116 "EXISTS".to_string(),
14117 vec![q.subquery, lambda],
14118 ))))
14119 } else {
14120 Ok(Expression::Any(q))
14121 }
14122 } else {
14123 Ok(e)
14124 }
14125 }
14126
14127 Action::GenerateSeriesConvert => {
14128 // GENERATE_SERIES(start, end[, step]) -> SEQUENCE for Spark/Databricks/Hive, wrapped in UNNEST/EXPLODE
14129 // For DuckDB target: wrap in UNNEST(GENERATE_SERIES(...))
14130 // For PG/Redshift target: keep as GENERATE_SERIES but normalize interval string step
14131 if let Expression::Function(f) = e {
14132 if f.name.eq_ignore_ascii_case("GENERATE_SERIES") && f.args.len() >= 2 {
14133 let start = f.args[0].clone();
14134 let end = f.args[1].clone();
14135 let step = f.args.get(2).cloned();
14136
14137 // Normalize step: convert string interval like '1day' or ' 2 days ' to INTERVAL expression
14138 let step = step.map(|s| Self::normalize_interval_string(s, target));
14139
14140 // Helper: wrap CURRENT_TIMESTAMP in CAST(... AS TIMESTAMP) for Presto/Trino/Spark
14141 let maybe_cast_timestamp = |arg: Expression| -> Expression {
14142 if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena
14143 | DialectType::Spark | DialectType::Databricks | DialectType::Hive) {
14144 match &arg {
14145 Expression::CurrentTimestamp(_) => {
14146 Expression::Cast(Box::new(Cast {
14147 this: arg,
14148 to: DataType::Timestamp { precision: None, timezone: false },
14149 trailing_comments: Vec::new(),
14150 double_colon_syntax: false,
14151 format: None,
14152 default: None,
14153 }))
14154 }
14155 _ => arg,
14156 }
14157 } else {
14158 arg
14159 }
14160 };
14161
14162 let start = maybe_cast_timestamp(start);
14163 let end = maybe_cast_timestamp(end);
14164
14165 // For PostgreSQL/Redshift target, keep as GENERATE_SERIES
14166 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
14167 let mut gs_args = vec![start, end];
14168 if let Some(step) = step {
14169 gs_args.push(step);
14170 }
14171 return Ok(Expression::Function(Box::new(Function::new(
14172 "GENERATE_SERIES".to_string(), gs_args,
14173 ))));
14174 }
14175
14176 // For DuckDB target: wrap in UNNEST(GENERATE_SERIES(...))
14177 if matches!(target, DialectType::DuckDB) {
14178 let mut gs_args = vec![start, end];
14179 if let Some(step) = step {
14180 gs_args.push(step);
14181 }
14182 let gs = Expression::Function(Box::new(Function::new(
14183 "GENERATE_SERIES".to_string(), gs_args,
14184 )));
14185 return Ok(Expression::Function(Box::new(Function::new(
14186 "UNNEST".to_string(), vec![gs],
14187 ))));
14188 }
14189
14190 let mut seq_args = vec![start, end];
14191 if let Some(step) = step {
14192 seq_args.push(step);
14193 }
14194
14195 let seq = Expression::Function(Box::new(Function::new(
14196 "SEQUENCE".to_string(), seq_args,
14197 )));
14198
14199 match target {
14200 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
14201 // Wrap in UNNEST
14202 Ok(Expression::Function(Box::new(Function::new(
14203 "UNNEST".to_string(), vec![seq],
14204 ))))
14205 }
14206 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
14207 // Wrap in EXPLODE
14208 Ok(Expression::Function(Box::new(Function::new(
14209 "EXPLODE".to_string(), vec![seq],
14210 ))))
14211 }
14212 _ => {
14213 // Just SEQUENCE for others
14214 Ok(seq)
14215 }
14216 }
14217 } else {
14218 Ok(Expression::Function(f))
14219 }
14220 } else {
14221 Ok(e)
14222 }
14223 }
14224
14225 Action::ConcatCoalesceWrap => {
14226 // CONCAT(a, b) function -> CONCAT(COALESCE(CAST(a AS VARCHAR), ''), ...) for Presto
14227 // CONCAT(a, b) function -> CONCAT(COALESCE(a, ''), ...) for ClickHouse
14228 if let Expression::Function(f) = e {
14229 if f.name.eq_ignore_ascii_case("CONCAT") {
14230 let new_args: Vec<Expression> = f.args.into_iter().map(|arg| {
14231 let cast_arg = if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena) {
14232 Expression::Cast(Box::new(Cast {
14233 this: arg, to: DataType::VarChar { length: None, parenthesized_length: false },
14234 trailing_comments: Vec::new(), double_colon_syntax: false, format: None, default: None,
14235 }))
14236 } else {
14237 arg
14238 };
14239 Expression::Function(Box::new(Function::new(
14240 "COALESCE".to_string(), vec![cast_arg, Expression::string("")],
14241 )))
14242 }).collect();
14243 Ok(Expression::Function(Box::new(Function::new("CONCAT".to_string(), new_args))))
14244 } else {
14245 Ok(Expression::Function(f))
14246 }
14247 } else {
14248 Ok(e)
14249 }
14250 }
14251
14252 Action::PipeConcatToConcat => {
14253 // a || b (Concat operator) -> CONCAT(CAST(a AS VARCHAR), CAST(b AS VARCHAR)) for Presto/Trino
14254 if let Expression::Concat(op) = e {
14255 let cast_left = Expression::Cast(Box::new(Cast {
14256 this: op.left, to: DataType::VarChar { length: None, parenthesized_length: false },
14257 trailing_comments: Vec::new(), double_colon_syntax: false, format: None, default: None,
14258 }));
14259 let cast_right = Expression::Cast(Box::new(Cast {
14260 this: op.right, to: DataType::VarChar { length: None, parenthesized_length: false },
14261 trailing_comments: Vec::new(), double_colon_syntax: false, format: None, default: None,
14262 }));
14263 Ok(Expression::Function(Box::new(Function::new(
14264 "CONCAT".to_string(), vec![cast_left, cast_right],
14265 ))))
14266 } else {
14267 Ok(e)
14268 }
14269 }
14270
14271 Action::DivFuncConvert => {
14272 // DIV(a, b) -> target-specific integer division
14273 if let Expression::Function(f) = e {
14274 if f.name.eq_ignore_ascii_case("DIV") && f.args.len() == 2 {
14275 let a = f.args[0].clone();
14276 let b = f.args[1].clone();
14277 match target {
14278 DialectType::DuckDB => {
14279 // DIV(a, b) -> CAST(a // b AS DECIMAL)
14280 let int_div = Expression::IntDiv(Box::new(crate::expressions::BinaryFunc {
14281 this: a, expression: b, original_name: None,
14282 }));
14283 Ok(Expression::Cast(Box::new(Cast {
14284 this: int_div, to: DataType::Decimal { precision: None, scale: None },
14285 trailing_comments: Vec::new(), double_colon_syntax: false, format: None, default: None,
14286 })))
14287 }
14288 DialectType::BigQuery => {
14289 // DIV(a, b) -> CAST(DIV(a, b) AS NUMERIC)
14290 let div_func = Expression::Function(Box::new(Function::new(
14291 "DIV".to_string(), vec![a, b],
14292 )));
14293 Ok(Expression::Cast(Box::new(Cast {
14294 this: div_func, to: DataType::Custom { name: "NUMERIC".to_string() },
14295 trailing_comments: Vec::new(), double_colon_syntax: false, format: None, default: None,
14296 })))
14297 }
14298 DialectType::SQLite => {
14299 // DIV(a, b) -> CAST(CAST(CAST(a AS REAL) / b AS INTEGER) AS REAL)
14300 let cast_a = Expression::Cast(Box::new(Cast {
14301 this: a, to: DataType::Custom { name: "REAL".to_string() },
14302 trailing_comments: Vec::new(), double_colon_syntax: false, format: None, default: None,
14303 }));
14304 let div = Expression::Div(Box::new(BinaryOp::new(cast_a, b)));
14305 let cast_int = Expression::Cast(Box::new(Cast {
14306 this: div, to: DataType::Int { length: None, integer_spelling: true },
14307 trailing_comments: Vec::new(), double_colon_syntax: false, format: None, default: None,
14308 }));
14309 Ok(Expression::Cast(Box::new(Cast {
14310 this: cast_int, to: DataType::Custom { name: "REAL".to_string() },
14311 trailing_comments: Vec::new(), double_colon_syntax: false, format: None, default: None,
14312 })))
14313 }
14314 _ => Ok(Expression::Function(f)),
14315 }
14316 } else {
14317 Ok(Expression::Function(f))
14318 }
14319 } else {
14320 Ok(e)
14321 }
14322 }
14323
14324 Action::JsonObjectAggConvert => {
14325 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
14326 match e {
14327 Expression::Function(f) => {
14328 Ok(Expression::Function(Box::new(Function::new(
14329 "JSON_GROUP_OBJECT".to_string(), f.args,
14330 ))))
14331 }
14332 Expression::AggregateFunction(af) => {
14333 // AggregateFunction stores all args in the `args` vec
14334 Ok(Expression::Function(Box::new(Function::new(
14335 "JSON_GROUP_OBJECT".to_string(), af.args,
14336 ))))
14337 }
14338 other => Ok(other),
14339 }
14340 }
14341
14342 Action::JsonbExistsConvert => {
14343 // JSONB_EXISTS('json', 'key') -> JSON_EXISTS('json', '$.key') for DuckDB
14344 if let Expression::Function(f) = e {
14345 if f.args.len() == 2 {
14346 let json_expr = f.args[0].clone();
14347 let key = match &f.args[1] {
14348 Expression::Literal(crate::expressions::Literal::String(s)) => format!("$.{}", s),
14349 _ => return Ok(Expression::Function(f)),
14350 };
14351 Ok(Expression::Function(Box::new(Function::new(
14352 "JSON_EXISTS".to_string(), vec![json_expr, Expression::string(&key)],
14353 ))))
14354 } else {
14355 Ok(Expression::Function(f))
14356 }
14357 } else {
14358 Ok(e)
14359 }
14360 }
14361
14362 Action::DateBinConvert => {
14363 // DATE_BIN('interval', ts, origin) -> TIME_BUCKET('interval', ts, origin) for DuckDB
14364 if let Expression::Function(f) = e {
14365 Ok(Expression::Function(Box::new(Function::new(
14366 "TIME_BUCKET".to_string(), f.args,
14367 ))))
14368 } else {
14369 Ok(e)
14370 }
14371 }
14372
14373 Action::MysqlCastCharToText => {
14374 // MySQL CAST(x AS CHAR) was originally TEXT -> convert to target text type
14375 if let Expression::Cast(mut c) = e {
14376 c.to = DataType::Text;
14377 Ok(Expression::Cast(c))
14378 } else {
14379 Ok(e)
14380 }
14381 }
14382
14383 Action::SparkCastVarcharToString => {
14384 // Spark parses VARCHAR(n)/CHAR(n) as TEXT -> normalize to STRING
14385 match e {
14386 Expression::Cast(mut c) => {
14387 c.to = Self::normalize_varchar_to_string(c.to);
14388 Ok(Expression::Cast(c))
14389 }
14390 Expression::TryCast(mut c) => {
14391 c.to = Self::normalize_varchar_to_string(c.to);
14392 Ok(Expression::TryCast(c))
14393 }
14394 _ => Ok(e),
14395 }
14396 }
14397
14398 Action::MinMaxToLeastGreatest => {
14399 // Multi-arg MIN(a,b,c) -> LEAST(a,b,c), MAX(a,b,c) -> GREATEST(a,b,c)
14400 if let Expression::Function(f) = e {
14401 let name = f.name.to_uppercase();
14402 let new_name = match name.as_str() {
14403 "MIN" => "LEAST",
14404 "MAX" => "GREATEST",
14405 _ => return Ok(Expression::Function(f)),
14406 };
14407 Ok(Expression::Function(Box::new(Function::new(
14408 new_name.to_string(),
14409 f.args,
14410 ))))
14411 } else {
14412 Ok(e)
14413 }
14414 }
14415
14416 Action::ClickHouseUniqToApproxCountDistinct => {
14417 // ClickHouse uniq(x) -> APPROX_COUNT_DISTINCT(x) for non-ClickHouse targets
14418 if let Expression::Function(f) = e {
14419 Ok(Expression::Function(Box::new(Function::new(
14420 "APPROX_COUNT_DISTINCT".to_string(),
14421 f.args,
14422 ))))
14423 } else {
14424 Ok(e)
14425 }
14426 }
14427
14428 Action::ClickHouseAnyToAnyValue => {
14429 // ClickHouse any(x) -> ANY_VALUE(x) for non-ClickHouse targets
14430 if let Expression::Function(f) = e {
14431 Ok(Expression::Function(Box::new(Function::new(
14432 "ANY_VALUE".to_string(),
14433 f.args,
14434 ))))
14435 } else {
14436 Ok(e)
14437 }
14438 }
14439
14440 Action::OracleVarchar2ToVarchar => {
14441 // Oracle VARCHAR2(N CHAR/BYTE) / NVARCHAR2(N) -> VarChar(N) for non-Oracle targets
14442 if let Expression::DataType(DataType::Custom { ref name }) = e {
14443 let upper = name.to_uppercase();
14444 // Extract length from VARCHAR2(N ...) or NVARCHAR2(N ...)
14445 let inner = if upper.starts_with("VARCHAR2(") || upper.starts_with("NVARCHAR2(") {
14446 let start = if upper.starts_with("N") { 10 } else { 9 }; // skip "NVARCHAR2(" or "VARCHAR2("
14447 let end = name.len() - 1; // skip trailing ")"
14448 Some(&name[start..end])
14449 } else {
14450 Option::None
14451 };
14452 if let Some(inner_str) = inner {
14453 // Parse the number part, ignoring BYTE/CHAR qualifier
14454 let num_str = inner_str.split_whitespace().next().unwrap_or("");
14455 if let Ok(n) = num_str.parse::<u32>() {
14456 Ok(Expression::DataType(DataType::VarChar { length: Some(n), parenthesized_length: false }))
14457 } else {
14458 Ok(e)
14459 }
14460 } else {
14461 // Plain VARCHAR2 / NVARCHAR2 without parens
14462 Ok(Expression::DataType(DataType::VarChar { length: Option::None, parenthesized_length: false }))
14463 }
14464 } else {
14465 Ok(e)
14466 }
14467 }
14468
14469 }
14470 })
14471 }
14472
14473 /// Check if a DataType is or contains VARCHAR/CHAR (for Spark VARCHAR->STRING normalization)
14474 fn has_varchar_char_type(dt: &crate::expressions::DataType) -> bool {
14475 use crate::expressions::DataType;
14476 match dt {
14477 DataType::VarChar { .. } | DataType::Char { .. } => true,
14478 DataType::Struct { fields, .. } => fields.iter().any(|f| Self::has_varchar_char_type(&f.data_type)),
14479 _ => false,
14480 }
14481 }
14482
14483 /// Recursively normalize VARCHAR/CHAR to STRING in a DataType (for Spark)
14484 fn normalize_varchar_to_string(dt: crate::expressions::DataType) -> crate::expressions::DataType {
14485 use crate::expressions::DataType;
14486 match dt {
14487 DataType::VarChar { .. } | DataType::Char { .. } => DataType::Custom { name: "STRING".to_string() },
14488 DataType::Struct { fields, nested } => {
14489 let fields = fields.into_iter().map(|mut f| {
14490 f.data_type = Self::normalize_varchar_to_string(f.data_type);
14491 f
14492 }).collect();
14493 DataType::Struct { fields, nested }
14494 }
14495 other => other,
14496 }
14497 }
14498
14499 /// Normalize an interval string like '1day' or ' 2 days ' to proper INTERVAL expression
14500 fn normalize_interval_string(expr: Expression, target: DialectType) -> Expression {
14501 if let Expression::Literal(crate::expressions::Literal::String(ref s)) = expr {
14502 // Try to parse patterns like '1day', '1 day', '2 days', ' 2 days '
14503 let trimmed = s.trim();
14504
14505 // Find where digits end and unit text begins
14506 let digit_end = trimmed.find(|c: char| !c.is_ascii_digit()).unwrap_or(trimmed.len());
14507 if digit_end == 0 || digit_end == trimmed.len() {
14508 return expr;
14509 }
14510 let num = &trimmed[..digit_end];
14511 let unit_text = trimmed[digit_end..].trim().to_uppercase();
14512 if unit_text.is_empty() {
14513 return expr;
14514 }
14515
14516 let known_units = ["DAY", "DAYS", "HOUR", "HOURS", "MINUTE", "MINUTES", "SECOND", "SECONDS", "WEEK", "WEEKS", "MONTH", "MONTHS", "YEAR", "YEARS"];
14517 if !known_units.contains(&unit_text.as_str()) {
14518 return expr;
14519 }
14520
14521 let unit_str = unit_text.clone();
14522 // Singularize
14523 let unit_singular = if unit_str.ends_with('S') && unit_str.len() > 3 {
14524 &unit_str[..unit_str.len()-1]
14525 } else {
14526 &unit_str
14527 };
14528 let unit = unit_singular;
14529
14530 match target {
14531 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
14532 // INTERVAL '2' DAY
14533 let iu = match unit {
14534 "DAY" => crate::expressions::IntervalUnit::Day,
14535 "HOUR" => crate::expressions::IntervalUnit::Hour,
14536 "MINUTE" => crate::expressions::IntervalUnit::Minute,
14537 "SECOND" => crate::expressions::IntervalUnit::Second,
14538 "WEEK" => crate::expressions::IntervalUnit::Week,
14539 "MONTH" => crate::expressions::IntervalUnit::Month,
14540 "YEAR" => crate::expressions::IntervalUnit::Year,
14541 _ => return expr,
14542 };
14543 return Expression::Interval(Box::new(crate::expressions::Interval {
14544 this: Some(Expression::string(num)),
14545 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
14546 unit: iu,
14547 use_plural: false,
14548 }),
14549 }));
14550 }
14551 DialectType::PostgreSQL | DialectType::Redshift | DialectType::DuckDB => {
14552 // INTERVAL '2 DAYS'
14553 let plural = if num != "1" && !unit_str.ends_with('S') {
14554 format!("{} {}S", num, unit)
14555 } else if unit_str.ends_with('S') {
14556 format!("{} {}", num, unit_str)
14557 } else {
14558 format!("{} {}", num, unit)
14559 };
14560 return Expression::Interval(Box::new(crate::expressions::Interval {
14561 this: Some(Expression::string(&plural)),
14562 unit: None,
14563 }));
14564 }
14565 _ => {
14566 // Spark/Databricks/Hive: INTERVAL '1' DAY
14567 let iu = match unit {
14568 "DAY" => crate::expressions::IntervalUnit::Day,
14569 "HOUR" => crate::expressions::IntervalUnit::Hour,
14570 "MINUTE" => crate::expressions::IntervalUnit::Minute,
14571 "SECOND" => crate::expressions::IntervalUnit::Second,
14572 "WEEK" => crate::expressions::IntervalUnit::Week,
14573 "MONTH" => crate::expressions::IntervalUnit::Month,
14574 "YEAR" => crate::expressions::IntervalUnit::Year,
14575 _ => return expr,
14576 };
14577 return Expression::Interval(Box::new(crate::expressions::Interval {
14578 this: Some(Expression::string(num)),
14579 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
14580 unit: iu,
14581 use_plural: false,
14582 }),
14583 }));
14584 }
14585 }
14586 }
14587 // If it's already an INTERVAL expression, pass through
14588 expr
14589 }
14590
14591 /// Rewrite SELECT expressions containing UNNEST into expanded form with CROSS JOINs.
14592 /// DuckDB: SELECT UNNEST(arr1), UNNEST(arr2) ->
14593 /// BigQuery: SELECT IF(pos = pos_2, col, NULL) AS col, ... FROM UNNEST(GENERATE_ARRAY(0, ...)) AS pos CROSS JOIN ...
14594 /// Presto: SELECT IF(_u.pos = _u_2.pos_2, _u_2.col) AS col, ... FROM UNNEST(SEQUENCE(1, ...)) AS _u(pos) CROSS JOIN ...
14595 fn rewrite_unnest_expansion(select: &crate::expressions::Select, target: DialectType) -> Option<crate::expressions::Select> {
14596 use crate::expressions::{
14597 Alias, BinaryOp, Column, From, Function, Identifier, Join, JoinKind,
14598 Literal, UnnestFunc,
14599 };
14600
14601 let index_offset: i64 = match target {
14602 DialectType::Presto | DialectType::Trino => 1,
14603 _ => 0, // BigQuery, Snowflake
14604 };
14605
14606 let if_func_name = match target {
14607 DialectType::Snowflake => "IFF",
14608 _ => "IF",
14609 };
14610
14611 let array_length_func = match target {
14612 DialectType::BigQuery => "ARRAY_LENGTH",
14613 DialectType::Presto | DialectType::Trino => "CARDINALITY",
14614 DialectType::Snowflake => "ARRAY_SIZE",
14615 _ => "ARRAY_LENGTH",
14616 };
14617
14618 let use_table_aliases = matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Snowflake);
14619 let null_third_arg = matches!(target, DialectType::BigQuery | DialectType::Snowflake);
14620
14621 fn make_col(name: &str, table: Option<&str>) -> Expression {
14622 if let Some(tbl) = table {
14623 Expression::Column(Column {
14624 name: Identifier::new(name.to_string()),
14625 table: Some(Identifier::new(tbl.to_string())),
14626 join_mark: false,
14627 trailing_comments: Vec::new(),
14628 })
14629 } else {
14630 Expression::Identifier(Identifier::new(name.to_string()))
14631 }
14632 }
14633
14634 fn make_join(this: Expression) -> Join {
14635 Join {
14636 this,
14637 on: None,
14638 using: Vec::new(),
14639 kind: JoinKind::Cross,
14640 use_inner_keyword: false,
14641 use_outer_keyword: false,
14642 deferred_condition: false,
14643 join_hint: None,
14644 match_condition: None,
14645 pivots: Vec::new(),
14646 }
14647 }
14648
14649 // Collect UNNEST info from SELECT expressions
14650 struct UnnestInfo {
14651 arr_expr: Expression,
14652 col_alias: String,
14653 pos_alias: String,
14654 source_alias: String,
14655 original_expr: Expression,
14656 has_outer_alias: Option<String>,
14657 }
14658
14659 let mut unnest_infos: Vec<UnnestInfo> = Vec::new();
14660 let mut col_counter = 0usize;
14661 let mut pos_counter = 1usize;
14662 let mut source_counter = 1usize;
14663
14664 fn extract_unnest_arg(expr: &Expression) -> Option<Expression> {
14665 match expr {
14666 Expression::Unnest(u) => Some(u.this.clone()),
14667 Expression::Function(f) if f.name.eq_ignore_ascii_case("UNNEST") && !f.args.is_empty() => {
14668 Some(f.args[0].clone())
14669 }
14670 Expression::Alias(a) => extract_unnest_arg(&a.this),
14671 Expression::Add(op) | Expression::Sub(op) | Expression::Mul(op) | Expression::Div(op) => {
14672 extract_unnest_arg(&op.left).or_else(|| extract_unnest_arg(&op.right))
14673 }
14674 _ => None,
14675 }
14676 }
14677
14678 fn get_alias_name(expr: &Expression) -> Option<String> {
14679 if let Expression::Alias(a) = expr {
14680 Some(a.alias.name.clone())
14681 } else {
14682 None
14683 }
14684 }
14685
14686 for sel_expr in &select.expressions {
14687 if let Some(arr) = extract_unnest_arg(sel_expr) {
14688 col_counter += 1;
14689 pos_counter += 1;
14690 source_counter += 1;
14691
14692 let col_alias = if col_counter == 1 { "col".to_string() } else { format!("col_{}", col_counter) };
14693 let pos_alias = format!("pos_{}", pos_counter);
14694 let source_alias = format!("_u_{}", source_counter);
14695 let has_outer_alias = get_alias_name(sel_expr);
14696
14697 unnest_infos.push(UnnestInfo {
14698 arr_expr: arr,
14699 col_alias,
14700 pos_alias,
14701 source_alias,
14702 original_expr: sel_expr.clone(),
14703 has_outer_alias,
14704 });
14705 }
14706 }
14707
14708 if unnest_infos.is_empty() {
14709 return None;
14710 }
14711
14712 let series_alias = "pos".to_string();
14713 let series_source_alias = "_u".to_string();
14714 let tbl_ref = if use_table_aliases { Some(series_source_alias.as_str()) } else { None };
14715
14716 // Build new SELECT expressions
14717 let mut new_select_exprs = Vec::new();
14718 for info in &unnest_infos {
14719 let actual_col_name = info.has_outer_alias.as_ref().unwrap_or(&info.col_alias);
14720 let src_ref = if use_table_aliases { Some(info.source_alias.as_str()) } else { None };
14721
14722 let pos_col = make_col(&series_alias, tbl_ref);
14723 let unnest_pos_col = make_col(&info.pos_alias, src_ref);
14724 let col_ref = make_col(actual_col_name, src_ref);
14725
14726 let eq_cond = Expression::Eq(Box::new(BinaryOp::new(pos_col.clone(), unnest_pos_col.clone())));
14727 let mut if_args = vec![eq_cond, col_ref];
14728 if null_third_arg {
14729 if_args.push(Expression::Null(crate::expressions::Null));
14730 }
14731
14732 let if_expr = Expression::Function(Box::new(Function::new(if_func_name.to_string(), if_args)));
14733 let final_expr = Self::replace_unnest_with_if(&info.original_expr, &if_expr);
14734
14735 new_select_exprs.push(Expression::Alias(Box::new(Alias::new(
14736 final_expr,
14737 Identifier::new(actual_col_name.clone()),
14738 ))));
14739 }
14740
14741 // Build array size expressions for GREATEST
14742 let size_exprs: Vec<Expression> = unnest_infos.iter().map(|info| {
14743 Expression::Function(Box::new(Function::new(array_length_func.to_string(), vec![info.arr_expr.clone()])))
14744 }).collect();
14745
14746 let greatest = Expression::Function(Box::new(Function::new("GREATEST".to_string(), size_exprs)));
14747
14748 let series_end = if index_offset == 0 {
14749 Expression::Sub(Box::new(BinaryOp::new(greatest, Expression::Literal(Literal::Number("1".to_string())))))
14750 } else {
14751 greatest
14752 };
14753
14754 // Build the position array source
14755 let series_unnest_expr = match target {
14756 DialectType::BigQuery => {
14757 let gen_array = Expression::Function(Box::new(Function::new(
14758 "GENERATE_ARRAY".to_string(),
14759 vec![Expression::Literal(Literal::Number("0".to_string())), series_end],
14760 )));
14761 Expression::Unnest(Box::new(UnnestFunc {
14762 this: gen_array, expressions: Vec::new(), with_ordinality: false, alias: None, offset_alias: None,
14763 }))
14764 }
14765 DialectType::Presto | DialectType::Trino => {
14766 let sequence = Expression::Function(Box::new(Function::new(
14767 "SEQUENCE".to_string(),
14768 vec![Expression::Literal(Literal::Number("1".to_string())), series_end],
14769 )));
14770 Expression::Unnest(Box::new(UnnestFunc {
14771 this: sequence, expressions: Vec::new(), with_ordinality: false, alias: None, offset_alias: None,
14772 }))
14773 }
14774 DialectType::Snowflake => {
14775 let range_end = Expression::Add(Box::new(BinaryOp::new(
14776 Expression::Paren(Box::new(crate::expressions::Paren { this: series_end, trailing_comments: Vec::new() })),
14777 Expression::Literal(Literal::Number("1".to_string())),
14778 )));
14779 let gen_range = Expression::Function(Box::new(Function::new(
14780 "ARRAY_GENERATE_RANGE".to_string(),
14781 vec![Expression::Literal(Literal::Number("0".to_string())), range_end],
14782 )));
14783 let flatten_arg = Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
14784 name: Identifier::new("INPUT".to_string()),
14785 value: gen_range,
14786 separator: crate::expressions::NamedArgSeparator::DArrow,
14787 }));
14788 let flatten = Expression::Function(Box::new(Function::new("FLATTEN".to_string(), vec![flatten_arg])));
14789 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])))
14790 }
14791 _ => return None,
14792 };
14793
14794 // Build series alias expression
14795 let series_alias_expr = if use_table_aliases {
14796 let col_aliases = if matches!(target, DialectType::Snowflake) {
14797 vec![
14798 Identifier::new("seq".to_string()), Identifier::new("key".to_string()),
14799 Identifier::new("path".to_string()), Identifier::new("index".to_string()),
14800 Identifier::new(series_alias.clone()), Identifier::new("this".to_string()),
14801 ]
14802 } else {
14803 vec![Identifier::new(series_alias.clone())]
14804 };
14805 Expression::Alias(Box::new(Alias {
14806 this: series_unnest_expr,
14807 alias: Identifier::new(series_source_alias.clone()),
14808 column_aliases: col_aliases,
14809 pre_alias_comments: Vec::new(),
14810 trailing_comments: Vec::new(),
14811 }))
14812 } else {
14813 Expression::Alias(Box::new(Alias::new(series_unnest_expr, Identifier::new(series_alias.clone()))))
14814 };
14815
14816 // Build CROSS JOINs for each UNNEST
14817 let mut joins = Vec::new();
14818 for info in &unnest_infos {
14819 let actual_col_name = info.has_outer_alias.as_ref().unwrap_or(&info.col_alias);
14820
14821 let unnest_join_expr = match target {
14822 DialectType::BigQuery => {
14823 // UNNEST([1,2,3]) AS col WITH OFFSET AS pos_2
14824 let unnest = UnnestFunc {
14825 this: info.arr_expr.clone(),
14826 expressions: Vec::new(),
14827 with_ordinality: true,
14828 alias: Some(Identifier::new(actual_col_name.clone())),
14829 offset_alias: Some(Identifier::new(info.pos_alias.clone())),
14830 };
14831 Expression::Unnest(Box::new(unnest))
14832 }
14833 DialectType::Presto | DialectType::Trino => {
14834 let unnest = UnnestFunc {
14835 this: info.arr_expr.clone(),
14836 expressions: Vec::new(),
14837 with_ordinality: true,
14838 alias: None,
14839 offset_alias: None,
14840 };
14841 Expression::Alias(Box::new(Alias {
14842 this: Expression::Unnest(Box::new(unnest)),
14843 alias: Identifier::new(info.source_alias.clone()),
14844 column_aliases: vec![
14845 Identifier::new(actual_col_name.clone()),
14846 Identifier::new(info.pos_alias.clone()),
14847 ],
14848 pre_alias_comments: Vec::new(),
14849 trailing_comments: Vec::new(),
14850 }))
14851 }
14852 DialectType::Snowflake => {
14853 let flatten_arg = Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
14854 name: Identifier::new("INPUT".to_string()),
14855 value: info.arr_expr.clone(),
14856 separator: crate::expressions::NamedArgSeparator::DArrow,
14857 }));
14858 let flatten = Expression::Function(Box::new(Function::new("FLATTEN".to_string(), vec![flatten_arg])));
14859 let table_fn = Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])));
14860 Expression::Alias(Box::new(Alias {
14861 this: table_fn,
14862 alias: Identifier::new(info.source_alias.clone()),
14863 column_aliases: vec![
14864 Identifier::new("seq".to_string()), Identifier::new("key".to_string()),
14865 Identifier::new("path".to_string()), Identifier::new(info.pos_alias.clone()),
14866 Identifier::new(actual_col_name.clone()), Identifier::new("this".to_string()),
14867 ],
14868 pre_alias_comments: Vec::new(),
14869 trailing_comments: Vec::new(),
14870 }))
14871 }
14872 _ => return None,
14873 };
14874
14875 joins.push(make_join(unnest_join_expr));
14876 }
14877
14878 // Build WHERE clause
14879 let mut where_conditions: Vec<Expression> = Vec::new();
14880 for info in &unnest_infos {
14881 let src_ref = if use_table_aliases { Some(info.source_alias.as_str()) } else { None };
14882 let pos_col = make_col(&series_alias, tbl_ref);
14883 let unnest_pos_col = make_col(&info.pos_alias, src_ref);
14884
14885 let arr_size = Expression::Function(Box::new(Function::new(
14886 array_length_func.to_string(), vec![info.arr_expr.clone()],
14887 )));
14888
14889 let size_ref = if index_offset == 0 {
14890 Expression::Paren(Box::new(crate::expressions::Paren {
14891 this: Expression::Sub(Box::new(BinaryOp::new(arr_size, Expression::Literal(Literal::Number("1".to_string()))))),
14892 trailing_comments: Vec::new(),
14893 }))
14894 } else {
14895 arr_size
14896 };
14897
14898 let eq = Expression::Eq(Box::new(BinaryOp::new(pos_col.clone(), unnest_pos_col.clone())));
14899 let gt = Expression::Gt(Box::new(BinaryOp::new(pos_col, size_ref.clone())));
14900 let pos_eq_size = Expression::Eq(Box::new(BinaryOp::new(unnest_pos_col, size_ref)));
14901 let and_cond = Expression::And(Box::new(BinaryOp::new(gt, pos_eq_size)));
14902 let paren_and = Expression::Paren(Box::new(crate::expressions::Paren { this: and_cond, trailing_comments: Vec::new() }));
14903 let or_cond = Expression::Or(Box::new(BinaryOp::new(eq, paren_and)));
14904
14905 where_conditions.push(or_cond);
14906 }
14907
14908 let where_expr = if where_conditions.len() == 1 {
14909 // Single condition: no parens needed
14910 where_conditions.into_iter().next().unwrap()
14911 } else {
14912 // Multiple conditions: wrap each OR in parens, then combine with AND
14913 let wrap = |e: Expression| Expression::Paren(Box::new(crate::expressions::Paren { this: e, trailing_comments: Vec::new() }));
14914 let mut iter = where_conditions.into_iter();
14915 let first = wrap(iter.next().unwrap());
14916 let second = wrap(iter.next().unwrap());
14917 let mut combined = Expression::Paren(Box::new(crate::expressions::Paren {
14918 this: Expression::And(Box::new(BinaryOp::new(first, second))),
14919 trailing_comments: Vec::new(),
14920 }));
14921 for cond in iter {
14922 combined = Expression::And(Box::new(BinaryOp::new(combined, wrap(cond))));
14923 }
14924 combined
14925 };
14926
14927 // Build the new SELECT
14928 let mut new_select = select.clone();
14929 new_select.expressions = new_select_exprs;
14930
14931 if new_select.from.is_some() {
14932 let mut all_joins = vec![make_join(series_alias_expr)];
14933 all_joins.extend(joins);
14934 new_select.joins.extend(all_joins);
14935 } else {
14936 new_select.from = Some(From { expressions: vec![series_alias_expr] });
14937 new_select.joins.extend(joins);
14938 }
14939
14940 if let Some(ref existing_where) = new_select.where_clause {
14941 let combined = Expression::And(Box::new(BinaryOp::new(existing_where.this.clone(), where_expr)));
14942 new_select.where_clause = Some(crate::expressions::Where { this: combined });
14943 } else {
14944 new_select.where_clause = Some(crate::expressions::Where { this: where_expr });
14945 }
14946
14947 Some(new_select)
14948 }
14949
14950 /// Helper to replace UNNEST(...) inside an expression with a replacement expression.
14951 fn replace_unnest_with_if(original: &Expression, replacement: &Expression) -> Expression {
14952 match original {
14953 Expression::Unnest(_) => replacement.clone(),
14954 Expression::Function(f) if f.name.eq_ignore_ascii_case("UNNEST") => replacement.clone(),
14955 Expression::Alias(a) => Self::replace_unnest_with_if(&a.this, replacement),
14956 Expression::Add(op) => {
14957 let left = Self::replace_unnest_with_if(&op.left, replacement);
14958 let right = Self::replace_unnest_with_if(&op.right, replacement);
14959 Expression::Add(Box::new(crate::expressions::BinaryOp::new(left, right)))
14960 }
14961 Expression::Sub(op) => {
14962 let left = Self::replace_unnest_with_if(&op.left, replacement);
14963 let right = Self::replace_unnest_with_if(&op.right, replacement);
14964 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(left, right)))
14965 }
14966 Expression::Mul(op) => {
14967 let left = Self::replace_unnest_with_if(&op.left, replacement);
14968 let right = Self::replace_unnest_with_if(&op.right, replacement);
14969 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(left, right)))
14970 }
14971 Expression::Div(op) => {
14972 let left = Self::replace_unnest_with_if(&op.left, replacement);
14973 let right = Self::replace_unnest_with_if(&op.right, replacement);
14974 Expression::Div(Box::new(crate::expressions::BinaryOp::new(left, right)))
14975 }
14976 _ => original.clone(),
14977 }
14978 }
14979
14980 /// Transform TSQL SELECT INTO -> CREATE TABLE AS for DuckDB/Snowflake
14981 /// or PostgreSQL #temp -> TEMPORARY.
14982 /// Also strips # from INSERT INTO #table for non-TSQL targets.
14983 fn transform_select_into(expr: Expression, _source: DialectType, target: DialectType) -> Expression {
14984 use crate::expressions::{CreateTable, Expression, TableRef};
14985
14986 // Handle INSERT INTO #temp -> INSERT INTO temp for non-TSQL targets
14987 if let Expression::Insert(ref insert) = expr {
14988 if insert.table.name.name.starts_with('#') && !matches!(target, DialectType::TSQL | DialectType::Fabric) {
14989 let mut new_insert = insert.clone();
14990 new_insert.table.name.name = insert.table.name.name.trim_start_matches('#').to_string();
14991 return Expression::Insert(new_insert);
14992 }
14993 return expr;
14994 }
14995
14996 if let Expression::Select(ref select) = expr {
14997 if let Some(ref into) = select.into {
14998 let table_name_raw = match &into.this {
14999 Expression::Table(tr) => tr.name.name.clone(),
15000 Expression::Identifier(id) => id.name.clone(),
15001 _ => String::new(),
15002 };
15003 let is_temp = table_name_raw.starts_with('#') || into.temporary;
15004 let clean_name = table_name_raw.trim_start_matches('#').to_string();
15005
15006 match target {
15007 DialectType::DuckDB | DialectType::Snowflake => {
15008 // SELECT INTO -> CREATE TABLE AS SELECT
15009 let mut new_select = select.clone();
15010 new_select.into = None;
15011 let ct = CreateTable {
15012 name: TableRef::new(clean_name),
15013 on_cluster: None,
15014 columns: Vec::new(),
15015 constraints: Vec::new(),
15016 if_not_exists: false,
15017 temporary: is_temp,
15018 or_replace: false,
15019 table_modifier: None,
15020 as_select: Some(Expression::Select(new_select)),
15021 as_select_parenthesized: false,
15022 on_commit: None,
15023 clone_source: None,
15024 clone_at_clause: None,
15025 shallow_clone: false, is_copy: false,
15026 leading_comments: Vec::new(),
15027 with_properties: Vec::new(),
15028 teradata_post_name_options: Vec::new(),
15029 with_data: None,
15030 with_statistics: None,
15031 teradata_indexes: Vec::new(),
15032 with_cte: None,
15033 properties: Vec::new(),
15034 partition_of: None,
15035 post_table_properties: Vec::new(),
15036 mysql_table_options: Vec::new(),
15037 inherits: Vec::new(),
15038 on_property: None,
15039 copy_grants: false,
15040 using_template: None,
15041 rollup: None,
15042 };
15043 return Expression::CreateTable(Box::new(ct));
15044 }
15045 DialectType::PostgreSQL | DialectType::Redshift => {
15046 // PostgreSQL: #foo -> INTO TEMPORARY foo
15047 if is_temp && !into.temporary {
15048 let mut new_select = select.clone();
15049 let mut new_into = into.clone();
15050 new_into.temporary = true;
15051 new_into.unlogged = false;
15052 new_into.this = Expression::Table(TableRef::new(clean_name));
15053 new_select.into = Some(new_into);
15054 Expression::Select(new_select)
15055 } else {
15056 expr
15057 }
15058 }
15059 _ => expr,
15060 }
15061 } else {
15062 expr
15063 }
15064 } else {
15065 expr
15066 }
15067 }
15068
15069 /// Transform CREATE TABLE WITH properties for cross-dialect transpilation.
15070 /// Handles FORMAT, PARTITIONED_BY, and other Presto WITH properties.
15071 fn transform_create_table_properties(
15072 ct: &mut crate::expressions::CreateTable,
15073 _source: DialectType,
15074 target: DialectType,
15075 ) {
15076 use crate::expressions::{
15077 BinaryOp, BooleanLiteral, Expression, FileFormatProperty, Identifier, Literal,
15078 Properties,
15079 };
15080
15081 // Helper to convert a raw property value string to the correct Expression
15082 let value_to_expr = |v: &str| -> Expression {
15083 let trimmed = v.trim();
15084 // Check if it's a quoted string (starts and ends with ')
15085 if trimmed.starts_with('\'') && trimmed.ends_with('\'') {
15086 Expression::Literal(Literal::String(trimmed[1..trimmed.len()-1].to_string()))
15087 }
15088 // Check if it's a number
15089 else if trimmed.parse::<i64>().is_ok() || trimmed.parse::<f64>().is_ok() {
15090 Expression::Literal(Literal::Number(trimmed.to_string()))
15091 }
15092 // Check if it's ARRAY[...] or ARRAY(...)
15093 else if trimmed.to_uppercase().starts_with("ARRAY") {
15094 // Convert ARRAY['y'] to ARRAY('y') for Hive/Spark
15095 let inner = trimmed
15096 .trim_start_matches(|c: char| c.is_alphabetic()) // Remove ARRAY
15097 .trim_start_matches('[')
15098 .trim_start_matches('(')
15099 .trim_end_matches(']')
15100 .trim_end_matches(')');
15101 let elements: Vec<Expression> = inner
15102 .split(',')
15103 .map(|e| {
15104 let elem = e.trim().trim_matches('\'');
15105 Expression::Literal(Literal::String(elem.to_string()))
15106 })
15107 .collect();
15108 Expression::Function(Box::new(crate::expressions::Function::new(
15109 "ARRAY".to_string(),
15110 elements,
15111 )))
15112 }
15113 // Otherwise, just output as identifier (unquoted)
15114 else {
15115 Expression::Identifier(Identifier::new(trimmed.to_string()))
15116 }
15117 };
15118
15119 if ct.with_properties.is_empty() && ct.properties.is_empty() {
15120 return;
15121 }
15122
15123 // Handle Presto-style WITH properties
15124 if !ct.with_properties.is_empty() {
15125 // Extract FORMAT property and remaining properties
15126 let mut format_value: Option<String> = None;
15127 let mut partitioned_by: Option<String> = None;
15128 let mut other_props: Vec<(String, String)> = Vec::new();
15129
15130 for (key, value) in ct.with_properties.drain(..) {
15131 let key_upper = key.to_uppercase();
15132 if key_upper == "FORMAT" {
15133 // Strip surrounding quotes from value if present
15134 format_value = Some(value.trim_matches('\'').to_string());
15135 } else if key_upper == "PARTITIONED_BY" {
15136 partitioned_by = Some(value);
15137 } else {
15138 other_props.push((key, value));
15139 }
15140 }
15141
15142 match target {
15143 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
15144 // Presto: keep WITH properties but lowercase 'format' key
15145 if let Some(fmt) = format_value {
15146 ct.with_properties.push(("format".to_string(), format!("'{}'", fmt)));
15147 }
15148 if let Some(part) = partitioned_by {
15149 // Convert (col1, col2) to ARRAY['col1', 'col2'] format
15150 let trimmed = part.trim();
15151 let inner = trimmed.trim_start_matches('(').trim_end_matches(')');
15152 // Also handle ARRAY['...'] format - keep as-is
15153 if trimmed.to_uppercase().starts_with("ARRAY") {
15154 ct.with_properties.push(("PARTITIONED_BY".to_string(), part));
15155 } else {
15156 // Parse column names from the parenthesized list
15157 let cols: Vec<&str> = inner.split(',').map(|c| c.trim().trim_matches('"').trim_matches('\'')).collect();
15158 let array_val = format!("ARRAY[{}]", cols.iter().map(|c| format!("'{}'", c)).collect::<Vec<_>>().join(", "));
15159 ct.with_properties.push(("PARTITIONED_BY".to_string(), array_val));
15160 }
15161 }
15162 ct.with_properties.extend(other_props);
15163 }
15164 DialectType::Hive => {
15165 // Hive: FORMAT -> STORED AS, other props -> TBLPROPERTIES
15166 if let Some(fmt) = format_value {
15167 ct.properties.push(Expression::FileFormatProperty(Box::new(
15168 FileFormatProperty {
15169 this: Some(Box::new(Expression::Identifier(Identifier::new(fmt)))),
15170 expressions: vec![],
15171 hive_format: Some(Box::new(Expression::Boolean(BooleanLiteral { value: true }))),
15172 },
15173 )));
15174 }
15175 if let Some(_part) = partitioned_by {
15176 // PARTITIONED_BY handling is complex - move columns to partitioned by
15177 // For now, the partition columns are extracted from the column list
15178 Self::apply_partitioned_by(ct, &_part, target);
15179 }
15180 if !other_props.is_empty() {
15181 let eq_exprs: Vec<Expression> = other_props
15182 .into_iter()
15183 .map(|(k, v)| Expression::Eq(Box::new(BinaryOp::new(
15184 Expression::Literal(Literal::String(k)),
15185 value_to_expr(&v),
15186 ))))
15187 .collect();
15188 ct.properties.push(Expression::Properties(Box::new(
15189 Properties { expressions: eq_exprs },
15190 )));
15191 }
15192 }
15193 DialectType::Spark | DialectType::Databricks => {
15194 // Spark: FORMAT -> USING, other props -> TBLPROPERTIES
15195 if let Some(fmt) = format_value {
15196 ct.properties.push(Expression::FileFormatProperty(Box::new(
15197 FileFormatProperty {
15198 this: Some(Box::new(Expression::Identifier(Identifier::new(fmt)))),
15199 expressions: vec![],
15200 hive_format: None, // None means USING syntax
15201 },
15202 )));
15203 }
15204 if let Some(_part) = partitioned_by {
15205 Self::apply_partitioned_by(ct, &_part, target);
15206 }
15207 if !other_props.is_empty() {
15208 let eq_exprs: Vec<Expression> = other_props
15209 .into_iter()
15210 .map(|(k, v)| Expression::Eq(Box::new(BinaryOp::new(
15211 Expression::Literal(Literal::String(k)),
15212 value_to_expr(&v),
15213 ))))
15214 .collect();
15215 ct.properties.push(Expression::Properties(Box::new(
15216 Properties { expressions: eq_exprs },
15217 )));
15218 }
15219 }
15220 DialectType::DuckDB => {
15221 // DuckDB: strip all WITH properties (FORMAT, PARTITIONED_BY, etc.)
15222 // Keep nothing
15223 }
15224 _ => {
15225 // For other dialects, keep WITH properties as-is
15226 if let Some(fmt) = format_value {
15227 ct.with_properties.push(("FORMAT".to_string(), format!("'{}'", fmt)));
15228 }
15229 if let Some(part) = partitioned_by {
15230 ct.with_properties.push(("PARTITIONED_BY".to_string(), part));
15231 }
15232 ct.with_properties.extend(other_props);
15233 }
15234 }
15235 }
15236
15237 // Handle STORED AS 'PARQUET' (quoted format name) -> STORED AS PARQUET (unquoted)
15238 // and Hive STORED AS -> Presto WITH (format=...) conversion
15239 if !ct.properties.is_empty() {
15240 let is_presto_target = matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena);
15241 let is_duckdb_target = matches!(target, DialectType::DuckDB);
15242
15243 if is_presto_target || is_duckdb_target {
15244 let mut new_properties = Vec::new();
15245 for prop in ct.properties.drain(..) {
15246 match &prop {
15247 Expression::FileFormatProperty(ffp) => {
15248 if is_presto_target {
15249 // Convert STORED AS/USING to WITH (format=...)
15250 if let Some(ref fmt_expr) = ffp.this {
15251 let fmt_str = match fmt_expr.as_ref() {
15252 Expression::Identifier(id) => id.name.clone(),
15253 Expression::Literal(Literal::String(s)) => s.clone(),
15254 _ => {
15255 new_properties.push(prop);
15256 continue;
15257 }
15258 };
15259 ct.with_properties.push(("format".to_string(), format!("'{}'", fmt_str)));
15260 }
15261 }
15262 // DuckDB: just strip file format properties
15263 }
15264 // Convert TBLPROPERTIES to WITH properties for Presto target
15265 Expression::Properties(props) if is_presto_target => {
15266 for expr in &props.expressions {
15267 if let Expression::Eq(eq) = expr {
15268 // Extract key and value from the Eq expression
15269 let key = match &eq.left {
15270 Expression::Literal(Literal::String(s)) => s.clone(),
15271 Expression::Identifier(id) => id.name.clone(),
15272 _ => continue,
15273 };
15274 let value = match &eq.right {
15275 Expression::Literal(Literal::String(s)) => format!("'{}'", s),
15276 Expression::Literal(Literal::Number(n)) => n.clone(),
15277 Expression::Identifier(id) => id.name.clone(),
15278 _ => continue,
15279 };
15280 ct.with_properties.push((key, value));
15281 }
15282 }
15283 }
15284 // Convert PartitionedByProperty for Presto target
15285 Expression::PartitionedByProperty(ref pbp) if is_presto_target => {
15286 // Check if it contains ColumnDef expressions (Hive-style with types)
15287 if let Expression::Tuple(ref tuple) = *pbp.this {
15288 let mut col_names: Vec<String> = Vec::new();
15289 let mut col_defs: Vec<crate::expressions::ColumnDef> = Vec::new();
15290 let mut has_col_defs = false;
15291 for expr in &tuple.expressions {
15292 if let Expression::ColumnDef(ref cd) = expr {
15293 has_col_defs = true;
15294 col_names.push(cd.name.name.clone());
15295 col_defs.push(*cd.clone());
15296 } else if let Expression::Column(ref col) = expr {
15297 col_names.push(col.name.name.clone());
15298 } else if let Expression::Identifier(ref id) = expr {
15299 col_names.push(id.name.clone());
15300 } else {
15301 // For function expressions like MONTHS(y), serialize to SQL
15302 let generic = Dialect::get(DialectType::Generic);
15303 if let Ok(sql) = generic.generate(expr) {
15304 col_names.push(sql);
15305 }
15306 }
15307 }
15308 if has_col_defs {
15309 // Merge partition column defs into the main column list
15310 for cd in col_defs {
15311 ct.columns.push(cd);
15312 }
15313 }
15314 if !col_names.is_empty() {
15315 // Add PARTITIONED_BY property
15316 let array_val = format!("ARRAY[{}]",
15317 col_names.iter().map(|n| format!("'{}'", n)).collect::<Vec<_>>().join(", "));
15318 ct.with_properties.push(("PARTITIONED_BY".to_string(), array_val));
15319 }
15320 }
15321 // Skip - don't keep in properties
15322 }
15323 _ => {
15324 if !is_duckdb_target {
15325 new_properties.push(prop);
15326 }
15327 }
15328 }
15329 }
15330 ct.properties = new_properties;
15331 } else {
15332 // For Hive/Spark targets, unquote format names in STORED AS
15333 for prop in &mut ct.properties {
15334 if let Expression::FileFormatProperty(ref mut ffp) = prop {
15335 if let Some(ref mut fmt_expr) = ffp.this {
15336 if let Expression::Literal(Literal::String(s)) = fmt_expr.as_ref() {
15337 // Convert STORED AS 'PARQUET' to STORED AS PARQUET (unquote)
15338 let unquoted = s.clone();
15339 *fmt_expr = Box::new(Expression::Identifier(Identifier::new(unquoted)));
15340 }
15341 }
15342 }
15343 }
15344 }
15345 }
15346 }
15347
15348 /// Apply PARTITIONED_BY conversion: move partition columns from column list to PARTITIONED BY
15349 fn apply_partitioned_by(ct: &mut crate::expressions::CreateTable, partitioned_by_value: &str, target: DialectType) {
15350 use crate::expressions::{Column, Expression, Identifier, PartitionedByProperty, Tuple};
15351
15352 // Parse the ARRAY['col1', 'col2'] value to extract column names
15353 let mut col_names: Vec<String> = Vec::new();
15354 // The value looks like ARRAY['y', 'z'] or ARRAY('y', 'z')
15355 let inner = partitioned_by_value
15356 .trim()
15357 .trim_start_matches("ARRAY")
15358 .trim_start_matches('[')
15359 .trim_start_matches('(')
15360 .trim_end_matches(']')
15361 .trim_end_matches(')');
15362 for part in inner.split(',') {
15363 let col = part.trim().trim_matches('\'').trim_matches('"');
15364 if !col.is_empty() {
15365 col_names.push(col.to_string());
15366 }
15367 }
15368
15369 if col_names.is_empty() {
15370 return;
15371 }
15372
15373 if matches!(target, DialectType::Hive) {
15374 // Hive: PARTITIONED BY (col_name type, ...) - move columns out of column list
15375 let mut partition_col_defs = Vec::new();
15376 for col_name in &col_names {
15377 // Find and remove from columns
15378 if let Some(pos) = ct.columns.iter().position(|c| c.name.name.eq_ignore_ascii_case(col_name)) {
15379 let col_def = ct.columns.remove(pos);
15380 partition_col_defs.push(Expression::ColumnDef(Box::new(col_def)));
15381 }
15382 }
15383 if !partition_col_defs.is_empty() {
15384 ct.properties.push(Expression::PartitionedByProperty(Box::new(
15385 PartitionedByProperty {
15386 this: Box::new(Expression::Tuple(Box::new(Tuple { expressions: partition_col_defs }))),
15387 },
15388 )));
15389 }
15390 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
15391 // Spark: PARTITIONED BY (col1, col2) - just column names, keep in column list
15392 // Use quoted identifiers to match the quoting style of the original column definitions
15393 let partition_exprs: Vec<Expression> = col_names
15394 .iter()
15395 .map(|name| {
15396 // Check if the column exists in the column list and use its quoting
15397 let is_quoted = ct.columns.iter().any(|c| c.name.name.eq_ignore_ascii_case(name) && c.name.quoted);
15398 let ident = if is_quoted { Identifier::quoted(name.clone()) } else { Identifier::new(name.clone()) };
15399 Expression::Column(Column {
15400 name: ident,
15401 table: None,
15402 join_mark: false,
15403 trailing_comments: Vec::new(),
15404 })
15405 })
15406 .collect();
15407 ct.properties.push(Expression::PartitionedByProperty(Box::new(
15408 PartitionedByProperty {
15409 this: Box::new(Expression::Tuple(Box::new(Tuple { expressions: partition_exprs }))),
15410 },
15411 )));
15412 }
15413 // DuckDB: strip partitioned_by entirely (already handled)
15414 }
15415
15416 /// Convert a DataType to Spark's type string format (using angle brackets)
15417 fn data_type_to_spark_string(dt: &crate::expressions::DataType) -> String {
15418 use crate::expressions::DataType;
15419 match dt {
15420 DataType::Int { .. } => "INT".to_string(),
15421 DataType::BigInt { .. } => "BIGINT".to_string(),
15422 DataType::SmallInt { .. } => "SMALLINT".to_string(),
15423 DataType::TinyInt { .. } => "TINYINT".to_string(),
15424 DataType::Float { .. } => "FLOAT".to_string(),
15425 DataType::Double { .. } => "DOUBLE".to_string(),
15426 DataType::Decimal { precision: Some(p), scale: Some(s) } => format!("DECIMAL({}, {})", p, s),
15427 DataType::Decimal { precision: Some(p), .. } => format!("DECIMAL({})", p),
15428 DataType::Decimal { .. } => "DECIMAL".to_string(),
15429 DataType::VarChar { .. } | DataType::Text | DataType::String { .. } => "STRING".to_string(),
15430 DataType::Char { .. } => "STRING".to_string(),
15431 DataType::Boolean => "BOOLEAN".to_string(),
15432 DataType::Date => "DATE".to_string(),
15433 DataType::Timestamp { .. } => "TIMESTAMP".to_string(),
15434 DataType::Json | DataType::JsonB => "STRING".to_string(),
15435 DataType::Binary { .. } => "BINARY".to_string(),
15436 DataType::Array { element_type, .. } => format!("ARRAY<{}>", Self::data_type_to_spark_string(element_type)),
15437 DataType::Map { key_type, value_type } => format!("MAP<{}, {}>", Self::data_type_to_spark_string(key_type), Self::data_type_to_spark_string(value_type)),
15438 DataType::Struct { fields, .. } => {
15439 let field_strs: Vec<String> = fields.iter().map(|f| {
15440 if f.name.is_empty() {
15441 Self::data_type_to_spark_string(&f.data_type)
15442 } else {
15443 format!("{}: {}", f.name, Self::data_type_to_spark_string(&f.data_type))
15444 }
15445 }).collect();
15446 format!("STRUCT<{}>", field_strs.join(", "))
15447 }
15448 DataType::Custom { name } => name.clone(),
15449 _ => format!("{:?}", dt),
15450 }
15451 }
15452
15453 /// Extract value and unit from an Interval expression
15454 /// Returns (value_expression, IntervalUnit)
15455 fn extract_interval_parts(interval_expr: &Expression) -> (Expression, crate::expressions::IntervalUnit) {
15456 use crate::expressions::{IntervalUnit, IntervalUnitSpec};
15457
15458 if let Expression::Interval(iv) = interval_expr {
15459 let val = iv.this.clone().unwrap_or(Expression::number(0));
15460 let unit = match &iv.unit {
15461 Some(IntervalUnitSpec::Simple { unit, .. }) => *unit,
15462 None => {
15463 // Unit might be embedded in the string value (Snowflake format: '5 DAY')
15464 if let Expression::Literal(crate::expressions::Literal::String(s)) = &val {
15465 let parts: Vec<&str> = s.trim().splitn(2, ' ').collect();
15466 if parts.len() == 2 {
15467 let unit_str = parts[1].trim().to_uppercase();
15468 let parsed_unit = match unit_str.as_str() {
15469 "YEAR" | "YEARS" => IntervalUnit::Year,
15470 "QUARTER" | "QUARTERS" => IntervalUnit::Quarter,
15471 "MONTH" | "MONTHS" => IntervalUnit::Month,
15472 "WEEK" | "WEEKS" | "ISOWEEK" => IntervalUnit::Week,
15473 "DAY" | "DAYS" => IntervalUnit::Day,
15474 "HOUR" | "HOURS" => IntervalUnit::Hour,
15475 "MINUTE" | "MINUTES" => IntervalUnit::Minute,
15476 "SECOND" | "SECONDS" => IntervalUnit::Second,
15477 "MILLISECOND" | "MILLISECONDS" => IntervalUnit::Millisecond,
15478 "MICROSECOND" | "MICROSECONDS" => IntervalUnit::Microsecond,
15479 _ => IntervalUnit::Day,
15480 };
15481 // Return just the numeric part as value and parsed unit
15482 return (Expression::Literal(crate::expressions::Literal::String(parts[0].to_string())), parsed_unit);
15483 }
15484 IntervalUnit::Day
15485 } else {
15486 IntervalUnit::Day
15487 }
15488 }
15489 _ => IntervalUnit::Day,
15490 };
15491 (val, unit)
15492 } else {
15493 // Not an interval - pass through
15494 (interval_expr.clone(), crate::expressions::IntervalUnit::Day)
15495 }
15496 }
15497
15498 /// Normalize BigQuery-specific functions to standard forms that target dialects can handle
15499 fn normalize_bigquery_function(e: Expression, source: DialectType, target: DialectType) -> Result<Expression> {
15500 use crate::expressions::{BinaryOp, Cast, DataType, Function, Identifier, Literal, Paren};
15501
15502 let f = if let Expression::Function(f) = e { *f } else { return Ok(e); };
15503 let name = f.name.to_uppercase();
15504 let mut args = f.args;
15505
15506 /// Helper to extract unit string from an identifier, column, or literal expression
15507 fn get_unit_str(expr: &Expression) -> String {
15508 match expr {
15509 Expression::Identifier(id) => id.name.to_uppercase(),
15510 Expression::Literal(Literal::String(s)) => s.to_uppercase(),
15511 Expression::Column(col) => col.name.name.to_uppercase(),
15512 // Handle WEEK(MONDAY), WEEK(SUNDAY) etc. which are parsed as Function("WEEK", [Column("MONDAY")])
15513 Expression::Function(f) => {
15514 let base = f.name.to_uppercase();
15515 if !f.args.is_empty() {
15516 // e.g., WEEK(MONDAY) -> "WEEK(MONDAY)"
15517 let inner = get_unit_str(&f.args[0]);
15518 format!("{}({})", base, inner)
15519 } else {
15520 base
15521 }
15522 }
15523 _ => "DAY".to_string(),
15524 }
15525 }
15526
15527 /// Parse unit string to IntervalUnit
15528 fn parse_interval_unit(s: &str) -> crate::expressions::IntervalUnit {
15529 match s {
15530 "YEAR" => crate::expressions::IntervalUnit::Year,
15531 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
15532 "MONTH" => crate::expressions::IntervalUnit::Month,
15533 "WEEK" | "ISOWEEK" => crate::expressions::IntervalUnit::Week,
15534 "DAY" => crate::expressions::IntervalUnit::Day,
15535 "HOUR" => crate::expressions::IntervalUnit::Hour,
15536 "MINUTE" => crate::expressions::IntervalUnit::Minute,
15537 "SECOND" => crate::expressions::IntervalUnit::Second,
15538 "MILLISECOND" => crate::expressions::IntervalUnit::Millisecond,
15539 "MICROSECOND" => crate::expressions::IntervalUnit::Microsecond,
15540 _ if s.starts_with("WEEK(") => crate::expressions::IntervalUnit::Week,
15541 _ => crate::expressions::IntervalUnit::Day,
15542 }
15543 }
15544
15545 match name.as_str() {
15546 // TIMESTAMP_DIFF(date1, date2, unit) -> TIMESTAMPDIFF(unit, date2, date1)
15547 // (BigQuery: result = date1 - date2, Standard: result = end - start)
15548 "TIMESTAMP_DIFF" | "DATETIME_DIFF" | "TIME_DIFF" if args.len() == 3 => {
15549 let date1 = args.remove(0);
15550 let date2 = args.remove(0);
15551 let unit_expr = args.remove(0);
15552 let unit_str = get_unit_str(&unit_expr);
15553
15554 if matches!(target, DialectType::BigQuery) {
15555 // BigQuery -> BigQuery: just uppercase the unit
15556 let unit = Expression::Identifier(Identifier::new(unit_str.clone()));
15557 return Ok(Expression::Function(Box::new(Function::new(
15558 f.name, vec![date1, date2, unit],
15559 ))));
15560 }
15561
15562 // For Snowflake: use TimestampDiff expression so it generates TIMESTAMPDIFF
15563 // (Function("TIMESTAMPDIFF") would be converted to DATEDIFF by Snowflake's function normalization)
15564 if matches!(target, DialectType::Snowflake) {
15565 return Ok(Expression::TimestampDiff(Box::new(crate::expressions::TimestampDiff {
15566 this: Box::new(date2),
15567 expression: Box::new(date1),
15568 unit: Some(unit_str),
15569 })));
15570 }
15571
15572 // For DuckDB: DATE_DIFF('UNIT', start, end) with proper CAST
15573 if matches!(target, DialectType::DuckDB) {
15574 let (cast_d1, cast_d2) = if name == "TIME_DIFF" {
15575 // CAST to TIME
15576 let cast_fn = |e: Expression| -> Expression {
15577 match e {
15578 Expression::Literal(Literal::String(s)) => {
15579 Expression::Cast(Box::new(Cast {
15580 this: Expression::Literal(Literal::String(s)),
15581 to: DataType::Custom { name: "TIME".to_string() },
15582 trailing_comments: vec![],
15583 double_colon_syntax: false,
15584 format: None,
15585 default: None,
15586 }))
15587 }
15588 other => other,
15589 }
15590 };
15591 (cast_fn(date1), cast_fn(date2))
15592 } else if name == "DATETIME_DIFF" {
15593 // CAST to TIMESTAMP
15594 (Self::ensure_cast_timestamp(date1), Self::ensure_cast_timestamp(date2))
15595 } else {
15596 // TIMESTAMP_DIFF: CAST to TIMESTAMPTZ
15597 (Self::ensure_cast_timestamptz(date1), Self::ensure_cast_timestamptz(date2))
15598 };
15599 return Ok(Expression::Function(Box::new(Function::new(
15600 "DATE_DIFF".to_string(), vec![
15601 Expression::Literal(Literal::String(unit_str)),
15602 cast_d2,
15603 cast_d1,
15604 ],
15605 ))));
15606 }
15607
15608 // Convert to standard TIMESTAMPDIFF(unit, start, end)
15609 let unit = Expression::Identifier(Identifier::new(unit_str));
15610 Ok(Expression::Function(Box::new(Function::new(
15611 "TIMESTAMPDIFF".to_string(), vec![unit, date2, date1],
15612 ))))
15613 }
15614
15615 // DATEDIFF(unit, start, end) -> target-specific form
15616 // Used by: Redshift, Snowflake, TSQL, Databricks, Spark
15617 "DATEDIFF" if args.len() == 3 => {
15618 let arg0 = args.remove(0);
15619 let arg1 = args.remove(0);
15620 let arg2 = args.remove(0);
15621 let unit_str = get_unit_str(&arg0);
15622
15623 // Redshift DATEDIFF(unit, start, end) order: result = end - start
15624 // Snowflake DATEDIFF(unit, start, end) order: result = end - start
15625 // TSQL DATEDIFF(unit, start, end) order: result = end - start
15626
15627 if matches!(target, DialectType::Snowflake) {
15628 // Snowflake: DATEDIFF(UNIT, start, end) - uppercase unit
15629 let unit = Expression::Identifier(Identifier::new(unit_str));
15630 return Ok(Expression::Function(Box::new(Function::new(
15631 "DATEDIFF".to_string(), vec![unit, arg1, arg2],
15632 ))));
15633 }
15634
15635 if matches!(target, DialectType::DuckDB) {
15636 // DuckDB: DATE_DIFF('UNIT', start, end) with CAST
15637 let cast_d1 = Self::ensure_cast_timestamp(arg1);
15638 let cast_d2 = Self::ensure_cast_timestamp(arg2);
15639 return Ok(Expression::Function(Box::new(Function::new(
15640 "DATE_DIFF".to_string(), vec![
15641 Expression::Literal(Literal::String(unit_str)),
15642 cast_d1,
15643 cast_d2,
15644 ],
15645 ))));
15646 }
15647
15648 if matches!(target, DialectType::BigQuery) {
15649 // BigQuery: DATE_DIFF(end_date, start_date, UNIT) - reversed args, CAST to DATETIME
15650 let cast_d1 = Self::ensure_cast_datetime(arg1);
15651 let cast_d2 = Self::ensure_cast_datetime(arg2);
15652 let unit = Expression::Identifier(Identifier::new(unit_str));
15653 return Ok(Expression::Function(Box::new(Function::new(
15654 "DATE_DIFF".to_string(), vec![cast_d2, cast_d1, unit],
15655 ))));
15656 }
15657
15658 if matches!(target, DialectType::Spark | DialectType::Databricks) {
15659 // Spark/Databricks: DATEDIFF(UNIT, start, end) - uppercase unit
15660 let unit = Expression::Identifier(Identifier::new(unit_str));
15661 return Ok(Expression::Function(Box::new(Function::new(
15662 "DATEDIFF".to_string(), vec![unit, arg1, arg2],
15663 ))));
15664 }
15665
15666 if matches!(target, DialectType::Hive) {
15667 // Hive: DATEDIFF(end, start) for DAY only, use MONTHS_BETWEEN for MONTH
15668 match unit_str.as_str() {
15669 "MONTH" => {
15670 return Ok(Expression::Function(Box::new(Function::new(
15671 "CAST".to_string(), vec![
15672 Expression::Function(Box::new(Function::new(
15673 "MONTHS_BETWEEN".to_string(), vec![arg2, arg1],
15674 ))),
15675 ],
15676 ))));
15677 }
15678 "WEEK" => {
15679 return Ok(Expression::Cast(Box::new(Cast {
15680 this: Expression::Div(Box::new(crate::expressions::BinaryOp::new(
15681 Expression::Function(Box::new(Function::new(
15682 "DATEDIFF".to_string(), vec![arg2, arg1],
15683 ))),
15684 Expression::Literal(Literal::Number("7".to_string())),
15685 ))),
15686 to: DataType::Int { length: None, integer_spelling: false },
15687 trailing_comments: vec![],
15688 double_colon_syntax: false,
15689 format: None,
15690 default: None,
15691 })));
15692 }
15693 _ => {
15694 // Default: DATEDIFF(end, start) for DAY
15695 return Ok(Expression::Function(Box::new(Function::new(
15696 "DATEDIFF".to_string(), vec![arg2, arg1],
15697 ))));
15698 }
15699 }
15700 }
15701
15702 if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena) {
15703 // Presto/Trino: DATE_DIFF('UNIT', start, end)
15704 return Ok(Expression::Function(Box::new(Function::new(
15705 "DATE_DIFF".to_string(), vec![
15706 Expression::Literal(Literal::String(unit_str)),
15707 arg1,
15708 arg2,
15709 ],
15710 ))));
15711 }
15712
15713 if matches!(target, DialectType::TSQL) {
15714 // TSQL: DATEDIFF(UNIT, start, CAST(end AS DATETIME2))
15715 let cast_d2 = Self::ensure_cast_datetime2(arg2);
15716 let unit = Expression::Identifier(Identifier::new(unit_str));
15717 return Ok(Expression::Function(Box::new(Function::new(
15718 "DATEDIFF".to_string(), vec![unit, arg1, cast_d2],
15719 ))));
15720 }
15721
15722 if matches!(target, DialectType::PostgreSQL) {
15723 // PostgreSQL doesn't have DATEDIFF - use date subtraction or EXTRACT
15724 // For now, use DATEDIFF (passthrough) with uppercased unit
15725 let unit = Expression::Identifier(Identifier::new(unit_str));
15726 return Ok(Expression::Function(Box::new(Function::new(
15727 "DATEDIFF".to_string(), vec![unit, arg1, arg2],
15728 ))));
15729 }
15730
15731 // Default: DATEDIFF(UNIT, start, end) with uppercase unit
15732 let unit = Expression::Identifier(Identifier::new(unit_str));
15733 Ok(Expression::Function(Box::new(Function::new(
15734 "DATEDIFF".to_string(), vec![unit, arg1, arg2],
15735 ))))
15736 }
15737
15738 // DATE_DIFF(date1, date2, unit) -> standard form
15739 "DATE_DIFF" if args.len() == 3 => {
15740 let date1 = args.remove(0);
15741 let date2 = args.remove(0);
15742 let unit_expr = args.remove(0);
15743 let unit_str = get_unit_str(&unit_expr);
15744
15745 if matches!(target, DialectType::BigQuery) {
15746 // BigQuery -> BigQuery: just uppercase the unit, normalize WEEK(SUNDAY) -> WEEK
15747 let norm_unit = if unit_str == "WEEK(SUNDAY)" { "WEEK".to_string() } else { unit_str };
15748 let norm_d1 = Self::date_literal_to_cast(date1);
15749 let norm_d2 = Self::date_literal_to_cast(date2);
15750 let unit = Expression::Identifier(Identifier::new(norm_unit));
15751 return Ok(Expression::Function(Box::new(Function::new(
15752 f.name, vec![norm_d1, norm_d2, unit],
15753 ))));
15754 }
15755
15756 if matches!(target, DialectType::MySQL) {
15757 // MySQL DATEDIFF only takes 2 args (date1, date2), returns day difference
15758 let norm_d1 = Self::date_literal_to_cast(date1);
15759 let norm_d2 = Self::date_literal_to_cast(date2);
15760 return Ok(Expression::Function(Box::new(Function::new(
15761 "DATEDIFF".to_string(), vec![norm_d1, norm_d2],
15762 ))));
15763 }
15764
15765 if matches!(target, DialectType::StarRocks) {
15766 // StarRocks: DATE_DIFF('UNIT', date1, date2) - unit as string, args NOT swapped
15767 let norm_d1 = Self::date_literal_to_cast(date1);
15768 let norm_d2 = Self::date_literal_to_cast(date2);
15769 return Ok(Expression::Function(Box::new(Function::new(
15770 "DATE_DIFF".to_string(), vec![
15771 Expression::Literal(Literal::String(unit_str)),
15772 norm_d1,
15773 norm_d2,
15774 ],
15775 ))));
15776 }
15777
15778 if matches!(target, DialectType::DuckDB) {
15779 // DuckDB: DATE_DIFF('UNIT', date2, date1) with proper CAST for dates
15780 let norm_d1 = Self::ensure_cast_date(date1);
15781 let norm_d2 = Self::ensure_cast_date(date2);
15782
15783 // Handle WEEK variants: WEEK(MONDAY)/WEEK(SUNDAY)/ISOWEEK/WEEK
15784 let is_week_variant = unit_str == "WEEK" || unit_str.starts_with("WEEK(") || unit_str == "ISOWEEK";
15785 if is_week_variant {
15786 // For DuckDB, WEEK-based diffs use DATE_TRUNC approach
15787 // WEEK(MONDAY) / ISOWEEK: DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2), DATE_TRUNC('WEEK', d1))
15788 // WEEK / WEEK(SUNDAY): DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2 + INTERVAL '1' DAY), DATE_TRUNC('WEEK', d1 + INTERVAL '1' DAY))
15789 // WEEK(SATURDAY): DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2 + INTERVAL '-5' DAY), DATE_TRUNC('WEEK', d1 + INTERVAL '-5' DAY))
15790 let day_offset = if unit_str == "WEEK(MONDAY)" || unit_str == "ISOWEEK" {
15791 None // ISO weeks start on Monday, aligned with DATE_TRUNC('WEEK')
15792 } else if unit_str == "WEEK" || unit_str == "WEEK(SUNDAY)" {
15793 Some("1") // Shift Sunday to Monday alignment
15794 } else if unit_str == "WEEK(SATURDAY)" {
15795 Some("-5")
15796 } else if unit_str == "WEEK(TUESDAY)" {
15797 Some("-1")
15798 } else if unit_str == "WEEK(WEDNESDAY)" {
15799 Some("-2")
15800 } else if unit_str == "WEEK(THURSDAY)" {
15801 Some("-3")
15802 } else if unit_str == "WEEK(FRIDAY)" {
15803 Some("-4")
15804 } else {
15805 Some("1") // default to Sunday
15806 };
15807
15808 let make_trunc = |date: Expression, offset: Option<&str>| -> Expression {
15809 let shifted = if let Some(off) = offset {
15810 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
15811 this: Some(Expression::Literal(Literal::String(off.to_string()))),
15812 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: crate::expressions::IntervalUnit::Day, use_plural: false }),
15813 }));
15814 Expression::Add(Box::new(crate::expressions::BinaryOp::new(date, interval)))
15815 } else {
15816 date
15817 };
15818 Expression::Function(Box::new(Function::new(
15819 "DATE_TRUNC".to_string(), vec![Expression::Literal(Literal::String("WEEK".to_string())), shifted],
15820 )))
15821 };
15822
15823 let trunc_d2 = make_trunc(norm_d2, day_offset);
15824 let trunc_d1 = make_trunc(norm_d1, day_offset);
15825 return Ok(Expression::Function(Box::new(Function::new(
15826 "DATE_DIFF".to_string(), vec![
15827 Expression::Literal(Literal::String("WEEK".to_string())),
15828 trunc_d2,
15829 trunc_d1,
15830 ],
15831 ))));
15832 }
15833
15834 return Ok(Expression::Function(Box::new(Function::new(
15835 "DATE_DIFF".to_string(), vec![
15836 Expression::Literal(Literal::String(unit_str)),
15837 norm_d2,
15838 norm_d1,
15839 ],
15840 ))));
15841 }
15842
15843 // Default: DATEDIFF(unit, date2, date1)
15844 let unit = Expression::Identifier(Identifier::new(unit_str));
15845 Ok(Expression::Function(Box::new(Function::new(
15846 "DATEDIFF".to_string(), vec![unit, date2, date1],
15847 ))))
15848 }
15849
15850 // TIMESTAMP_ADD(ts, INTERVAL n UNIT) -> target-specific
15851 "TIMESTAMP_ADD" | "DATETIME_ADD" | "TIME_ADD" if args.len() == 2 => {
15852 let ts = args.remove(0);
15853 let interval_expr = args.remove(0);
15854 let (val, unit) = Self::extract_interval_parts(&interval_expr);
15855
15856 match target {
15857 DialectType::Snowflake => {
15858 // TIMESTAMPADD(UNIT, val, CAST(ts AS TIMESTAMPTZ))
15859 // Use TimestampAdd expression so Snowflake generates TIMESTAMPADD
15860 // (Function("TIMESTAMPADD") would be converted to DATEADD by Snowflake's function normalization)
15861 let unit_str = Self::interval_unit_to_string(&unit);
15862 let cast_ts = Self::maybe_cast_ts_to_tz(ts, &name);
15863 Ok(Expression::TimestampAdd(Box::new(crate::expressions::TimestampAdd {
15864 this: Box::new(val),
15865 expression: Box::new(cast_ts),
15866 unit: Some(unit_str),
15867 })))
15868 }
15869 DialectType::Spark | DialectType::Databricks => {
15870 if name == "DATETIME_ADD" && matches!(target, DialectType::Spark) {
15871 // Spark DATETIME_ADD: ts + INTERVAL val UNIT
15872 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
15873 this: Some(val),
15874 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit, use_plural: false }),
15875 }));
15876 Ok(Expression::Add(Box::new(crate::expressions::BinaryOp::new(ts, interval))))
15877 } else if name == "DATETIME_ADD" && matches!(target, DialectType::Databricks) {
15878 // Databricks DATETIME_ADD: TIMESTAMPADD(UNIT, val, ts)
15879 let unit_str = Self::interval_unit_to_string(&unit);
15880 Ok(Expression::Function(Box::new(Function::new(
15881 "TIMESTAMPADD".to_string(),
15882 vec![Expression::Identifier(Identifier::new(unit_str)), val, ts],
15883 ))))
15884 } else {
15885 // Presto-style: DATE_ADD('unit', val, CAST(ts AS TIMESTAMP))
15886 let unit_str = Self::interval_unit_to_string(&unit);
15887 let cast_ts = if name.starts_with("TIMESTAMP") || name.starts_with("DATETIME") {
15888 Self::maybe_cast_ts(ts)
15889 } else {
15890 ts
15891 };
15892 Ok(Expression::Function(Box::new(Function::new(
15893 "DATE_ADD".to_string(),
15894 vec![Expression::Identifier(Identifier::new(unit_str)), val, cast_ts],
15895 ))))
15896 }
15897 }
15898 DialectType::MySQL => {
15899 // DATE_ADD(TIMESTAMP(ts), INTERVAL val UNIT) for MySQL
15900 let mysql_ts = if name.starts_with("TIMESTAMP") {
15901 // Check if already wrapped in TIMESTAMP() function (from cross-dialect normalization)
15902 match &ts {
15903 Expression::Function(ref inner_f) if inner_f.name.eq_ignore_ascii_case("TIMESTAMP") => {
15904 // Already wrapped, keep as-is
15905 ts
15906 }
15907 _ => {
15908 // Unwrap typed literals: TIMESTAMP '...' -> '...' for TIMESTAMP() wrapper
15909 let unwrapped = match ts {
15910 Expression::Literal(Literal::Timestamp(s)) => Expression::Literal(Literal::String(s)),
15911 other => other,
15912 };
15913 Expression::Function(Box::new(Function::new("TIMESTAMP".to_string(), vec![unwrapped])))
15914 }
15915 }
15916 } else {
15917 ts
15918 };
15919 Ok(Expression::DateAdd(Box::new(crate::expressions::DateAddFunc {
15920 this: mysql_ts,
15921 interval: val,
15922 unit,
15923 })))
15924 }
15925 _ => {
15926 // DuckDB and others use DateAdd expression (DuckDB converts to + INTERVAL)
15927 let cast_ts = if matches!(target, DialectType::DuckDB) {
15928 if name == "DATETIME_ADD" {
15929 Self::ensure_cast_timestamp(ts)
15930 } else if name.starts_with("TIMESTAMP") {
15931 Self::maybe_cast_ts_to_tz(ts, &name)
15932 } else {
15933 ts
15934 }
15935 } else {
15936 ts
15937 };
15938 Ok(Expression::DateAdd(Box::new(crate::expressions::DateAddFunc {
15939 this: cast_ts,
15940 interval: val,
15941 unit,
15942 })))
15943 }
15944 }
15945 }
15946
15947 // TIMESTAMP_SUB(ts, INTERVAL n UNIT) -> target-specific
15948 "TIMESTAMP_SUB" | "DATETIME_SUB" | "TIME_SUB" if args.len() == 2 => {
15949 let ts = args.remove(0);
15950 let interval_expr = args.remove(0);
15951 let (val, unit) = Self::extract_interval_parts(&interval_expr);
15952
15953 match target {
15954 DialectType::Snowflake => {
15955 // TIMESTAMPADD(UNIT, val * -1, CAST(ts AS TIMESTAMPTZ))
15956 let unit_str = Self::interval_unit_to_string(&unit);
15957 let cast_ts = Self::maybe_cast_ts_to_tz(ts, &name);
15958 let neg_val = Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
15959 val,
15960 Expression::Neg(Box::new(crate::expressions::UnaryOp { this: Expression::number(1) })),
15961 )));
15962 Ok(Expression::TimestampAdd(Box::new(crate::expressions::TimestampAdd {
15963 this: Box::new(neg_val),
15964 expression: Box::new(cast_ts),
15965 unit: Some(unit_str),
15966 })))
15967 }
15968 DialectType::Spark | DialectType::Databricks => {
15969 if (name == "DATETIME_SUB" && matches!(target, DialectType::Spark))
15970 || (name == "TIMESTAMP_SUB" && matches!(target, DialectType::Spark))
15971 {
15972 // Spark: ts - INTERVAL val UNIT
15973 let cast_ts = if name.starts_with("TIMESTAMP") {
15974 Self::maybe_cast_ts(ts)
15975 } else {
15976 ts
15977 };
15978 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
15979 this: Some(val),
15980 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit, use_plural: false }),
15981 }));
15982 Ok(Expression::Sub(Box::new(crate::expressions::BinaryOp::new(cast_ts, interval))))
15983 } else {
15984 // Databricks: TIMESTAMPADD(UNIT, val * -1, ts)
15985 let unit_str = Self::interval_unit_to_string(&unit);
15986 let neg_val = Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
15987 val,
15988 Expression::Neg(Box::new(crate::expressions::UnaryOp { this: Expression::number(1) })),
15989 )));
15990 Ok(Expression::Function(Box::new(Function::new(
15991 "TIMESTAMPADD".to_string(),
15992 vec![Expression::Identifier(Identifier::new(unit_str)), neg_val, ts],
15993 ))))
15994 }
15995 }
15996 DialectType::MySQL => {
15997 let mysql_ts = if name.starts_with("TIMESTAMP") {
15998 // Check if already wrapped in TIMESTAMP() function (from cross-dialect normalization)
15999 match &ts {
16000 Expression::Function(ref inner_f) if inner_f.name.eq_ignore_ascii_case("TIMESTAMP") => {
16001 // Already wrapped, keep as-is
16002 ts
16003 }
16004 _ => {
16005 let unwrapped = match ts {
16006 Expression::Literal(Literal::Timestamp(s)) => Expression::Literal(Literal::String(s)),
16007 other => other,
16008 };
16009 Expression::Function(Box::new(Function::new("TIMESTAMP".to_string(), vec![unwrapped])))
16010 }
16011 }
16012 } else {
16013 ts
16014 };
16015 Ok(Expression::DateSub(Box::new(crate::expressions::DateAddFunc {
16016 this: mysql_ts,
16017 interval: val,
16018 unit,
16019 })))
16020 }
16021 _ => {
16022 let cast_ts = if matches!(target, DialectType::DuckDB) {
16023 if name == "DATETIME_SUB" {
16024 Self::ensure_cast_timestamp(ts)
16025 } else if name.starts_with("TIMESTAMP") {
16026 Self::maybe_cast_ts_to_tz(ts, &name)
16027 } else {
16028 ts
16029 }
16030 } else {
16031 ts
16032 };
16033 Ok(Expression::DateSub(Box::new(crate::expressions::DateAddFunc {
16034 this: cast_ts,
16035 interval: val,
16036 unit,
16037 })))
16038 }
16039 }
16040 }
16041
16042 // DATE_SUB(date, INTERVAL n UNIT) -> target-specific
16043 "DATE_SUB" if args.len() == 2 => {
16044 let date = args.remove(0);
16045 let interval_expr = args.remove(0);
16046 let (val, unit) = Self::extract_interval_parts(&interval_expr);
16047
16048 match target {
16049 DialectType::Databricks | DialectType::Spark => {
16050 // Databricks/Spark: DATE_ADD(date, -val)
16051 // Use DateAdd expression with negative val so it generates correctly
16052 // The generator will output DATE_ADD(date, INTERVAL -val DAY)
16053 // Then Databricks transform converts 2-arg DATE_ADD(date, interval) to DATEADD(DAY, interval, date)
16054 // Instead, we directly output as a simple negated DateSub
16055 Ok(Expression::DateSub(Box::new(crate::expressions::DateAddFunc {
16056 this: date,
16057 interval: val,
16058 unit,
16059 })))
16060 }
16061 DialectType::DuckDB => {
16062 // DuckDB: CAST(date AS DATE) - INTERVAL 'val' UNIT
16063 let cast_date = Self::ensure_cast_date(date);
16064 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
16065 this: Some(val),
16066 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit, use_plural: false }),
16067 }));
16068 Ok(Expression::Sub(Box::new(crate::expressions::BinaryOp::new(cast_date, interval))))
16069 }
16070 DialectType::Snowflake => {
16071 // Snowflake: Let Snowflake's own DateSub -> DATEADD(UNIT, val * -1, date) handler work
16072 // Just ensure the date is cast properly
16073 let cast_date = Self::ensure_cast_date(date);
16074 Ok(Expression::DateSub(Box::new(crate::expressions::DateAddFunc {
16075 this: cast_date,
16076 interval: val,
16077 unit,
16078 })))
16079 }
16080 DialectType::PostgreSQL => {
16081 // PostgreSQL: date - INTERVAL 'val UNIT'
16082 let unit_str = Self::interval_unit_to_string(&unit);
16083 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
16084 this: Some(Expression::Literal(Literal::String(format!("{} {}", Self::expr_to_string(&val), unit_str)))),
16085 unit: None,
16086 }));
16087 Ok(Expression::Sub(Box::new(crate::expressions::BinaryOp::new(date, interval))))
16088 }
16089 _ => {
16090 Ok(Expression::DateSub(Box::new(crate::expressions::DateAddFunc {
16091 this: date,
16092 interval: val,
16093 unit,
16094 })))
16095 }
16096 }
16097 }
16098
16099 // DATEADD(unit, val, date) -> target-specific form
16100 // Used by: Redshift, Snowflake, TSQL, ClickHouse
16101 "DATEADD" if args.len() == 3 => {
16102 let arg0 = args.remove(0);
16103 let arg1 = args.remove(0);
16104 let arg2 = args.remove(0);
16105 let unit_str = get_unit_str(&arg0);
16106
16107 if matches!(target, DialectType::Snowflake | DialectType::TSQL) {
16108 // Keep DATEADD(UNIT, val, date) with uppercased unit
16109 let unit = Expression::Identifier(Identifier::new(unit_str));
16110 // Only CAST to DATETIME2 for TSQL target when source is NOT Spark/Databricks family
16111 let date = if matches!(target, DialectType::TSQL)
16112 && !matches!(source, DialectType::Spark | DialectType::Databricks | DialectType::Hive) {
16113 Self::ensure_cast_datetime2(arg2)
16114 } else {
16115 arg2
16116 };
16117 return Ok(Expression::Function(Box::new(Function::new(
16118 "DATEADD".to_string(), vec![unit, arg1, date],
16119 ))));
16120 }
16121
16122 if matches!(target, DialectType::DuckDB) {
16123 // DuckDB: date + INTERVAL 'val' UNIT
16124 let iu = parse_interval_unit(&unit_str);
16125 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
16126 this: Some(arg1),
16127 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: iu, use_plural: false }),
16128 }));
16129 let cast_date = Self::ensure_cast_timestamp(arg2);
16130 return Ok(Expression::Add(Box::new(crate::expressions::BinaryOp::new(cast_date, interval))));
16131 }
16132
16133 if matches!(target, DialectType::BigQuery) {
16134 // BigQuery: DATE_ADD(date, INTERVAL val UNIT) or TIMESTAMP_ADD(ts, INTERVAL val UNIT)
16135 let iu = parse_interval_unit(&unit_str);
16136 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
16137 this: Some(arg1),
16138 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: iu, use_plural: false }),
16139 }));
16140 return Ok(Expression::Function(Box::new(Function::new(
16141 "DATE_ADD".to_string(), vec![arg2, interval],
16142 ))));
16143 }
16144
16145 if matches!(target, DialectType::Databricks) {
16146 // Databricks: keep DATEADD(UNIT, val, date) format
16147 let unit = Expression::Identifier(Identifier::new(unit_str));
16148 return Ok(Expression::Function(Box::new(Function::new(
16149 "DATEADD".to_string(), vec![unit, arg1, arg2],
16150 ))));
16151 }
16152
16153 if matches!(target, DialectType::Spark) {
16154 // Spark: convert month-based units to ADD_MONTHS, rest to DATE_ADD
16155 fn multiply_expr_dateadd(expr: Expression, factor: i64) -> Expression {
16156 if let Expression::Literal(crate::expressions::Literal::Number(n)) = &expr {
16157 if let Ok(val) = n.parse::<i64>() {
16158 return Expression::Literal(crate::expressions::Literal::Number((val * factor).to_string()));
16159 }
16160 }
16161 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
16162 expr, Expression::Literal(crate::expressions::Literal::Number(factor.to_string())),
16163 )))
16164 }
16165 match unit_str.as_str() {
16166 "YEAR" => {
16167 let months = multiply_expr_dateadd(arg1, 12);
16168 return Ok(Expression::Function(Box::new(Function::new(
16169 "ADD_MONTHS".to_string(), vec![arg2, months],
16170 ))));
16171 }
16172 "QUARTER" => {
16173 let months = multiply_expr_dateadd(arg1, 3);
16174 return Ok(Expression::Function(Box::new(Function::new(
16175 "ADD_MONTHS".to_string(), vec![arg2, months],
16176 ))));
16177 }
16178 "MONTH" => {
16179 return Ok(Expression::Function(Box::new(Function::new(
16180 "ADD_MONTHS".to_string(), vec![arg2, arg1],
16181 ))));
16182 }
16183 "WEEK" => {
16184 let days = multiply_expr_dateadd(arg1, 7);
16185 return Ok(Expression::Function(Box::new(Function::new(
16186 "DATE_ADD".to_string(), vec![arg2, days],
16187 ))));
16188 }
16189 "DAY" => {
16190 return Ok(Expression::Function(Box::new(Function::new(
16191 "DATE_ADD".to_string(), vec![arg2, arg1],
16192 ))));
16193 }
16194 _ => {
16195 let unit = Expression::Identifier(Identifier::new(unit_str));
16196 return Ok(Expression::Function(Box::new(Function::new(
16197 "DATE_ADD".to_string(), vec![unit, arg1, arg2],
16198 ))));
16199 }
16200 }
16201 }
16202
16203 if matches!(target, DialectType::Hive) {
16204 // Hive: DATE_ADD(date, val) for DAY, or date + INTERVAL for others
16205 match unit_str.as_str() {
16206 "DAY" => {
16207 return Ok(Expression::Function(Box::new(Function::new(
16208 "DATE_ADD".to_string(), vec![arg2, arg1],
16209 ))));
16210 }
16211 "MONTH" => {
16212 return Ok(Expression::Function(Box::new(Function::new(
16213 "ADD_MONTHS".to_string(), vec![arg2, arg1],
16214 ))));
16215 }
16216 _ => {
16217 let iu = parse_interval_unit(&unit_str);
16218 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
16219 this: Some(arg1),
16220 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: iu, use_plural: false }),
16221 }));
16222 return Ok(Expression::Add(Box::new(crate::expressions::BinaryOp::new(arg2, interval))));
16223 }
16224 }
16225 }
16226
16227 if matches!(target, DialectType::PostgreSQL) {
16228 // PostgreSQL: date + INTERVAL 'val UNIT'
16229 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
16230 this: Some(Expression::Literal(Literal::String(format!("{} {}", Self::expr_to_string(&arg1), unit_str)))),
16231 unit: None,
16232 }));
16233 return Ok(Expression::Add(Box::new(crate::expressions::BinaryOp::new(arg2, interval))));
16234 }
16235
16236 if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena) {
16237 // Presto/Trino: DATE_ADD('UNIT', val, date)
16238 return Ok(Expression::Function(Box::new(Function::new(
16239 "DATE_ADD".to_string(), vec![
16240 Expression::Literal(Literal::String(unit_str)),
16241 arg1,
16242 arg2,
16243 ],
16244 ))));
16245 }
16246
16247 if matches!(target, DialectType::ClickHouse) {
16248 // ClickHouse: DATE_ADD(UNIT, val, date)
16249 let unit = Expression::Identifier(Identifier::new(unit_str));
16250 return Ok(Expression::Function(Box::new(Function::new(
16251 "DATE_ADD".to_string(), vec![unit, arg1, arg2],
16252 ))));
16253 }
16254
16255 // Default: keep DATEADD with uppercased unit
16256 let unit = Expression::Identifier(Identifier::new(unit_str));
16257 Ok(Expression::Function(Box::new(Function::new(
16258 "DATEADD".to_string(), vec![unit, arg1, arg2],
16259 ))))
16260 }
16261
16262 // DATE_ADD(unit, val, date) - 3 arg form from ClickHouse/Presto
16263 "DATE_ADD" if args.len() == 3 => {
16264 let arg0 = args.remove(0);
16265 let arg1 = args.remove(0);
16266 let arg2 = args.remove(0);
16267 let unit_str = get_unit_str(&arg0);
16268
16269 if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena) {
16270 // Presto/Trino: DATE_ADD('UNIT', val, date)
16271 return Ok(Expression::Function(Box::new(Function::new(
16272 "DATE_ADD".to_string(), vec![
16273 Expression::Literal(Literal::String(unit_str)),
16274 arg1,
16275 arg2,
16276 ],
16277 ))));
16278 }
16279
16280 if matches!(target, DialectType::Snowflake | DialectType::TSQL | DialectType::Redshift) {
16281 // DATEADD(UNIT, val, date)
16282 let unit = Expression::Identifier(Identifier::new(unit_str));
16283 let date = if matches!(target, DialectType::TSQL) {
16284 Self::ensure_cast_datetime2(arg2)
16285 } else {
16286 arg2
16287 };
16288 return Ok(Expression::Function(Box::new(Function::new(
16289 "DATEADD".to_string(), vec![unit, arg1, date],
16290 ))));
16291 }
16292
16293 if matches!(target, DialectType::DuckDB) {
16294 // DuckDB: date + INTERVAL val UNIT
16295 let iu = parse_interval_unit(&unit_str);
16296 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
16297 this: Some(arg1),
16298 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: iu, use_plural: false }),
16299 }));
16300 return Ok(Expression::Add(Box::new(crate::expressions::BinaryOp::new(arg2, interval))));
16301 }
16302
16303 if matches!(target, DialectType::Spark | DialectType::Databricks) {
16304 // Spark: DATE_ADD(UNIT, val, date) with uppercased unit
16305 let unit = Expression::Identifier(Identifier::new(unit_str));
16306 return Ok(Expression::Function(Box::new(Function::new(
16307 "DATE_ADD".to_string(), vec![unit, arg1, arg2],
16308 ))));
16309 }
16310
16311 // Default: DATE_ADD(UNIT, val, date)
16312 let unit = Expression::Identifier(Identifier::new(unit_str));
16313 Ok(Expression::Function(Box::new(Function::new(
16314 "DATE_ADD".to_string(), vec![unit, arg1, arg2],
16315 ))))
16316 }
16317
16318 // DATE_ADD(date, INTERVAL val UNIT) - 2 arg BigQuery form
16319 "DATE_ADD" if args.len() == 2 => {
16320 let date = args.remove(0);
16321 let interval_expr = args.remove(0);
16322 let (val, unit) = Self::extract_interval_parts(&interval_expr);
16323 let unit_str = Self::interval_unit_to_string(&unit);
16324
16325 match target {
16326 DialectType::DuckDB => {
16327 // DuckDB: CAST(date AS DATE) + INTERVAL 'val' UNIT
16328 let cast_date = Self::ensure_cast_date(date);
16329 let quoted_val = Self::quote_interval_val(&val);
16330 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
16331 this: Some(quoted_val),
16332 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit, use_plural: false }),
16333 }));
16334 Ok(Expression::Add(Box::new(crate::expressions::BinaryOp::new(cast_date, interval))))
16335 }
16336 DialectType::PostgreSQL => {
16337 // PostgreSQL: date + INTERVAL 'val UNIT'
16338 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
16339 this: Some(Expression::Literal(Literal::String(format!("{} {}", Self::expr_to_string(&val), unit_str)))),
16340 unit: None,
16341 }));
16342 Ok(Expression::Add(Box::new(crate::expressions::BinaryOp::new(date, interval))))
16343 }
16344 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
16345 // Presto: DATE_ADD('UNIT', CAST('val' AS BIGINT), date)
16346 let val_str = Self::expr_to_string(&val);
16347 Ok(Expression::Function(Box::new(Function::new(
16348 "DATE_ADD".to_string(), vec![
16349 Expression::Literal(Literal::String(unit_str)),
16350 Expression::Cast(Box::new(Cast {
16351 this: Expression::Literal(Literal::String(val_str)),
16352 to: DataType::BigInt { length: None },
16353 trailing_comments: vec![],
16354 double_colon_syntax: false,
16355 format: None,
16356 default: None,
16357 })),
16358 date,
16359 ],
16360 ))))
16361 }
16362 DialectType::Spark | DialectType::Hive => {
16363 // Spark/Hive: DATE_ADD(date, val) for DAY
16364 match unit_str.as_str() {
16365 "DAY" => {
16366 Ok(Expression::Function(Box::new(Function::new(
16367 "DATE_ADD".to_string(), vec![date, val],
16368 ))))
16369 }
16370 "MONTH" => {
16371 Ok(Expression::Function(Box::new(Function::new(
16372 "ADD_MONTHS".to_string(), vec![date, val],
16373 ))))
16374 }
16375 _ => {
16376 let iu = parse_interval_unit(&unit_str);
16377 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
16378 this: Some(val),
16379 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: iu, use_plural: false }),
16380 }));
16381 Ok(Expression::Function(Box::new(Function::new(
16382 "DATE_ADD".to_string(), vec![date, interval],
16383 ))))
16384 }
16385 }
16386 }
16387 DialectType::Snowflake => {
16388 // Snowflake: DATEADD(UNIT, 'val', CAST(date AS DATE))
16389 let cast_date = Self::ensure_cast_date(date);
16390 let val_str = Self::expr_to_string(&val);
16391 Ok(Expression::Function(Box::new(Function::new(
16392 "DATEADD".to_string(), vec![
16393 Expression::Identifier(Identifier::new(unit_str)),
16394 Expression::Literal(Literal::String(val_str)),
16395 cast_date,
16396 ],
16397 ))))
16398 }
16399 DialectType::TSQL | DialectType::Fabric => {
16400 let cast_date = Self::ensure_cast_datetime2(date);
16401 Ok(Expression::Function(Box::new(Function::new(
16402 "DATEADD".to_string(), vec![
16403 Expression::Identifier(Identifier::new(unit_str)),
16404 val, cast_date,
16405 ],
16406 ))))
16407 }
16408 DialectType::Redshift => {
16409 Ok(Expression::Function(Box::new(Function::new(
16410 "DATEADD".to_string(), vec![
16411 Expression::Identifier(Identifier::new(unit_str)),
16412 val, date,
16413 ],
16414 ))))
16415 }
16416 DialectType::MySQL => {
16417 // MySQL: DATE_ADD(date, INTERVAL 'val' UNIT)
16418 let quoted_val = Self::quote_interval_val(&val);
16419 let iu = parse_interval_unit(&unit_str);
16420 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
16421 this: Some(quoted_val),
16422 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: iu, use_plural: false }),
16423 }));
16424 Ok(Expression::Function(Box::new(Function::new(
16425 "DATE_ADD".to_string(), vec![date, interval],
16426 ))))
16427 }
16428 DialectType::BigQuery => {
16429 // BigQuery: DATE_ADD(date, INTERVAL 'val' UNIT)
16430 let quoted_val = Self::quote_interval_val(&val);
16431 let iu = parse_interval_unit(&unit_str);
16432 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
16433 this: Some(quoted_val),
16434 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: iu, use_plural: false }),
16435 }));
16436 Ok(Expression::Function(Box::new(Function::new(
16437 "DATE_ADD".to_string(), vec![date, interval],
16438 ))))
16439 }
16440 DialectType::Databricks => {
16441 Ok(Expression::Function(Box::new(Function::new(
16442 "DATEADD".to_string(), vec![
16443 Expression::Identifier(Identifier::new(unit_str)),
16444 val, date,
16445 ],
16446 ))))
16447 }
16448 _ => {
16449 // Default: keep as DATE_ADD with decomposed interval
16450 Ok(Expression::DateAdd(Box::new(crate::expressions::DateAddFunc {
16451 this: date,
16452 interval: val,
16453 unit,
16454 })))
16455 }
16456 }
16457 }
16458
16459 // ADD_MONTHS(date, val) -> target-specific form
16460 "ADD_MONTHS" if args.len() == 2 => {
16461 let date = args.remove(0);
16462 let val = args.remove(0);
16463
16464 if matches!(target, DialectType::TSQL) {
16465 // TSQL: DATEADD(MONTH, val, CAST(date AS DATETIME2))
16466 let cast_date = Self::ensure_cast_datetime2(date);
16467 return Ok(Expression::Function(Box::new(Function::new(
16468 "DATEADD".to_string(), vec![
16469 Expression::Identifier(Identifier::new("MONTH")),
16470 val,
16471 cast_date,
16472 ],
16473 ))));
16474 }
16475
16476 if matches!(target, DialectType::DuckDB) {
16477 // DuckDB: date + INTERVAL val MONTH
16478 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
16479 this: Some(val),
16480 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
16481 unit: crate::expressions::IntervalUnit::Month,
16482 use_plural: false,
16483 }),
16484 }));
16485 return Ok(Expression::Add(Box::new(crate::expressions::BinaryOp::new(date, interval))));
16486 }
16487
16488 if matches!(target, DialectType::Snowflake) {
16489 // Snowflake: keep ADD_MONTHS when source is also Snowflake, else DATEADD
16490 if matches!(source, DialectType::Snowflake) {
16491 return Ok(Expression::Function(Box::new(Function::new(
16492 "ADD_MONTHS".to_string(), vec![date, val],
16493 ))));
16494 }
16495 return Ok(Expression::Function(Box::new(Function::new(
16496 "DATEADD".to_string(), vec![
16497 Expression::Identifier(Identifier::new("MONTH")),
16498 val,
16499 date,
16500 ],
16501 ))));
16502 }
16503
16504 if matches!(target, DialectType::Spark | DialectType::Databricks) {
16505 // Spark: ADD_MONTHS(date, val) - keep as is
16506 return Ok(Expression::Function(Box::new(Function::new(
16507 "ADD_MONTHS".to_string(), vec![date, val],
16508 ))));
16509 }
16510
16511 if matches!(target, DialectType::Hive) {
16512 return Ok(Expression::Function(Box::new(Function::new(
16513 "ADD_MONTHS".to_string(), vec![date, val],
16514 ))));
16515 }
16516
16517 if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena) {
16518 // Presto: DATE_ADD('MONTH', val, date)
16519 return Ok(Expression::Function(Box::new(Function::new(
16520 "DATE_ADD".to_string(), vec![
16521 Expression::Literal(Literal::String("MONTH".to_string())),
16522 val,
16523 date,
16524 ],
16525 ))));
16526 }
16527
16528 // Default: keep ADD_MONTHS
16529 Ok(Expression::Function(Box::new(Function::new(
16530 "ADD_MONTHS".to_string(), vec![date, val],
16531 ))))
16532 }
16533
16534 // SAFE_DIVIDE(x, y) -> target-specific form directly
16535 "SAFE_DIVIDE" if args.len() == 2 => {
16536 let x = args.remove(0);
16537 let y = args.remove(0);
16538 // Wrap x and y in parens if they're complex expressions
16539 let y_ref = match &y {
16540 Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => y.clone(),
16541 _ => Expression::Paren(Box::new(Paren { this: y.clone(), trailing_comments: vec![] })),
16542 };
16543 let x_ref = match &x {
16544 Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => x.clone(),
16545 _ => Expression::Paren(Box::new(Paren { this: x.clone(), trailing_comments: vec![] })),
16546 };
16547 let condition = Expression::Neq(Box::new(crate::expressions::BinaryOp::new(y_ref.clone(), Expression::number(0))));
16548 let div_expr = Expression::Div(Box::new(crate::expressions::BinaryOp::new(x_ref.clone(), y_ref.clone())));
16549
16550 match target {
16551 DialectType::DuckDB | DialectType::PostgreSQL => {
16552 // CASE WHEN y <> 0 THEN x / y ELSE NULL END
16553 let result_div = if matches!(target, DialectType::PostgreSQL) {
16554 let cast_x = Expression::Cast(Box::new(Cast {
16555 this: x_ref,
16556 to: DataType::Custom { name: "DOUBLE PRECISION".to_string() },
16557 trailing_comments: vec![],
16558 double_colon_syntax: false,
16559 format: None,
16560 default: None,
16561 }));
16562 Expression::Div(Box::new(crate::expressions::BinaryOp::new(cast_x, y_ref)))
16563 } else {
16564 div_expr
16565 };
16566 Ok(Expression::Case(Box::new(crate::expressions::Case {
16567 operand: None,
16568 whens: vec![(condition, result_div)],
16569 else_: Some(Expression::Null(crate::expressions::Null)),
16570 })))
16571 }
16572 DialectType::Snowflake => {
16573 // IFF(y <> 0, x / y, NULL)
16574 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
16575 condition,
16576 true_value: div_expr,
16577 false_value: Some(Expression::Null(crate::expressions::Null)),
16578 original_name: Some("IFF".to_string()),
16579 })))
16580 }
16581 DialectType::Presto | DialectType::Trino => {
16582 // IF(y <> 0, CAST(x AS DOUBLE) / y, NULL)
16583 let cast_x = Expression::Cast(Box::new(Cast {
16584 this: x_ref,
16585 to: DataType::Double { precision: None, scale: None },
16586 trailing_comments: vec![],
16587 double_colon_syntax: false,
16588 format: None,
16589 default: None,
16590 }));
16591 let cast_div = Expression::Div(Box::new(crate::expressions::BinaryOp::new(cast_x, y_ref)));
16592 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
16593 condition,
16594 true_value: cast_div,
16595 false_value: Some(Expression::Null(crate::expressions::Null)),
16596 original_name: None,
16597 })))
16598 }
16599 _ => {
16600 // IF(y <> 0, x / y, NULL)
16601 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
16602 condition,
16603 true_value: div_expr,
16604 false_value: Some(Expression::Null(crate::expressions::Null)),
16605 original_name: None,
16606 })))
16607 }
16608 }
16609 }
16610
16611 // GENERATE_UUID() -> UUID() with CAST to string
16612 "GENERATE_UUID" => {
16613 let uuid_expr = Expression::Uuid(Box::new(crate::expressions::Uuid {
16614 this: None,
16615 name: None,
16616 is_string: None,
16617 }));
16618 // Most targets need CAST(UUID() AS TEXT/VARCHAR/STRING)
16619 let cast_type = match target {
16620 DialectType::DuckDB => Some(DataType::Text),
16621 DialectType::Presto | DialectType::Trino => Some(DataType::VarChar { length: None, parenthesized_length: false }),
16622 DialectType::Spark | DialectType::Databricks | DialectType::Hive => Some(DataType::String { length: None }),
16623 _ => None,
16624 };
16625 if let Some(dt) = cast_type {
16626 Ok(Expression::Cast(Box::new(Cast {
16627 this: uuid_expr,
16628 to: dt,
16629 trailing_comments: vec![],
16630 double_colon_syntax: false,
16631 format: None,
16632 default: None,
16633 })))
16634 } else {
16635 Ok(uuid_expr)
16636 }
16637 }
16638
16639 // COUNTIF(x) -> CountIf expression
16640 "COUNTIF" if args.len() == 1 => {
16641 let arg = args.remove(0);
16642 Ok(Expression::CountIf(Box::new(crate::expressions::AggFunc {
16643 this: arg,
16644 distinct: false,
16645 filter: None,
16646 order_by: vec![],
16647 name: None,
16648 ignore_nulls: None,
16649 having_max: None,
16650 limit: None,
16651 })))
16652 }
16653
16654 // EDIT_DISTANCE(col1, col2, ...) -> Levenshtein expression
16655 "EDIT_DISTANCE" => {
16656 // Strip named arguments (max_distance => N) and pass as positional
16657 let mut positional_args: Vec<Expression> = vec![];
16658 for arg in args {
16659 match arg {
16660 Expression::NamedArgument(na) => {
16661 positional_args.push(na.value);
16662 }
16663 other => positional_args.push(other),
16664 }
16665 }
16666 if positional_args.len() >= 2 {
16667 let col1 = positional_args.remove(0);
16668 let col2 = positional_args.remove(0);
16669 let levenshtein = crate::expressions::BinaryFunc {
16670 this: col1,
16671 expression: col2,
16672 original_name: None,
16673 };
16674 // Pass extra args through a function wrapper with all args
16675 if !positional_args.is_empty() {
16676 let mut all_args = vec![levenshtein.this, levenshtein.expression];
16677 all_args.extend(positional_args);
16678 // PostgreSQL: use LEVENSHTEIN_LESS_EQUAL when max_distance is provided
16679 let func_name = if matches!(target, DialectType::PostgreSQL) {
16680 "LEVENSHTEIN_LESS_EQUAL"
16681 } else {
16682 "LEVENSHTEIN"
16683 };
16684 return Ok(Expression::Function(Box::new(Function::new(
16685 func_name.to_string(), all_args,
16686 ))));
16687 }
16688 Ok(Expression::Levenshtein(Box::new(levenshtein)))
16689 } else {
16690 Ok(Expression::Function(Box::new(Function::new("EDIT_DISTANCE".to_string(), positional_args))))
16691 }
16692 }
16693
16694 // TIMESTAMP_SECONDS(x) -> UnixToTime with scale 0
16695 "TIMESTAMP_SECONDS" if args.len() == 1 => {
16696 let arg = args.remove(0);
16697 Ok(Expression::UnixToTime(Box::new(crate::expressions::UnixToTime {
16698 this: Box::new(arg),
16699 scale: Some(0),
16700 zone: None,
16701 hours: None,
16702 minutes: None,
16703 format: None,
16704 target_type: None,
16705 })))
16706 }
16707
16708 // TIMESTAMP_MILLIS(x) -> UnixToTime with scale 3
16709 "TIMESTAMP_MILLIS" if args.len() == 1 => {
16710 let arg = args.remove(0);
16711 Ok(Expression::UnixToTime(Box::new(crate::expressions::UnixToTime {
16712 this: Box::new(arg),
16713 scale: Some(3),
16714 zone: None,
16715 hours: None,
16716 minutes: None,
16717 format: None,
16718 target_type: None,
16719 })))
16720 }
16721
16722 // TIMESTAMP_MICROS(x) -> UnixToTime with scale 6
16723 "TIMESTAMP_MICROS" if args.len() == 1 => {
16724 let arg = args.remove(0);
16725 Ok(Expression::UnixToTime(Box::new(crate::expressions::UnixToTime {
16726 this: Box::new(arg),
16727 scale: Some(6),
16728 zone: None,
16729 hours: None,
16730 minutes: None,
16731 format: None,
16732 target_type: None,
16733 })))
16734 }
16735
16736 // DIV(x, y) -> IntDiv expression
16737 "DIV" if args.len() == 2 => {
16738 let x = args.remove(0);
16739 let y = args.remove(0);
16740 Ok(Expression::IntDiv(Box::new(crate::expressions::BinaryFunc {
16741 this: x,
16742 expression: y,
16743 original_name: None,
16744 })))
16745 }
16746
16747 // TO_HEX(x) -> target-specific form
16748 "TO_HEX" if args.len() == 1 => {
16749 let arg = args.remove(0);
16750 // Check if inner function already returns hex string in certain targets
16751 let inner_returns_hex = matches!(&arg, Expression::Function(f) if matches!(f.name.as_str(), "MD5" | "SHA1" | "SHA256" | "SHA512"));
16752 if matches!(target, DialectType::BigQuery) {
16753 // BQ->BQ: keep as TO_HEX
16754 Ok(Expression::Function(Box::new(Function::new("TO_HEX".to_string(), vec![arg]))))
16755 } else if matches!(target, DialectType::DuckDB) && inner_returns_hex {
16756 // DuckDB: MD5/SHA already return hex strings, so TO_HEX is redundant
16757 Ok(arg)
16758 } else if matches!(target, DialectType::Snowflake) && inner_returns_hex {
16759 // Snowflake: TO_HEX(SHA1(x)) -> TO_CHAR(SHA1_BINARY(x))
16760 // TO_HEX(MD5(x)) -> TO_CHAR(MD5_BINARY(x))
16761 // TO_HEX(SHA256(x)) -> TO_CHAR(SHA2_BINARY(x, 256))
16762 // TO_HEX(SHA512(x)) -> TO_CHAR(SHA2_BINARY(x, 512))
16763 if let Expression::Function(ref inner_f) = arg {
16764 let inner_args = inner_f.args.clone();
16765 let binary_func = match inner_f.name.to_uppercase().as_str() {
16766 "SHA1" => Expression::Function(Box::new(Function::new("SHA1_BINARY".to_string(), inner_args))),
16767 "MD5" => Expression::Function(Box::new(Function::new("MD5_BINARY".to_string(), inner_args))),
16768 "SHA256" => {
16769 let mut a = inner_args;
16770 a.push(Expression::number(256));
16771 Expression::Function(Box::new(Function::new("SHA2_BINARY".to_string(), a)))
16772 }
16773 "SHA512" => {
16774 let mut a = inner_args;
16775 a.push(Expression::number(512));
16776 Expression::Function(Box::new(Function::new("SHA2_BINARY".to_string(), a)))
16777 }
16778 _ => arg.clone(),
16779 };
16780 Ok(Expression::Function(Box::new(Function::new("TO_CHAR".to_string(), vec![binary_func]))))
16781 } else {
16782 let inner = Expression::Function(Box::new(Function::new("HEX".to_string(), vec![arg])));
16783 Ok(Expression::Lower(Box::new(crate::expressions::UnaryFunc::new(inner))))
16784 }
16785 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
16786 let inner = Expression::Function(Box::new(Function::new("TO_HEX".to_string(), vec![arg])));
16787 Ok(Expression::Lower(Box::new(crate::expressions::UnaryFunc::new(inner))))
16788 } else {
16789 let inner = Expression::Function(Box::new(Function::new("HEX".to_string(), vec![arg])));
16790 Ok(Expression::Lower(Box::new(crate::expressions::UnaryFunc::new(inner))))
16791 }
16792 }
16793
16794 // LAST_DAY(date, unit) -> strip unit for most targets, or transform for PostgreSQL
16795 "LAST_DAY" if args.len() == 2 => {
16796 let date = args.remove(0);
16797 let _unit = args.remove(0); // Strip the unit (MONTH is default)
16798 Ok(Expression::Function(Box::new(Function::new(
16799 "LAST_DAY".to_string(), vec![date],
16800 ))))
16801 }
16802
16803 // GENERATE_ARRAY(start, end, step?) -> GenerateSeries expression
16804 "GENERATE_ARRAY" => {
16805 let start = args.get(0).cloned();
16806 let end = args.get(1).cloned();
16807 let step = args.get(2).cloned();
16808 Ok(Expression::GenerateSeries(Box::new(crate::expressions::GenerateSeries {
16809 start: start.map(Box::new),
16810 end: end.map(Box::new),
16811 step: step.map(Box::new),
16812 is_end_exclusive: None,
16813 })))
16814 }
16815
16816 // GENERATE_TIMESTAMP_ARRAY(start, end, step) -> GenerateSeries expression
16817 "GENERATE_TIMESTAMP_ARRAY" => {
16818 let start = args.get(0).cloned();
16819 let end = args.get(1).cloned();
16820 let step = args.get(2).cloned();
16821
16822 if matches!(target, DialectType::DuckDB) {
16823 // DuckDB: GENERATE_SERIES(CAST(start AS TIMESTAMP), CAST(end AS TIMESTAMP), step)
16824 // Only cast string literals - leave columns/expressions as-is
16825 let maybe_cast_ts = |expr: Expression| -> Expression {
16826 if matches!(&expr, Expression::Literal(Literal::String(_))) {
16827 Expression::Cast(Box::new(Cast {
16828 this: expr,
16829 to: DataType::Timestamp { precision: None, timezone: false },
16830 trailing_comments: vec![],
16831 double_colon_syntax: false,
16832 format: None,
16833 default: None,
16834 }))
16835 } else {
16836 expr
16837 }
16838 };
16839 let cast_start = start.map(maybe_cast_ts);
16840 let cast_end = end.map(maybe_cast_ts);
16841 Ok(Expression::GenerateSeries(Box::new(crate::expressions::GenerateSeries {
16842 start: cast_start.map(Box::new),
16843 end: cast_end.map(Box::new),
16844 step: step.map(Box::new),
16845 is_end_exclusive: None,
16846 })))
16847 } else {
16848 Ok(Expression::GenerateSeries(Box::new(crate::expressions::GenerateSeries {
16849 start: start.map(Box::new),
16850 end: end.map(Box::new),
16851 step: step.map(Box::new),
16852 is_end_exclusive: None,
16853 })))
16854 }
16855 }
16856
16857 // TO_JSON(x) -> target-specific (from Spark/Hive)
16858 "TO_JSON" => {
16859 match target {
16860 DialectType::Presto | DialectType::Trino => {
16861 // JSON_FORMAT(CAST(x AS JSON))
16862 let arg = args.into_iter().next().unwrap_or(Expression::Null(crate::expressions::Null));
16863 let cast_json = Expression::Cast(Box::new(Cast {
16864 this: arg,
16865 to: DataType::Custom { name: "JSON".to_string() },
16866 trailing_comments: vec![],
16867 double_colon_syntax: false,
16868 format: None,
16869 default: None,
16870 }));
16871 Ok(Expression::Function(Box::new(Function::new("JSON_FORMAT".to_string(), vec![cast_json]))))
16872 }
16873 DialectType::BigQuery => {
16874 Ok(Expression::Function(Box::new(Function::new("TO_JSON_STRING".to_string(), args))))
16875 }
16876 DialectType::DuckDB => {
16877 // CAST(TO_JSON(x) AS TEXT)
16878 let arg = args.into_iter().next().unwrap_or(Expression::Null(crate::expressions::Null));
16879 let to_json = Expression::Function(Box::new(Function::new("TO_JSON".to_string(), vec![arg])));
16880 Ok(Expression::Cast(Box::new(Cast {
16881 this: to_json,
16882 to: DataType::Text,
16883 trailing_comments: vec![],
16884 double_colon_syntax: false,
16885 format: None,
16886 default: None,
16887 })))
16888 }
16889 _ => Ok(Expression::Function(Box::new(Function::new("TO_JSON".to_string(), args))))
16890 }
16891 }
16892
16893 // TO_JSON_STRING(x) -> target-specific
16894 "TO_JSON_STRING" => {
16895 match target {
16896 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
16897 Ok(Expression::Function(Box::new(Function::new("TO_JSON".to_string(), args))))
16898 }
16899 DialectType::Presto | DialectType::Trino => {
16900 // JSON_FORMAT(CAST(x AS JSON))
16901 let arg = args.into_iter().next().unwrap_or(Expression::Null(crate::expressions::Null));
16902 let cast_json = Expression::Cast(Box::new(Cast {
16903 this: arg,
16904 to: DataType::Custom { name: "JSON".to_string() },
16905 trailing_comments: vec![],
16906 double_colon_syntax: false,
16907 format: None,
16908 default: None,
16909 }));
16910 Ok(Expression::Function(Box::new(Function::new("JSON_FORMAT".to_string(), vec![cast_json]))))
16911 }
16912 DialectType::DuckDB => {
16913 // CAST(TO_JSON(x) AS TEXT)
16914 let arg = args.into_iter().next().unwrap_or(Expression::Null(crate::expressions::Null));
16915 let to_json = Expression::Function(Box::new(Function::new("TO_JSON".to_string(), vec![arg])));
16916 Ok(Expression::Cast(Box::new(Cast {
16917 this: to_json,
16918 to: DataType::Text,
16919 trailing_comments: vec![],
16920 double_colon_syntax: false,
16921 format: None,
16922 default: None,
16923 })))
16924 }
16925 DialectType::Snowflake => {
16926 // TO_JSON(x)
16927 Ok(Expression::Function(Box::new(Function::new("TO_JSON".to_string(), args))))
16928 }
16929 _ => Ok(Expression::Function(Box::new(Function::new("TO_JSON_STRING".to_string(), args))))
16930 }
16931 }
16932
16933 // SAFE_ADD(x, y) -> SafeAdd expression
16934 "SAFE_ADD" if args.len() == 2 => {
16935 let x = args.remove(0);
16936 let y = args.remove(0);
16937 Ok(Expression::SafeAdd(Box::new(crate::expressions::SafeAdd {
16938 this: Box::new(x),
16939 expression: Box::new(y),
16940 })))
16941 }
16942
16943 // SAFE_SUBTRACT(x, y) -> SafeSubtract expression
16944 "SAFE_SUBTRACT" if args.len() == 2 => {
16945 let x = args.remove(0);
16946 let y = args.remove(0);
16947 Ok(Expression::SafeSubtract(Box::new(crate::expressions::SafeSubtract {
16948 this: Box::new(x),
16949 expression: Box::new(y),
16950 })))
16951 }
16952
16953 // SAFE_MULTIPLY(x, y) -> SafeMultiply expression
16954 "SAFE_MULTIPLY" if args.len() == 2 => {
16955 let x = args.remove(0);
16956 let y = args.remove(0);
16957 Ok(Expression::SafeMultiply(Box::new(crate::expressions::SafeMultiply {
16958 this: Box::new(x),
16959 expression: Box::new(y),
16960 })))
16961 }
16962
16963 // REGEXP_CONTAINS(str, pattern) -> RegexpLike expression
16964 "REGEXP_CONTAINS" if args.len() == 2 => {
16965 let str_expr = args.remove(0);
16966 let pattern = args.remove(0);
16967 Ok(Expression::RegexpLike(Box::new(crate::expressions::RegexpFunc {
16968 this: str_expr,
16969 pattern,
16970 flags: None,
16971 })))
16972 }
16973
16974 // CONTAINS_SUBSTR(a, b) -> CONTAINS(LOWER(a), LOWER(b))
16975 "CONTAINS_SUBSTR" if args.len() == 2 => {
16976 let a = args.remove(0);
16977 let b = args.remove(0);
16978 let lower_a = Expression::Lower(Box::new(crate::expressions::UnaryFunc::new(a)));
16979 let lower_b = Expression::Lower(Box::new(crate::expressions::UnaryFunc::new(b)));
16980 Ok(Expression::Function(Box::new(Function::new(
16981 "CONTAINS".to_string(), vec![lower_a, lower_b],
16982 ))))
16983 }
16984
16985 // INT64(x) -> CAST(x AS BIGINT)
16986 "INT64" if args.len() == 1 => {
16987 let arg = args.remove(0);
16988 Ok(Expression::Cast(Box::new(Cast {
16989 this: arg,
16990 to: DataType::BigInt { length: None },
16991 trailing_comments: vec![],
16992 double_colon_syntax: false,
16993 format: None,
16994 default: None,
16995 })))
16996 }
16997
16998 // INSTR(str, substr) -> target-specific
16999 "INSTR" if args.len() >= 2 => {
17000 let str_expr = args.remove(0);
17001 let substr = args.remove(0);
17002 if matches!(target, DialectType::Snowflake) {
17003 // CHARINDEX(substr, str)
17004 Ok(Expression::Function(Box::new(Function::new("CHARINDEX".to_string(), vec![substr, str_expr]))))
17005 } else if matches!(target, DialectType::BigQuery) {
17006 // Keep as INSTR
17007 Ok(Expression::Function(Box::new(Function::new("INSTR".to_string(), vec![str_expr, substr]))))
17008 } else {
17009 // Default: keep as INSTR
17010 Ok(Expression::Function(Box::new(Function::new("INSTR".to_string(), vec![str_expr, substr]))))
17011 }
17012 }
17013
17014 // BigQuery DATE_TRUNC(expr, unit) -> DATE_TRUNC('unit', expr) for standard SQL
17015 "DATE_TRUNC" if args.len() == 2 => {
17016 let expr = args.remove(0);
17017 let unit_expr = args.remove(0);
17018 let unit_str = get_unit_str(&unit_expr);
17019
17020 match target {
17021 DialectType::DuckDB | DialectType::Snowflake | DialectType::PostgreSQL
17022 | DialectType::Presto | DialectType::Trino
17023 | DialectType::Databricks | DialectType::Spark
17024 | DialectType::Redshift | DialectType::ClickHouse | DialectType::TSQL => {
17025 // Standard: DATE_TRUNC('UNIT', expr)
17026 Ok(Expression::Function(Box::new(Function::new(
17027 "DATE_TRUNC".to_string(),
17028 vec![Expression::Literal(Literal::String(unit_str)), expr],
17029 ))))
17030 }
17031 _ => {
17032 // Keep BigQuery arg order: DATE_TRUNC(expr, unit)
17033 Ok(Expression::Function(Box::new(Function::new(
17034 "DATE_TRUNC".to_string(),
17035 vec![expr, unit_expr],
17036 ))))
17037 }
17038 }
17039 }
17040
17041 // TIMESTAMP_TRUNC / DATETIME_TRUNC -> target-specific
17042 "TIMESTAMP_TRUNC" | "DATETIME_TRUNC" if args.len() >= 2 => {
17043 // TIMESTAMP_TRUNC(ts, unit) or TIMESTAMP_TRUNC(ts, unit, timezone)
17044 let ts = args.remove(0);
17045 let unit_expr = args.remove(0);
17046 let tz = if !args.is_empty() { Some(args.remove(0)) } else { None };
17047 let unit_str = get_unit_str(&unit_expr);
17048
17049 match target {
17050 DialectType::DuckDB => {
17051 // DuckDB: DATE_TRUNC('UNIT', CAST(ts AS TIMESTAMPTZ))
17052 // With timezone: DATE_TRUNC('UNIT', ts AT TIME ZONE 'tz') AT TIME ZONE 'tz' (for DAY granularity)
17053 // Without timezone for MINUTE+ granularity: just DATE_TRUNC
17054 let is_coarse = matches!(unit_str.as_str(), "DAY" | "WEEK" | "MONTH" | "QUARTER" | "YEAR");
17055 // For DATETIME_TRUNC, cast string args to TIMESTAMP
17056 let cast_ts = if name == "DATETIME_TRUNC" {
17057 match ts {
17058 Expression::Literal(Literal::String(ref _s)) => {
17059 Expression::Cast(Box::new(Cast {
17060 this: ts,
17061 to: DataType::Timestamp { precision: None, timezone: false },
17062 trailing_comments: vec![],
17063 double_colon_syntax: false,
17064 format: None,
17065 default: None,
17066 }))
17067 }
17068 _ => Self::maybe_cast_ts_to_tz(ts, &name),
17069 }
17070 } else {
17071 Self::maybe_cast_ts_to_tz(ts, &name)
17072 };
17073
17074 if let Some(tz_arg) = tz {
17075 if is_coarse {
17076 // DATE_TRUNC('UNIT', ts AT TIME ZONE 'tz') AT TIME ZONE 'tz'
17077 let at_tz = Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
17078 this: cast_ts,
17079 zone: tz_arg.clone(),
17080 }));
17081 let date_trunc = Expression::Function(Box::new(Function::new(
17082 "DATE_TRUNC".to_string(), vec![Expression::Literal(Literal::String(unit_str)), at_tz],
17083 )));
17084 Ok(Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
17085 this: date_trunc,
17086 zone: tz_arg,
17087 })))
17088 } else {
17089 // For MINUTE/HOUR: no AT TIME ZONE wrapper, just DATE_TRUNC('UNIT', ts)
17090 Ok(Expression::Function(Box::new(Function::new(
17091 "DATE_TRUNC".to_string(), vec![Expression::Literal(Literal::String(unit_str)), cast_ts],
17092 ))))
17093 }
17094 } else {
17095 // No timezone: DATE_TRUNC('UNIT', CAST(ts AS TIMESTAMPTZ))
17096 Ok(Expression::Function(Box::new(Function::new(
17097 "DATE_TRUNC".to_string(), vec![Expression::Literal(Literal::String(unit_str)), cast_ts],
17098 ))))
17099 }
17100 }
17101 DialectType::Databricks | DialectType::Spark => {
17102 // Databricks/Spark: DATE_TRUNC('UNIT', ts)
17103 Ok(Expression::Function(Box::new(Function::new(
17104 "DATE_TRUNC".to_string(), vec![Expression::Literal(Literal::String(unit_str)), ts],
17105 ))))
17106 }
17107 _ => {
17108 // Default: keep as TIMESTAMP_TRUNC('UNIT', ts, [tz])
17109 let unit = Expression::Literal(Literal::String(unit_str));
17110 let mut date_trunc_args = vec![unit, ts];
17111 if let Some(tz_arg) = tz {
17112 date_trunc_args.push(tz_arg);
17113 }
17114 Ok(Expression::Function(Box::new(Function::new(
17115 "TIMESTAMP_TRUNC".to_string(), date_trunc_args,
17116 ))))
17117 }
17118 }
17119 }
17120
17121 // TIME(h, m, s) -> target-specific, TIME('string') -> CAST('string' AS TIME)
17122 "TIME" => {
17123 if args.len() == 3 {
17124 // TIME(h, m, s) constructor
17125 match target {
17126 DialectType::TSQL => {
17127 // TIMEFROMPARTS(h, m, s, 0, 0)
17128 args.push(Expression::number(0));
17129 args.push(Expression::number(0));
17130 Ok(Expression::Function(Box::new(Function::new("TIMEFROMPARTS".to_string(), args))))
17131 }
17132 DialectType::MySQL => {
17133 Ok(Expression::Function(Box::new(Function::new("MAKETIME".to_string(), args))))
17134 }
17135 DialectType::PostgreSQL => {
17136 Ok(Expression::Function(Box::new(Function::new("MAKE_TIME".to_string(), args))))
17137 }
17138 _ => Ok(Expression::Function(Box::new(Function::new("TIME".to_string(), args))))
17139 }
17140 } else if args.len() == 1 {
17141 let arg = args.remove(0);
17142 if matches!(target, DialectType::Spark) {
17143 // Spark: CAST(x AS TIMESTAMP) (yes, TIMESTAMP not TIME)
17144 Ok(Expression::Cast(Box::new(Cast {
17145 this: arg,
17146 to: DataType::Timestamp { timezone: false, precision: None },
17147 trailing_comments: vec![],
17148 double_colon_syntax: false,
17149 format: None,
17150 default: None,
17151 })))
17152 } else {
17153 // Most targets: CAST(x AS TIME)
17154 Ok(Expression::Cast(Box::new(Cast {
17155 this: arg,
17156 to: DataType::Time { precision: None, timezone: false },
17157 trailing_comments: vec![],
17158 double_colon_syntax: false,
17159 format: None,
17160 default: None,
17161 })))
17162 }
17163 } else if args.len() == 2 {
17164 // TIME(expr, timezone) -> CAST(CAST(expr AS TIMESTAMPTZ) AT TIME ZONE tz AS TIME)
17165 let expr = args.remove(0);
17166 let tz = args.remove(0);
17167 let cast_tstz = Expression::Cast(Box::new(Cast {
17168 this: expr,
17169 to: DataType::Timestamp { timezone: true, precision: None },
17170 trailing_comments: vec![],
17171 double_colon_syntax: false,
17172 format: None,
17173 default: None,
17174 }));
17175 let at_tz = Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
17176 this: cast_tstz,
17177 zone: tz,
17178 }));
17179 Ok(Expression::Cast(Box::new(Cast {
17180 this: at_tz,
17181 to: DataType::Time { precision: None, timezone: false },
17182 trailing_comments: vec![],
17183 double_colon_syntax: false,
17184 format: None,
17185 default: None,
17186 })))
17187 } else {
17188 Ok(Expression::Function(Box::new(Function::new("TIME".to_string(), args))))
17189 }
17190 }
17191
17192 // DATETIME('string') -> CAST('string' AS TIMESTAMP)
17193 // DATETIME('date', TIME 'time') -> CAST(CAST('date' AS DATE) + CAST('time' AS TIME) AS TIMESTAMP)
17194 // DATETIME('string', 'timezone') -> CAST(CAST('string' AS TIMESTAMPTZ) AT TIME ZONE tz AS TIMESTAMP)
17195 // DATETIME(y, m, d, h, min, s) -> target-specific
17196 "DATETIME" => {
17197 // For BigQuery target: keep DATETIME function but convert TIME literal to CAST
17198 if matches!(target, DialectType::BigQuery) {
17199 if args.len() == 2 {
17200 let has_time_literal = matches!(&args[1], Expression::Literal(Literal::Time(_)));
17201 if has_time_literal {
17202 let first = args.remove(0);
17203 let second = args.remove(0);
17204 let time_as_cast = match second {
17205 Expression::Literal(Literal::Time(s)) => Expression::Cast(Box::new(Cast {
17206 this: Expression::Literal(Literal::String(s)),
17207 to: DataType::Time { precision: None, timezone: false },
17208 trailing_comments: vec![],
17209 double_colon_syntax: false,
17210 format: None,
17211 default: None,
17212 })),
17213 other => other,
17214 };
17215 return Ok(Expression::Function(Box::new(Function::new(
17216 "DATETIME".to_string(), vec![first, time_as_cast],
17217 ))));
17218 }
17219 }
17220 return Ok(Expression::Function(Box::new(Function::new("DATETIME".to_string(), args))));
17221 }
17222
17223 if args.len() == 1 {
17224 let arg = args.remove(0);
17225 Ok(Expression::Cast(Box::new(Cast {
17226 this: arg,
17227 to: DataType::Timestamp { timezone: false, precision: None },
17228 trailing_comments: vec![],
17229 double_colon_syntax: false,
17230 format: None,
17231 default: None,
17232 })))
17233 } else if args.len() == 2 {
17234 let first = args.remove(0);
17235 let second = args.remove(0);
17236 // Check if second arg is a TIME literal
17237 let is_time_literal = matches!(&second, Expression::Literal(Literal::Time(_)));
17238 if is_time_literal {
17239 // DATETIME('date', TIME 'time') -> CAST(CAST(date AS DATE) + CAST('time' AS TIME) AS TIMESTAMP)
17240 let cast_date = Expression::Cast(Box::new(Cast {
17241 this: first,
17242 to: DataType::Date,
17243 trailing_comments: vec![],
17244 double_colon_syntax: false,
17245 format: None,
17246 default: None,
17247 }));
17248 // Convert TIME 'x' literal to string 'x' so CAST produces CAST('x' AS TIME) not CAST(TIME 'x' AS TIME)
17249 let time_as_string = match second {
17250 Expression::Literal(Literal::Time(s)) => Expression::Literal(Literal::String(s)),
17251 other => other,
17252 };
17253 let cast_time = Expression::Cast(Box::new(Cast {
17254 this: time_as_string,
17255 to: DataType::Time { precision: None, timezone: false },
17256 trailing_comments: vec![],
17257 double_colon_syntax: false,
17258 format: None,
17259 default: None,
17260 }));
17261 let add_expr = Expression::Add(Box::new(BinaryOp::new(cast_date, cast_time)));
17262 Ok(Expression::Cast(Box::new(Cast {
17263 this: add_expr,
17264 to: DataType::Timestamp { timezone: false, precision: None },
17265 trailing_comments: vec![],
17266 double_colon_syntax: false,
17267 format: None,
17268 default: None,
17269 })))
17270 } else {
17271 // DATETIME('string', 'timezone')
17272 let cast_tstz = Expression::Cast(Box::new(Cast {
17273 this: first,
17274 to: DataType::Timestamp { timezone: true, precision: None },
17275 trailing_comments: vec![],
17276 double_colon_syntax: false,
17277 format: None,
17278 default: None,
17279 }));
17280 let at_tz = Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
17281 this: cast_tstz,
17282 zone: second,
17283 }));
17284 Ok(Expression::Cast(Box::new(Cast {
17285 this: at_tz,
17286 to: DataType::Timestamp { timezone: false, precision: None },
17287 trailing_comments: vec![],
17288 double_colon_syntax: false,
17289 format: None,
17290 default: None,
17291 })))
17292 }
17293 } else if args.len() >= 3 {
17294 // DATETIME(y, m, d, h, min, s) -> TIMESTAMP_FROM_PARTS for Snowflake
17295 // For other targets, use MAKE_TIMESTAMP or similar
17296 if matches!(target, DialectType::Snowflake) {
17297 Ok(Expression::Function(Box::new(Function::new("TIMESTAMP_FROM_PARTS".to_string(), args))))
17298 } else {
17299 Ok(Expression::Function(Box::new(Function::new("DATETIME".to_string(), args))))
17300 }
17301 } else {
17302 Ok(Expression::Function(Box::new(Function::new("DATETIME".to_string(), args))))
17303 }
17304 }
17305
17306 // TIMESTAMP(x) -> CAST(x AS TIMESTAMP WITH TIME ZONE) for Presto
17307 // TIMESTAMP(x, tz) -> CAST(x AS TIMESTAMP) AT TIME ZONE tz for DuckDB
17308 "TIMESTAMP" => {
17309 if args.len() == 1 {
17310 let arg = args.remove(0);
17311 Ok(Expression::Cast(Box::new(Cast {
17312 this: arg,
17313 to: DataType::Timestamp { timezone: true, precision: None },
17314 trailing_comments: vec![],
17315 double_colon_syntax: false,
17316 format: None,
17317 default: None,
17318 })))
17319 } else if args.len() == 2 {
17320 let arg = args.remove(0);
17321 let tz = args.remove(0);
17322 let cast_ts = Expression::Cast(Box::new(Cast {
17323 this: arg,
17324 to: DataType::Timestamp { timezone: false, precision: None },
17325 trailing_comments: vec![],
17326 double_colon_syntax: false,
17327 format: None,
17328 default: None,
17329 }));
17330 if matches!(target, DialectType::Snowflake) {
17331 // CONVERT_TIMEZONE('tz', CAST(x AS TIMESTAMP))
17332 Ok(Expression::Function(Box::new(Function::new(
17333 "CONVERT_TIMEZONE".to_string(), vec![tz, cast_ts],
17334 ))))
17335 } else {
17336 Ok(Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
17337 this: cast_ts,
17338 zone: tz,
17339 })))
17340 }
17341 } else {
17342 Ok(Expression::Function(Box::new(Function::new("TIMESTAMP".to_string(), args))))
17343 }
17344 }
17345
17346 // STRING(x) -> CAST(x AS VARCHAR/TEXT)
17347 // STRING(x, tz) -> CAST(CAST(x AS TIMESTAMP) AT TIME ZONE 'UTC' AT TIME ZONE tz AS VARCHAR/TEXT)
17348 "STRING" => {
17349 if args.len() == 1 {
17350 let arg = args.remove(0);
17351 let cast_type = match target {
17352 DialectType::DuckDB => DataType::Text,
17353 _ => DataType::VarChar { length: None, parenthesized_length: false },
17354 };
17355 Ok(Expression::Cast(Box::new(Cast {
17356 this: arg,
17357 to: cast_type,
17358 trailing_comments: vec![],
17359 double_colon_syntax: false,
17360 format: None,
17361 default: None,
17362 })))
17363 } else if args.len() == 2 {
17364 let arg = args.remove(0);
17365 let tz = args.remove(0);
17366 let cast_type = match target {
17367 DialectType::DuckDB => DataType::Text,
17368 _ => DataType::VarChar { length: None, parenthesized_length: false },
17369 };
17370 if matches!(target, DialectType::Snowflake) {
17371 // STRING(x, tz) -> CAST(CONVERT_TIMEZONE('UTC', tz, x) AS VARCHAR)
17372 let convert_tz = Expression::Function(Box::new(Function::new(
17373 "CONVERT_TIMEZONE".to_string(),
17374 vec![Expression::Literal(Literal::String("UTC".to_string())), tz, arg],
17375 )));
17376 Ok(Expression::Cast(Box::new(Cast {
17377 this: convert_tz,
17378 to: cast_type,
17379 trailing_comments: vec![],
17380 double_colon_syntax: false,
17381 format: None,
17382 default: None,
17383 })))
17384 } else {
17385 // STRING(x, tz) -> CAST(CAST(x AS TIMESTAMP) AT TIME ZONE 'UTC' AT TIME ZONE tz AS TEXT/VARCHAR)
17386 let cast_ts = Expression::Cast(Box::new(Cast {
17387 this: arg,
17388 to: DataType::Timestamp { timezone: false, precision: None },
17389 trailing_comments: vec![],
17390 double_colon_syntax: false,
17391 format: None,
17392 default: None,
17393 }));
17394 let at_utc = Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
17395 this: cast_ts,
17396 zone: Expression::Literal(Literal::String("UTC".to_string())),
17397 }));
17398 let at_tz = Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
17399 this: at_utc,
17400 zone: tz,
17401 }));
17402 Ok(Expression::Cast(Box::new(Cast {
17403 this: at_tz,
17404 to: cast_type,
17405 trailing_comments: vec![],
17406 double_colon_syntax: false,
17407 format: None,
17408 default: None,
17409 })))
17410 }
17411 } else {
17412 Ok(Expression::Function(Box::new(Function::new("STRING".to_string(), args))))
17413 }
17414 }
17415
17416 // UNIX_SECONDS, UNIX_MILLIS, UNIX_MICROS as functions (not expressions)
17417 "UNIX_SECONDS" if args.len() == 1 => {
17418 let ts = args.remove(0);
17419 match target {
17420 DialectType::DuckDB => {
17421 // CAST(EPOCH(CAST(ts AS TIMESTAMPTZ)) AS BIGINT)
17422 let cast_ts = Self::ensure_cast_timestamptz(ts);
17423 let epoch = Expression::Function(Box::new(Function::new("EPOCH".to_string(), vec![cast_ts])));
17424 Ok(Expression::Cast(Box::new(Cast {
17425 this: epoch,
17426 to: DataType::BigInt { length: None },
17427 trailing_comments: vec![],
17428 double_colon_syntax: false,
17429 format: None,
17430 default: None,
17431 })))
17432 }
17433 DialectType::Snowflake => {
17434 // TIMESTAMPDIFF(SECONDS, CAST('1970-01-01 00:00:00+00' AS TIMESTAMPTZ), ts)
17435 let epoch = Expression::Cast(Box::new(Cast {
17436 this: Expression::Literal(Literal::String("1970-01-01 00:00:00+00".to_string())),
17437 to: DataType::Timestamp { timezone: true, precision: None },
17438 trailing_comments: vec![],
17439 double_colon_syntax: false,
17440 format: None,
17441 default: None,
17442 }));
17443 Ok(Expression::TimestampDiff(Box::new(crate::expressions::TimestampDiff {
17444 this: Box::new(epoch),
17445 expression: Box::new(ts),
17446 unit: Some("SECONDS".to_string()),
17447 })))
17448 }
17449 _ => Ok(Expression::Function(Box::new(Function::new("UNIX_SECONDS".to_string(), vec![ts]))))
17450 }
17451 }
17452
17453 "UNIX_MILLIS" if args.len() == 1 => {
17454 let ts = args.remove(0);
17455 match target {
17456 DialectType::DuckDB => {
17457 // EPOCH_MS(CAST(ts AS TIMESTAMPTZ))
17458 let cast_ts = Self::ensure_cast_timestamptz(ts);
17459 Ok(Expression::Function(Box::new(Function::new("EPOCH_MS".to_string(), vec![cast_ts]))))
17460 }
17461 _ => Ok(Expression::Function(Box::new(Function::new("UNIX_MILLIS".to_string(), vec![ts]))))
17462 }
17463 }
17464
17465 "UNIX_MICROS" if args.len() == 1 => {
17466 let ts = args.remove(0);
17467 match target {
17468 DialectType::DuckDB => {
17469 // EPOCH_US(CAST(ts AS TIMESTAMPTZ))
17470 let cast_ts = Self::ensure_cast_timestamptz(ts);
17471 Ok(Expression::Function(Box::new(Function::new("EPOCH_US".to_string(), vec![cast_ts]))))
17472 }
17473 _ => Ok(Expression::Function(Box::new(Function::new("UNIX_MICROS".to_string(), vec![ts]))))
17474 }
17475 }
17476
17477 // ARRAY_CONCAT -> target-specific
17478 "ARRAY_CONCAT" => {
17479 match target {
17480 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
17481 // CONCAT(arr1, arr2, ...)
17482 Ok(Expression::Function(Box::new(Function::new("CONCAT".to_string(), args))))
17483 }
17484 DialectType::Presto | DialectType::Trino => {
17485 // CONCAT(arr1, arr2, ...)
17486 Ok(Expression::Function(Box::new(Function::new("CONCAT".to_string(), args))))
17487 }
17488 DialectType::Snowflake => {
17489 // ARRAY_CAT(arr1, ARRAY_CAT(arr2, arr3))
17490 if args.len() == 1 {
17491 // ARRAY_CAT requires 2 args, add empty array as []
17492 let empty_arr = Expression::ArrayFunc(Box::new(crate::expressions::ArrayConstructor {
17493 expressions: vec![],
17494 bracket_notation: true,
17495 use_list_keyword: false,
17496 }));
17497 let mut new_args = args;
17498 new_args.push(empty_arr);
17499 Ok(Expression::Function(Box::new(Function::new("ARRAY_CAT".to_string(), new_args))))
17500 } else if args.is_empty() {
17501 Ok(Expression::Function(Box::new(Function::new("ARRAY_CAT".to_string(), args))))
17502 } else {
17503 let mut it = args.into_iter().rev();
17504 let mut result = it.next().unwrap();
17505 for arr in it {
17506 result = Expression::Function(Box::new(Function::new(
17507 "ARRAY_CAT".to_string(), vec![arr, result],
17508 )));
17509 }
17510 Ok(result)
17511 }
17512 }
17513 DialectType::PostgreSQL => {
17514 // ARRAY_CAT(arr1, ARRAY_CAT(arr2, arr3))
17515 if args.len() <= 1 {
17516 Ok(Expression::Function(Box::new(Function::new("ARRAY_CAT".to_string(), args))))
17517 } else {
17518 let mut it = args.into_iter().rev();
17519 let mut result = it.next().unwrap();
17520 for arr in it {
17521 result = Expression::Function(Box::new(Function::new(
17522 "ARRAY_CAT".to_string(), vec![arr, result],
17523 )));
17524 }
17525 Ok(result)
17526 }
17527 }
17528 DialectType::Redshift => {
17529 // ARRAY_CONCAT(arr1, ARRAY_CONCAT(arr2, arr3))
17530 if args.len() <= 2 {
17531 Ok(Expression::Function(Box::new(Function::new("ARRAY_CONCAT".to_string(), args))))
17532 } else {
17533 let mut it = args.into_iter().rev();
17534 let mut result = it.next().unwrap();
17535 for arr in it {
17536 result = Expression::Function(Box::new(Function::new(
17537 "ARRAY_CONCAT".to_string(), vec![arr, result],
17538 )));
17539 }
17540 Ok(result)
17541 }
17542 }
17543 DialectType::DuckDB => {
17544 // ARRAY_CONCAT(arr1, ARRAY_CONCAT(arr2, arr3))
17545 if args.len() <= 2 {
17546 Ok(Expression::Function(Box::new(Function::new("ARRAY_CONCAT".to_string(), args))))
17547 } else {
17548 let mut it = args.into_iter().rev();
17549 let mut result = it.next().unwrap();
17550 for arr in it {
17551 result = Expression::Function(Box::new(Function::new(
17552 "ARRAY_CONCAT".to_string(), vec![arr, result],
17553 )));
17554 }
17555 Ok(result)
17556 }
17557 }
17558 _ => Ok(Expression::Function(Box::new(Function::new("ARRAY_CONCAT".to_string(), args))))
17559 }
17560 }
17561
17562 // ARRAY_CONCAT_AGG -> Snowflake: ARRAY_FLATTEN(ARRAY_AGG(x))
17563 "ARRAY_CONCAT_AGG" if args.len() == 1 => {
17564 let arg = args.remove(0);
17565 match target {
17566 DialectType::Snowflake => {
17567 let array_agg = Expression::ArrayAgg(Box::new(crate::expressions::AggFunc {
17568 this: arg,
17569 distinct: false,
17570 filter: None,
17571 order_by: vec![],
17572 name: None,
17573 ignore_nulls: None,
17574 having_max: None,
17575 limit: None,
17576 }));
17577 Ok(Expression::Function(Box::new(Function::new(
17578 "ARRAY_FLATTEN".to_string(), vec![array_agg],
17579 ))))
17580 }
17581 _ => Ok(Expression::Function(Box::new(Function::new("ARRAY_CONCAT_AGG".to_string(), vec![arg]))))
17582 }
17583 }
17584
17585 // MD5/SHA1/SHA256/SHA512 -> target-specific hash functions
17586 "MD5" if args.len() == 1 => {
17587 let arg = args.remove(0);
17588 match target {
17589 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
17590 // UNHEX(MD5(x))
17591 let md5 = Expression::Function(Box::new(Function::new("MD5".to_string(), vec![arg])));
17592 Ok(Expression::Function(Box::new(Function::new("UNHEX".to_string(), vec![md5]))))
17593 }
17594 DialectType::Snowflake => {
17595 // MD5_BINARY(x)
17596 Ok(Expression::Function(Box::new(Function::new("MD5_BINARY".to_string(), vec![arg]))))
17597 }
17598 _ => Ok(Expression::Function(Box::new(Function::new("MD5".to_string(), vec![arg]))))
17599 }
17600 }
17601
17602 "SHA1" if args.len() == 1 => {
17603 let arg = args.remove(0);
17604 match target {
17605 DialectType::DuckDB => {
17606 // UNHEX(SHA1(x))
17607 let sha1 = Expression::Function(Box::new(Function::new("SHA1".to_string(), vec![arg])));
17608 Ok(Expression::Function(Box::new(Function::new("UNHEX".to_string(), vec![sha1]))))
17609 }
17610 _ => Ok(Expression::Function(Box::new(Function::new("SHA1".to_string(), vec![arg]))))
17611 }
17612 }
17613
17614 "SHA256" if args.len() == 1 => {
17615 let arg = args.remove(0);
17616 match target {
17617 DialectType::DuckDB => {
17618 // UNHEX(SHA256(x))
17619 let sha = Expression::Function(Box::new(Function::new("SHA256".to_string(), vec![arg])));
17620 Ok(Expression::Function(Box::new(Function::new("UNHEX".to_string(), vec![sha]))))
17621 }
17622 DialectType::Snowflake => {
17623 // SHA2_BINARY(x, 256)
17624 Ok(Expression::Function(Box::new(Function::new("SHA2_BINARY".to_string(), vec![arg, Expression::number(256)]))))
17625 }
17626 DialectType::Redshift | DialectType::Spark => {
17627 // SHA2(x, 256)
17628 Ok(Expression::Function(Box::new(Function::new("SHA2".to_string(), vec![arg, Expression::number(256)]))))
17629 }
17630 _ => Ok(Expression::Function(Box::new(Function::new("SHA256".to_string(), vec![arg]))))
17631 }
17632 }
17633
17634 "SHA512" if args.len() == 1 => {
17635 let arg = args.remove(0);
17636 match target {
17637 DialectType::Snowflake => {
17638 // SHA2_BINARY(x, 512)
17639 Ok(Expression::Function(Box::new(Function::new("SHA2_BINARY".to_string(), vec![arg, Expression::number(512)]))))
17640 }
17641 DialectType::Redshift | DialectType::Spark => {
17642 // SHA2(x, 512)
17643 Ok(Expression::Function(Box::new(Function::new("SHA2".to_string(), vec![arg, Expression::number(512)]))))
17644 }
17645 _ => Ok(Expression::Function(Box::new(Function::new("SHA512".to_string(), vec![arg]))))
17646 }
17647 }
17648
17649 // REGEXP_EXTRACT_ALL(str, pattern) -> add default group arg
17650 "REGEXP_EXTRACT_ALL" if args.len() == 2 => {
17651 let str_expr = args.remove(0);
17652 let pattern = args.remove(0);
17653
17654 // Check if pattern contains capturing groups (parentheses)
17655 let has_groups = match &pattern {
17656 Expression::Literal(Literal::String(s)) => s.contains('(') && s.contains(')'),
17657 _ => false,
17658 };
17659
17660 match target {
17661 DialectType::DuckDB => {
17662 let group = if has_groups { Expression::number(1) } else { Expression::number(0) };
17663 Ok(Expression::Function(Box::new(Function::new("REGEXP_EXTRACT_ALL".to_string(), vec![str_expr, pattern, group]))))
17664 }
17665 DialectType::Spark | DialectType::Databricks => {
17666 // Spark's default group_index is 1 (same as BigQuery), so omit for capturing groups
17667 if has_groups {
17668 Ok(Expression::Function(Box::new(Function::new("REGEXP_EXTRACT_ALL".to_string(), vec![str_expr, pattern]))))
17669 } else {
17670 Ok(Expression::Function(Box::new(Function::new("REGEXP_EXTRACT_ALL".to_string(), vec![str_expr, pattern, Expression::number(0)]))))
17671 }
17672 }
17673 DialectType::Presto | DialectType::Trino => {
17674 if has_groups {
17675 Ok(Expression::Function(Box::new(Function::new("REGEXP_EXTRACT_ALL".to_string(), vec![str_expr, pattern, Expression::number(1)]))))
17676 } else {
17677 Ok(Expression::Function(Box::new(Function::new("REGEXP_EXTRACT_ALL".to_string(), vec![str_expr, pattern]))))
17678 }
17679 }
17680 DialectType::Snowflake => {
17681 if has_groups {
17682 // REGEXP_EXTRACT_ALL(str, pattern, 1, 1, 'c', 1)
17683 Ok(Expression::Function(Box::new(Function::new("REGEXP_EXTRACT_ALL".to_string(), vec![
17684 str_expr, pattern, Expression::number(1), Expression::number(1),
17685 Expression::Literal(Literal::String("c".to_string())), Expression::number(1),
17686 ]))))
17687 } else {
17688 Ok(Expression::Function(Box::new(Function::new("REGEXP_EXTRACT_ALL".to_string(), vec![str_expr, pattern]))))
17689 }
17690 }
17691 _ => Ok(Expression::Function(Box::new(Function::new("REGEXP_EXTRACT_ALL".to_string(), vec![str_expr, pattern]))))
17692 }
17693 }
17694
17695 // MOD(x, y) -> x % y for PostgreSQL/DuckDB
17696 "MOD" if args.len() == 2 => {
17697 match target {
17698 DialectType::PostgreSQL | DialectType::DuckDB => {
17699 let x = args.remove(0);
17700 let y = args.remove(0);
17701 Ok(Expression::Mod(Box::new(crate::expressions::BinaryOp::new(x, y))))
17702 }
17703 _ => Ok(Expression::Function(Box::new(Function::new("MOD".to_string(), args))))
17704 }
17705 }
17706
17707 // CONCAT(a, b, ...) -> a || b || ... for DuckDB with 3+ args
17708 "CONCAT" if args.len() > 2 => {
17709 match target {
17710 DialectType::DuckDB => {
17711 let mut it = args.into_iter();
17712 let mut result = it.next().unwrap();
17713 for arg in it {
17714 result = Expression::DPipe(Box::new(crate::expressions::DPipe { this: Box::new(result), expression: Box::new(arg), safe: None }));
17715 }
17716 Ok(result)
17717 }
17718 _ => Ok(Expression::Function(Box::new(Function::new("CONCAT".to_string(), args))))
17719 }
17720 }
17721
17722 // GENERATE_DATE_ARRAY(start, end[, step]) -> target-specific
17723 "GENERATE_DATE_ARRAY" => {
17724 if matches!(target, DialectType::BigQuery) {
17725 // BQ->BQ: add default interval if not present
17726 if args.len() == 2 {
17727 let start = args.remove(0);
17728 let end = args.remove(0);
17729 let default_interval = Expression::Interval(Box::new(crate::expressions::Interval {
17730 this: Some(Expression::Literal(Literal::String("1".to_string()))),
17731 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: crate::expressions::IntervalUnit::Day, use_plural: false }),
17732 }));
17733 Ok(Expression::Function(Box::new(Function::new("GENERATE_DATE_ARRAY".to_string(), vec![start, end, default_interval]))))
17734 } else {
17735 Ok(Expression::Function(Box::new(Function::new("GENERATE_DATE_ARRAY".to_string(), args))))
17736 }
17737 } else if matches!(target, DialectType::DuckDB) {
17738 // DuckDB: CAST(GENERATE_SERIES(CAST(start AS DATE), CAST(end AS DATE), step) AS DATE[])
17739 let start = args.get(0).cloned();
17740 let end = args.get(1).cloned();
17741 let step = args.get(2).cloned().or_else(|| Some(Expression::Interval(Box::new(crate::expressions::Interval {
17742 this: Some(Expression::Literal(Literal::String("1".to_string()))),
17743 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: crate::expressions::IntervalUnit::Day, use_plural: false }),
17744 }))));
17745
17746 // Wrap start/end in CAST(... AS DATE) only for string literals
17747 let maybe_cast_date = |expr: Expression| -> Expression {
17748 if matches!(&expr, Expression::Literal(Literal::String(_))) {
17749 Expression::Cast(Box::new(Cast {
17750 this: expr,
17751 to: DataType::Date,
17752 trailing_comments: vec![],
17753 double_colon_syntax: false,
17754 format: None,
17755 default: None,
17756 }))
17757 } else {
17758 expr
17759 }
17760 };
17761 let cast_start = start.map(maybe_cast_date);
17762 let cast_end = end.map(maybe_cast_date);
17763
17764 let gen_series = Expression::GenerateSeries(Box::new(crate::expressions::GenerateSeries {
17765 start: cast_start.map(Box::new),
17766 end: cast_end.map(Box::new),
17767 step: step.map(Box::new),
17768 is_end_exclusive: None,
17769 }));
17770
17771 // Wrap in CAST(... AS DATE[])
17772 Ok(Expression::Cast(Box::new(Cast {
17773 this: gen_series,
17774 to: DataType::Array { element_type: Box::new(DataType::Date), dimension: None },
17775 trailing_comments: vec![],
17776 double_colon_syntax: false,
17777 format: None,
17778 default: None,
17779 })))
17780 } else if matches!(target, DialectType::Snowflake) {
17781 // Snowflake: keep as GENERATE_DATE_ARRAY function for later transform
17782 // (transform_generate_date_array_snowflake will convert to ARRAY_GENERATE_RANGE + DATEADD)
17783 if args.len() == 2 {
17784 let start = args.remove(0);
17785 let end = args.remove(0);
17786 let default_interval = Expression::Interval(Box::new(crate::expressions::Interval {
17787 this: Some(Expression::Literal(Literal::String("1".to_string()))),
17788 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: crate::expressions::IntervalUnit::Day, use_plural: false }),
17789 }));
17790 Ok(Expression::Function(Box::new(Function::new("GENERATE_DATE_ARRAY".to_string(), vec![start, end, default_interval]))))
17791 } else {
17792 Ok(Expression::Function(Box::new(Function::new("GENERATE_DATE_ARRAY".to_string(), args))))
17793 }
17794 } else {
17795 // Convert to GenerateSeries for other targets
17796 let start = args.get(0).cloned();
17797 let end = args.get(1).cloned();
17798 let step = args.get(2).cloned().or_else(|| Some(Expression::Interval(Box::new(crate::expressions::Interval {
17799 this: Some(Expression::Literal(Literal::String("1".to_string()))),
17800 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: crate::expressions::IntervalUnit::Day, use_plural: false }),
17801 }))));
17802 Ok(Expression::GenerateSeries(Box::new(crate::expressions::GenerateSeries {
17803 start: start.map(Box::new),
17804 end: end.map(Box::new),
17805 step: step.map(Box::new),
17806 is_end_exclusive: None,
17807 })))
17808 }
17809 }
17810
17811 // PARSE_DATE(format, str) -> target-specific
17812 "PARSE_DATE" if args.len() == 2 => {
17813 let format = args.remove(0);
17814 let str_expr = args.remove(0);
17815 match target {
17816 DialectType::DuckDB => {
17817 // CAST(STRPTIME(str, duck_format) AS DATE)
17818 let duck_format = Self::bq_format_to_duckdb(&format);
17819 let strptime = Expression::Function(Box::new(Function::new("STRPTIME".to_string(), vec![str_expr, duck_format])));
17820 Ok(Expression::Cast(Box::new(Cast {
17821 this: strptime,
17822 to: DataType::Date,
17823 trailing_comments: vec![],
17824 double_colon_syntax: false,
17825 format: None,
17826 default: None,
17827 })))
17828 }
17829 DialectType::Snowflake => {
17830 // _POLYGLOT_DATE(str, snowflake_format)
17831 // Use marker so Snowflake target transform keeps it as DATE() instead of TO_DATE()
17832 let sf_format = Self::bq_format_to_snowflake(&format);
17833 Ok(Expression::Function(Box::new(Function::new("_POLYGLOT_DATE".to_string(), vec![str_expr, sf_format]))))
17834 }
17835 _ => Ok(Expression::Function(Box::new(Function::new("PARSE_DATE".to_string(), vec![format, str_expr]))))
17836 }
17837 }
17838
17839 // PARSE_TIMESTAMP(format, str) -> target-specific
17840 "PARSE_TIMESTAMP" if args.len() >= 2 => {
17841 let format = args.remove(0);
17842 let str_expr = args.remove(0);
17843 let tz = if !args.is_empty() { Some(args.remove(0)) } else { None };
17844 match target {
17845 DialectType::DuckDB => {
17846 let duck_format = Self::bq_format_to_duckdb(&format);
17847 let strptime = Expression::Function(Box::new(Function::new("STRPTIME".to_string(), vec![str_expr, duck_format])));
17848 Ok(strptime)
17849 }
17850 _ => {
17851 let mut result_args = vec![format, str_expr];
17852 if let Some(tz_arg) = tz { result_args.push(tz_arg); }
17853 Ok(Expression::Function(Box::new(Function::new("PARSE_TIMESTAMP".to_string(), result_args))))
17854 }
17855 }
17856 }
17857
17858 // FORMAT_DATE(format, date) -> target-specific
17859 "FORMAT_DATE" if args.len() == 2 => {
17860 let format = args.remove(0);
17861 let date_expr = args.remove(0);
17862 match target {
17863 DialectType::DuckDB => {
17864 // STRFTIME(CAST(date AS DATE), format)
17865 let cast_date = Expression::Cast(Box::new(Cast {
17866 this: date_expr,
17867 to: DataType::Date,
17868 trailing_comments: vec![],
17869 double_colon_syntax: false,
17870 format: None,
17871 default: None,
17872 }));
17873 Ok(Expression::Function(Box::new(Function::new("STRFTIME".to_string(), vec![cast_date, format]))))
17874 }
17875 _ => Ok(Expression::Function(Box::new(Function::new("FORMAT_DATE".to_string(), vec![format, date_expr]))))
17876 }
17877 }
17878
17879 // FORMAT_DATETIME(format, datetime) -> target-specific
17880 "FORMAT_DATETIME" if args.len() == 2 => {
17881 let format = args.remove(0);
17882 let dt_expr = args.remove(0);
17883
17884 if matches!(target, DialectType::BigQuery) {
17885 // BQ->BQ: normalize %H:%M:%S to %T, %x to %D
17886 let norm_format = Self::bq_format_normalize_bq(&format);
17887 // Also strip DATETIME keyword from typed literals
17888 let norm_dt = match dt_expr {
17889 Expression::Literal(Literal::Timestamp(s)) => {
17890 Expression::Cast(Box::new(Cast {
17891 this: Expression::Literal(Literal::String(s)),
17892 to: DataType::Custom { name: "DATETIME".to_string() },
17893 trailing_comments: vec![],
17894 double_colon_syntax: false,
17895 format: None,
17896 default: None,
17897 }))
17898 }
17899 other => other,
17900 };
17901 return Ok(Expression::Function(Box::new(Function::new("FORMAT_DATETIME".to_string(), vec![norm_format, norm_dt]))));
17902 }
17903
17904 match target {
17905 DialectType::DuckDB => {
17906 // STRFTIME(CAST(dt AS TIMESTAMP), duckdb_format)
17907 let cast_dt = Self::ensure_cast_timestamp(dt_expr);
17908 let duck_format = Self::bq_format_to_duckdb(&format);
17909 Ok(Expression::Function(Box::new(Function::new("STRFTIME".to_string(), vec![cast_dt, duck_format]))))
17910 }
17911 _ => Ok(Expression::Function(Box::new(Function::new("FORMAT_DATETIME".to_string(), vec![format, dt_expr]))))
17912 }
17913 }
17914
17915 // FORMAT_TIMESTAMP(format, ts) -> target-specific
17916 "FORMAT_TIMESTAMP" if args.len() == 2 => {
17917 let format = args.remove(0);
17918 let ts_expr = args.remove(0);
17919 match target {
17920 DialectType::DuckDB => {
17921 // STRFTIME(CAST(CAST(ts AS TIMESTAMPTZ) AS TIMESTAMP), format)
17922 let cast_tstz = Self::ensure_cast_timestamptz(ts_expr);
17923 let cast_ts = Expression::Cast(Box::new(Cast {
17924 this: cast_tstz,
17925 to: DataType::Timestamp { timezone: false, precision: None },
17926 trailing_comments: vec![],
17927 double_colon_syntax: false,
17928 format: None,
17929 default: None,
17930 }));
17931 Ok(Expression::Function(Box::new(Function::new("STRFTIME".to_string(), vec![cast_ts, format]))))
17932 }
17933 DialectType::Snowflake => {
17934 // TO_CHAR(CAST(CAST(ts AS TIMESTAMPTZ) AS TIMESTAMP), snowflake_format)
17935 let cast_tstz = Self::ensure_cast_timestamptz(ts_expr);
17936 let cast_ts = Expression::Cast(Box::new(Cast {
17937 this: cast_tstz,
17938 to: DataType::Timestamp { timezone: false, precision: None },
17939 trailing_comments: vec![],
17940 double_colon_syntax: false,
17941 format: None,
17942 default: None,
17943 }));
17944 let sf_format = Self::bq_format_to_snowflake(&format);
17945 Ok(Expression::Function(Box::new(Function::new("TO_CHAR".to_string(), vec![cast_ts, sf_format]))))
17946 }
17947 _ => Ok(Expression::Function(Box::new(Function::new("FORMAT_TIMESTAMP".to_string(), vec![format, ts_expr]))))
17948 }
17949 }
17950
17951 // UNIX_DATE(date) -> DATE_DIFF('DAY', '1970-01-01', date) for DuckDB
17952 "UNIX_DATE" if args.len() == 1 => {
17953 let date = args.remove(0);
17954 match target {
17955 DialectType::DuckDB => {
17956 let epoch = Expression::Cast(Box::new(Cast {
17957 this: Expression::Literal(Literal::String("1970-01-01".to_string())),
17958 to: DataType::Date,
17959 trailing_comments: vec![],
17960 double_colon_syntax: false,
17961 format: None,
17962 default: None,
17963 }));
17964 // DATE_DIFF('DAY', epoch, date) but date might be DATE '...' literal
17965 // Need to convert DATE literal to CAST
17966 let norm_date = Self::date_literal_to_cast(date);
17967 Ok(Expression::Function(Box::new(Function::new(
17968 "DATE_DIFF".to_string(), vec![
17969 Expression::Literal(Literal::String("DAY".to_string())),
17970 epoch,
17971 norm_date,
17972 ],
17973 ))))
17974 }
17975 _ => Ok(Expression::Function(Box::new(Function::new("UNIX_DATE".to_string(), vec![date]))))
17976 }
17977 }
17978
17979 // UNIX_SECONDS(ts) -> target-specific
17980 "UNIX_SECONDS" if args.len() == 1 => {
17981 let ts = args.remove(0);
17982 match target {
17983 DialectType::DuckDB => {
17984 // CAST(EPOCH(CAST(ts AS TIMESTAMPTZ)) AS BIGINT)
17985 let norm_ts = Self::ts_literal_to_cast_tz(ts);
17986 let epoch = Expression::Function(Box::new(Function::new("EPOCH".to_string(), vec![norm_ts])));
17987 Ok(Expression::Cast(Box::new(Cast {
17988 this: epoch,
17989 to: DataType::BigInt { length: None },
17990 trailing_comments: vec![],
17991 double_colon_syntax: false,
17992 format: None,
17993 default: None,
17994 })))
17995 }
17996 DialectType::Snowflake => {
17997 // TIMESTAMPDIFF(SECONDS, CAST('1970-01-01 00:00:00+00' AS TIMESTAMPTZ), ts)
17998 let epoch = Expression::Cast(Box::new(Cast {
17999 this: Expression::Literal(Literal::String("1970-01-01 00:00:00+00".to_string())),
18000 to: DataType::Timestamp { timezone: true, precision: None },
18001 trailing_comments: vec![],
18002 double_colon_syntax: false,
18003 format: None,
18004 default: None,
18005 }));
18006 Ok(Expression::Function(Box::new(Function::new(
18007 "TIMESTAMPDIFF".to_string(), vec![
18008 Expression::Identifier(Identifier::new("SECONDS".to_string())),
18009 epoch,
18010 ts,
18011 ],
18012 ))))
18013 }
18014 _ => Ok(Expression::Function(Box::new(Function::new("UNIX_SECONDS".to_string(), vec![ts]))))
18015 }
18016 }
18017
18018 // UNIX_MILLIS(ts) -> target-specific
18019 "UNIX_MILLIS" if args.len() == 1 => {
18020 let ts = args.remove(0);
18021 match target {
18022 DialectType::DuckDB => {
18023 let norm_ts = Self::ts_literal_to_cast_tz(ts);
18024 Ok(Expression::Function(Box::new(Function::new("EPOCH_MS".to_string(), vec![norm_ts]))))
18025 }
18026 _ => Ok(Expression::Function(Box::new(Function::new("UNIX_MILLIS".to_string(), vec![ts]))))
18027 }
18028 }
18029
18030 // UNIX_MICROS(ts) -> target-specific
18031 "UNIX_MICROS" if args.len() == 1 => {
18032 let ts = args.remove(0);
18033 match target {
18034 DialectType::DuckDB => {
18035 let norm_ts = Self::ts_literal_to_cast_tz(ts);
18036 Ok(Expression::Function(Box::new(Function::new("EPOCH_US".to_string(), vec![norm_ts]))))
18037 }
18038 _ => Ok(Expression::Function(Box::new(Function::new("UNIX_MICROS".to_string(), vec![ts]))))
18039 }
18040 }
18041
18042 // INSTR(str, substr) -> target-specific
18043 "INSTR" => {
18044 if matches!(target, DialectType::BigQuery) {
18045 // BQ->BQ: keep as INSTR
18046 Ok(Expression::Function(Box::new(Function::new("INSTR".to_string(), args))))
18047 } else if matches!(target, DialectType::Snowflake) && args.len() == 2 {
18048 // Snowflake: CHARINDEX(substr, str) - swap args
18049 let str_expr = args.remove(0);
18050 let substr = args.remove(0);
18051 Ok(Expression::Function(Box::new(Function::new("CHARINDEX".to_string(), vec![substr, str_expr]))))
18052 } else {
18053 // Keep as INSTR for other targets
18054 Ok(Expression::Function(Box::new(Function::new("INSTR".to_string(), args))))
18055 }
18056 }
18057
18058 // CURRENT_TIMESTAMP / CURRENT_DATE handling - parens normalization and timezone
18059 "CURRENT_TIMESTAMP" | "CURRENT_DATE" | "CURRENT_DATETIME" | "CURRENT_TIME" => {
18060 if matches!(target, DialectType::BigQuery) {
18061 // BQ->BQ: always output with parens (function form), keep any timezone arg
18062 Ok(Expression::Function(Box::new(Function::new(name, args))))
18063 } else if name == "CURRENT_DATE" && args.len() == 1 {
18064 // CURRENT_DATE('UTC') - has timezone arg
18065 let tz_arg = args.remove(0);
18066 match target {
18067 DialectType::DuckDB => {
18068 // CAST(CURRENT_TIMESTAMP AT TIME ZONE 'UTC' AS DATE)
18069 let ct = Expression::CurrentTimestamp(crate::expressions::CurrentTimestamp { precision: None, sysdate: false });
18070 let at_tz = Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
18071 this: ct,
18072 zone: tz_arg,
18073 }));
18074 Ok(Expression::Cast(Box::new(Cast {
18075 this: at_tz,
18076 to: DataType::Date,
18077 trailing_comments: vec![],
18078 double_colon_syntax: false,
18079 format: None,
18080 default: None,
18081 })))
18082 }
18083 DialectType::Snowflake => {
18084 // CAST(CONVERT_TIMEZONE('UTC', CURRENT_TIMESTAMP()) AS DATE)
18085 let ct = Expression::Function(Box::new(Function::new("CURRENT_TIMESTAMP".to_string(), vec![])));
18086 let convert = Expression::Function(Box::new(Function::new("CONVERT_TIMEZONE".to_string(), vec![tz_arg, ct])));
18087 Ok(Expression::Cast(Box::new(Cast {
18088 this: convert,
18089 to: DataType::Date,
18090 trailing_comments: vec![],
18091 double_colon_syntax: false,
18092 format: None,
18093 default: None,
18094 })))
18095 }
18096 _ => {
18097 // PostgreSQL, MySQL, etc.: CURRENT_DATE AT TIME ZONE 'UTC'
18098 let cd = Expression::CurrentDate(crate::expressions::CurrentDate);
18099 Ok(Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
18100 this: cd,
18101 zone: tz_arg,
18102 })))
18103 }
18104 }
18105 } else if (name == "CURRENT_TIMESTAMP" || name == "CURRENT_TIME" || name == "CURRENT_DATE") && args.is_empty()
18106 && matches!(target, DialectType::PostgreSQL | DialectType::DuckDB | DialectType::Presto | DialectType::Trino)
18107 {
18108 // These targets want no-parens CURRENT_TIMESTAMP / CURRENT_DATE / CURRENT_TIME
18109 if name == "CURRENT_TIMESTAMP" {
18110 Ok(Expression::CurrentTimestamp(crate::expressions::CurrentTimestamp {
18111 precision: None,
18112 sysdate: false,
18113 }))
18114 } else if name == "CURRENT_DATE" {
18115 Ok(Expression::CurrentDate(crate::expressions::CurrentDate))
18116 } else {
18117 // CURRENT_TIME
18118 Ok(Expression::CurrentTime(crate::expressions::CurrentTime {
18119 precision: None,
18120 }))
18121 }
18122 } else {
18123 // All other targets: keep as function (with parens)
18124 Ok(Expression::Function(Box::new(Function::new(name, args))))
18125 }
18126 }
18127
18128 // JSON_QUERY(json, path) -> target-specific
18129 "JSON_QUERY" if args.len() == 2 => {
18130 match target {
18131 DialectType::DuckDB | DialectType::SQLite => {
18132 // json -> path syntax
18133 let json_expr = args.remove(0);
18134 let path = args.remove(0);
18135 Ok(Expression::JsonExtract(Box::new(crate::expressions::JsonExtractFunc {
18136 this: json_expr,
18137 path,
18138 returning: None,
18139 arrow_syntax: true,
18140 hash_arrow_syntax: false,
18141 wrapper_option: None,
18142 quotes_option: None,
18143 on_scalar_string: false,
18144 on_error: None,
18145 })))
18146 }
18147 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
18148 Ok(Expression::Function(Box::new(Function::new(
18149 "GET_JSON_OBJECT".to_string(), args,
18150 ))))
18151 }
18152 DialectType::PostgreSQL | DialectType::Redshift => {
18153 Ok(Expression::Function(Box::new(Function::new(
18154 "JSON_EXTRACT_PATH".to_string(), args,
18155 ))))
18156 }
18157 _ => Ok(Expression::Function(Box::new(Function::new("JSON_QUERY".to_string(), args))))
18158 }
18159 }
18160
18161 // JSON_VALUE_ARRAY(json, path) -> target-specific
18162 "JSON_VALUE_ARRAY" if args.len() == 2 => {
18163 match target {
18164 DialectType::DuckDB => {
18165 // CAST(json -> path AS TEXT[])
18166 let json_expr = args.remove(0);
18167 let path = args.remove(0);
18168 let arrow = Expression::JsonExtract(Box::new(crate::expressions::JsonExtractFunc {
18169 this: json_expr,
18170 path,
18171 returning: None,
18172 arrow_syntax: true,
18173 hash_arrow_syntax: false,
18174 wrapper_option: None,
18175 quotes_option: None,
18176 on_scalar_string: false,
18177 on_error: None,
18178 }));
18179 Ok(Expression::Cast(Box::new(Cast {
18180 this: arrow,
18181 to: DataType::Array { element_type: Box::new(DataType::Text), dimension: None },
18182 trailing_comments: vec![],
18183 double_colon_syntax: false,
18184 format: None,
18185 default: None,
18186 })))
18187 }
18188 DialectType::Snowflake => {
18189 let json_expr = args.remove(0);
18190 let path_expr = args.remove(0);
18191 // Convert JSON path from $.path to just path
18192 let sf_path = if let Expression::Literal(Literal::String(ref s)) = path_expr {
18193 let trimmed = s.trim_start_matches('$').trim_start_matches('.');
18194 Expression::Literal(Literal::String(trimmed.to_string()))
18195 } else {
18196 path_expr
18197 };
18198 let parse_json = Expression::Function(Box::new(Function::new("PARSE_JSON".to_string(), vec![json_expr])));
18199 let get_path = Expression::Function(Box::new(Function::new("GET_PATH".to_string(), vec![parse_json, sf_path])));
18200 // TRANSFORM(get_path, x -> CAST(x AS VARCHAR))
18201 let cast_expr = Expression::Cast(Box::new(Cast {
18202 this: Expression::Identifier(Identifier::new("x")),
18203 to: DataType::VarChar { length: None, parenthesized_length: false },
18204 trailing_comments: vec![],
18205 double_colon_syntax: false,
18206 format: None,
18207 default: None,
18208 }));
18209 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
18210 parameters: vec![Identifier::new("x")],
18211 body: cast_expr,
18212 colon: false,
18213 parameter_types: vec![],
18214 }));
18215 Ok(Expression::Function(Box::new(Function::new("TRANSFORM".to_string(), vec![get_path, lambda]))))
18216 }
18217 _ => Ok(Expression::Function(Box::new(Function::new("JSON_VALUE_ARRAY".to_string(), args))))
18218 }
18219 }
18220
18221 // BigQuery REGEXP_EXTRACT(val, regex[, position[, occurrence]]) -> target dialects
18222 // BigQuery's 3rd arg is "position" (starting char index), 4th is "occurrence" (which match to return)
18223 // This is different from Hive/Spark where 3rd arg is "group_index"
18224 "REGEXP_EXTRACT" if matches!(source, DialectType::BigQuery) => {
18225 match target {
18226 DialectType::DuckDB | DialectType::Presto | DialectType::Trino | DialectType::Athena => {
18227 if args.len() == 2 {
18228 // REGEXP_EXTRACT(val, regex) -> REGEXP_EXTRACT(val, regex, 1)
18229 args.push(Expression::number(1));
18230 Ok(Expression::Function(Box::new(Function::new("REGEXP_EXTRACT".to_string(), args))))
18231 } else if args.len() == 3 {
18232 let val = args.remove(0);
18233 let regex = args.remove(0);
18234 let position = args.remove(0);
18235 let is_pos_1 = matches!(&position, Expression::Literal(Literal::Number(n)) if n == "1");
18236 if is_pos_1 {
18237 Ok(Expression::Function(Box::new(Function::new("REGEXP_EXTRACT".to_string(), vec![val, regex, Expression::number(1)]))))
18238 } else {
18239 let substring_expr = Expression::Function(Box::new(Function::new("SUBSTRING".to_string(), vec![val, position])));
18240 let nullif_expr = Expression::Function(Box::new(Function::new("NULLIF".to_string(), vec![substring_expr, Expression::Literal(Literal::String(String::new()))])));
18241 Ok(Expression::Function(Box::new(Function::new("REGEXP_EXTRACT".to_string(), vec![nullif_expr, regex, Expression::number(1)]))))
18242 }
18243 } else if args.len() == 4 {
18244 let val = args.remove(0);
18245 let regex = args.remove(0);
18246 let position = args.remove(0);
18247 let occurrence = args.remove(0);
18248 let is_pos_1 = matches!(&position, Expression::Literal(Literal::Number(n)) if n == "1");
18249 let is_occ_1 = matches!(&occurrence, Expression::Literal(Literal::Number(n)) if n == "1");
18250 if is_pos_1 && is_occ_1 {
18251 Ok(Expression::Function(Box::new(Function::new("REGEXP_EXTRACT".to_string(), vec![val, regex, Expression::number(1)]))))
18252 } else {
18253 let subject = if is_pos_1 {
18254 val
18255 } else {
18256 let substring_expr = Expression::Function(Box::new(Function::new("SUBSTRING".to_string(), vec![val, position])));
18257 Expression::Function(Box::new(Function::new("NULLIF".to_string(), vec![substring_expr, Expression::Literal(Literal::String(String::new()))])))
18258 };
18259 let extract_all = Expression::Function(Box::new(Function::new("REGEXP_EXTRACT_ALL".to_string(), vec![subject, regex, Expression::number(1)])));
18260 Ok(Expression::Function(Box::new(Function::new("ARRAY_EXTRACT".to_string(), vec![extract_all, occurrence]))))
18261 }
18262 } else {
18263 Ok(Expression::Function(Box::new(Function { name: f.name, args, distinct: f.distinct, trailing_comments: f.trailing_comments, use_bracket_syntax: f.use_bracket_syntax, no_parens: f.no_parens, quoted: f.quoted })))
18264 }
18265 }
18266 DialectType::Snowflake => {
18267 // BigQuery REGEXP_EXTRACT -> Snowflake REGEXP_SUBSTR
18268 Ok(Expression::Function(Box::new(Function::new("REGEXP_SUBSTR".to_string(), args))))
18269 }
18270 _ => {
18271 // For other targets (Hive/Spark/BigQuery): pass through as-is
18272 // BigQuery's default group behavior matches Hive/Spark for 2-arg case
18273 Ok(Expression::Function(Box::new(Function { name: f.name, args, distinct: f.distinct, trailing_comments: f.trailing_comments, use_bracket_syntax: f.use_bracket_syntax, no_parens: f.no_parens, quoted: f.quoted })))
18274 }
18275 }
18276 }
18277
18278 // BigQuery STRUCT(args) -> target-specific struct expression
18279 "STRUCT" => {
18280 // Convert Function args to Struct fields
18281 let mut fields: Vec<(Option<String>, Expression)> = Vec::new();
18282 for (i, arg) in args.into_iter().enumerate() {
18283 match arg {
18284 Expression::Alias(a) => {
18285 // Named field: expr AS name
18286 fields.push((Some(a.alias.name.clone()), a.this));
18287 }
18288 other => {
18289 // Unnamed field: for Spark/Hive, keep as None
18290 // For Snowflake, auto-name as _N
18291 // For DuckDB, use column name for column refs, _N for others
18292 if matches!(target, DialectType::Snowflake) {
18293 fields.push((Some(format!("_{}", i)), other));
18294 } else if matches!(target, DialectType::DuckDB) {
18295 let auto_name = match &other {
18296 Expression::Column(col) => col.name.name.clone(),
18297 _ => format!("_{}", i),
18298 };
18299 fields.push((Some(auto_name), other));
18300 } else {
18301 fields.push((None, other));
18302 }
18303 }
18304 }
18305 }
18306
18307 match target {
18308 DialectType::Snowflake => {
18309 // OBJECT_CONSTRUCT('name', value, ...)
18310 let mut oc_args = Vec::new();
18311 for (name, val) in &fields {
18312 if let Some(n) = name {
18313 oc_args.push(Expression::Literal(Literal::String(n.clone())));
18314 oc_args.push(val.clone());
18315 } else {
18316 oc_args.push(val.clone());
18317 }
18318 }
18319 Ok(Expression::Function(Box::new(Function::new("OBJECT_CONSTRUCT".to_string(), oc_args))))
18320 }
18321 DialectType::DuckDB => {
18322 // {'name': value, ...}
18323 Ok(Expression::Struct(Box::new(crate::expressions::Struct { fields })))
18324 }
18325 DialectType::Hive => {
18326 // STRUCT(val1, val2, ...) - strip aliases
18327 let hive_fields: Vec<(Option<String>, Expression)> = fields.into_iter().map(|(_, v)| (None, v)).collect();
18328 Ok(Expression::Struct(Box::new(crate::expressions::Struct { fields: hive_fields })))
18329 }
18330 DialectType::Spark | DialectType::Databricks => {
18331 // Use Expression::Struct to bypass Spark target transform auto-naming
18332 Ok(Expression::Struct(Box::new(crate::expressions::Struct { fields })))
18333 }
18334 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
18335 // Check if all fields are named AND all have inferable types - if so, wrap in CAST(ROW(...) AS ROW(name TYPE, ...))
18336 let all_named = !fields.is_empty() && fields.iter().all(|(name, _)| name.is_some());
18337 let all_types_inferable = all_named && fields.iter().all(|(_, val)| Self::can_infer_presto_type(val));
18338 let row_args: Vec<Expression> = fields.iter().map(|(_, v)| v.clone()).collect();
18339 let row_expr = Expression::Function(Box::new(Function::new("ROW".to_string(), row_args)));
18340 if all_named && all_types_inferable {
18341 // Build ROW type with inferred types
18342 let mut row_type_fields = Vec::new();
18343 for (name, val) in &fields {
18344 if let Some(n) = name {
18345 let type_str = Self::infer_sql_type_for_presto(val);
18346 row_type_fields.push(crate::expressions::StructField::new(
18347 n.clone(),
18348 crate::expressions::DataType::Custom { name: type_str },
18349 ));
18350 }
18351 }
18352 let row_type = crate::expressions::DataType::Struct { fields: row_type_fields, nested: true };
18353 Ok(Expression::Cast(Box::new(Cast {
18354 this: row_expr,
18355 to: row_type,
18356 trailing_comments: Vec::new(),
18357 double_colon_syntax: false,
18358 format: None,
18359 default: None,
18360 })))
18361 } else {
18362 Ok(row_expr)
18363 }
18364 }
18365 _ => {
18366 // Default: keep as STRUCT function with original args
18367 let mut new_args = Vec::new();
18368 for (name, val) in fields {
18369 if let Some(n) = name {
18370 new_args.push(Expression::Alias(Box::new(crate::expressions::Alias::new(
18371 val, Identifier::new(n),
18372 ))));
18373 } else {
18374 new_args.push(val);
18375 }
18376 }
18377 Ok(Expression::Function(Box::new(Function::new("STRUCT".to_string(), new_args))))
18378 }
18379 }
18380 }
18381
18382 // ROUND(x, n, 'ROUND_HALF_EVEN') -> ROUND_EVEN(x, n) for DuckDB
18383 "ROUND" if args.len() == 3 => {
18384 let x = args.remove(0);
18385 let n = args.remove(0);
18386 let mode = args.remove(0);
18387 // Check if mode is 'ROUND_HALF_EVEN'
18388 let is_half_even = matches!(&mode, Expression::Literal(Literal::String(s)) if s.eq_ignore_ascii_case("ROUND_HALF_EVEN"));
18389 if is_half_even && matches!(target, DialectType::DuckDB) {
18390 Ok(Expression::Function(Box::new(Function::new("ROUND_EVEN".to_string(), vec![x, n]))))
18391 } else {
18392 // Pass through with all args
18393 Ok(Expression::Function(Box::new(Function::new("ROUND".to_string(), vec![x, n, mode]))))
18394 }
18395 }
18396
18397 // MAKE_INTERVAL(year, month, named_args...) -> INTERVAL string for Snowflake/DuckDB
18398 "MAKE_INTERVAL" => {
18399 // MAKE_INTERVAL(1, 2, minute => 5, day => 3)
18400 // The positional args are: year, month
18401 // Named args are: day =>, minute =>, etc.
18402 // For Snowflake: INTERVAL '1 year, 2 month, 5 minute, 3 day'
18403 // For DuckDB: INTERVAL '1 year 2 month 5 minute 3 day'
18404 // For BigQuery->BigQuery: reorder named args (day before minute)
18405 if matches!(target, DialectType::Snowflake | DialectType::DuckDB) {
18406 let mut parts: Vec<(String, String)> = Vec::new();
18407 let mut pos_idx = 0;
18408 let pos_units = ["year", "month"];
18409 for arg in &args {
18410 if let Expression::NamedArgument(na) = arg {
18411 // Named arg like minute => 5
18412 let unit = na.name.name.clone();
18413 if let Expression::Literal(Literal::Number(n)) = &na.value {
18414 parts.push((unit, n.clone()));
18415 }
18416 } else if pos_idx < pos_units.len() {
18417 if let Expression::Literal(Literal::Number(n)) = arg {
18418 parts.push((pos_units[pos_idx].to_string(), n.clone()));
18419 }
18420 pos_idx += 1;
18421 }
18422 }
18423 // Don't sort - preserve original argument order
18424 let separator = if matches!(target, DialectType::Snowflake) { ", " } else { " " };
18425 let interval_str = parts.iter()
18426 .map(|(u, v)| format!("{} {}", v, u))
18427 .collect::<Vec<_>>()
18428 .join(separator);
18429 Ok(Expression::Interval(Box::new(crate::expressions::Interval {
18430 this: Some(Expression::Literal(Literal::String(interval_str))),
18431 unit: None,
18432 })))
18433 } else if matches!(target, DialectType::BigQuery) {
18434 // BigQuery->BigQuery: reorder named args (day, minute, etc.)
18435 let mut positional = Vec::new();
18436 let mut named: Vec<(String, Expression, crate::expressions::NamedArgSeparator)> = Vec::new();
18437 let _pos_units = ["year", "month"];
18438 let mut _pos_idx = 0;
18439 for arg in args {
18440 if let Expression::NamedArgument(na) = arg {
18441 named.push((na.name.name.clone(), na.value, na.separator));
18442 } else {
18443 positional.push(arg);
18444 _pos_idx += 1;
18445 }
18446 }
18447 // Sort named args by: day, hour, minute, second
18448 let unit_order = |u: &str| -> usize {
18449 match u.to_lowercase().as_str() {
18450 "day" => 0, "hour" => 1, "minute" => 2, "second" => 3, _ => 4,
18451 }
18452 };
18453 named.sort_by_key(|(u, _, _)| unit_order(u));
18454 let mut result_args = positional;
18455 for (name, value, sep) in named {
18456 result_args.push(Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
18457 name: Identifier::new(&name),
18458 value,
18459 separator: sep,
18460 })));
18461 }
18462 Ok(Expression::Function(Box::new(Function::new("MAKE_INTERVAL".to_string(), result_args))))
18463 } else {
18464 Ok(Expression::Function(Box::new(Function::new("MAKE_INTERVAL".to_string(), args))))
18465 }
18466 }
18467
18468 // ARRAY_TO_STRING(array, sep, null_text) -> ARRAY_TO_STRING(LIST_TRANSFORM(array, x -> COALESCE(x, null_text)), sep) for DuckDB
18469 "ARRAY_TO_STRING" if args.len() == 3 => {
18470 let arr = args.remove(0);
18471 let sep = args.remove(0);
18472 let null_text = args.remove(0);
18473 match target {
18474 DialectType::DuckDB => {
18475 // LIST_TRANSFORM(array, x -> COALESCE(x, null_text))
18476 let _lambda_param = Expression::Identifier(crate::expressions::Identifier::new("x"));
18477 let coalesce = Expression::Coalesce(Box::new(crate::expressions::VarArgFunc {
18478 original_name: None,
18479 expressions: vec![
18480 Expression::Identifier(crate::expressions::Identifier::new("x")),
18481 null_text,
18482 ],
18483 }));
18484 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
18485 parameters: vec![crate::expressions::Identifier::new("x")],
18486 body: coalesce,
18487 colon: false,
18488 parameter_types: vec![],
18489 }));
18490 let list_transform = Expression::Function(Box::new(Function::new("LIST_TRANSFORM".to_string(), vec![arr, lambda])));
18491 Ok(Expression::Function(Box::new(Function::new("ARRAY_TO_STRING".to_string(), vec![list_transform, sep]))))
18492 }
18493 _ => Ok(Expression::Function(Box::new(Function::new("ARRAY_TO_STRING".to_string(), vec![arr, sep, null_text]))))
18494 }
18495 }
18496
18497 // LENGTH(x) -> CASE TYPEOF(x) ... for DuckDB
18498 "LENGTH" if args.len() == 1 => {
18499 let arg = args.remove(0);
18500 match target {
18501 DialectType::DuckDB => {
18502 // CASE TYPEOF(foo) WHEN 'BLOB' THEN OCTET_LENGTH(CAST(foo AS BLOB)) ELSE LENGTH(CAST(foo AS TEXT)) END
18503 let typeof_func = Expression::Function(Box::new(Function::new("TYPEOF".to_string(), vec![arg.clone()])));
18504 let blob_cast = Expression::Cast(Box::new(Cast {
18505 this: arg.clone(),
18506 to: DataType::VarBinary { length: None },
18507 trailing_comments: vec![],
18508 double_colon_syntax: false,
18509 format: None,
18510 default: None,
18511 }));
18512 let octet_length = Expression::Function(Box::new(Function::new("OCTET_LENGTH".to_string(), vec![blob_cast])));
18513 let text_cast = Expression::Cast(Box::new(Cast {
18514 this: arg,
18515 to: DataType::Text,
18516 trailing_comments: vec![],
18517 double_colon_syntax: false,
18518 format: None,
18519 default: None,
18520 }));
18521 let length_text = Expression::Function(Box::new(Function::new("LENGTH".to_string(), vec![text_cast])));
18522 Ok(Expression::Case(Box::new(crate::expressions::Case {
18523 operand: Some(typeof_func),
18524 whens: vec![(Expression::Literal(Literal::String("BLOB".to_string())), octet_length)],
18525 else_: Some(length_text),
18526 })))
18527 }
18528 _ => Ok(Expression::Function(Box::new(Function::new("LENGTH".to_string(), vec![arg]))))
18529 }
18530 }
18531
18532 // PERCENTILE_CONT(x, fraction RESPECT NULLS) -> QUANTILE_CONT(x, fraction) for DuckDB
18533 "PERCENTILE_CONT" if args.len() >= 2 && matches!(source, DialectType::BigQuery) => {
18534 // BigQuery PERCENTILE_CONT(x, fraction [RESPECT|IGNORE NULLS]) OVER ()
18535 // The args should be [x, fraction] with the null handling stripped
18536 // For DuckDB: QUANTILE_CONT(x, fraction)
18537 // For Spark: PERCENTILE_CONT(x, fraction) RESPECT NULLS (handled at window level)
18538 match target {
18539 DialectType::DuckDB => {
18540 // Strip down to just 2 args, rename to QUANTILE_CONT
18541 let x = args[0].clone();
18542 let frac = args[1].clone();
18543 Ok(Expression::Function(Box::new(Function::new("QUANTILE_CONT".to_string(), vec![x, frac]))))
18544 }
18545 _ => Ok(Expression::Function(Box::new(Function::new("PERCENTILE_CONT".to_string(), args))))
18546 }
18547 }
18548
18549 // All others: pass through
18550 _ => Ok(Expression::Function(Box::new(Function { name: f.name, args, distinct: f.distinct, trailing_comments: f.trailing_comments, use_bracket_syntax: f.use_bracket_syntax, no_parens: f.no_parens, quoted: f.quoted })))
18551 }
18552 }
18553
18554 /// Check if we can reliably infer the SQL type for Presto/Trino ROW CAST.
18555 /// Returns false for column references and other non-literal expressions where the type is unknown.
18556 fn can_infer_presto_type(expr: &Expression) -> bool {
18557 match expr {
18558 Expression::Literal(_) => true,
18559 Expression::Boolean(_) => true,
18560 Expression::Array(_) | Expression::ArrayFunc(_) => true,
18561 Expression::Struct(_) | Expression::StructFunc(_) => true,
18562 Expression::Function(f) => {
18563 let up = f.name.to_uppercase();
18564 up == "STRUCT" || up == "ROW" || up == "CURRENT_DATE" || up == "CURRENT_TIMESTAMP" || up == "NOW"
18565 }
18566 Expression::Cast(_) => true,
18567 Expression::Neg(inner) => Self::can_infer_presto_type(&inner.this),
18568 _ => false,
18569 }
18570 }
18571
18572 /// Infer SQL type name for a Presto/Trino ROW CAST from a literal expression
18573 fn infer_sql_type_for_presto(expr: &Expression) -> String {
18574 use crate::expressions::Literal;
18575 match expr {
18576 Expression::Literal(Literal::String(_)) => "VARCHAR".to_string(),
18577 Expression::Literal(Literal::Number(n)) => {
18578 if n.contains('.') { "DOUBLE".to_string() } else { "INTEGER".to_string() }
18579 }
18580 Expression::Boolean(_) => "BOOLEAN".to_string(),
18581 Expression::Literal(Literal::Date(_)) => "DATE".to_string(),
18582 Expression::Literal(Literal::Timestamp(_)) => "TIMESTAMP".to_string(),
18583 Expression::Literal(Literal::Datetime(_)) => "TIMESTAMP".to_string(),
18584 Expression::Array(_) | Expression::ArrayFunc(_) => "ARRAY(VARCHAR)".to_string(),
18585 Expression::Struct(_) | Expression::StructFunc(_) => "ROW".to_string(),
18586 Expression::Function(f) => {
18587 let up = f.name.to_uppercase();
18588 if up == "STRUCT" || up == "ROW" { "ROW".to_string() }
18589 else if up == "CURRENT_DATE" { "DATE".to_string() }
18590 else if up == "CURRENT_TIMESTAMP" || up == "NOW" { "TIMESTAMP".to_string() }
18591 else { "VARCHAR".to_string() }
18592 }
18593 Expression::Cast(c) => {
18594 // If already cast, use the target type
18595 Self::data_type_to_presto_string(&c.to)
18596 }
18597 _ => "VARCHAR".to_string(),
18598 }
18599 }
18600
18601 /// Convert a DataType to its Presto/Trino string representation for ROW type
18602 fn data_type_to_presto_string(dt: &crate::expressions::DataType) -> String {
18603 use crate::expressions::DataType;
18604 match dt {
18605 DataType::VarChar { .. } | DataType::Text | DataType::String { .. } => "VARCHAR".to_string(),
18606 DataType::Int { .. } | DataType::BigInt { .. } | DataType::SmallInt { .. } | DataType::TinyInt { .. } => "INTEGER".to_string(),
18607 DataType::Float { .. } | DataType::Double { .. } => "DOUBLE".to_string(),
18608 DataType::Boolean => "BOOLEAN".to_string(),
18609 DataType::Date => "DATE".to_string(),
18610 DataType::Timestamp { .. } => "TIMESTAMP".to_string(),
18611 DataType::Struct { fields, .. } => {
18612 let field_strs: Vec<String> = fields.iter().map(|f| {
18613 format!("{} {}", f.name, Self::data_type_to_presto_string(&f.data_type))
18614 }).collect();
18615 format!("ROW({})", field_strs.join(", "))
18616 }
18617 DataType::Array { element_type, .. } => {
18618 format!("ARRAY({})", Self::data_type_to_presto_string(element_type))
18619 }
18620 DataType::Custom { name } => {
18621 // Pass through custom type names (e.g., "INTEGER", "VARCHAR" from earlier inference)
18622 name.clone()
18623 }
18624 _ => "VARCHAR".to_string(),
18625 }
18626 }
18627
18628 /// Convert IntervalUnit to string
18629 fn interval_unit_to_string(unit: &crate::expressions::IntervalUnit) -> String {
18630 match unit {
18631 crate::expressions::IntervalUnit::Year => "YEAR".to_string(),
18632 crate::expressions::IntervalUnit::Quarter => "QUARTER".to_string(),
18633 crate::expressions::IntervalUnit::Month => "MONTH".to_string(),
18634 crate::expressions::IntervalUnit::Week => "WEEK".to_string(),
18635 crate::expressions::IntervalUnit::Day => "DAY".to_string(),
18636 crate::expressions::IntervalUnit::Hour => "HOUR".to_string(),
18637 crate::expressions::IntervalUnit::Minute => "MINUTE".to_string(),
18638 crate::expressions::IntervalUnit::Second => "SECOND".to_string(),
18639 crate::expressions::IntervalUnit::Millisecond => "MILLISECOND".to_string(),
18640 crate::expressions::IntervalUnit::Microsecond => "MICROSECOND".to_string(),
18641 }
18642 }
18643
18644 /// Extract unit string from an expression (uppercased)
18645 fn get_unit_str_static(expr: &Expression) -> String {
18646 use crate::expressions::Literal;
18647 match expr {
18648 Expression::Identifier(id) => id.name.to_uppercase(),
18649 Expression::Literal(Literal::String(s)) => s.to_uppercase(),
18650 Expression::Column(col) => col.name.name.to_uppercase(),
18651 Expression::Function(f) => {
18652 let base = f.name.to_uppercase();
18653 if !f.args.is_empty() {
18654 let inner = Self::get_unit_str_static(&f.args[0]);
18655 format!("{}({})", base, inner)
18656 } else {
18657 base
18658 }
18659 }
18660 _ => "DAY".to_string(),
18661 }
18662 }
18663
18664 /// Parse unit string to IntervalUnit
18665 fn parse_interval_unit_static(s: &str) -> crate::expressions::IntervalUnit {
18666 match s {
18667 "YEAR" | "YY" | "YYYY" => crate::expressions::IntervalUnit::Year,
18668 "QUARTER" | "QQ" | "Q" => crate::expressions::IntervalUnit::Quarter,
18669 "MONTH" | "MM" | "M" => crate::expressions::IntervalUnit::Month,
18670 "WEEK" | "WK" | "WW" | "ISOWEEK" => crate::expressions::IntervalUnit::Week,
18671 "DAY" | "DD" | "D" | "DY" => crate::expressions::IntervalUnit::Day,
18672 "HOUR" | "HH" => crate::expressions::IntervalUnit::Hour,
18673 "MINUTE" | "MI" | "N" => crate::expressions::IntervalUnit::Minute,
18674 "SECOND" | "SS" | "S" => crate::expressions::IntervalUnit::Second,
18675 "MILLISECOND" | "MS" => crate::expressions::IntervalUnit::Millisecond,
18676 "MICROSECOND" | "MCS" | "US" => crate::expressions::IntervalUnit::Microsecond,
18677 _ if s.starts_with("WEEK(") => crate::expressions::IntervalUnit::Week,
18678 _ => crate::expressions::IntervalUnit::Day,
18679 }
18680 }
18681
18682 /// Convert expression to simple string for interval building
18683 fn expr_to_string_static(expr: &Expression) -> String {
18684 use crate::expressions::Literal;
18685 match expr {
18686 Expression::Literal(Literal::Number(s)) => s.clone(),
18687 Expression::Literal(Literal::String(s)) => s.clone(),
18688 Expression::Identifier(id) => id.name.clone(),
18689 Expression::Neg(f) => format!("-{}", Self::expr_to_string_static(&f.this)),
18690 _ => "1".to_string(),
18691 }
18692 }
18693
18694 /// Extract a simple string representation from a literal expression
18695 fn expr_to_string(expr: &Expression) -> String {
18696 use crate::expressions::Literal;
18697 match expr {
18698 Expression::Literal(Literal::Number(s)) => s.clone(),
18699 Expression::Literal(Literal::String(s)) => s.clone(),
18700 Expression::Neg(f) => format!("-{}", Self::expr_to_string(&f.this)),
18701 Expression::Identifier(id) => id.name.clone(),
18702 _ => "1".to_string(),
18703 }
18704 }
18705
18706 /// Quote an interval value expression as a string literal if it's a number (or negated number)
18707 fn quote_interval_val(expr: &Expression) -> Expression {
18708 use crate::expressions::Literal;
18709 match expr {
18710 Expression::Literal(Literal::Number(n)) => {
18711 Expression::Literal(Literal::String(n.clone()))
18712 }
18713 Expression::Literal(Literal::String(_)) => expr.clone(),
18714 Expression::Neg(inner) => {
18715 if let Expression::Literal(Literal::Number(n)) = &inner.this {
18716 Expression::Literal(Literal::String(format!("-{}", n)))
18717 } else {
18718 expr.clone()
18719 }
18720 }
18721 _ => expr.clone(),
18722 }
18723 }
18724
18725 /// Check if a timestamp string contains timezone info (offset like +02:00, or named timezone)
18726 fn timestamp_string_has_timezone(ts: &str) -> bool {
18727 let trimmed = ts.trim();
18728 // Check for numeric timezone offsets: +N, -N, +NN:NN, -NN:NN at end
18729 if let Some(last_space) = trimmed.rfind(' ') {
18730 let suffix = &trimmed[last_space + 1..];
18731 if (suffix.starts_with('+') || suffix.starts_with('-')) && suffix.len() > 1 {
18732 let rest = &suffix[1..];
18733 if rest.chars().all(|c| c.is_ascii_digit() || c == ':') {
18734 return true;
18735 }
18736 }
18737 }
18738 // Check for named timezone abbreviations
18739 let ts_lower = trimmed.to_lowercase();
18740 let tz_abbrevs = [" utc", " gmt", " cet", " est", " pst", " cst", " mst"];
18741 for abbrev in &tz_abbrevs {
18742 if ts_lower.ends_with(abbrev) {
18743 return true;
18744 }
18745 }
18746 false
18747 }
18748
18749 /// Maybe CAST timestamp literal to TIMESTAMPTZ for Snowflake
18750 fn maybe_cast_ts_to_tz(expr: Expression, func_name: &str) -> Expression {
18751 use crate::expressions::{Cast, DataType, Literal};
18752 match expr {
18753 Expression::Literal(Literal::Timestamp(s)) => {
18754 let tz = func_name.starts_with("TIMESTAMP");
18755 Expression::Cast(Box::new(Cast {
18756 this: Expression::Literal(Literal::String(s)),
18757 to: if tz {
18758 DataType::Timestamp { timezone: true, precision: None }
18759 } else {
18760 DataType::Timestamp { timezone: false, precision: None }
18761 },
18762 trailing_comments: vec![],
18763 double_colon_syntax: false,
18764 format: None,
18765 default: None,
18766 }))
18767 }
18768 other => other,
18769 }
18770 }
18771
18772 /// Maybe CAST timestamp literal to TIMESTAMP (no tz)
18773 fn maybe_cast_ts(expr: Expression) -> Expression {
18774 use crate::expressions::{Cast, DataType, Literal};
18775 match expr {
18776 Expression::Literal(Literal::Timestamp(s)) => {
18777 Expression::Cast(Box::new(Cast {
18778 this: Expression::Literal(Literal::String(s)),
18779 to: DataType::Timestamp { timezone: false, precision: None },
18780 trailing_comments: vec![],
18781 double_colon_syntax: false,
18782 format: None,
18783 default: None,
18784 }))
18785 }
18786 other => other,
18787 }
18788 }
18789
18790 /// Convert DATE 'x' literal to CAST('x' AS DATE)
18791 fn date_literal_to_cast(expr: Expression) -> Expression {
18792 use crate::expressions::{Cast, DataType, Literal};
18793 match expr {
18794 Expression::Literal(Literal::Date(s)) => {
18795 Expression::Cast(Box::new(Cast {
18796 this: Expression::Literal(Literal::String(s)),
18797 to: DataType::Date,
18798 trailing_comments: vec![],
18799 double_colon_syntax: false,
18800 format: None,
18801 default: None,
18802 }))
18803 }
18804 other => other,
18805 }
18806 }
18807
18808 /// Ensure an expression that should be a date is CAST(... AS DATE).
18809 /// Handles both DATE literals and string literals that look like dates.
18810 fn ensure_cast_date(expr: Expression) -> Expression {
18811 use crate::expressions::{Cast, DataType, Literal};
18812 match expr {
18813 Expression::Literal(Literal::Date(s)) => {
18814 Expression::Cast(Box::new(Cast {
18815 this: Expression::Literal(Literal::String(s)),
18816 to: DataType::Date,
18817 trailing_comments: vec![],
18818 double_colon_syntax: false,
18819 format: None,
18820 default: None,
18821 }))
18822 }
18823 Expression::Literal(Literal::String(ref _s)) => {
18824 // String literal that should be a date -> CAST('s' AS DATE)
18825 Expression::Cast(Box::new(Cast {
18826 this: expr,
18827 to: DataType::Date,
18828 trailing_comments: vec![],
18829 double_colon_syntax: false,
18830 format: None,
18831 default: None,
18832 }))
18833 }
18834 // Already a CAST or other expression -> leave as-is
18835 other => other,
18836 }
18837 }
18838
18839 /// Force CAST(expr AS DATE) for any expression (not just literals)
18840 /// Skips if the expression is already a CAST to DATE
18841 fn force_cast_date(expr: Expression) -> Expression {
18842 use crate::expressions::{Cast, DataType};
18843 // If it's already a CAST to DATE, don't double-wrap
18844 if let Expression::Cast(ref c) = expr {
18845 if matches!(c.to, DataType::Date) {
18846 return expr;
18847 }
18848 }
18849 Expression::Cast(Box::new(Cast {
18850 this: expr,
18851 to: DataType::Date,
18852 trailing_comments: vec![],
18853 double_colon_syntax: false,
18854 format: None,
18855 default: None,
18856 }))
18857 }
18858
18859 /// Internal TO_DATE function that won't be converted to CAST by the Snowflake handler.
18860 /// Uses the name `_POLYGLOT_TO_DATE` which is not recognized by the TO_DATE -> CAST logic.
18861 /// The Snowflake DATEDIFF handler converts these back to TO_DATE.
18862 const PRESERVED_TO_DATE: &'static str = "_POLYGLOT_TO_DATE";
18863
18864 fn ensure_to_date_preserved(expr: Expression) -> Expression {
18865 use crate::expressions::{Literal, Function};
18866 if matches!(expr, Expression::Literal(Literal::String(_))) {
18867 Expression::Function(Box::new(Function::new(Self::PRESERVED_TO_DATE.to_string(), vec![expr])))
18868 } else {
18869 expr
18870 }
18871 }
18872
18873 /// TRY_CAST(expr AS DATE) - used for DuckDB when TO_DATE is unwrapped
18874 fn try_cast_date(expr: Expression) -> Expression {
18875 use crate::expressions::{Cast, DataType};
18876 Expression::TryCast(Box::new(Cast {
18877 this: expr,
18878 to: DataType::Date,
18879 trailing_comments: vec![],
18880 double_colon_syntax: false,
18881 format: None,
18882 default: None,
18883 }))
18884 }
18885
18886 /// CAST(CAST(expr AS TIMESTAMP) AS DATE) - used when Hive string dates need to be cast
18887 fn double_cast_timestamp_date(expr: Expression) -> Expression {
18888 use crate::expressions::{Cast, DataType};
18889 let inner = Expression::Cast(Box::new(Cast {
18890 this: expr,
18891 to: DataType::Timestamp { timezone: false, precision: None },
18892 trailing_comments: vec![],
18893 double_colon_syntax: false,
18894 format: None,
18895 default: None,
18896 }));
18897 Expression::Cast(Box::new(Cast {
18898 this: inner,
18899 to: DataType::Date,
18900 trailing_comments: vec![],
18901 double_colon_syntax: false,
18902 format: None,
18903 default: None,
18904 }))
18905 }
18906
18907 /// CAST(CAST(expr AS DATETIME) AS DATE) - BigQuery variant
18908 fn double_cast_datetime_date(expr: Expression) -> Expression {
18909 use crate::expressions::{Cast, DataType};
18910 let inner = Expression::Cast(Box::new(Cast {
18911 this: expr,
18912 to: DataType::Custom { name: "DATETIME".to_string() },
18913 trailing_comments: vec![],
18914 double_colon_syntax: false,
18915 format: None,
18916 default: None,
18917 }));
18918 Expression::Cast(Box::new(Cast {
18919 this: inner,
18920 to: DataType::Date,
18921 trailing_comments: vec![],
18922 double_colon_syntax: false,
18923 format: None,
18924 default: None,
18925 }))
18926 }
18927
18928 /// CAST(CAST(expr AS DATETIME2) AS DATE) - TSQL variant
18929 fn double_cast_datetime2_date(expr: Expression) -> Expression {
18930 use crate::expressions::{Cast, DataType};
18931 let inner = Expression::Cast(Box::new(Cast {
18932 this: expr,
18933 to: DataType::Custom { name: "DATETIME2".to_string() },
18934 trailing_comments: vec![],
18935 double_colon_syntax: false,
18936 format: None,
18937 default: None,
18938 }));
18939 Expression::Cast(Box::new(Cast {
18940 this: inner,
18941 to: DataType::Date,
18942 trailing_comments: vec![],
18943 double_colon_syntax: false,
18944 format: None,
18945 default: None,
18946 }))
18947 }
18948
18949 /// Convert Hive/Java-style date format strings to C-style (strftime) format
18950 /// e.g., "yyyy-MM-dd'T'HH" -> "%Y-%m-%d'T'%H"
18951 fn hive_format_to_c_format(fmt: &str) -> String {
18952 let mut result = String::new();
18953 let chars: Vec<char> = fmt.chars().collect();
18954 let mut i = 0;
18955 while i < chars.len() {
18956 match chars[i] {
18957 'y' => {
18958 let mut count = 0;
18959 while i < chars.len() && chars[i] == 'y' { count += 1; i += 1; }
18960 if count >= 4 { result.push_str("%Y"); }
18961 else if count == 2 { result.push_str("%y"); }
18962 else { result.push_str("%Y"); }
18963 }
18964 'M' => {
18965 let mut count = 0;
18966 while i < chars.len() && chars[i] == 'M' { count += 1; i += 1; }
18967 if count >= 3 { result.push_str("%b"); }
18968 else if count == 2 { result.push_str("%m"); }
18969 else { result.push_str("%m"); }
18970 }
18971 'd' => {
18972 let mut _count = 0;
18973 while i < chars.len() && chars[i] == 'd' { _count += 1; i += 1; }
18974 result.push_str("%d");
18975 }
18976 'H' => {
18977 let mut _count = 0;
18978 while i < chars.len() && chars[i] == 'H' { _count += 1; i += 1; }
18979 result.push_str("%H");
18980 }
18981 'h' => {
18982 let mut _count = 0;
18983 while i < chars.len() && chars[i] == 'h' { _count += 1; i += 1; }
18984 result.push_str("%I");
18985 }
18986 'm' => {
18987 let mut _count = 0;
18988 while i < chars.len() && chars[i] == 'm' { _count += 1; i += 1; }
18989 result.push_str("%M");
18990 }
18991 's' => {
18992 let mut _count = 0;
18993 while i < chars.len() && chars[i] == 's' { _count += 1; i += 1; }
18994 result.push_str("%S");
18995 }
18996 'S' => {
18997 // Fractional seconds - skip
18998 while i < chars.len() && chars[i] == 'S' { i += 1; }
18999 result.push_str("%f");
19000 }
19001 'a' => {
19002 // AM/PM
19003 while i < chars.len() && chars[i] == 'a' { i += 1; }
19004 result.push_str("%p");
19005 }
19006 'E' => {
19007 let mut count = 0;
19008 while i < chars.len() && chars[i] == 'E' { count += 1; i += 1; }
19009 if count >= 4 { result.push_str("%A"); }
19010 else { result.push_str("%a"); }
19011 }
19012 '\'' => {
19013 // Quoted literal text - pass through the quotes and content
19014 result.push('\'');
19015 i += 1;
19016 while i < chars.len() && chars[i] != '\'' {
19017 result.push(chars[i]);
19018 i += 1;
19019 }
19020 if i < chars.len() { result.push('\''); i += 1; }
19021 }
19022 c => {
19023 result.push(c);
19024 i += 1;
19025 }
19026 }
19027 }
19028 result
19029 }
19030
19031 /// Convert Hive/Java format to Presto format (uses %T for HH:mm:ss)
19032 fn hive_format_to_presto_format(fmt: &str) -> String {
19033 let c_fmt = Self::hive_format_to_c_format(fmt);
19034 // Presto uses %T for HH:MM:SS
19035 c_fmt.replace("%H:%M:%S", "%T")
19036 }
19037
19038 /// Ensure a timestamp-like expression for DuckDB with CAST(... AS TIMESTAMP)
19039 fn ensure_cast_timestamp(expr: Expression) -> Expression {
19040 use crate::expressions::{Cast, DataType, Literal};
19041 match expr {
19042 Expression::Literal(Literal::Timestamp(s)) => {
19043 Expression::Cast(Box::new(Cast {
19044 this: Expression::Literal(Literal::String(s)),
19045 to: DataType::Timestamp { timezone: false, precision: None },
19046 trailing_comments: vec![],
19047 double_colon_syntax: false,
19048 format: None,
19049 default: None,
19050 }))
19051 }
19052 Expression::Literal(Literal::String(ref _s)) => {
19053 Expression::Cast(Box::new(Cast {
19054 this: expr,
19055 to: DataType::Timestamp { timezone: false, precision: None },
19056 trailing_comments: vec![],
19057 double_colon_syntax: false,
19058 format: None,
19059 default: None,
19060 }))
19061 }
19062 Expression::Literal(Literal::Datetime(s)) => {
19063 Expression::Cast(Box::new(Cast {
19064 this: Expression::Literal(Literal::String(s)),
19065 to: DataType::Timestamp { timezone: false, precision: None },
19066 trailing_comments: vec![],
19067 double_colon_syntax: false,
19068 format: None,
19069 default: None,
19070 }))
19071 }
19072 other => other,
19073 }
19074 }
19075
19076 /// Force CAST to TIMESTAMP for any expression (not just literals)
19077 /// Used when transpiling from Redshift/TSQL where DATEDIFF/DATEADD args need explicit timestamp cast
19078 fn force_cast_timestamp(expr: Expression) -> Expression {
19079 use crate::expressions::{Cast, DataType};
19080 // Don't double-wrap if already a CAST to TIMESTAMP
19081 if let Expression::Cast(ref c) = expr {
19082 if matches!(c.to, DataType::Timestamp { .. }) {
19083 return expr;
19084 }
19085 }
19086 Expression::Cast(Box::new(Cast {
19087 this: expr,
19088 to: DataType::Timestamp { timezone: false, precision: None },
19089 trailing_comments: vec![],
19090 double_colon_syntax: false,
19091 format: None,
19092 default: None,
19093 }))
19094 }
19095
19096 /// Ensure a timestamp-like expression for DuckDB with CAST(... AS TIMESTAMPTZ)
19097 fn ensure_cast_timestamptz(expr: Expression) -> Expression {
19098 use crate::expressions::{Cast, DataType, Literal};
19099 match expr {
19100 Expression::Literal(Literal::Timestamp(s)) => {
19101 Expression::Cast(Box::new(Cast {
19102 this: Expression::Literal(Literal::String(s)),
19103 to: DataType::Timestamp { timezone: true, precision: None },
19104 trailing_comments: vec![],
19105 double_colon_syntax: false,
19106 format: None,
19107 default: None,
19108 }))
19109 }
19110 Expression::Literal(Literal::String(ref _s)) => {
19111 Expression::Cast(Box::new(Cast {
19112 this: expr,
19113 to: DataType::Timestamp { timezone: true, precision: None },
19114 trailing_comments: vec![],
19115 double_colon_syntax: false,
19116 format: None,
19117 default: None,
19118 }))
19119 }
19120 Expression::Literal(Literal::Datetime(s)) => {
19121 Expression::Cast(Box::new(Cast {
19122 this: Expression::Literal(Literal::String(s)),
19123 to: DataType::Timestamp { timezone: true, precision: None },
19124 trailing_comments: vec![],
19125 double_colon_syntax: false,
19126 format: None,
19127 default: None,
19128 }))
19129 }
19130 other => other,
19131 }
19132 }
19133
19134 /// Ensure expression is CAST to DATETIME (for BigQuery)
19135 fn ensure_cast_datetime(expr: Expression) -> Expression {
19136 use crate::expressions::{Cast, DataType, Literal};
19137 match expr {
19138 Expression::Literal(Literal::String(ref _s)) => {
19139 Expression::Cast(Box::new(Cast {
19140 this: expr,
19141 to: DataType::Custom { name: "DATETIME".to_string() },
19142 trailing_comments: vec![],
19143 double_colon_syntax: false,
19144 format: None,
19145 default: None,
19146 }))
19147 }
19148 other => other,
19149 }
19150 }
19151
19152 /// Force CAST expression to DATETIME (for BigQuery) - always wraps unless already DATETIME
19153 fn force_cast_datetime(expr: Expression) -> Expression {
19154 use crate::expressions::{Cast, DataType};
19155 if let Expression::Cast(ref c) = expr {
19156 if let DataType::Custom { ref name } = c.to {
19157 if name.eq_ignore_ascii_case("DATETIME") {
19158 return expr;
19159 }
19160 }
19161 }
19162 Expression::Cast(Box::new(Cast {
19163 this: expr,
19164 to: DataType::Custom { name: "DATETIME".to_string() },
19165 trailing_comments: vec![],
19166 double_colon_syntax: false,
19167 format: None,
19168 default: None,
19169 }))
19170 }
19171
19172 /// Ensure expression is CAST to DATETIME2 (for TSQL)
19173 fn ensure_cast_datetime2(expr: Expression) -> Expression {
19174 use crate::expressions::{Cast, DataType, Literal};
19175 match expr {
19176 Expression::Literal(Literal::String(ref _s)) => {
19177 Expression::Cast(Box::new(Cast {
19178 this: expr,
19179 to: DataType::Custom { name: "DATETIME2".to_string() },
19180 trailing_comments: vec![],
19181 double_colon_syntax: false,
19182 format: None,
19183 default: None,
19184 }))
19185 }
19186 other => other,
19187 }
19188 }
19189
19190 /// Convert TIMESTAMP 'x' literal to CAST('x' AS TIMESTAMPTZ) for DuckDB
19191 fn ts_literal_to_cast_tz(expr: Expression) -> Expression {
19192 use crate::expressions::{Cast, DataType, Literal};
19193 match expr {
19194 Expression::Literal(Literal::Timestamp(s)) => {
19195 Expression::Cast(Box::new(Cast {
19196 this: Expression::Literal(Literal::String(s)),
19197 to: DataType::Timestamp { timezone: true, precision: None },
19198 trailing_comments: vec![],
19199 double_colon_syntax: false,
19200 format: None,
19201 default: None,
19202 }))
19203 }
19204 other => other,
19205 }
19206 }
19207
19208 /// Convert BigQuery format string to Snowflake format string
19209 fn bq_format_to_snowflake(format_expr: &Expression) -> Expression {
19210 use crate::expressions::Literal;
19211 if let Expression::Literal(Literal::String(s)) = format_expr {
19212 let sf = s
19213 .replace("%Y", "yyyy")
19214 .replace("%m", "mm")
19215 .replace("%d", "DD")
19216 .replace("%H", "HH24")
19217 .replace("%M", "MI")
19218 .replace("%S", "SS")
19219 .replace("%b", "mon")
19220 .replace("%B", "Month")
19221 .replace("%e", "FMDD");
19222 Expression::Literal(Literal::String(sf))
19223 } else {
19224 format_expr.clone()
19225 }
19226 }
19227
19228 /// Convert BigQuery format string to DuckDB format string
19229 fn bq_format_to_duckdb(format_expr: &Expression) -> Expression {
19230 use crate::expressions::Literal;
19231 if let Expression::Literal(Literal::String(s)) = format_expr {
19232 let duck = s
19233 .replace("%T", "%H:%M:%S")
19234 .replace("%F", "%Y-%m-%d")
19235 .replace("%D", "%m/%d/%y")
19236 .replace("%x", "%m/%d/%y")
19237 .replace("%c", "%a %b %-d %H:%M:%S %Y")
19238 .replace("%e", "%-d")
19239 .replace("%E6S", "%S.%f");
19240 Expression::Literal(Literal::String(duck))
19241 } else {
19242 format_expr.clone()
19243 }
19244 }
19245
19246 /// Convert BigQuery CAST FORMAT elements (like YYYY, MM, DD) to strftime (like %Y, %m, %d)
19247 fn bq_cast_format_to_strftime(format_expr: &Expression) -> Expression {
19248 use crate::expressions::Literal;
19249 if let Expression::Literal(Literal::String(s)) = format_expr {
19250 // Replace format elements from longest to shortest to avoid partial matches
19251 let result = s
19252 .replace("YYYYMMDD", "%Y%m%d")
19253 .replace("YYYY", "%Y")
19254 .replace("YY", "%y")
19255 .replace("MONTH", "%B")
19256 .replace("MON", "%b")
19257 .replace("MM", "%m")
19258 .replace("DD", "%d")
19259 .replace("HH24", "%H")
19260 .replace("HH12", "%I")
19261 .replace("HH", "%I")
19262 .replace("MI", "%M")
19263 .replace("SSTZH", "%S%z")
19264 .replace("SS", "%S")
19265 .replace("TZH", "%z");
19266 Expression::Literal(Literal::String(result))
19267 } else {
19268 format_expr.clone()
19269 }
19270 }
19271
19272 /// Normalize BigQuery format strings for BQ->BQ output
19273 fn bq_format_normalize_bq(format_expr: &Expression) -> Expression {
19274 use crate::expressions::Literal;
19275 if let Expression::Literal(Literal::String(s)) = format_expr {
19276 let norm = s
19277 .replace("%H:%M:%S", "%T")
19278 .replace("%x", "%D");
19279 Expression::Literal(Literal::String(norm))
19280 } else {
19281 format_expr.clone()
19282 }
19283 }
19284}
19285
19286#[cfg(test)]
19287mod tests {
19288 use super::*;
19289
19290 #[test]
19291 fn test_dialect_type_from_str() {
19292 assert_eq!("postgres".parse::<DialectType>().unwrap(), DialectType::PostgreSQL);
19293 assert_eq!("postgresql".parse::<DialectType>().unwrap(), DialectType::PostgreSQL);
19294 assert_eq!("mysql".parse::<DialectType>().unwrap(), DialectType::MySQL);
19295 assert_eq!("bigquery".parse::<DialectType>().unwrap(), DialectType::BigQuery);
19296 }
19297
19298 #[test]
19299 fn test_basic_transpile() {
19300 let dialect = Dialect::get(DialectType::Generic);
19301 let result = dialect.transpile_to("SELECT 1", DialectType::PostgreSQL).unwrap();
19302 assert_eq!(result.len(), 1);
19303 assert_eq!(result[0], "SELECT 1");
19304 }
19305
19306 #[test]
19307 fn test_function_transformation_mysql() {
19308 // NVL should be transformed to IFNULL in MySQL
19309 let dialect = Dialect::get(DialectType::Generic);
19310 let result = dialect.transpile_to("SELECT NVL(a, b)", DialectType::MySQL).unwrap();
19311 assert_eq!(result[0], "SELECT IFNULL(a, b)");
19312 }
19313
19314 #[test]
19315 fn test_get_path_duckdb() {
19316 // Test: step by step
19317 let snowflake = Dialect::get(DialectType::Snowflake);
19318
19319 // Step 1: Parse and check what Snowflake produces as intermediate
19320 let result_sf_sf = snowflake.transpile_to(
19321 "SELECT PARSE_JSON('{\"fruit\":\"banana\"}'):fruit",
19322 DialectType::Snowflake,
19323 ).unwrap();
19324 eprintln!("Snowflake->Snowflake colon: {}", result_sf_sf[0]);
19325
19326 // Step 2: DuckDB target
19327 let result_sf_dk = snowflake.transpile_to(
19328 "SELECT PARSE_JSON('{\"fruit\":\"banana\"}'):fruit",
19329 DialectType::DuckDB,
19330 ).unwrap();
19331 eprintln!("Snowflake->DuckDB colon: {}", result_sf_dk[0]);
19332
19333 // Step 3: GET_PATH directly
19334 let result_gp = snowflake.transpile_to(
19335 "SELECT GET_PATH(PARSE_JSON('{\"fruit\":\"banana\"}'), 'fruit')",
19336 DialectType::DuckDB,
19337 ).unwrap();
19338 eprintln!("Snowflake->DuckDB explicit GET_PATH: {}", result_gp[0]);
19339 }
19340
19341 #[test]
19342 fn test_function_transformation_postgres() {
19343 // IFNULL should be transformed to COALESCE in PostgreSQL
19344 let dialect = Dialect::get(DialectType::Generic);
19345 let result = dialect.transpile_to("SELECT IFNULL(a, b)", DialectType::PostgreSQL).unwrap();
19346 assert_eq!(result[0], "SELECT COALESCE(a, b)");
19347
19348 // NVL should also be transformed to COALESCE
19349 let result = dialect.transpile_to("SELECT NVL(a, b)", DialectType::PostgreSQL).unwrap();
19350 assert_eq!(result[0], "SELECT COALESCE(a, b)");
19351 }
19352
19353 #[test]
19354 fn test_hive_cast_to_trycast() {
19355 // Hive CAST should become TRY_CAST for targets that support it
19356 let hive = Dialect::get(DialectType::Hive);
19357 let result = hive.transpile_to("CAST(1 AS INT)", DialectType::DuckDB).unwrap();
19358 assert_eq!(result[0], "TRY_CAST(1 AS INT)");
19359
19360 let result = hive.transpile_to("CAST(1 AS INT)", DialectType::Presto).unwrap();
19361 assert_eq!(result[0], "TRY_CAST(1 AS INTEGER)");
19362 }
19363
19364 #[test]
19365 fn test_hive_array_identity() {
19366 // Hive ARRAY<DATE> should preserve angle bracket syntax
19367 let sql = "CREATE EXTERNAL TABLE `my_table` (`a7` ARRAY<DATE>) ROW FORMAT SERDE 'a' STORED AS INPUTFORMAT 'b' OUTPUTFORMAT 'c' LOCATION 'd' TBLPROPERTIES ('e'='f')";
19368 let hive = Dialect::get(DialectType::Hive);
19369
19370 // Test via transpile_to (this works)
19371 let result = hive.transpile_to(sql, DialectType::Hive).unwrap();
19372 eprintln!("Hive ARRAY via transpile_to: {}", result[0]);
19373 assert!(result[0].contains("ARRAY<DATE>"), "transpile_to: Expected ARRAY<DATE>, got: {}", result[0]);
19374
19375 // Test via parse -> transform -> generate (identity test path)
19376 let ast = hive.parse(sql).unwrap();
19377 let transformed = hive.transform(ast[0].clone()).unwrap();
19378 let output = hive.generate(&transformed).unwrap();
19379 eprintln!("Hive ARRAY via identity path: {}", output);
19380 assert!(output.contains("ARRAY<DATE>"), "identity path: Expected ARRAY<DATE>, got: {}", output);
19381 }
19382
19383 #[test]
19384 fn test_starrocks_delete_between_expansion() {
19385 // StarRocks doesn't support BETWEEN in DELETE statements
19386 let dialect = Dialect::get(DialectType::Generic);
19387
19388 // BETWEEN should be expanded to >= AND <= in DELETE
19389 let result = dialect.transpile_to("DELETE FROM t WHERE a BETWEEN b AND c", DialectType::StarRocks).unwrap();
19390 assert_eq!(result[0], "DELETE FROM t WHERE a >= b AND a <= c");
19391
19392 // NOT BETWEEN should be expanded to < OR > in DELETE
19393 let result = dialect.transpile_to("DELETE FROM t WHERE a NOT BETWEEN b AND c", DialectType::StarRocks).unwrap();
19394 assert_eq!(result[0], "DELETE FROM t WHERE a < b OR a > c");
19395
19396 // BETWEEN in SELECT should NOT be expanded (StarRocks supports it there)
19397 let result = dialect.transpile_to("SELECT * FROM t WHERE a BETWEEN b AND c", DialectType::StarRocks).unwrap();
19398 assert!(result[0].contains("BETWEEN"), "BETWEEN should be preserved in SELECT");
19399 }
19400
19401 #[test]
19402 fn test_snowflake_ltrim_rtrim_parse() {
19403 let sf = Dialect::get(DialectType::Snowflake);
19404 let sql = "SELECT LTRIM(RTRIM(col)) FROM t1";
19405 let result = sf.transpile_to(sql, DialectType::DuckDB);
19406 match &result {
19407 Ok(r) => eprintln!("LTRIM/RTRIM result: {}", r[0]),
19408 Err(e) => eprintln!("LTRIM/RTRIM error: {}", e),
19409 }
19410 assert!(result.is_ok(), "Expected successful parse of LTRIM(RTRIM(col)), got error: {:?}", result.err());
19411 }
19412
19413 #[test]
19414 fn test_duckdb_count_if_parse() {
19415 let duck = Dialect::get(DialectType::DuckDB);
19416 let sql = "COUNT_IF(x)";
19417 let result = duck.transpile_to(sql, DialectType::DuckDB);
19418 match &result {
19419 Ok(r) => eprintln!("COUNT_IF result: {}", r[0]),
19420 Err(e) => eprintln!("COUNT_IF error: {}", e),
19421 }
19422 assert!(result.is_ok(), "Expected successful parse of COUNT_IF(x), got error: {:?}", result.err());
19423 }
19424
19425 #[test]
19426 fn test_tsql_cast_tinyint_parse() {
19427 let tsql = Dialect::get(DialectType::TSQL);
19428 let sql = "CAST(X AS TINYINT)";
19429 let result = tsql.transpile_to(sql, DialectType::DuckDB);
19430 match &result {
19431 Ok(r) => eprintln!("TSQL CAST TINYINT result: {}", r[0]),
19432 Err(e) => eprintln!("TSQL CAST TINYINT error: {}", e),
19433 }
19434 assert!(result.is_ok(), "Expected successful transpile, got error: {:?}", result.err());
19435 }
19436
19437 #[test]
19438 fn test_pg_hash_bitwise_xor() {
19439 let dialect = Dialect::get(DialectType::PostgreSQL);
19440 let result = dialect.transpile_to("x # y", DialectType::PostgreSQL).unwrap();
19441 assert_eq!(result[0], "x # y");
19442 }
19443
19444 #[test]
19445 fn test_pg_array_to_duckdb() {
19446 let dialect = Dialect::get(DialectType::PostgreSQL);
19447 let result = dialect.transpile_to("SELECT ARRAY[1, 2, 3] @> ARRAY[1, 2]", DialectType::DuckDB).unwrap();
19448 assert_eq!(result[0], "SELECT [1, 2, 3] @> [1, 2]");
19449 }
19450
19451}