polyglot_sql/dialects/mod.rs
1//! SQL Dialect System
2//!
3//! This module implements the dialect abstraction layer that enables SQL transpilation
4//! between 30+ database engines. Each dialect encapsulates three concerns:
5//!
6//! - **Tokenization**: Dialect-specific lexing rules (e.g., BigQuery uses backtick quoting,
7//! MySQL uses backtick for identifiers, TSQL uses square brackets).
8//! - **Generation**: How AST nodes are rendered back to SQL text, including identifier quoting
9//! style, function name casing, and syntax variations.
10//! - **Transformation**: AST-level rewrites that convert dialect-specific constructs to/from
11//! a normalized form (e.g., Snowflake `SQUARE(x)` becomes `POWER(x, 2)`).
12//!
13//! The primary entry point is [`Dialect::get`], which returns a configured [`Dialect`] instance
14//! for a given [`DialectType`]. From there, callers can [`parse`](Dialect::parse),
15//! [`generate`](Dialect::generate), [`transform`](Dialect::transform), or
16//! [`transpile_to`](Dialect::transpile_to) another dialect in a single call.
17//!
18//! Each concrete dialect (e.g., `PostgresDialect`, `BigQueryDialect`) implements the
19//! [`DialectImpl`] trait, which provides configuration hooks and expression-level transforms.
20//! Dialect modules live in submodules of this module and are re-exported here.
21
22mod generic;
23mod postgres;
24mod mysql;
25mod bigquery;
26mod snowflake;
27mod duckdb;
28mod tsql;
29mod oracle;
30mod hive;
31mod spark;
32mod sqlite;
33mod presto;
34mod trino;
35mod redshift;
36mod clickhouse;
37mod databricks;
38mod athena;
39mod teradata;
40mod doris;
41mod starrocks;
42mod materialize;
43mod risingwave;
44mod singlestore;
45mod cockroachdb;
46mod tidb;
47mod druid;
48mod solr;
49mod tableau;
50mod dune;
51mod fabric;
52mod drill;
53mod dremio;
54mod exasol;
55mod datafusion;
56
57pub use generic::GenericDialect;
58pub use postgres::PostgresDialect;
59pub use mysql::MySQLDialect;
60pub use bigquery::BigQueryDialect;
61pub use snowflake::SnowflakeDialect;
62pub use duckdb::DuckDBDialect;
63pub use tsql::TSQLDialect;
64pub use oracle::OracleDialect;
65pub use hive::HiveDialect;
66pub use spark::SparkDialect;
67pub use sqlite::SQLiteDialect;
68pub use presto::PrestoDialect;
69pub use trino::TrinoDialect;
70pub use redshift::RedshiftDialect;
71pub use clickhouse::ClickHouseDialect;
72pub use databricks::DatabricksDialect;
73pub use athena::AthenaDialect;
74pub use teradata::TeradataDialect;
75pub use doris::DorisDialect;
76pub use starrocks::StarRocksDialect;
77pub use materialize::MaterializeDialect;
78pub use risingwave::RisingWaveDialect;
79pub use singlestore::SingleStoreDialect;
80pub use cockroachdb::CockroachDBDialect;
81pub use tidb::TiDBDialect;
82pub use druid::DruidDialect;
83pub use solr::SolrDialect;
84pub use tableau::TableauDialect;
85pub use dune::DuneDialect;
86pub use fabric::FabricDialect;
87pub use drill::DrillDialect;
88pub use dremio::DremioDialect;
89pub use exasol::ExasolDialect;
90pub use datafusion::DataFusionDialect;
91
92use crate::error::Result;
93use crate::expressions::{Expression, FunctionBody};
94use crate::generator::{Generator, GeneratorConfig};
95use crate::parser::Parser;
96use crate::tokens::{Tokenizer, TokenizerConfig};
97use serde::{Deserialize, Serialize};
98use std::collections::HashMap;
99use std::sync::{Arc, LazyLock, RwLock};
100
101/// Enumeration of all supported SQL dialects.
102///
103/// Each variant corresponds to a specific SQL database engine or query language.
104/// The `Generic` variant represents standard SQL with no dialect-specific behavior,
105/// and is used as the default when no dialect is specified.
106///
107/// Dialect names are case-insensitive when parsed from strings via [`FromStr`].
108/// Some dialects accept aliases (e.g., "mssql" and "sqlserver" both resolve to [`TSQL`](DialectType::TSQL)).
109#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
110#[serde(rename_all = "lowercase")]
111pub enum DialectType {
112 /// Standard SQL with no dialect-specific behavior (default).
113 Generic,
114 /// PostgreSQL -- advanced open-source relational database.
115 PostgreSQL,
116 /// MySQL -- widely-used open-source relational database (also accepts "mysql").
117 MySQL,
118 /// Google BigQuery -- serverless cloud data warehouse with unique syntax (backtick quoting, STRUCT types, QUALIFY).
119 BigQuery,
120 /// Snowflake -- cloud data platform with QUALIFY clause, FLATTEN, and variant types.
121 Snowflake,
122 /// DuckDB -- in-process analytical database with modern SQL extensions.
123 DuckDB,
124 /// SQLite -- lightweight embedded relational database.
125 SQLite,
126 /// Apache Hive -- data warehouse on Hadoop with HiveQL syntax.
127 Hive,
128 /// Apache Spark SQL -- distributed query engine (also accepts "spark2").
129 Spark,
130 /// Trino -- distributed SQL query engine (formerly PrestoSQL).
131 Trino,
132 /// PrestoDB -- distributed SQL query engine for big data.
133 Presto,
134 /// Amazon Redshift -- cloud data warehouse based on PostgreSQL.
135 Redshift,
136 /// Transact-SQL (T-SQL) -- Microsoft SQL Server and Azure SQL (also accepts "mssql", "sqlserver").
137 TSQL,
138 /// Oracle Database -- commercial relational database with PL/SQL extensions.
139 Oracle,
140 /// ClickHouse -- column-oriented OLAP database for real-time analytics.
141 ClickHouse,
142 /// Databricks SQL -- Spark-based lakehouse platform with QUALIFY support.
143 Databricks,
144 /// Amazon Athena -- serverless query service (hybrid Trino/Hive engine).
145 Athena,
146 /// Teradata -- enterprise data warehouse with proprietary SQL extensions.
147 Teradata,
148 /// Apache Doris -- real-time analytical database (MySQL-compatible).
149 Doris,
150 /// StarRocks -- sub-second OLAP database (MySQL-compatible).
151 StarRocks,
152 /// Materialize -- streaming SQL database built on differential dataflow.
153 Materialize,
154 /// RisingWave -- distributed streaming database with PostgreSQL compatibility.
155 RisingWave,
156 /// SingleStore (formerly MemSQL) -- distributed SQL database (also accepts "memsql").
157 SingleStore,
158 /// CockroachDB -- distributed SQL database with PostgreSQL compatibility (also accepts "cockroach").
159 CockroachDB,
160 /// TiDB -- distributed HTAP database with MySQL compatibility.
161 TiDB,
162 /// Apache Druid -- real-time analytics database.
163 Druid,
164 /// Apache Solr -- search platform with SQL interface.
165 Solr,
166 /// Tableau -- data visualization platform with its own SQL dialect.
167 Tableau,
168 /// Dune Analytics -- blockchain analytics SQL engine.
169 Dune,
170 /// Microsoft Fabric -- unified analytics platform (T-SQL based).
171 Fabric,
172 /// Apache Drill -- schema-free SQL query engine for big data.
173 Drill,
174 /// Dremio -- data lakehouse platform with Arrow-based query engine.
175 Dremio,
176 /// Exasol -- in-memory analytic database.
177 Exasol,
178 /// Apache DataFusion -- Arrow-based query engine with modern SQL extensions.
179 DataFusion,
180}
181
182impl Default for DialectType {
183 fn default() -> Self {
184 DialectType::Generic
185 }
186}
187
188impl std::fmt::Display for DialectType {
189 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
190 match self {
191 DialectType::Generic => write!(f, "generic"),
192 DialectType::PostgreSQL => write!(f, "postgresql"),
193 DialectType::MySQL => write!(f, "mysql"),
194 DialectType::BigQuery => write!(f, "bigquery"),
195 DialectType::Snowflake => write!(f, "snowflake"),
196 DialectType::DuckDB => write!(f, "duckdb"),
197 DialectType::SQLite => write!(f, "sqlite"),
198 DialectType::Hive => write!(f, "hive"),
199 DialectType::Spark => write!(f, "spark"),
200 DialectType::Trino => write!(f, "trino"),
201 DialectType::Presto => write!(f, "presto"),
202 DialectType::Redshift => write!(f, "redshift"),
203 DialectType::TSQL => write!(f, "tsql"),
204 DialectType::Oracle => write!(f, "oracle"),
205 DialectType::ClickHouse => write!(f, "clickhouse"),
206 DialectType::Databricks => write!(f, "databricks"),
207 DialectType::Athena => write!(f, "athena"),
208 DialectType::Teradata => write!(f, "teradata"),
209 DialectType::Doris => write!(f, "doris"),
210 DialectType::StarRocks => write!(f, "starrocks"),
211 DialectType::Materialize => write!(f, "materialize"),
212 DialectType::RisingWave => write!(f, "risingwave"),
213 DialectType::SingleStore => write!(f, "singlestore"),
214 DialectType::CockroachDB => write!(f, "cockroachdb"),
215 DialectType::TiDB => write!(f, "tidb"),
216 DialectType::Druid => write!(f, "druid"),
217 DialectType::Solr => write!(f, "solr"),
218 DialectType::Tableau => write!(f, "tableau"),
219 DialectType::Dune => write!(f, "dune"),
220 DialectType::Fabric => write!(f, "fabric"),
221 DialectType::Drill => write!(f, "drill"),
222 DialectType::Dremio => write!(f, "dremio"),
223 DialectType::Exasol => write!(f, "exasol"),
224 DialectType::DataFusion => write!(f, "datafusion"),
225 }
226 }
227}
228
229impl std::str::FromStr for DialectType {
230 type Err = crate::error::Error;
231
232 fn from_str(s: &str) -> Result<Self> {
233 match s.to_lowercase().as_str() {
234 "generic" | "" => Ok(DialectType::Generic),
235 "postgres" | "postgresql" => Ok(DialectType::PostgreSQL),
236 "mysql" => Ok(DialectType::MySQL),
237 "bigquery" => Ok(DialectType::BigQuery),
238 "snowflake" => Ok(DialectType::Snowflake),
239 "duckdb" => Ok(DialectType::DuckDB),
240 "sqlite" => Ok(DialectType::SQLite),
241 "hive" => Ok(DialectType::Hive),
242 "spark" | "spark2" => Ok(DialectType::Spark),
243 "trino" => Ok(DialectType::Trino),
244 "presto" => Ok(DialectType::Presto),
245 "redshift" => Ok(DialectType::Redshift),
246 "tsql" | "mssql" | "sqlserver" => Ok(DialectType::TSQL),
247 "oracle" => Ok(DialectType::Oracle),
248 "clickhouse" => Ok(DialectType::ClickHouse),
249 "databricks" => Ok(DialectType::Databricks),
250 "athena" => Ok(DialectType::Athena),
251 "teradata" => Ok(DialectType::Teradata),
252 "doris" => Ok(DialectType::Doris),
253 "starrocks" => Ok(DialectType::StarRocks),
254 "materialize" => Ok(DialectType::Materialize),
255 "risingwave" => Ok(DialectType::RisingWave),
256 "singlestore" | "memsql" => Ok(DialectType::SingleStore),
257 "cockroachdb" | "cockroach" => Ok(DialectType::CockroachDB),
258 "tidb" => Ok(DialectType::TiDB),
259 "druid" => Ok(DialectType::Druid),
260 "solr" => Ok(DialectType::Solr),
261 "tableau" => Ok(DialectType::Tableau),
262 "dune" => Ok(DialectType::Dune),
263 "fabric" => Ok(DialectType::Fabric),
264 "drill" => Ok(DialectType::Drill),
265 "dremio" => Ok(DialectType::Dremio),
266 "exasol" => Ok(DialectType::Exasol),
267 "datafusion" | "arrow-datafusion" | "arrow_datafusion" => Ok(DialectType::DataFusion),
268 _ => Err(crate::error::Error::parse(format!("Unknown dialect: {}", s))),
269 }
270 }
271}
272
273/// Trait that each concrete SQL dialect must implement.
274///
275/// `DialectImpl` provides the configuration hooks and per-expression transform logic
276/// that distinguish one dialect from another. Implementors supply:
277///
278/// - A [`DialectType`] identifier.
279/// - Optional overrides for tokenizer and generator configuration (defaults to generic SQL).
280/// - An expression-level transform function ([`transform_expr`](DialectImpl::transform_expr))
281/// that rewrites individual AST nodes for this dialect (e.g., converting `NVL` to `COALESCE`).
282/// - An optional preprocessing step ([`preprocess`](DialectImpl::preprocess)) for whole-tree
283/// rewrites that must run before the recursive per-node transform (e.g., eliminating QUALIFY).
284///
285/// The default implementations are no-ops, so a minimal dialect only needs to provide
286/// [`dialect_type`](DialectImpl::dialect_type) and override the methods that differ from
287/// standard SQL.
288pub trait DialectImpl {
289 /// Returns the [`DialectType`] that identifies this dialect.
290 fn dialect_type(&self) -> DialectType;
291
292 /// Returns the tokenizer configuration for this dialect.
293 ///
294 /// Override to customize identifier quoting characters, string escape rules,
295 /// comment styles, and other lexing behavior.
296 fn tokenizer_config(&self) -> TokenizerConfig {
297 TokenizerConfig::default()
298 }
299
300 /// Returns the generator configuration for this dialect.
301 ///
302 /// Override to customize identifier quoting style, function name casing,
303 /// keyword casing, and other SQL generation behavior.
304 fn generator_config(&self) -> GeneratorConfig {
305 GeneratorConfig::default()
306 }
307
308 /// Returns a generator configuration tailored to a specific expression.
309 ///
310 /// Override this for hybrid dialects like Athena that route to different SQL engines
311 /// based on expression type (e.g., Hive-style generation for DDL, Trino-style for DML).
312 /// The default delegates to [`generator_config`](DialectImpl::generator_config).
313 fn generator_config_for_expr(&self, _expr: &Expression) -> GeneratorConfig {
314 self.generator_config()
315 }
316
317 /// Transforms a single expression node for this dialect, without recursing into children.
318 ///
319 /// This is the per-node rewrite hook invoked by [`transform_recursive`]. Return the
320 /// expression unchanged if no dialect-specific rewrite is needed. Transformations
321 /// typically include function renaming, operator substitution, and type mapping.
322 fn transform_expr(&self, expr: Expression) -> Result<Expression> {
323 Ok(expr)
324 }
325
326 /// Applies whole-tree preprocessing transforms before the recursive per-node pass.
327 ///
328 /// Override this to apply structural rewrites that must see the entire tree at once,
329 /// such as `eliminate_qualify`, `eliminate_distinct_on`, `ensure_bools`, or
330 /// `explode_projection_to_unnest`. The default is a no-op pass-through.
331 fn preprocess(&self, expr: Expression) -> Result<Expression> {
332 Ok(expr)
333 }
334}
335
336/// Recursively transforms a [`DataType`](crate::expressions::DataType), handling nested
337/// parametric types such as `ARRAY<INT>`, `STRUCT<a INT, b TEXT>`, and `MAP<STRING, INT>`.
338///
339/// The outer type is first passed through `transform_fn` as an `Expression::DataType`,
340/// and then nested element/field types are recursed into. This ensures that dialect-level
341/// type mappings (e.g., `INT` to `INTEGER`) propagate into complex nested types.
342fn transform_data_type_recursive<F>(dt: crate::expressions::DataType, transform_fn: &F) -> Result<crate::expressions::DataType>
343where
344 F: Fn(Expression) -> Result<Expression>,
345{
346 use crate::expressions::DataType;
347 // First, transform the outermost type through the expression system
348 let dt_expr = transform_fn(Expression::DataType(dt))?;
349 let dt = match dt_expr {
350 Expression::DataType(d) => d,
351 _ => return Ok(match dt_expr { _ => DataType::Custom { name: "UNKNOWN".to_string() } }),
352 };
353 // Then recurse into nested types
354 match dt {
355 DataType::Array { element_type, dimension } => {
356 let inner = transform_data_type_recursive(*element_type, transform_fn)?;
357 Ok(DataType::Array { element_type: Box::new(inner), dimension })
358 }
359 DataType::List { element_type } => {
360 let inner = transform_data_type_recursive(*element_type, transform_fn)?;
361 Ok(DataType::List { element_type: Box::new(inner) })
362 }
363 DataType::Struct { fields, nested } => {
364 let mut new_fields = Vec::new();
365 for mut field in fields {
366 field.data_type = transform_data_type_recursive(field.data_type, transform_fn)?;
367 new_fields.push(field);
368 }
369 Ok(DataType::Struct { fields: new_fields, nested })
370 }
371 DataType::Map { key_type, value_type } => {
372 let k = transform_data_type_recursive(*key_type, transform_fn)?;
373 let v = transform_data_type_recursive(*value_type, transform_fn)?;
374 Ok(DataType::Map { key_type: Box::new(k), value_type: Box::new(v) })
375 }
376 other => Ok(other),
377 }
378}
379
380/// Convert DuckDB C-style format strings to Presto C-style format strings.
381/// DuckDB and Presto both use C-style % directives but with different specifiers for some cases.
382fn duckdb_to_presto_format(fmt: &str) -> String {
383 // Order matters: handle longer patterns first to avoid partial replacements
384 let mut result = fmt.to_string();
385 // First pass: mark multi-char patterns with placeholders
386 result = result.replace("%-m", "\x01NOPADM\x01");
387 result = result.replace("%-d", "\x01NOPADD\x01");
388 result = result.replace("%-I", "\x01NOPADI\x01");
389 result = result.replace("%-H", "\x01NOPADH\x01");
390 result = result.replace("%H:%M:%S", "\x01HMS\x01");
391 result = result.replace("%Y-%m-%d", "\x01YMD\x01");
392 // Now convert individual specifiers
393 result = result.replace("%M", "%i");
394 result = result.replace("%S", "%s");
395 // Restore multi-char patterns with Presto equivalents
396 result = result.replace("\x01NOPADM\x01", "%c");
397 result = result.replace("\x01NOPADD\x01", "%e");
398 result = result.replace("\x01NOPADI\x01", "%l");
399 result = result.replace("\x01NOPADH\x01", "%k");
400 result = result.replace("\x01HMS\x01", "%T");
401 result = result.replace("\x01YMD\x01", "%Y-%m-%d");
402 result
403}
404
405/// Convert DuckDB C-style format strings to BigQuery format strings.
406/// BigQuery uses a mix of strftime-like directives.
407fn duckdb_to_bigquery_format(fmt: &str) -> String {
408 let mut result = fmt.to_string();
409 // Handle longer patterns first
410 result = result.replace("%-d", "%e");
411 result = result.replace("%Y-%m-%d %H:%M:%S", "%F %T");
412 result = result.replace("%Y-%m-%d", "%F");
413 result = result.replace("%H:%M:%S", "%T");
414 result
415}
416
417/// Applies a transform function bottom-up through an entire expression tree.
418///
419/// This is the core tree-rewriting engine used by the dialect system. It performs
420/// a post-order (children-first) traversal: for each node, all children are recursively
421/// transformed before the node itself is passed to `transform_fn`. This bottom-up
422/// strategy means that when `transform_fn` sees a node, its children have already
423/// been rewritten, which simplifies pattern matching on sub-expressions.
424///
425/// The function handles all expression variants including SELECT clauses (FROM, WHERE,
426/// GROUP BY, HAVING, ORDER BY, QUALIFY, WITH/CTEs, WINDOW), binary operators,
427/// function calls, CASE expressions, date/time functions, and more.
428///
429/// # Arguments
430///
431/// * `expr` - The root expression to transform (consumed).
432/// * `transform_fn` - A closure that receives each expression node (after its children
433/// have been transformed) and returns a possibly-rewritten expression.
434///
435/// # Errors
436///
437/// Returns an error if `transform_fn` returns an error for any node.
438pub fn transform_recursive<F>(expr: Expression, transform_fn: &F) -> Result<Expression>
439where
440 F: Fn(Expression) -> Result<Expression>,
441{
442 use crate::expressions::BinaryOp;
443
444 // Helper macro to transform binary ops with Box<BinaryOp>
445 macro_rules! transform_binary {
446 ($variant:ident, $op:expr) => {{
447 let left = transform_recursive($op.left, transform_fn)?;
448 let right = transform_recursive($op.right, transform_fn)?;
449 Expression::$variant(Box::new(BinaryOp {
450 left,
451 right,
452 left_comments: $op.left_comments,
453 operator_comments: $op.operator_comments,
454 trailing_comments: $op.trailing_comments,
455 }))
456 }};
457 }
458
459 // First recursively transform children, then apply the transform function
460 let expr = match expr {
461 Expression::Select(mut select) => {
462 select.expressions = select
463 .expressions
464 .into_iter()
465 .map(|e| transform_recursive(e, transform_fn))
466 .collect::<Result<Vec<_>>>()?;
467
468 // Transform FROM clause
469 if let Some(mut from) = select.from.take() {
470 from.expressions = from
471 .expressions
472 .into_iter()
473 .map(|e| transform_recursive(e, transform_fn))
474 .collect::<Result<Vec<_>>>()?;
475 select.from = Some(from);
476 }
477
478 // Transform JOINs - important for CROSS APPLY / LATERAL transformations
479 select.joins = select
480 .joins
481 .into_iter()
482 .map(|mut join| {
483 join.this = transform_recursive(join.this, transform_fn)?;
484 if let Some(on) = join.on.take() {
485 join.on = Some(transform_recursive(on, transform_fn)?);
486 }
487 // Wrap join in Expression::Join to allow transform_fn to transform it
488 match transform_fn(Expression::Join(Box::new(join)))? {
489 Expression::Join(j) => Ok(*j),
490 _ => Err(crate::error::Error::parse("Join transformation returned non-join expression")),
491 }
492 })
493 .collect::<Result<Vec<_>>>()?;
494
495 // Transform WHERE clause
496 if let Some(mut where_clause) = select.where_clause.take() {
497 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
498 select.where_clause = Some(where_clause);
499 }
500
501 // Transform GROUP BY
502 if let Some(mut group_by) = select.group_by.take() {
503 group_by.expressions = group_by
504 .expressions
505 .into_iter()
506 .map(|e| transform_recursive(e, transform_fn))
507 .collect::<Result<Vec<_>>>()?;
508 select.group_by = Some(group_by);
509 }
510
511 // Transform HAVING
512 if let Some(mut having) = select.having.take() {
513 having.this = transform_recursive(having.this, transform_fn)?;
514 select.having = Some(having);
515 }
516
517 // Transform WITH (CTEs)
518 if let Some(mut with) = select.with.take() {
519 with.ctes = with.ctes.into_iter().map(|mut cte| {
520 let original = cte.this.clone();
521 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
522 cte
523 }).collect();
524 select.with = Some(with);
525 }
526
527 // Transform ORDER BY
528 if let Some(mut order) = select.order_by.take() {
529 order.expressions = order.expressions.into_iter().map(|o| {
530 let mut o = o;
531 let original = o.this.clone();
532 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
533 // Also apply transform to the Ordered wrapper itself (for NULLS FIRST etc.)
534 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
535 Ok(Expression::Ordered(transformed)) => *transformed,
536 Ok(_) | Err(_) => o,
537 }
538 }).collect();
539 select.order_by = Some(order);
540 }
541
542 // Transform WINDOW clause order_by
543 if let Some(ref mut windows) = select.windows {
544 for nw in windows.iter_mut() {
545 nw.spec.order_by = std::mem::take(&mut nw.spec.order_by).into_iter().map(|o| {
546 let mut o = o;
547 let original = o.this.clone();
548 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
549 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
550 Ok(Expression::Ordered(transformed)) => *transformed,
551 Ok(_) | Err(_) => o,
552 }
553 }).collect();
554 }
555 }
556
557 // Transform QUALIFY
558 if let Some(mut qual) = select.qualify.take() {
559 qual.this = transform_recursive(qual.this, transform_fn)?;
560 select.qualify = Some(qual);
561 }
562
563 Expression::Select(select)
564 }
565 Expression::Function(mut f) => {
566 f.args = f
567 .args
568 .into_iter()
569 .map(|e| transform_recursive(e, transform_fn))
570 .collect::<Result<Vec<_>>>()?;
571 Expression::Function(f)
572 }
573 Expression::AggregateFunction(mut f) => {
574 f.args = f
575 .args
576 .into_iter()
577 .map(|e| transform_recursive(e, transform_fn))
578 .collect::<Result<Vec<_>>>()?;
579 if let Some(filter) = f.filter {
580 f.filter = Some(transform_recursive(filter, transform_fn)?);
581 }
582 Expression::AggregateFunction(f)
583 }
584 Expression::WindowFunction(mut wf) => {
585 wf.this = transform_recursive(wf.this, transform_fn)?;
586 wf.over.partition_by = wf
587 .over
588 .partition_by
589 .into_iter()
590 .map(|e| transform_recursive(e, transform_fn))
591 .collect::<Result<Vec<_>>>()?;
592 // Transform order_by items through Expression::Ordered wrapper
593 wf.over.order_by = wf.over.order_by.into_iter().map(|o| {
594 let mut o = o;
595 o.this = transform_recursive(o.this, transform_fn)?;
596 match transform_fn(Expression::Ordered(Box::new(o)))? {
597 Expression::Ordered(transformed) => Ok(*transformed),
598 _ => Err(crate::error::Error::parse("Ordered transformation returned non-Ordered expression")),
599 }
600 }).collect::<Result<Vec<_>>>()?;
601 Expression::WindowFunction(wf)
602 }
603 Expression::Alias(mut a) => {
604 a.this = transform_recursive(a.this, transform_fn)?;
605 Expression::Alias(a)
606 }
607 Expression::Cast(mut c) => {
608 c.this = transform_recursive(c.this, transform_fn)?;
609 // Also transform the target data type (recursively for nested types like ARRAY<INT>, STRUCT<a INT>)
610 c.to = transform_data_type_recursive(c.to, transform_fn)?;
611 Expression::Cast(c)
612 }
613 Expression::And(op) => transform_binary!(And, *op),
614 Expression::Or(op) => transform_binary!(Or, *op),
615 Expression::Add(op) => transform_binary!(Add, *op),
616 Expression::Sub(op) => transform_binary!(Sub, *op),
617 Expression::Mul(op) => transform_binary!(Mul, *op),
618 Expression::Div(op) => transform_binary!(Div, *op),
619 Expression::Eq(op) => transform_binary!(Eq, *op),
620 Expression::Lt(op) => transform_binary!(Lt, *op),
621 Expression::Gt(op) => transform_binary!(Gt, *op),
622 Expression::Paren(mut p) => {
623 p.this = transform_recursive(p.this, transform_fn)?;
624 Expression::Paren(p)
625 }
626 Expression::Coalesce(mut f) => {
627 f.expressions = f
628 .expressions
629 .into_iter()
630 .map(|e| transform_recursive(e, transform_fn))
631 .collect::<Result<Vec<_>>>()?;
632 Expression::Coalesce(f)
633 }
634 Expression::IfNull(mut f) => {
635 f.this = transform_recursive(f.this, transform_fn)?;
636 f.expression = transform_recursive(f.expression, transform_fn)?;
637 Expression::IfNull(f)
638 }
639 Expression::Nvl(mut f) => {
640 f.this = transform_recursive(f.this, transform_fn)?;
641 f.expression = transform_recursive(f.expression, transform_fn)?;
642 Expression::Nvl(f)
643 }
644 Expression::In(mut i) => {
645 i.this = transform_recursive(i.this, transform_fn)?;
646 i.expressions = i
647 .expressions
648 .into_iter()
649 .map(|e| transform_recursive(e, transform_fn))
650 .collect::<Result<Vec<_>>>()?;
651 if let Some(query) = i.query {
652 i.query = Some(transform_recursive(query, transform_fn)?);
653 }
654 Expression::In(i)
655 }
656 Expression::Not(mut n) => {
657 n.this = transform_recursive(n.this, transform_fn)?;
658 Expression::Not(n)
659 }
660 Expression::ArraySlice(mut s) => {
661 s.this = transform_recursive(s.this, transform_fn)?;
662 if let Some(start) = s.start {
663 s.start = Some(transform_recursive(start, transform_fn)?);
664 }
665 if let Some(end) = s.end {
666 s.end = Some(transform_recursive(end, transform_fn)?);
667 }
668 Expression::ArraySlice(s)
669 }
670 Expression::Subscript(mut s) => {
671 s.this = transform_recursive(s.this, transform_fn)?;
672 s.index = transform_recursive(s.index, transform_fn)?;
673 Expression::Subscript(s)
674 }
675 Expression::Array(mut a) => {
676 a.expressions = a.expressions.into_iter()
677 .map(|e| transform_recursive(e, transform_fn))
678 .collect::<Result<Vec<_>>>()?;
679 Expression::Array(a)
680 }
681 Expression::Struct(mut s) => {
682 let mut new_fields = Vec::new();
683 for (name, expr) in s.fields {
684 let transformed = transform_recursive(expr, transform_fn)?;
685 new_fields.push((name, transformed));
686 }
687 s.fields = new_fields;
688 Expression::Struct(s)
689 }
690 Expression::NamedArgument(mut na) => {
691 na.value = transform_recursive(na.value, transform_fn)?;
692 Expression::NamedArgument(na)
693 }
694 Expression::MapFunc(mut m) => {
695 m.keys = m.keys.into_iter()
696 .map(|e| transform_recursive(e, transform_fn))
697 .collect::<Result<Vec<_>>>()?;
698 m.values = m.values.into_iter()
699 .map(|e| transform_recursive(e, transform_fn))
700 .collect::<Result<Vec<_>>>()?;
701 Expression::MapFunc(m)
702 }
703 Expression::ArrayFunc(mut a) => {
704 a.expressions = a.expressions.into_iter()
705 .map(|e| transform_recursive(e, transform_fn))
706 .collect::<Result<Vec<_>>>()?;
707 Expression::ArrayFunc(a)
708 }
709 Expression::Lambda(mut l) => {
710 l.body = transform_recursive(l.body, transform_fn)?;
711 Expression::Lambda(l)
712 }
713 Expression::JsonExtract(mut f) => {
714 f.this = transform_recursive(f.this, transform_fn)?;
715 f.path = transform_recursive(f.path, transform_fn)?;
716 Expression::JsonExtract(f)
717 }
718 Expression::JsonExtractScalar(mut f) => {
719 f.this = transform_recursive(f.this, transform_fn)?;
720 f.path = transform_recursive(f.path, transform_fn)?;
721 Expression::JsonExtractScalar(f)
722 }
723
724 // ===== UnaryFunc-based expressions =====
725 // These all have a single `this: Expression` child
726 Expression::Length(mut f) => {
727 f.this = transform_recursive(f.this, transform_fn)?;
728 Expression::Length(f)
729 }
730 Expression::Upper(mut f) => {
731 f.this = transform_recursive(f.this, transform_fn)?;
732 Expression::Upper(f)
733 }
734 Expression::Lower(mut f) => {
735 f.this = transform_recursive(f.this, transform_fn)?;
736 Expression::Lower(f)
737 }
738 Expression::LTrim(mut f) => {
739 f.this = transform_recursive(f.this, transform_fn)?;
740 Expression::LTrim(f)
741 }
742 Expression::RTrim(mut f) => {
743 f.this = transform_recursive(f.this, transform_fn)?;
744 Expression::RTrim(f)
745 }
746 Expression::Reverse(mut f) => {
747 f.this = transform_recursive(f.this, transform_fn)?;
748 Expression::Reverse(f)
749 }
750 Expression::Abs(mut f) => {
751 f.this = transform_recursive(f.this, transform_fn)?;
752 Expression::Abs(f)
753 }
754 Expression::Ceil(mut f) => {
755 f.this = transform_recursive(f.this, transform_fn)?;
756 Expression::Ceil(f)
757 }
758 Expression::Floor(mut f) => {
759 f.this = transform_recursive(f.this, transform_fn)?;
760 Expression::Floor(f)
761 }
762 Expression::Sign(mut f) => {
763 f.this = transform_recursive(f.this, transform_fn)?;
764 Expression::Sign(f)
765 }
766 Expression::Sqrt(mut f) => {
767 f.this = transform_recursive(f.this, transform_fn)?;
768 Expression::Sqrt(f)
769 }
770 Expression::Cbrt(mut f) => {
771 f.this = transform_recursive(f.this, transform_fn)?;
772 Expression::Cbrt(f)
773 }
774 Expression::Ln(mut f) => {
775 f.this = transform_recursive(f.this, transform_fn)?;
776 Expression::Ln(f)
777 }
778 Expression::Exp(mut f) => {
779 f.this = transform_recursive(f.this, transform_fn)?;
780 Expression::Exp(f)
781 }
782 Expression::Date(mut f) => {
783 f.this = transform_recursive(f.this, transform_fn)?;
784 Expression::Date(f)
785 }
786 Expression::Stddev(mut f) => {
787 f.this = transform_recursive(f.this, transform_fn)?;
788 Expression::Stddev(f)
789 }
790 Expression::Variance(mut f) => {
791 f.this = transform_recursive(f.this, transform_fn)?;
792 Expression::Variance(f)
793 }
794
795 // ===== BinaryFunc-based expressions =====
796 Expression::ModFunc(mut f) => {
797 f.this = transform_recursive(f.this, transform_fn)?;
798 f.expression = transform_recursive(f.expression, transform_fn)?;
799 Expression::ModFunc(f)
800 }
801 Expression::Power(mut f) => {
802 f.this = transform_recursive(f.this, transform_fn)?;
803 f.expression = transform_recursive(f.expression, transform_fn)?;
804 Expression::Power(f)
805 }
806 Expression::MapFromArrays(mut f) => {
807 f.this = transform_recursive(f.this, transform_fn)?;
808 f.expression = transform_recursive(f.expression, transform_fn)?;
809 Expression::MapFromArrays(f)
810 }
811 Expression::ElementAt(mut f) => {
812 f.this = transform_recursive(f.this, transform_fn)?;
813 f.expression = transform_recursive(f.expression, transform_fn)?;
814 Expression::ElementAt(f)
815 }
816 Expression::MapContainsKey(mut f) => {
817 f.this = transform_recursive(f.this, transform_fn)?;
818 f.expression = transform_recursive(f.expression, transform_fn)?;
819 Expression::MapContainsKey(f)
820 }
821 Expression::Left(mut f) => {
822 f.this = transform_recursive(f.this, transform_fn)?;
823 f.length = transform_recursive(f.length, transform_fn)?;
824 Expression::Left(f)
825 }
826 Expression::Right(mut f) => {
827 f.this = transform_recursive(f.this, transform_fn)?;
828 f.length = transform_recursive(f.length, transform_fn)?;
829 Expression::Right(f)
830 }
831 Expression::Repeat(mut f) => {
832 f.this = transform_recursive(f.this, transform_fn)?;
833 f.times = transform_recursive(f.times, transform_fn)?;
834 Expression::Repeat(f)
835 }
836
837 // ===== Complex function expressions =====
838 Expression::Substring(mut f) => {
839 f.this = transform_recursive(f.this, transform_fn)?;
840 f.start = transform_recursive(f.start, transform_fn)?;
841 if let Some(len) = f.length {
842 f.length = Some(transform_recursive(len, transform_fn)?);
843 }
844 Expression::Substring(f)
845 }
846 Expression::Replace(mut f) => {
847 f.this = transform_recursive(f.this, transform_fn)?;
848 f.old = transform_recursive(f.old, transform_fn)?;
849 f.new = transform_recursive(f.new, transform_fn)?;
850 Expression::Replace(f)
851 }
852 Expression::ConcatWs(mut f) => {
853 f.separator = transform_recursive(f.separator, transform_fn)?;
854 f.expressions = f.expressions.into_iter()
855 .map(|e| transform_recursive(e, transform_fn))
856 .collect::<Result<Vec<_>>>()?;
857 Expression::ConcatWs(f)
858 }
859 Expression::Trim(mut f) => {
860 f.this = transform_recursive(f.this, transform_fn)?;
861 if let Some(chars) = f.characters {
862 f.characters = Some(transform_recursive(chars, transform_fn)?);
863 }
864 Expression::Trim(f)
865 }
866 Expression::Split(mut f) => {
867 f.this = transform_recursive(f.this, transform_fn)?;
868 f.delimiter = transform_recursive(f.delimiter, transform_fn)?;
869 Expression::Split(f)
870 }
871 Expression::Lpad(mut f) => {
872 f.this = transform_recursive(f.this, transform_fn)?;
873 f.length = transform_recursive(f.length, transform_fn)?;
874 if let Some(fill) = f.fill {
875 f.fill = Some(transform_recursive(fill, transform_fn)?);
876 }
877 Expression::Lpad(f)
878 }
879 Expression::Rpad(mut f) => {
880 f.this = transform_recursive(f.this, transform_fn)?;
881 f.length = transform_recursive(f.length, transform_fn)?;
882 if let Some(fill) = f.fill {
883 f.fill = Some(transform_recursive(fill, transform_fn)?);
884 }
885 Expression::Rpad(f)
886 }
887
888 // ===== Conditional expressions =====
889 Expression::Case(mut c) => {
890 if let Some(operand) = c.operand {
891 c.operand = Some(transform_recursive(operand, transform_fn)?);
892 }
893 c.whens = c.whens.into_iter().map(|(cond, then)| {
894 let new_cond = transform_recursive(cond.clone(), transform_fn).unwrap_or(cond);
895 let new_then = transform_recursive(then.clone(), transform_fn).unwrap_or(then);
896 (new_cond, new_then)
897 }).collect();
898 if let Some(else_expr) = c.else_ {
899 c.else_ = Some(transform_recursive(else_expr, transform_fn)?);
900 }
901 Expression::Case(c)
902 }
903 Expression::IfFunc(mut f) => {
904 f.condition = transform_recursive(f.condition, transform_fn)?;
905 f.true_value = transform_recursive(f.true_value, transform_fn)?;
906 if let Some(false_val) = f.false_value {
907 f.false_value = Some(transform_recursive(false_val, transform_fn)?);
908 }
909 Expression::IfFunc(f)
910 }
911
912 // ===== Date/Time expressions =====
913 Expression::DateAdd(mut f) => {
914 f.this = transform_recursive(f.this, transform_fn)?;
915 f.interval = transform_recursive(f.interval, transform_fn)?;
916 Expression::DateAdd(f)
917 }
918 Expression::DateSub(mut f) => {
919 f.this = transform_recursive(f.this, transform_fn)?;
920 f.interval = transform_recursive(f.interval, transform_fn)?;
921 Expression::DateSub(f)
922 }
923 Expression::DateDiff(mut f) => {
924 f.this = transform_recursive(f.this, transform_fn)?;
925 f.expression = transform_recursive(f.expression, transform_fn)?;
926 Expression::DateDiff(f)
927 }
928 Expression::DateTrunc(mut f) => {
929 f.this = transform_recursive(f.this, transform_fn)?;
930 Expression::DateTrunc(f)
931 }
932 Expression::Extract(mut f) => {
933 f.this = transform_recursive(f.this, transform_fn)?;
934 Expression::Extract(f)
935 }
936
937 // ===== JSON expressions =====
938 Expression::JsonObject(mut f) => {
939 f.pairs = f.pairs.into_iter().map(|(k, v)| {
940 let new_k = transform_recursive(k, transform_fn)?;
941 let new_v = transform_recursive(v, transform_fn)?;
942 Ok((new_k, new_v))
943 }).collect::<Result<Vec<_>>>()?;
944 Expression::JsonObject(f)
945 }
946
947 // ===== Subquery expressions =====
948 Expression::Subquery(mut s) => {
949 s.this = transform_recursive(s.this, transform_fn)?;
950 Expression::Subquery(s)
951 }
952 Expression::Exists(mut e) => {
953 e.this = transform_recursive(e.this, transform_fn)?;
954 Expression::Exists(e)
955 }
956
957 // ===== Set operations =====
958 Expression::Union(mut u) => {
959 u.left = transform_recursive(u.left, transform_fn)?;
960 u.right = transform_recursive(u.right, transform_fn)?;
961 Expression::Union(u)
962 }
963 Expression::Intersect(mut i) => {
964 i.left = transform_recursive(i.left, transform_fn)?;
965 i.right = transform_recursive(i.right, transform_fn)?;
966 Expression::Intersect(i)
967 }
968 Expression::Except(mut e) => {
969 e.left = transform_recursive(e.left, transform_fn)?;
970 e.right = transform_recursive(e.right, transform_fn)?;
971 Expression::Except(e)
972 }
973
974 // ===== DML expressions =====
975 Expression::Insert(mut ins) => {
976 // Transform VALUES clause expressions
977 let mut new_values = Vec::new();
978 for row in ins.values {
979 let mut new_row = Vec::new();
980 for e in row {
981 new_row.push(transform_recursive(e, transform_fn)?);
982 }
983 new_values.push(new_row);
984 }
985 ins.values = new_values;
986
987 // Transform query (for INSERT ... SELECT)
988 if let Some(query) = ins.query {
989 ins.query = Some(transform_recursive(query, transform_fn)?);
990 }
991
992 // Transform RETURNING clause
993 let mut new_returning = Vec::new();
994 for e in ins.returning {
995 new_returning.push(transform_recursive(e, transform_fn)?);
996 }
997 ins.returning = new_returning;
998
999 // Transform ON CONFLICT clause
1000 if let Some(on_conflict) = ins.on_conflict {
1001 ins.on_conflict = Some(Box::new(transform_recursive(*on_conflict, transform_fn)?));
1002 }
1003
1004 Expression::Insert(ins)
1005 }
1006 Expression::Update(mut upd) => {
1007 upd.set = upd.set.into_iter().map(|(id, val)| {
1008 let new_val = transform_recursive(val.clone(), transform_fn).unwrap_or(val);
1009 (id, new_val)
1010 }).collect();
1011 if let Some(mut where_clause) = upd.where_clause.take() {
1012 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
1013 upd.where_clause = Some(where_clause);
1014 }
1015 Expression::Update(upd)
1016 }
1017 Expression::Delete(mut del) => {
1018 if let Some(mut where_clause) = del.where_clause.take() {
1019 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
1020 del.where_clause = Some(where_clause);
1021 }
1022 Expression::Delete(del)
1023 }
1024
1025 // ===== CTE expressions =====
1026 Expression::With(mut w) => {
1027 w.ctes = w.ctes.into_iter().map(|mut cte| {
1028 let original = cte.this.clone();
1029 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
1030 cte
1031 }).collect();
1032 Expression::With(w)
1033 }
1034 Expression::Cte(mut c) => {
1035 c.this = transform_recursive(c.this, transform_fn)?;
1036 Expression::Cte(c)
1037 }
1038
1039 // ===== Order expressions =====
1040 Expression::Ordered(mut o) => {
1041 o.this = transform_recursive(o.this, transform_fn)?;
1042 Expression::Ordered(o)
1043 }
1044
1045 // ===== Negation =====
1046 Expression::Neg(mut n) => {
1047 n.this = transform_recursive(n.this, transform_fn)?;
1048 Expression::Neg(n)
1049 }
1050
1051 // ===== Between =====
1052 Expression::Between(mut b) => {
1053 b.this = transform_recursive(b.this, transform_fn)?;
1054 b.low = transform_recursive(b.low, transform_fn)?;
1055 b.high = transform_recursive(b.high, transform_fn)?;
1056 Expression::Between(b)
1057 }
1058
1059 // ===== Like expressions =====
1060 Expression::Like(mut l) => {
1061 l.left = transform_recursive(l.left, transform_fn)?;
1062 l.right = transform_recursive(l.right, transform_fn)?;
1063 Expression::Like(l)
1064 }
1065 Expression::ILike(mut l) => {
1066 l.left = transform_recursive(l.left, transform_fn)?;
1067 l.right = transform_recursive(l.right, transform_fn)?;
1068 Expression::ILike(l)
1069 }
1070
1071 // ===== Additional binary ops not covered by macro =====
1072 Expression::Neq(op) => transform_binary!(Neq, *op),
1073 Expression::Lte(op) => transform_binary!(Lte, *op),
1074 Expression::Gte(op) => transform_binary!(Gte, *op),
1075 Expression::Mod(op) => transform_binary!(Mod, *op),
1076 Expression::Concat(op) => transform_binary!(Concat, *op),
1077 Expression::BitwiseAnd(op) => transform_binary!(BitwiseAnd, *op),
1078 Expression::BitwiseOr(op) => transform_binary!(BitwiseOr, *op),
1079 Expression::BitwiseXor(op) => transform_binary!(BitwiseXor, *op),
1080 Expression::Is(op) => transform_binary!(Is, *op),
1081
1082 // ===== TryCast / SafeCast =====
1083 Expression::TryCast(mut c) => {
1084 c.this = transform_recursive(c.this, transform_fn)?;
1085 c.to = transform_data_type_recursive(c.to, transform_fn)?;
1086 Expression::TryCast(c)
1087 }
1088 Expression::SafeCast(mut c) => {
1089 c.this = transform_recursive(c.this, transform_fn)?;
1090 c.to = transform_data_type_recursive(c.to, transform_fn)?;
1091 Expression::SafeCast(c)
1092 }
1093
1094 // ===== Misc =====
1095 Expression::Unnest(mut f) => {
1096 f.this = transform_recursive(f.this, transform_fn)?;
1097 f.expressions = f.expressions.into_iter()
1098 .map(|e| transform_recursive(e, transform_fn))
1099 .collect::<Result<Vec<_>>>()?;
1100 Expression::Unnest(f)
1101 }
1102 Expression::Explode(mut f) => {
1103 f.this = transform_recursive(f.this, transform_fn)?;
1104 Expression::Explode(f)
1105 }
1106 Expression::GroupConcat(mut f) => {
1107 f.this = transform_recursive(f.this, transform_fn)?;
1108 Expression::GroupConcat(f)
1109 }
1110 Expression::StringAgg(mut f) => {
1111 f.this = transform_recursive(f.this, transform_fn)?;
1112 Expression::StringAgg(f)
1113 }
1114 Expression::ListAgg(mut f) => {
1115 f.this = transform_recursive(f.this, transform_fn)?;
1116 Expression::ListAgg(f)
1117 }
1118 Expression::ArrayAgg(mut f) => {
1119 f.this = transform_recursive(f.this, transform_fn)?;
1120 Expression::ArrayAgg(f)
1121 }
1122 Expression::ParseJson(mut f) => {
1123 f.this = transform_recursive(f.this, transform_fn)?;
1124 Expression::ParseJson(f)
1125 }
1126 Expression::ToJson(mut f) => {
1127 f.this = transform_recursive(f.this, transform_fn)?;
1128 Expression::ToJson(f)
1129 }
1130 Expression::JSONExtract(mut e) => {
1131 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1132 e.expression = Box::new(transform_recursive(*e.expression, transform_fn)?);
1133 Expression::JSONExtract(e)
1134 }
1135 Expression::JSONExtractScalar(mut e) => {
1136 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1137 e.expression = Box::new(transform_recursive(*e.expression, transform_fn)?);
1138 Expression::JSONExtractScalar(e)
1139 }
1140
1141 // StrToTime: recurse into this
1142 Expression::StrToTime(mut e) => {
1143 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1144 Expression::StrToTime(e)
1145 }
1146
1147 // UnixToTime: recurse into this
1148 Expression::UnixToTime(mut e) => {
1149 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1150 Expression::UnixToTime(e)
1151 }
1152
1153 // CreateTable: recurse into column defaults, on_update expressions, and data types
1154 Expression::CreateTable(mut ct) => {
1155 for col in &mut ct.columns {
1156 if let Some(default_expr) = col.default.take() {
1157 col.default = Some(transform_recursive(default_expr, transform_fn)?);
1158 }
1159 if let Some(on_update_expr) = col.on_update.take() {
1160 col.on_update = Some(transform_recursive(on_update_expr, transform_fn)?);
1161 }
1162 // Note: Column data type transformations (INT -> INT64 for BigQuery, etc.)
1163 // are NOT applied here because per-dialect transforms are designed for CAST/expression
1164 // contexts and may not produce correct results for DDL column definitions.
1165 // The DDL type mappings would need dedicated handling per source/target pair.
1166 }
1167 if let Some(as_select) = ct.as_select.take() {
1168 ct.as_select = Some(transform_recursive(as_select, transform_fn)?);
1169 }
1170 Expression::CreateTable(ct)
1171 }
1172
1173 // CreateProcedure: recurse into body expressions
1174 Expression::CreateProcedure(mut cp) => {
1175 if let Some(body) = cp.body.take() {
1176 cp.body = Some(match body {
1177 FunctionBody::Expression(expr) => {
1178 FunctionBody::Expression(transform_recursive(expr, transform_fn)?)
1179 }
1180 FunctionBody::Return(expr) => {
1181 FunctionBody::Return(transform_recursive(expr, transform_fn)?)
1182 }
1183 FunctionBody::Statements(stmts) => {
1184 let transformed_stmts = stmts
1185 .into_iter()
1186 .map(|s| transform_recursive(s, transform_fn))
1187 .collect::<Result<Vec<_>>>()?;
1188 FunctionBody::Statements(transformed_stmts)
1189 }
1190 other => other,
1191 });
1192 }
1193 Expression::CreateProcedure(cp)
1194 }
1195
1196 // CreateFunction: recurse into body expressions
1197 Expression::CreateFunction(mut cf) => {
1198 if let Some(body) = cf.body.take() {
1199 cf.body = Some(match body {
1200 FunctionBody::Expression(expr) => {
1201 FunctionBody::Expression(transform_recursive(expr, transform_fn)?)
1202 }
1203 FunctionBody::Return(expr) => {
1204 FunctionBody::Return(transform_recursive(expr, transform_fn)?)
1205 }
1206 FunctionBody::Statements(stmts) => {
1207 let transformed_stmts = stmts
1208 .into_iter()
1209 .map(|s| transform_recursive(s, transform_fn))
1210 .collect::<Result<Vec<_>>>()?;
1211 FunctionBody::Statements(transformed_stmts)
1212 }
1213 other => other,
1214 });
1215 }
1216 Expression::CreateFunction(cf)
1217 }
1218
1219 // MemberOf: recurse into left and right operands
1220 Expression::MemberOf(op) => transform_binary!(MemberOf, *op),
1221 // ArrayContainsAll (@>): recurse into left and right operands
1222 Expression::ArrayContainsAll(op) => transform_binary!(ArrayContainsAll, *op),
1223 // ArrayContainedBy (<@): recurse into left and right operands
1224 Expression::ArrayContainedBy(op) => transform_binary!(ArrayContainedBy, *op),
1225 // ArrayOverlaps (&&): recurse into left and right operands
1226 Expression::ArrayOverlaps(op) => transform_binary!(ArrayOverlaps, *op),
1227 // TsMatch (@@): recurse into left and right operands
1228 Expression::TsMatch(op) => transform_binary!(TsMatch, *op),
1229 // Adjacent (-|-): recurse into left and right operands
1230 Expression::Adjacent(op) => transform_binary!(Adjacent, *op),
1231
1232 // Table: recurse into when (HistoricalData) and changes fields
1233 Expression::Table(mut t) => {
1234 if let Some(when) = t.when.take() {
1235 let transformed = transform_recursive(Expression::HistoricalData(when), transform_fn)?;
1236 if let Expression::HistoricalData(hd) = transformed {
1237 t.when = Some(hd);
1238 }
1239 }
1240 if let Some(changes) = t.changes.take() {
1241 let transformed = transform_recursive(Expression::Changes(changes), transform_fn)?;
1242 if let Expression::Changes(c) = transformed {
1243 t.changes = Some(c);
1244 }
1245 }
1246 Expression::Table(t)
1247 }
1248
1249 // HistoricalData (Snowflake time travel): recurse into expression
1250 Expression::HistoricalData(mut hd) => {
1251 *hd.expression = transform_recursive(*hd.expression, transform_fn)?;
1252 Expression::HistoricalData(hd)
1253 }
1254
1255 // Changes (Snowflake CHANGES clause): recurse into at_before and end
1256 Expression::Changes(mut c) => {
1257 if let Some(at_before) = c.at_before.take() {
1258 c.at_before = Some(Box::new(transform_recursive(*at_before, transform_fn)?));
1259 }
1260 if let Some(end) = c.end.take() {
1261 c.end = Some(Box::new(transform_recursive(*end, transform_fn)?));
1262 }
1263 Expression::Changes(c)
1264 }
1265
1266 // TableArgument: TABLE(expr) or MODEL(expr)
1267 Expression::TableArgument(mut ta) => {
1268 ta.this = transform_recursive(ta.this, transform_fn)?;
1269 Expression::TableArgument(ta)
1270 }
1271
1272 // JoinedTable: (tbl1 JOIN tbl2 ON ...) - recurse into left and join tables
1273 Expression::JoinedTable(mut jt) => {
1274 jt.left = transform_recursive(jt.left, transform_fn)?;
1275 for join in &mut jt.joins {
1276 join.this = transform_recursive(std::mem::replace(&mut join.this, Expression::Null(crate::expressions::Null)), transform_fn)?;
1277 if let Some(on) = join.on.take() {
1278 join.on = Some(transform_recursive(on, transform_fn)?);
1279 }
1280 }
1281 Expression::JoinedTable(jt)
1282 }
1283
1284 // Lateral: LATERAL func() - recurse into the function expression
1285 Expression::Lateral(mut lat) => {
1286 *lat.this = transform_recursive(*lat.this, transform_fn)?;
1287 Expression::Lateral(lat)
1288 }
1289
1290 // WithinGroup: recurse into order_by items (for NULLS FIRST/LAST etc.)
1291 // but NOT into wg.this - the inner function is handled by StringAggConvert/GroupConcatConvert
1292 // as a unit together with the WithinGroup wrapper
1293 Expression::WithinGroup(mut wg) => {
1294 wg.order_by = wg.order_by.into_iter().map(|mut o| {
1295 let original = o.this.clone();
1296 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
1297 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
1298 Ok(Expression::Ordered(transformed)) => *transformed,
1299 Ok(_) | Err(_) => o,
1300 }
1301 }).collect();
1302 Expression::WithinGroup(wg)
1303 }
1304
1305 // Filter: recurse into both the aggregate and the filter condition
1306 Expression::Filter(mut f) => {
1307 f.this = Box::new(transform_recursive(*f.this, transform_fn)?);
1308 f.expression = Box::new(transform_recursive(*f.expression, transform_fn)?);
1309 Expression::Filter(f)
1310 }
1311
1312 // BitwiseOrAgg/BitwiseAndAgg/BitwiseXorAgg: recurse into the aggregate argument
1313 Expression::BitwiseOrAgg(mut f) => {
1314 f.this = transform_recursive(f.this, transform_fn)?;
1315 Expression::BitwiseOrAgg(f)
1316 }
1317 Expression::BitwiseAndAgg(mut f) => {
1318 f.this = transform_recursive(f.this, transform_fn)?;
1319 Expression::BitwiseAndAgg(f)
1320 }
1321 Expression::BitwiseXorAgg(mut f) => {
1322 f.this = transform_recursive(f.this, transform_fn)?;
1323 Expression::BitwiseXorAgg(f)
1324 }
1325 Expression::PipeOperator(mut pipe) => {
1326 pipe.this = transform_recursive(pipe.this, transform_fn)?;
1327 pipe.expression = transform_recursive(pipe.expression, transform_fn)?;
1328 Expression::PipeOperator(pipe)
1329 }
1330
1331 // Pass through leaf nodes unchanged
1332 other => other,
1333 };
1334
1335 // Then apply the transform function
1336 transform_fn(expr)
1337}
1338
1339/// Returns the tokenizer config, generator config, and expression transform closure
1340/// for a built-in dialect type. This is the shared implementation used by both
1341/// `Dialect::get()` and custom dialect construction.
1342fn configs_for_dialect_type(
1343 dt: DialectType,
1344) -> (
1345 TokenizerConfig,
1346 GeneratorConfig,
1347 Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>,
1348) {
1349 macro_rules! dialect_configs {
1350 ($dialect_struct:ident) => {{
1351 let d = $dialect_struct;
1352 (
1353 d.tokenizer_config(),
1354 d.generator_config(),
1355 Box::new(move |e| $dialect_struct.transform_expr(e)),
1356 )
1357 }};
1358 }
1359 match dt {
1360 DialectType::PostgreSQL => dialect_configs!(PostgresDialect),
1361 DialectType::MySQL => dialect_configs!(MySQLDialect),
1362 DialectType::BigQuery => dialect_configs!(BigQueryDialect),
1363 DialectType::Snowflake => dialect_configs!(SnowflakeDialect),
1364 DialectType::DuckDB => dialect_configs!(DuckDBDialect),
1365 DialectType::TSQL => dialect_configs!(TSQLDialect),
1366 DialectType::Oracle => dialect_configs!(OracleDialect),
1367 DialectType::Hive => dialect_configs!(HiveDialect),
1368 DialectType::Spark => dialect_configs!(SparkDialect),
1369 DialectType::SQLite => dialect_configs!(SQLiteDialect),
1370 DialectType::Presto => dialect_configs!(PrestoDialect),
1371 DialectType::Trino => dialect_configs!(TrinoDialect),
1372 DialectType::Redshift => dialect_configs!(RedshiftDialect),
1373 DialectType::ClickHouse => dialect_configs!(ClickHouseDialect),
1374 DialectType::Databricks => dialect_configs!(DatabricksDialect),
1375 DialectType::Athena => dialect_configs!(AthenaDialect),
1376 DialectType::Teradata => dialect_configs!(TeradataDialect),
1377 DialectType::Doris => dialect_configs!(DorisDialect),
1378 DialectType::StarRocks => dialect_configs!(StarRocksDialect),
1379 DialectType::Materialize => dialect_configs!(MaterializeDialect),
1380 DialectType::RisingWave => dialect_configs!(RisingWaveDialect),
1381 DialectType::SingleStore => dialect_configs!(SingleStoreDialect),
1382 DialectType::CockroachDB => dialect_configs!(CockroachDBDialect),
1383 DialectType::TiDB => dialect_configs!(TiDBDialect),
1384 DialectType::Druid => dialect_configs!(DruidDialect),
1385 DialectType::Solr => dialect_configs!(SolrDialect),
1386 DialectType::Tableau => dialect_configs!(TableauDialect),
1387 DialectType::Dune => dialect_configs!(DuneDialect),
1388 DialectType::Fabric => dialect_configs!(FabricDialect),
1389 DialectType::Drill => dialect_configs!(DrillDialect),
1390 DialectType::Dremio => dialect_configs!(DremioDialect),
1391 DialectType::Exasol => dialect_configs!(ExasolDialect),
1392 DialectType::DataFusion => dialect_configs!(DataFusionDialect),
1393 _ => dialect_configs!(GenericDialect),
1394 }
1395}
1396
1397// ---------------------------------------------------------------------------
1398// Custom dialect registry
1399// ---------------------------------------------------------------------------
1400
1401static CUSTOM_DIALECT_REGISTRY: LazyLock<RwLock<HashMap<String, Arc<CustomDialectConfig>>>> =
1402 LazyLock::new(|| RwLock::new(HashMap::new()));
1403
1404struct CustomDialectConfig {
1405 name: String,
1406 base_dialect: DialectType,
1407 tokenizer_config: TokenizerConfig,
1408 generator_config: GeneratorConfig,
1409 transform: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1410 preprocess: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1411}
1412
1413/// Fluent builder for creating and registering custom SQL dialects.
1414///
1415/// A custom dialect is based on an existing built-in dialect and allows selective
1416/// overrides of tokenizer configuration, generator configuration, and expression
1417/// transforms.
1418///
1419/// # Example
1420///
1421/// ```rust,ignore
1422/// use polyglot_sql::dialects::{CustomDialectBuilder, DialectType, Dialect};
1423/// use polyglot_sql::generator::NormalizeFunctions;
1424///
1425/// CustomDialectBuilder::new("my_postgres")
1426/// .based_on(DialectType::PostgreSQL)
1427/// .generator_config_modifier(|gc| {
1428/// gc.normalize_functions = NormalizeFunctions::Lower;
1429/// })
1430/// .register()
1431/// .unwrap();
1432///
1433/// let d = Dialect::get_by_name("my_postgres").unwrap();
1434/// let exprs = d.parse("SELECT COUNT(*)").unwrap();
1435/// let sql = d.generate(&exprs[0]).unwrap();
1436/// assert_eq!(sql, "select count(*)");
1437///
1438/// polyglot_sql::unregister_custom_dialect("my_postgres");
1439/// ```
1440pub struct CustomDialectBuilder {
1441 name: String,
1442 base_dialect: DialectType,
1443 tokenizer_modifier: Option<Box<dyn FnOnce(&mut TokenizerConfig)>>,
1444 generator_modifier: Option<Box<dyn FnOnce(&mut GeneratorConfig)>>,
1445 transform: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1446 preprocess: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1447}
1448
1449impl CustomDialectBuilder {
1450 /// Create a new builder with the given name. Defaults to `Generic` as the base dialect.
1451 pub fn new(name: impl Into<String>) -> Self {
1452 Self {
1453 name: name.into(),
1454 base_dialect: DialectType::Generic,
1455 tokenizer_modifier: None,
1456 generator_modifier: None,
1457 transform: None,
1458 preprocess: None,
1459 }
1460 }
1461
1462 /// Set the base built-in dialect to inherit configuration from.
1463 pub fn based_on(mut self, dialect: DialectType) -> Self {
1464 self.base_dialect = dialect;
1465 self
1466 }
1467
1468 /// Provide a closure that modifies the tokenizer configuration inherited from the base dialect.
1469 pub fn tokenizer_config_modifier<F>(mut self, f: F) -> Self
1470 where
1471 F: FnOnce(&mut TokenizerConfig) + 'static,
1472 {
1473 self.tokenizer_modifier = Some(Box::new(f));
1474 self
1475 }
1476
1477 /// Provide a closure that modifies the generator configuration inherited from the base dialect.
1478 pub fn generator_config_modifier<F>(mut self, f: F) -> Self
1479 where
1480 F: FnOnce(&mut GeneratorConfig) + 'static,
1481 {
1482 self.generator_modifier = Some(Box::new(f));
1483 self
1484 }
1485
1486 /// Set a custom per-node expression transform function.
1487 ///
1488 /// This replaces the base dialect's transform. It is called on every expression
1489 /// node during the recursive transform pass.
1490 pub fn transform_fn<F>(mut self, f: F) -> Self
1491 where
1492 F: Fn(Expression) -> Result<Expression> + Send + Sync + 'static,
1493 {
1494 self.transform = Some(Arc::new(f));
1495 self
1496 }
1497
1498 /// Set a custom whole-tree preprocessing function.
1499 ///
1500 /// This replaces the base dialect's built-in preprocessing. It is called once
1501 /// on the entire expression tree before the recursive per-node transform.
1502 pub fn preprocess_fn<F>(mut self, f: F) -> Self
1503 where
1504 F: Fn(Expression) -> Result<Expression> + Send + Sync + 'static,
1505 {
1506 self.preprocess = Some(Arc::new(f));
1507 self
1508 }
1509
1510 /// Build the custom dialect configuration and register it in the global registry.
1511 ///
1512 /// Returns an error if:
1513 /// - The name collides with a built-in dialect name
1514 /// - A custom dialect with the same name is already registered
1515 pub fn register(self) -> Result<()> {
1516 // Reject names that collide with built-in dialects
1517 if DialectType::from_str(&self.name).is_ok() {
1518 return Err(crate::error::Error::parse(format!(
1519 "Cannot register custom dialect '{}': name collides with built-in dialect",
1520 self.name
1521 )));
1522 }
1523
1524 // Get base configs
1525 let (mut tok_config, mut gen_config, _base_transform) =
1526 configs_for_dialect_type(self.base_dialect);
1527
1528 // Apply modifiers
1529 if let Some(tok_mod) = self.tokenizer_modifier {
1530 tok_mod(&mut tok_config);
1531 }
1532 if let Some(gen_mod) = self.generator_modifier {
1533 gen_mod(&mut gen_config);
1534 }
1535
1536 let config = CustomDialectConfig {
1537 name: self.name.clone(),
1538 base_dialect: self.base_dialect,
1539 tokenizer_config: tok_config,
1540 generator_config: gen_config,
1541 transform: self.transform,
1542 preprocess: self.preprocess,
1543 };
1544
1545 register_custom_dialect(config)
1546 }
1547}
1548
1549use std::str::FromStr;
1550
1551fn register_custom_dialect(config: CustomDialectConfig) -> Result<()> {
1552 let mut registry = CUSTOM_DIALECT_REGISTRY
1553 .write()
1554 .map_err(|e| crate::error::Error::parse(format!("Registry lock poisoned: {}", e)))?;
1555
1556 if registry.contains_key(&config.name) {
1557 return Err(crate::error::Error::parse(format!(
1558 "Custom dialect '{}' is already registered",
1559 config.name
1560 )));
1561 }
1562
1563 registry.insert(config.name.clone(), Arc::new(config));
1564 Ok(())
1565}
1566
1567/// Remove a custom dialect from the global registry.
1568///
1569/// Returns `true` if a dialect with that name was found and removed,
1570/// `false` if no such custom dialect existed.
1571pub fn unregister_custom_dialect(name: &str) -> bool {
1572 if let Ok(mut registry) = CUSTOM_DIALECT_REGISTRY.write() {
1573 registry.remove(name).is_some()
1574 } else {
1575 false
1576 }
1577}
1578
1579fn get_custom_dialect_config(name: &str) -> Option<Arc<CustomDialectConfig>> {
1580 CUSTOM_DIALECT_REGISTRY
1581 .read()
1582 .ok()
1583 .and_then(|registry| registry.get(name).cloned())
1584}
1585
1586/// Main entry point for dialect-specific SQL operations.
1587///
1588/// A `Dialect` bundles together a tokenizer, generator configuration, and expression
1589/// transformer for a specific SQL database engine. It is the high-level API through
1590/// which callers parse, generate, transform, and transpile SQL.
1591///
1592/// # Usage
1593///
1594/// ```rust,ignore
1595/// use polyglot_sql::dialects::{Dialect, DialectType};
1596///
1597/// // Parse PostgreSQL SQL into an AST
1598/// let pg = Dialect::get(DialectType::PostgreSQL);
1599/// let exprs = pg.parse("SELECT id, name FROM users WHERE active")?;
1600///
1601/// // Transpile from PostgreSQL to BigQuery
1602/// let results = pg.transpile_to("SELECT NOW()", DialectType::BigQuery)?;
1603/// assert_eq!(results[0], "SELECT CURRENT_TIMESTAMP()");
1604/// ```
1605///
1606/// Obtain an instance via [`Dialect::get`] or [`Dialect::get_by_name`].
1607/// The struct is `Send + Sync` safe so it can be shared across threads.
1608pub struct Dialect {
1609 dialect_type: DialectType,
1610 tokenizer: Tokenizer,
1611 generator_config: GeneratorConfig,
1612 transformer: Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>,
1613 /// Optional function to get expression-specific generator config (for hybrid dialects like Athena).
1614 generator_config_for_expr: Option<Box<dyn Fn(&Expression) -> GeneratorConfig + Send + Sync>>,
1615 /// Optional custom preprocessing function (overrides built-in preprocess for custom dialects).
1616 custom_preprocess: Option<Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1617}
1618
1619impl Dialect {
1620 /// Creates a fully configured [`Dialect`] instance for the given [`DialectType`].
1621 ///
1622 /// This is the primary constructor. It initializes the tokenizer, generator config,
1623 /// and expression transformer based on the dialect's [`DialectImpl`] implementation.
1624 /// For hybrid dialects like Athena, it also sets up expression-specific generator
1625 /// config routing.
1626 pub fn get(dialect_type: DialectType) -> Self {
1627 let (tokenizer_config, generator_config, transformer) = configs_for_dialect_type(dialect_type);
1628
1629 // Set up expression-specific generator config for hybrid dialects
1630 let generator_config_for_expr: Option<Box<dyn Fn(&Expression) -> GeneratorConfig + Send + Sync>> = match dialect_type {
1631 DialectType::Athena => Some(Box::new(|expr| AthenaDialect.generator_config_for_expr(expr))),
1632 _ => None,
1633 };
1634
1635 Self {
1636 dialect_type,
1637 tokenizer: Tokenizer::new(tokenizer_config),
1638 generator_config,
1639 transformer,
1640 generator_config_for_expr,
1641 custom_preprocess: None,
1642 }
1643 }
1644
1645 /// Look up a dialect by string name.
1646 ///
1647 /// Checks built-in dialect names first (via [`DialectType::from_str`]), then
1648 /// falls back to the custom dialect registry. Returns `None` if no dialect
1649 /// with the given name exists.
1650 pub fn get_by_name(name: &str) -> Option<Self> {
1651 // Try built-in first
1652 if let Ok(dt) = DialectType::from_str(name) {
1653 return Some(Self::get(dt));
1654 }
1655
1656 // Try custom registry
1657 let config = get_custom_dialect_config(name)?;
1658 Some(Self::from_custom_config(&config))
1659 }
1660
1661 /// Construct a `Dialect` from a custom dialect configuration.
1662 fn from_custom_config(config: &CustomDialectConfig) -> Self {
1663 // Build the transformer: use custom if provided, else use base dialect's
1664 let transformer: Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync> =
1665 if let Some(ref custom_transform) = config.transform {
1666 let t = Arc::clone(custom_transform);
1667 Box::new(move |e| t(e))
1668 } else {
1669 let (_, _, base_transform) = configs_for_dialect_type(config.base_dialect);
1670 base_transform
1671 };
1672
1673 // Build the custom preprocess: use custom if provided
1674 let custom_preprocess: Option<Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>> =
1675 config.preprocess.as_ref().map(|p| {
1676 let p = Arc::clone(p);
1677 Box::new(move |e: Expression| p(e)) as Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>
1678 });
1679
1680 Self {
1681 dialect_type: config.base_dialect,
1682 tokenizer: Tokenizer::new(config.tokenizer_config.clone()),
1683 generator_config: config.generator_config.clone(),
1684 transformer,
1685 generator_config_for_expr: None,
1686 custom_preprocess,
1687 }
1688 }
1689
1690 /// Get the dialect type
1691 pub fn dialect_type(&self) -> DialectType {
1692 self.dialect_type
1693 }
1694
1695 /// Get the generator configuration
1696 pub fn generator_config(&self) -> &GeneratorConfig {
1697 &self.generator_config
1698 }
1699
1700 /// Parses a SQL string into a list of [`Expression`] AST nodes.
1701 ///
1702 /// The input may contain multiple semicolon-separated statements; each one
1703 /// produces a separate element in the returned vector. Tokenization uses
1704 /// this dialect's configured tokenizer, and parsing uses the dialect-aware parser.
1705 pub fn parse(&self, sql: &str) -> Result<Vec<Expression>> {
1706 let tokens = self.tokenizer.tokenize(sql)?;
1707 let config = crate::parser::ParserConfig {
1708 dialect: Some(self.dialect_type),
1709 ..Default::default()
1710 };
1711 let mut parser = Parser::with_source(tokens, config, sql.to_string());
1712 parser.parse()
1713 }
1714
1715 /// Get the generator config for a specific expression (supports hybrid dialects)
1716 fn get_config_for_expr(&self, expr: &Expression) -> GeneratorConfig {
1717 if let Some(ref config_fn) = self.generator_config_for_expr {
1718 config_fn(expr)
1719 } else {
1720 self.generator_config.clone()
1721 }
1722 }
1723
1724 /// Generates a SQL string from an [`Expression`] AST node.
1725 ///
1726 /// The output uses this dialect's generator configuration for identifier quoting,
1727 /// keyword casing, function name normalization, and syntax style. The result is
1728 /// a single-line (non-pretty) SQL string.
1729 pub fn generate(&self, expr: &Expression) -> Result<String> {
1730 let config = self.get_config_for_expr(expr);
1731 let mut generator = Generator::with_config(config);
1732 generator.generate(expr)
1733 }
1734
1735 /// Generate SQL from an expression with pretty printing enabled
1736 pub fn generate_pretty(&self, expr: &Expression) -> Result<String> {
1737 let mut config = self.get_config_for_expr(expr);
1738 config.pretty = true;
1739 let mut generator = Generator::with_config(config);
1740 generator.generate(expr)
1741 }
1742
1743 /// Generate SQL from an expression with forced identifier quoting (identify=True)
1744 pub fn generate_with_identify(&self, expr: &Expression) -> Result<String> {
1745 let mut config = self.get_config_for_expr(expr);
1746 config.always_quote_identifiers = true;
1747 let mut generator = Generator::with_config(config);
1748 generator.generate(expr)
1749 }
1750
1751 /// Generate SQL from an expression with pretty printing and forced identifier quoting
1752 pub fn generate_pretty_with_identify(&self, expr: &Expression) -> Result<String> {
1753 let mut config = self.generator_config.clone();
1754 config.pretty = true;
1755 config.always_quote_identifiers = true;
1756 let mut generator = Generator::with_config(config);
1757 generator.generate(expr)
1758 }
1759
1760 /// Transforms an expression tree to conform to this dialect's syntax and semantics.
1761 ///
1762 /// The transformation proceeds in two phases:
1763 /// 1. **Preprocessing** -- whole-tree structural rewrites such as eliminating QUALIFY,
1764 /// ensuring boolean predicates, or converting DISTINCT ON to a window-function pattern.
1765 /// 2. **Recursive per-node transform** -- a bottom-up pass via [`transform_recursive`]
1766 /// that applies this dialect's [`DialectImpl::transform_expr`] to every node.
1767 ///
1768 /// This method is used both during transpilation (to rewrite an AST for a target dialect)
1769 /// and for identity transforms (normalizing SQL within the same dialect).
1770 pub fn transform(&self, expr: Expression) -> Result<Expression> {
1771 // Apply preprocessing transforms based on dialect
1772 let preprocessed = self.preprocess(expr)?;
1773 // Then apply recursive transformation
1774 transform_recursive(preprocessed, &self.transformer)
1775 }
1776
1777 /// Apply dialect-specific preprocessing transforms
1778 fn preprocess(&self, expr: Expression) -> Result<Expression> {
1779 // If a custom preprocess function is set, use it instead of the built-in logic
1780 if let Some(ref custom_preprocess) = self.custom_preprocess {
1781 return custom_preprocess(expr);
1782 }
1783
1784 use crate::transforms;
1785
1786 match self.dialect_type {
1787 // MySQL doesn't support QUALIFY, DISTINCT ON, FULL OUTER JOIN
1788 DialectType::MySQL => {
1789 let expr = transforms::eliminate_qualify(expr)?;
1790 let expr = transforms::eliminate_full_outer_join(expr)?;
1791 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
1792 Ok(expr)
1793 }
1794 // PostgreSQL doesn't support QUALIFY
1795 DialectType::PostgreSQL => {
1796 let expr = transforms::eliminate_qualify(expr)?;
1797 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
1798 Ok(expr)
1799 }
1800 // BigQuery doesn't support DISTINCT ON or CTE column aliases
1801 DialectType::BigQuery => {
1802 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
1803 let expr = transforms::pushdown_cte_column_names(expr)?;
1804 let expr = transforms::explode_projection_to_unnest(expr, DialectType::BigQuery)?;
1805 Ok(expr)
1806 }
1807 // Snowflake
1808 DialectType::Snowflake => {
1809 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
1810 let expr = transforms::eliminate_window_clause(expr)?;
1811 let expr = transforms::snowflake_flatten_projection_to_unnest(expr)?;
1812 Ok(expr)
1813 }
1814 // TSQL doesn't support QUALIFY
1815 // TSQL requires boolean expressions in WHERE/HAVING (no implicit truthiness)
1816 DialectType::TSQL => {
1817 let expr = transforms::eliminate_qualify(expr)?;
1818 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
1819 let expr = transforms::ensure_bools(expr)?;
1820 Ok(expr)
1821 }
1822 // Spark doesn't support QUALIFY (but Databricks does)
1823 DialectType::Spark => {
1824 let expr = transforms::eliminate_qualify(expr)?;
1825 let expr = transforms::add_auto_table_alias(expr)?;
1826 let expr = transforms::simplify_nested_paren_values(expr)?;
1827 Ok(expr)
1828 }
1829 // Databricks supports QUALIFY natively
1830 DialectType::Databricks => {
1831 let expr = transforms::add_auto_table_alias(expr)?;
1832 let expr = transforms::simplify_nested_paren_values(expr)?;
1833 Ok(expr)
1834 }
1835 // Hive doesn't support QUALIFY
1836 DialectType::Hive => {
1837 let expr = transforms::eliminate_qualify(expr)?;
1838 Ok(expr)
1839 }
1840 // SQLite doesn't support QUALIFY
1841 DialectType::SQLite => {
1842 let expr = transforms::eliminate_qualify(expr)?;
1843 Ok(expr)
1844 }
1845 // Trino doesn't support QUALIFY
1846 DialectType::Trino => {
1847 let expr = transforms::eliminate_qualify(expr)?;
1848 let expr = transforms::explode_projection_to_unnest(expr, DialectType::Trino)?;
1849 Ok(expr)
1850 }
1851 // Presto doesn't support QUALIFY or WINDOW clause
1852 DialectType::Presto => {
1853 let expr = transforms::eliminate_qualify(expr)?;
1854 let expr = transforms::eliminate_window_clause(expr)?;
1855 let expr = transforms::explode_projection_to_unnest(expr, DialectType::Presto)?;
1856 Ok(expr)
1857 }
1858 // DuckDB supports QUALIFY - no elimination needed
1859 // Expand POSEXPLODE to GENERATE_SUBSCRIPTS + UNNEST
1860 DialectType::DuckDB => {
1861 let expr = transforms::expand_posexplode_duckdb(expr)?;
1862 Ok(expr)
1863 }
1864 // Redshift doesn't support QUALIFY or WINDOW clause
1865 DialectType::Redshift => {
1866 let expr = transforms::eliminate_qualify(expr)?;
1867 let expr = transforms::eliminate_window_clause(expr)?;
1868 Ok(expr)
1869 }
1870 // StarRocks doesn't support BETWEEN in DELETE statements or QUALIFY
1871 DialectType::StarRocks => {
1872 let expr = transforms::eliminate_qualify(expr)?;
1873 let expr = transforms::expand_between_in_delete(expr)?;
1874 Ok(expr)
1875 }
1876 // DataFusion supports QUALIFY and semi/anti joins natively
1877 DialectType::DataFusion => Ok(expr),
1878 // Oracle - no special preprocessing needed
1879 DialectType::Oracle => {
1880 Ok(expr)
1881 }
1882 // Drill - no special preprocessing needed
1883 DialectType::Drill => {
1884 Ok(expr)
1885 }
1886 // Teradata - no special preprocessing needed
1887 DialectType::Teradata => {
1888 Ok(expr)
1889 }
1890 // Other dialects - no preprocessing
1891 _ => Ok(expr),
1892 }
1893 }
1894
1895 /// Transpile SQL from this dialect to another
1896 pub fn transpile_to(&self, sql: &str, target: DialectType) -> Result<Vec<String>> {
1897 self.transpile_to_inner(sql, target, false)
1898 }
1899
1900 /// Transpile SQL from this dialect to another with pretty printing enabled
1901 pub fn transpile_to_pretty(&self, sql: &str, target: DialectType) -> Result<Vec<String>> {
1902 self.transpile_to_inner(sql, target, true)
1903 }
1904
1905 fn transpile_to_inner(&self, sql: &str, target: DialectType, pretty: bool) -> Result<Vec<String>> {
1906 let expressions = self.parse(sql)?;
1907 let target_dialect = Dialect::get(target);
1908
1909 expressions
1910 .into_iter()
1911 .map(|expr| {
1912 // When source and target differ, first normalize the source dialect's
1913 // AST constructs to standard SQL, so that the target dialect can handle them.
1914 // This handles cases like Snowflake's SQUARE -> POWER, DIV0 -> CASE, etc.
1915 let normalized = if self.dialect_type != target && self.dialect_type != DialectType::Generic {
1916 self.transform(expr)?
1917 } else {
1918 expr
1919 };
1920
1921 // For TSQL source targeting non-TSQL: unwrap ISNULL(JSON_QUERY(...), JSON_VALUE(...))
1922 // to just JSON_QUERY(...) so cross_dialect_normalize can convert it cleanly.
1923 // The TSQL read transform wraps JsonQuery in ISNULL for identity, but for
1924 // cross-dialect transpilation we need the unwrapped JSON_QUERY.
1925 let normalized = if matches!(self.dialect_type, DialectType::TSQL | DialectType::Fabric)
1926 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
1927 {
1928 transform_recursive(normalized, &|e| {
1929 if let Expression::Function(ref f) = e {
1930 if f.name.eq_ignore_ascii_case("ISNULL") && f.args.len() == 2 {
1931 // Check if first arg is JSON_QUERY and second is JSON_VALUE
1932 if let (Expression::Function(ref jq), Expression::Function(ref jv)) = (&f.args[0], &f.args[1]) {
1933 if jq.name.eq_ignore_ascii_case("JSON_QUERY") && jv.name.eq_ignore_ascii_case("JSON_VALUE") {
1934 // Unwrap: return just JSON_QUERY(...)
1935 return Ok(f.args[0].clone());
1936 }
1937 }
1938 }
1939 }
1940 Ok(e)
1941 })?
1942 } else {
1943 normalized
1944 };
1945
1946 // Propagate struct field names in arrays (for BigQuery source to non-BigQuery target)
1947 // BigQuery->BigQuery should NOT propagate names (BigQuery handles implicit inheritance)
1948 let normalized = if matches!(self.dialect_type, DialectType::BigQuery) && !matches!(target, DialectType::BigQuery) {
1949 crate::transforms::propagate_struct_field_names(normalized)?
1950 } else {
1951 normalized
1952 };
1953
1954 // Apply cross-dialect semantic normalizations
1955 let normalized = Self::cross_dialect_normalize(normalized, self.dialect_type, target)?;
1956
1957 // For DuckDB target from BigQuery source: wrap UNNEST of struct arrays in
1958 // (SELECT UNNEST(..., max_depth => 2)) subquery
1959 // Must run BEFORE unnest_alias_to_column_alias since it changes alias structure
1960 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
1961 && matches!(target, DialectType::DuckDB)
1962 {
1963 crate::transforms::wrap_duckdb_unnest_struct(normalized)?
1964 } else {
1965 normalized
1966 };
1967
1968 // Convert BigQuery UNNEST aliases to column-alias format for DuckDB/Presto/Spark
1969 // UNNEST(arr) AS x -> UNNEST(arr) AS _t0(x)
1970 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
1971 && matches!(target, DialectType::DuckDB | DialectType::Presto | DialectType::Trino | DialectType::Athena
1972 | DialectType::Spark | DialectType::Databricks)
1973 {
1974 crate::transforms::unnest_alias_to_column_alias(normalized)?
1975 } else if matches!(self.dialect_type, DialectType::BigQuery)
1976 && matches!(target, DialectType::BigQuery | DialectType::Redshift)
1977 {
1978 // For BigQuery/Redshift targets: move UNNEST FROM items to CROSS JOINs
1979 // but don't convert alias format (no _t0 wrapper)
1980 let result = crate::transforms::unnest_from_to_cross_join(normalized)?;
1981 // For Redshift: strip UNNEST when arg is a column reference path
1982 if matches!(target, DialectType::Redshift) {
1983 crate::transforms::strip_unnest_column_refs(result)?
1984 } else {
1985 result
1986 }
1987 } else {
1988 normalized
1989 };
1990
1991 // For Presto/Trino targets from PostgreSQL/Redshift source:
1992 // Wrap UNNEST aliases from GENERATE_SERIES conversion: AS s -> AS _u(s)
1993 let normalized = if matches!(self.dialect_type, DialectType::PostgreSQL | DialectType::Redshift)
1994 && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena)
1995 {
1996 crate::transforms::wrap_unnest_join_aliases(normalized)?
1997 } else {
1998 normalized
1999 };
2000
2001 // Eliminate DISTINCT ON with target-dialect awareness
2002 // This must happen after source transform (which may produce DISTINCT ON)
2003 // and before target transform, with knowledge of the target dialect's NULL ordering behavior
2004 let normalized = crate::transforms::eliminate_distinct_on_for_dialect(normalized, Some(target))?;
2005
2006 // BigQuery GENERATE_DATE_ARRAY in UNNEST -> Snowflake ARRAY_GENERATE_RANGE + DATEADD
2007 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
2008 && matches!(target, DialectType::Snowflake)
2009 {
2010 Self::transform_generate_date_array_snowflake(normalized)?
2011 } else {
2012 normalized
2013 };
2014
2015 // CROSS JOIN UNNEST -> LATERAL VIEW EXPLODE/INLINE for Spark/Hive/Databricks
2016 let normalized = if matches!(target, DialectType::Spark | DialectType::Databricks | DialectType::Hive) {
2017 crate::transforms::unnest_to_explode_select(normalized)?
2018 } else {
2019 normalized
2020 };
2021
2022 let transformed = target_dialect.transform(normalized)?;
2023 let mut sql = if pretty {
2024 target_dialect.generate_pretty(&transformed)?
2025 } else {
2026 target_dialect.generate(&transformed)?
2027 };
2028
2029 // Align a known Snowflake pretty-print edge case with Python sqlglot output.
2030 if pretty && target == DialectType::Snowflake {
2031 sql = Self::normalize_snowflake_pretty(sql);
2032 }
2033
2034 Ok(sql)
2035 })
2036 .collect()
2037 }
2038
2039 /// Transform BigQuery GENERATE_DATE_ARRAY in UNNEST for Snowflake target.
2040 /// Converts:
2041 /// SELECT ..., alias, ... FROM t CROSS JOIN UNNEST(GENERATE_DATE_ARRAY(start, end, INTERVAL '1' unit)) AS alias
2042 /// To:
2043 /// SELECT ..., DATEADD(unit, CAST(alias AS INT), CAST(start AS DATE)) AS alias, ...
2044 /// FROM t, LATERAL FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (DATEDIFF(unit, start, end) + 1 - 1) + 1)) AS _t0(seq, key, path, index, alias, this)
2045 fn transform_generate_date_array_snowflake(expr: Expression) -> Result<Expression> {
2046 use crate::expressions::*;
2047 transform_recursive(expr, &|e| {
2048 let Expression::Select(mut sel) = e else { return Ok(e); };
2049
2050 // Find joins with UNNEST containing GenerateSeries (from GENERATE_DATE_ARRAY conversion)
2051 let mut gda_info: Option<(String, Expression, Expression, String)> = None; // (alias_name, start_expr, end_expr, unit)
2052 let mut gda_join_idx: Option<usize> = None;
2053
2054 for (idx, join) in sel.joins.iter().enumerate() {
2055 // The join.this may be:
2056 // 1. Unnest(UnnestFunc { alias: Some("mnth"), ... })
2057 // 2. Alias(Alias { this: Unnest(UnnestFunc { alias: None, ... }), alias: "mnth", ... })
2058 let (unnest_ref, alias_name) = match &join.this {
2059 Expression::Unnest(ref unnest) => {
2060 let alias = unnest.alias.as_ref().map(|id| id.name.clone());
2061 (Some(unnest.as_ref()), alias)
2062 }
2063 Expression::Alias(ref a) => {
2064 if let Expression::Unnest(ref unnest) = a.this {
2065 (Some(unnest.as_ref()), Some(a.alias.name.clone()))
2066 } else {
2067 (None, None)
2068 }
2069 }
2070 _ => (None, None),
2071 };
2072
2073 if let (Some(unnest), Some(alias)) = (unnest_ref, alias_name) {
2074 // Check the main expression (this) of the UNNEST for GENERATE_DATE_ARRAY function
2075 if let Expression::Function(ref f) = unnest.this {
2076 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY") && f.args.len() >= 2 {
2077 let start_expr = f.args[0].clone();
2078 let end_expr = f.args[1].clone();
2079 let step = f.args.get(2).cloned();
2080
2081 // Extract unit from step interval
2082 let unit = if let Some(Expression::Interval(ref iv)) = step {
2083 if let Some(IntervalUnitSpec::Simple { ref unit, .. }) = iv.unit {
2084 Some(format!("{:?}", unit).to_uppercase())
2085 } else if let Some(ref this) = iv.this {
2086 // The interval may be stored as a string like "1 MONTH"
2087 if let Expression::Literal(Literal::String(ref s)) = this {
2088 let parts: Vec<&str> = s.split_whitespace().collect();
2089 if parts.len() == 2 {
2090 Some(parts[1].to_uppercase())
2091 } else if parts.len() == 1 {
2092 // Single word like "MONTH" or just "1"
2093 let upper = parts[0].to_uppercase();
2094 if matches!(upper.as_str(), "YEAR" | "QUARTER" | "MONTH" | "WEEK" | "DAY" | "HOUR" | "MINUTE" | "SECOND") {
2095 Some(upper)
2096 } else {
2097 None
2098 }
2099 } else {
2100 None
2101 }
2102 } else {
2103 None
2104 }
2105 } else {
2106 None
2107 }
2108 } else {
2109 None
2110 };
2111
2112 if let Some(unit_str) = unit {
2113 gda_info = Some((alias, start_expr, end_expr, unit_str));
2114 gda_join_idx = Some(idx);
2115 }
2116 }
2117 }
2118 }
2119 if gda_info.is_some() { break; }
2120 }
2121
2122 let Some((alias_name, start_expr, end_expr, unit_str)) = gda_info else {
2123 return Ok(Expression::Select(sel));
2124 };
2125 let join_idx = gda_join_idx.unwrap();
2126
2127 // Build ARRAY_GENERATE_RANGE(0, (DATEDIFF(unit, start, end) + 1 - 1) + 1)
2128 let datediff = Expression::Function(Box::new(Function::new(
2129 "DATEDIFF".to_string(),
2130 vec![
2131 Expression::Column(Column { name: Identifier::new(&unit_str), table: None, join_mark: false, trailing_comments: vec![] }),
2132 start_expr.clone(),
2133 end_expr.clone(),
2134 ],
2135 )));
2136 // (DATEDIFF(...) + 1 - 1) + 1
2137 let plus_one = Expression::Add(Box::new(BinaryOp {
2138 left: datediff,
2139 right: Expression::Literal(Literal::Number("1".to_string())),
2140 left_comments: vec![], operator_comments: vec![], trailing_comments: vec![],
2141 }));
2142 let minus_one = Expression::Sub(Box::new(BinaryOp {
2143 left: plus_one,
2144 right: Expression::Literal(Literal::Number("1".to_string())),
2145 left_comments: vec![], operator_comments: vec![], trailing_comments: vec![],
2146 }));
2147 let paren_inner = Expression::Paren(Box::new(Paren { this: minus_one, trailing_comments: vec![] }));
2148 let outer_plus_one = Expression::Add(Box::new(BinaryOp {
2149 left: paren_inner,
2150 right: Expression::Literal(Literal::Number("1".to_string())),
2151 left_comments: vec![], operator_comments: vec![], trailing_comments: vec![],
2152 }));
2153
2154 let array_gen_range = Expression::Function(Box::new(Function::new(
2155 "ARRAY_GENERATE_RANGE".to_string(),
2156 vec![Expression::Literal(Literal::Number("0".to_string())), outer_plus_one],
2157 )));
2158
2159 // Build FLATTEN(INPUT => ARRAY_GENERATE_RANGE(...))
2160 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
2161 name: Identifier::new("INPUT"),
2162 value: array_gen_range,
2163 separator: crate::expressions::NamedArgSeparator::DArrow,
2164 }));
2165 let flatten = Expression::Function(Box::new(Function::new(
2166 "FLATTEN".to_string(),
2167 vec![flatten_input],
2168 )));
2169
2170 // Build LATERAL FLATTEN(...) AS _t0(seq, key, path, index, alias, this)
2171 let alias_table = Alias {
2172 this: flatten,
2173 alias: Identifier::new("_t0"),
2174 column_aliases: vec![
2175 Identifier::new("seq"),
2176 Identifier::new("key"),
2177 Identifier::new("path"),
2178 Identifier::new("index"),
2179 Identifier::new(&alias_name),
2180 Identifier::new("this"),
2181 ],
2182 pre_alias_comments: vec![],
2183 trailing_comments: vec![],
2184 };
2185 let lateral_expr = Expression::Lateral(Box::new(Lateral {
2186 this: Box::new(Expression::Alias(Box::new(alias_table))),
2187 view: None,
2188 outer: None,
2189 alias: None,
2190 alias_quoted: false,
2191 cross_apply: None,
2192 ordinality: None,
2193 column_aliases: vec![],
2194 }));
2195
2196 // Remove the original join and add to FROM expressions
2197 sel.joins.remove(join_idx);
2198 if let Some(ref mut from) = sel.from {
2199 from.expressions.push(lateral_expr);
2200 }
2201
2202 // Build DATEADD(unit, CAST(alias AS INT), CAST(start AS DATE))
2203 let dateadd_expr = Expression::Function(Box::new(Function::new(
2204 "DATEADD".to_string(),
2205 vec![
2206 Expression::Column(Column { name: Identifier::new(&unit_str), table: None, join_mark: false, trailing_comments: vec![] }),
2207 Expression::Cast(Box::new(Cast {
2208 this: Expression::Column(Column { name: Identifier::new(&alias_name), table: None, join_mark: false, trailing_comments: vec![] }),
2209 to: DataType::Int { length: None, integer_spelling: false },
2210 trailing_comments: vec![],
2211 double_colon_syntax: false,
2212 format: None,
2213 default: None,
2214 })),
2215 Expression::Cast(Box::new(Cast {
2216 this: start_expr.clone(),
2217 to: DataType::Date,
2218 trailing_comments: vec![],
2219 double_colon_syntax: false,
2220 format: None,
2221 default: None,
2222 })),
2223 ],
2224 )));
2225
2226 // Replace references to the alias in the SELECT list
2227 let new_exprs: Vec<Expression> = sel.expressions.iter().map(|expr| {
2228 Self::replace_column_ref_with_dateadd(expr, &alias_name, &dateadd_expr)
2229 }).collect();
2230 sel.expressions = new_exprs;
2231
2232 Ok(Expression::Select(sel))
2233 })
2234 }
2235
2236 /// Helper: replace column references to `alias_name` with dateadd expression
2237 fn replace_column_ref_with_dateadd(expr: &Expression, alias_name: &str, dateadd: &Expression) -> Expression {
2238 use crate::expressions::*;
2239 match expr {
2240 Expression::Column(c) if c.name.name == alias_name && c.table.is_none() => {
2241 // Plain column reference -> DATEADD(...) AS alias_name
2242 Expression::Alias(Box::new(Alias {
2243 this: dateadd.clone(),
2244 alias: Identifier::new(alias_name),
2245 column_aliases: vec![],
2246 pre_alias_comments: vec![],
2247 trailing_comments: vec![],
2248 }))
2249 }
2250 Expression::Alias(a) => {
2251 // Check if the inner expression references the alias
2252 let new_this = Self::replace_column_ref_inner(&a.this, alias_name, dateadd);
2253 Expression::Alias(Box::new(Alias {
2254 this: new_this,
2255 alias: a.alias.clone(),
2256 column_aliases: a.column_aliases.clone(),
2257 pre_alias_comments: a.pre_alias_comments.clone(),
2258 trailing_comments: a.trailing_comments.clone(),
2259 }))
2260 }
2261 _ => expr.clone(),
2262 }
2263 }
2264
2265 /// Helper: replace column references in inner expression (not top-level)
2266 fn replace_column_ref_inner(expr: &Expression, alias_name: &str, dateadd: &Expression) -> Expression {
2267 use crate::expressions::*;
2268 match expr {
2269 Expression::Column(c) if c.name.name == alias_name && c.table.is_none() => {
2270 dateadd.clone()
2271 }
2272 Expression::Add(op) => {
2273 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
2274 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
2275 Expression::Add(Box::new(BinaryOp {
2276 left, right,
2277 left_comments: op.left_comments.clone(),
2278 operator_comments: op.operator_comments.clone(),
2279 trailing_comments: op.trailing_comments.clone(),
2280 }))
2281 }
2282 Expression::Sub(op) => {
2283 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
2284 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
2285 Expression::Sub(Box::new(BinaryOp {
2286 left, right,
2287 left_comments: op.left_comments.clone(),
2288 operator_comments: op.operator_comments.clone(),
2289 trailing_comments: op.trailing_comments.clone(),
2290 }))
2291 }
2292 Expression::Mul(op) => {
2293 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
2294 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
2295 Expression::Mul(Box::new(BinaryOp {
2296 left, right,
2297 left_comments: op.left_comments.clone(),
2298 operator_comments: op.operator_comments.clone(),
2299 trailing_comments: op.trailing_comments.clone(),
2300 }))
2301 }
2302 _ => expr.clone(),
2303 }
2304 }
2305
2306 fn normalize_snowflake_pretty(mut sql: String) -> String {
2307 if sql.contains("LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)")
2308 && sql.contains("ARRAY_GENERATE_RANGE(0, (GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1) + 1)")
2309 {
2310 sql = sql.replace(
2311 "AND uc.user_id <> ALL (SELECT DISTINCT\n _id\n FROM users, LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)\n WHERE\n GET_PATH(datasource.value, 'name') = 'something')",
2312 "AND uc.user_id <> ALL (\n SELECT DISTINCT\n _id\n FROM users, LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)\n WHERE\n GET_PATH(datasource.value, 'name') = 'something'\n )",
2313 );
2314
2315 sql = sql.replace(
2316 "CROSS JOIN TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1) + 1))) AS _u(seq, key, path, index, pos, this)",
2317 "CROSS JOIN TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (\n GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1\n) + 1))) AS _u(seq, key, path, index, pos, this)",
2318 );
2319
2320 sql = sql.replace(
2321 "OR (_u.pos > (ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1)\n AND _u_2.pos_2 = (ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1))",
2322 "OR (\n _u.pos > (\n ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1\n )\n AND _u_2.pos_2 = (\n ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1\n )\n )",
2323 );
2324 }
2325
2326 sql
2327 }
2328
2329 /// Apply cross-dialect semantic normalizations that depend on knowing both source and target.
2330 /// This handles cases where the same syntax has different semantics across dialects.
2331 fn cross_dialect_normalize(expr: Expression, source: DialectType, target: DialectType) -> Result<Expression> {
2332 use crate::expressions::{AggFunc, BinaryOp, Case, Cast, ConvertTimezone, DataType, DateTimeField, DateTruncFunc, Function, Identifier, IsNull, Literal, Null, Paren};
2333
2334 // Helper to tag which kind of transform to apply
2335 #[derive(Debug)]
2336 enum Action {
2337 None,
2338 GreatestLeastNull,
2339 ArrayGenerateRange,
2340 Div0TypedDivision,
2341 ArrayAggCollectList,
2342 ArrayAggWithinGroupFilter,
2343 ArrayAggFilter,
2344 CastTimestampToDatetime,
2345 DateTruncWrapCast,
2346 ToDateToCast,
2347 ConvertTimezoneToExpr,
2348 SetToVariable,
2349 RegexpReplaceSnowflakeToDuckDB,
2350 BigQueryFunctionNormalize,
2351 BigQuerySafeDivide,
2352 BigQueryCastType,
2353 BigQueryToHexBare, // _BQ_TO_HEX(x) with no LOWER/UPPER wrapper
2354 BigQueryToHexLower, // LOWER(_BQ_TO_HEX(x))
2355 BigQueryToHexUpper, // UPPER(_BQ_TO_HEX(x))
2356 BigQueryLastDayStripUnit, // LAST_DAY(date, MONTH) -> LAST_DAY(date)
2357 BigQueryCastFormat, // CAST(x AS type FORMAT 'fmt') -> PARSE_DATE/PARSE_TIMESTAMP etc.
2358 BigQueryAnyValueHaving, // ANY_VALUE(x HAVING MAX/MIN y) -> ARG_MAX_NULL/ARG_MIN_NULL for DuckDB
2359 BigQueryApproxQuantiles, // APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
2360 GenericFunctionNormalize, // Cross-dialect function renaming (non-BigQuery sources)
2361 RegexpLikeToDuckDB, // RegexpLike -> REGEXP_MATCHES for DuckDB target (partial match)
2362 EpochConvert, // Expression::Epoch -> target-specific epoch function
2363 EpochMsConvert, // Expression::EpochMs -> target-specific epoch ms function
2364 TSQLTypeNormalize, // TSQL types (MONEY, SMALLMONEY, REAL, DATETIME2) -> standard types
2365 MySQLSafeDivide, // MySQL a/b -> a / NULLIF(b, 0) with optional CAST
2366 NullsOrdering, // Add NULLS FIRST/LAST for ORDER BY
2367 AlterTableRenameStripSchema, // ALTER TABLE db.t1 RENAME TO db.t2 -> ALTER TABLE db.t1 RENAME TO t2
2368 StringAggConvert, // STRING_AGG/WITHIN GROUP -> target-specific aggregate
2369 GroupConcatConvert, // GROUP_CONCAT -> target-specific aggregate
2370 TempTableHash, // TSQL #table -> temp table normalization
2371 ArrayLengthConvert, // CARDINALITY/ARRAY_LENGTH/ARRAY_SIZE -> target-specific
2372 DatePartUnquote, // DATE_PART('month', x) -> DATE_PART(month, x) for Snowflake target
2373 NvlClearOriginal, // Clear NVL original_name for cross-dialect transpilation
2374 HiveCastToTryCast, // Hive/Spark CAST -> TRY_CAST for targets that support it
2375 XorExpand, // MySQL XOR -> (a AND NOT b) OR (NOT a AND b) for non-XOR targets
2376 CastTimestampStripTz, // CAST(x AS TIMESTAMP WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark
2377 JsonExtractToGetJsonObject, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
2378 JsonExtractScalarToGetJsonObject, // JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
2379 JsonQueryValueConvert, // JsonQuery/JsonValue -> target-specific (ISNULL wrapper for TSQL, GET_JSON_OBJECT for Spark, etc.)
2380 JsonLiteralToJsonParse, // JSON 'x' -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake
2381 ArraySyntaxConvert, // ARRAY[x] -> ARRAY(x) for Spark, [x] for BigQuery/DuckDB
2382 AtTimeZoneConvert, // AT TIME ZONE -> AT_TIMEZONE (Presto) / FROM_UTC_TIMESTAMP (Spark)
2383 DayOfWeekConvert, // DAY_OF_WEEK -> dialect-specific
2384 MaxByMinByConvert, // MAX_BY/MIN_BY -> argMax/argMin for ClickHouse
2385 ArrayAggToCollectList, // ARRAY_AGG(x ORDER BY ...) -> COLLECT_LIST(x) for Hive/Spark
2386 ElementAtConvert, // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
2387 CurrentUserParens, // CURRENT_USER -> CURRENT_USER() for Snowflake
2388 CastToJsonForSpark, // CAST(x AS JSON) -> TO_JSON(x) for Spark
2389 CastJsonToFromJson, // CAST(JSON_PARSE(literal) AS ARRAY/MAP) -> FROM_JSON(literal, type_string)
2390 ToJsonConvert, // TO_JSON(x) -> JSON_FORMAT(CAST(x AS JSON)) for Presto etc.
2391 ArrayAggNullFilter, // ARRAY_AGG(x) FILTER(WHERE cond) -> add AND NOT x IS NULL for DuckDB
2392 ArrayAggIgnoreNullsDuckDB, // ARRAY_AGG(x IGNORE NULLS ORDER BY ...) -> ARRAY_AGG(x ORDER BY a NULLS FIRST, ...) for DuckDB
2393 BigQueryPercentileContToDuckDB, // PERCENTILE_CONT(x, frac RESPECT NULLS) -> QUANTILE_CONT(x, frac) for DuckDB
2394 BigQueryArraySelectAsStructToSnowflake, // ARRAY(SELECT AS STRUCT ...) -> (SELECT ARRAY_AGG(OBJECT_CONSTRUCT(...)))
2395 CountDistinctMultiArg, // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN ... END)
2396 VarianceToClickHouse, // Expression::Variance -> varSamp for ClickHouse
2397 StddevToClickHouse, // Expression::Stddev -> stddevSamp for ClickHouse
2398 ApproxQuantileConvert, // Expression::ApproxQuantile -> APPROX_PERCENTILE for Snowflake
2399 ArrayIndexConvert, // array[1] -> array[0] for BigQuery (1-based to 0-based)
2400 DollarParamConvert, // $foo -> @foo for BigQuery
2401 TablesampleReservoir, // TABLESAMPLE (n ROWS) -> TABLESAMPLE RESERVOIR (n ROWS) for DuckDB
2402 BitAggFloatCast, // BIT_OR/BIT_AND/BIT_XOR float arg -> CAST(ROUND(CAST(arg)) AS INT) for DuckDB
2403 BitAggSnowflakeRename, // BIT_OR -> BITORAGG, BIT_AND -> BITANDAGG etc. for Snowflake
2404 StrftimeCastTimestamp, // CAST TIMESTAMP -> TIMESTAMP_NTZ for Spark in STRFTIME
2405 AnyValueIgnoreNulls, // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
2406 CreateTableStripComment, // Strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
2407 EscapeStringNormalize, // e'Hello\nworld' literal newline -> \n
2408 AnyToExists, // PostgreSQL x <op> ANY(array) -> EXISTS(array, x -> ...)
2409 ArrayConcatBracketConvert, // [1,2] -> ARRAY[1,2] for PostgreSQL in ARRAY_CAT
2410 SnowflakeIntervalFormat, // INTERVAL '2' HOUR -> INTERVAL '2 HOUR' for Snowflake
2411 AlterTableToSpRename, // ALTER TABLE RENAME -> EXEC sp_rename for TSQL
2412 StraightJoinCase, // STRAIGHT_JOIN -> straight_join for DuckDB
2413 RespectNullsConvert, // RESPECT NULLS window function handling
2414 MysqlNullsOrdering, // MySQL doesn't support NULLS ordering
2415 MysqlNullsLastRewrite, // Add CASE WHEN to ORDER BY for DuckDB -> MySQL (NULLS LAST simulation)
2416 BigQueryNullsOrdering, // BigQuery doesn't support NULLS FIRST/LAST - strip
2417 SnowflakeFloatProtect, // Protect FLOAT from being converted to DOUBLE by Snowflake target transform
2418 JsonToGetPath, // JSON arrow -> GET_PATH/PARSE_JSON for Snowflake
2419 FilterToIff, // FILTER(WHERE) -> IFF wrapping for Snowflake
2420 AggFilterToIff, // AggFunc.filter -> IFF wrapping for Snowflake (e.g., AVG(x) FILTER(WHERE cond))
2421 StructToRow, // DuckDB struct -> Presto ROW / BigQuery STRUCT
2422 SparkStructConvert, // Spark STRUCT(x AS col1, ...) -> ROW/DuckDB struct
2423 DecimalDefaultPrecision, // DECIMAL -> DECIMAL(18, 3) for Snowflake in BIT agg
2424 ApproxCountDistinctToApproxDistinct, // APPROX_COUNT_DISTINCT -> APPROX_DISTINCT for Presto/Trino
2425 CollectListToArrayAgg, // COLLECT_LIST -> ARRAY_AGG for Presto/DuckDB
2426 CollectSetConvert, // COLLECT_SET -> SET_AGG/ARRAY_AGG(DISTINCT)/ARRAY_UNIQUE_AGG
2427 PercentileConvert, // PERCENTILE -> QUANTILE/APPROX_PERCENTILE
2428 CorrIsnanWrap, // CORR(a,b) -> CASE WHEN ISNAN(CORR(a,b)) THEN NULL ELSE CORR(a,b) END
2429 TruncToDateTrunc, // TRUNC(ts, unit) -> DATE_TRUNC(unit, ts)
2430 ArrayContainsConvert, // ARRAY_CONTAINS -> CONTAINS/target-specific
2431 StrPositionExpand, // StrPosition with position -> complex STRPOS expansion for Presto/DuckDB
2432 TablesampleSnowflakeStrip, // Strip method and PERCENT for Snowflake target
2433 FirstToAnyValue, // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
2434 MonthsBetweenConvert, // Expression::MonthsBetween -> target-specific
2435 CurrentUserSparkParens, // CURRENT_USER -> CURRENT_USER() for Spark
2436 SparkDateFuncCast, // MONTH/YEAR/DAY('str') -> MONTH/YEAR/DAY(CAST('str' AS DATE)) from Spark
2437 MapFromArraysConvert, // Expression::MapFromArrays -> MAP/OBJECT_CONSTRUCT/MAP_FROM_ARRAYS
2438 AddMonthsConvert, // Expression::AddMonths -> target-specific DATEADD/DATE_ADD
2439 PercentileContConvert, // PERCENTILE_CONT/DISC WITHIN GROUP -> APPROX_PERCENTILE/PERCENTILE_APPROX
2440 GenerateSeriesConvert, // GENERATE_SERIES -> SEQUENCE/UNNEST(SEQUENCE)/EXPLODE(SEQUENCE)
2441 ConcatCoalesceWrap, // CONCAT(a, b) -> CONCAT(COALESCE(CAST(a), ''), ...) for Presto/ClickHouse
2442 PipeConcatToConcat, // a || b -> CONCAT(CAST(a), CAST(b)) for Presto
2443 DivFuncConvert, // DIV(a, b) -> a // b for DuckDB, CAST for BigQuery
2444 JsonObjectAggConvert, // JSON_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
2445 JsonbExistsConvert, // JSONB_EXISTS -> JSON_EXISTS for DuckDB
2446 DateBinConvert, // DATE_BIN -> TIME_BUCKET for DuckDB
2447 MysqlCastCharToText, // MySQL CAST(x AS CHAR) -> CAST(x AS TEXT/VARCHAR/STRING) for targets
2448 SparkCastVarcharToString, // Spark CAST(x AS VARCHAR/CHAR) -> CAST(x AS STRING) for Spark targets
2449 JsonExtractToArrow, // JSON_EXTRACT(x, path) -> x -> path for SQLite/DuckDB
2450 MinMaxToLeastGreatest, // Multi-arg MIN(a,b,c) -> LEAST(a,b,c), MAX(a,b,c) -> GREATEST(a,b,c)
2451 ClickHouseUniqToApproxCountDistinct, // uniq(x) -> APPROX_COUNT_DISTINCT(x) for non-ClickHouse targets
2452 ClickHouseAnyToAnyValue, // any(x) -> ANY_VALUE(x) for non-ClickHouse targets
2453 OracleVarchar2ToVarchar, // VARCHAR2(N CHAR/BYTE) -> VARCHAR(N) for non-Oracle targets
2454 }
2455
2456 // Handle SELECT INTO -> CREATE TABLE AS for DuckDB/Snowflake/etc.
2457 let expr = if matches!(source, DialectType::TSQL | DialectType::Fabric) {
2458 Self::transform_select_into(expr, source, target)
2459 } else {
2460 expr
2461 };
2462
2463 // Strip OFFSET ROWS for non-TSQL/Oracle targets
2464 let expr = if !matches!(target, DialectType::TSQL | DialectType::Oracle | DialectType::Fabric) {
2465 if let Expression::Select(mut select) = expr {
2466 if let Some(ref mut offset) = select.offset {
2467 offset.rows = None;
2468 }
2469 Expression::Select(select)
2470 } else {
2471 expr
2472 }
2473 } else {
2474 expr
2475 };
2476
2477 // Handle CreateTable WITH properties transformation before recursive transforms
2478 let expr = if let Expression::CreateTable(mut ct) = expr {
2479 Self::transform_create_table_properties(&mut ct, source, target);
2480
2481 // Handle Hive-style PARTITIONED BY (col_name type, ...) -> target-specific
2482 // When the PARTITIONED BY clause contains column definitions, merge them into the
2483 // main column list and adjust the PARTITIONED BY clause for the target dialect.
2484 if matches!(source, DialectType::Hive | DialectType::Spark | DialectType::Databricks) {
2485 let mut partition_col_names: Vec<String> = Vec::new();
2486 let mut partition_col_defs: Vec<crate::expressions::ColumnDef> = Vec::new();
2487 let mut has_col_def_partitions = false;
2488
2489 // Check if any PARTITIONED BY property contains ColumnDef expressions
2490 for prop in &ct.properties {
2491 if let Expression::PartitionedByProperty(ref pbp) = prop {
2492 if let Expression::Tuple(ref tuple) = *pbp.this {
2493 for expr in &tuple.expressions {
2494 if let Expression::ColumnDef(ref cd) = expr {
2495 has_col_def_partitions = true;
2496 partition_col_names.push(cd.name.name.clone());
2497 partition_col_defs.push(*cd.clone());
2498 }
2499 }
2500 }
2501 }
2502 }
2503
2504 if has_col_def_partitions && !matches!(target, DialectType::Hive) {
2505 // Merge partition columns into main column list
2506 for cd in partition_col_defs {
2507 ct.columns.push(cd);
2508 }
2509
2510 // Replace PARTITIONED BY property with column-name-only version
2511 ct.properties.retain(|p| !matches!(p, Expression::PartitionedByProperty(_)));
2512
2513 if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena) {
2514 // Presto: WITH (PARTITIONED_BY=ARRAY['y', 'z'])
2515 let array_elements: Vec<String> = partition_col_names.iter()
2516 .map(|n| format!("'{}'", n))
2517 .collect();
2518 let array_value = format!("ARRAY[{}]", array_elements.join(", "));
2519 ct.with_properties.push(("PARTITIONED_BY".to_string(), array_value));
2520 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
2521 // Spark: PARTITIONED BY (y, z) - just column names
2522 let name_exprs: Vec<Expression> = partition_col_names.iter()
2523 .map(|n| Expression::Column(crate::expressions::Column {
2524 name: crate::expressions::Identifier::new(n.clone()),
2525 table: None,
2526 join_mark: false,
2527 trailing_comments: Vec::new(),
2528 }))
2529 .collect();
2530 ct.properties.insert(0, Expression::PartitionedByProperty(Box::new(
2531 crate::expressions::PartitionedByProperty {
2532 this: Box::new(Expression::Tuple(Box::new(crate::expressions::Tuple { expressions: name_exprs }))),
2533 }
2534 )));
2535 }
2536 // For DuckDB and other targets, just drop the PARTITIONED BY (already retained above)
2537 }
2538
2539 // Note: Non-ColumnDef partitions (e.g., function expressions like MONTHS(y))
2540 // are handled by transform_create_table_properties which runs first
2541 }
2542
2543 // Strip LOCATION property for Presto/Trino (not supported)
2544 if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena) {
2545 ct.properties.retain(|p| !matches!(p, Expression::LocationProperty(_)));
2546 }
2547
2548 // Strip table-level constraints for Spark/Hive/Databricks
2549 // Keep PRIMARY KEY constraints but strip TSQL-specific modifiers; remove all others
2550 if matches!(target, DialectType::Spark | DialectType::Databricks | DialectType::Hive) {
2551 ct.constraints.retain(|c| matches!(c, crate::expressions::TableConstraint::PrimaryKey { .. }));
2552 for constraint in &mut ct.constraints {
2553 if let crate::expressions::TableConstraint::PrimaryKey { columns, modifiers, .. } = constraint {
2554 // Strip ASC/DESC from column names
2555 for col in columns.iter_mut() {
2556 if col.name.ends_with(" ASC") {
2557 col.name = col.name[..col.name.len() - 4].to_string();
2558 } else if col.name.ends_with(" DESC") {
2559 col.name = col.name[..col.name.len() - 5].to_string();
2560 }
2561 }
2562 // Strip TSQL-specific modifiers
2563 modifiers.clustered = None;
2564 modifiers.with_options.clear();
2565 modifiers.on_filegroup = None;
2566 }
2567 }
2568 }
2569
2570 // Databricks: IDENTITY columns with INT/INTEGER -> BIGINT
2571 if matches!(target, DialectType::Databricks) {
2572 for col in &mut ct.columns {
2573 if col.auto_increment {
2574 if matches!(col.data_type, crate::expressions::DataType::Int { .. }) {
2575 col.data_type = crate::expressions::DataType::BigInt { length: None };
2576 }
2577 }
2578 }
2579 }
2580
2581 // Spark/Databricks: INTEGER -> INT in column definitions
2582 // Python sqlglot always outputs INT for Spark/Databricks
2583 if matches!(target, DialectType::Spark | DialectType::Databricks) {
2584 for col in &mut ct.columns {
2585 if let crate::expressions::DataType::Int { integer_spelling, .. } = &mut col.data_type {
2586 *integer_spelling = false;
2587 }
2588 }
2589 }
2590
2591 // Strip explicit NULL constraints for Hive/Spark (B INTEGER NULL -> B INTEGER)
2592 if matches!(target, DialectType::Hive | DialectType::Spark) {
2593 for col in &mut ct.columns {
2594 // If nullable is explicitly true (NULL), change to None (omit it)
2595 if col.nullable == Some(true) {
2596 col.nullable = None;
2597 }
2598 // Also remove from constraints if stored there
2599 col.constraints.retain(|c| !matches!(c, crate::expressions::ColumnConstraint::Null));
2600 }
2601 }
2602
2603 // Strip TSQL ON filegroup for non-TSQL/Fabric targets
2604 if ct.on_property.is_some() && !matches!(target, DialectType::TSQL | DialectType::Fabric) {
2605 ct.on_property = None;
2606 }
2607
2608 // Snowflake: strip ARRAY type parameters (ARRAY<INT> -> ARRAY, ARRAY<ARRAY<INT>> -> ARRAY)
2609 // Snowflake doesn't support typed arrays in DDL
2610 if matches!(target, DialectType::Snowflake) {
2611 fn strip_array_type_params(dt: &mut crate::expressions::DataType) {
2612 if let crate::expressions::DataType::Array { .. } = dt {
2613 *dt = crate::expressions::DataType::Custom { name: "ARRAY".to_string() };
2614 }
2615 }
2616 for col in &mut ct.columns {
2617 strip_array_type_params(&mut col.data_type);
2618 }
2619 }
2620
2621 // PostgreSQL target: ensure IDENTITY columns have NOT NULL
2622 // If NOT NULL was explicit in source (present in constraint_order), preserve original order.
2623 // If NOT NULL was not explicit, add it after IDENTITY (GENERATED BY DEFAULT AS IDENTITY NOT NULL).
2624 if matches!(target, DialectType::PostgreSQL) {
2625 for col in &mut ct.columns {
2626 if col.auto_increment && !col.constraint_order.is_empty() {
2627 use crate::expressions::ConstraintType;
2628 let has_explicit_not_null = col.constraint_order.iter().any(|ct| *ct == ConstraintType::NotNull);
2629
2630 if has_explicit_not_null {
2631 // Source had explicit NOT NULL - preserve original order
2632 // Just ensure nullable is set
2633 if col.nullable != Some(false) {
2634 col.nullable = Some(false);
2635 }
2636 } else {
2637 // Source didn't have explicit NOT NULL - build order with
2638 // AutoIncrement + NotNull first, then remaining constraints
2639 let mut new_order = Vec::new();
2640 // Put AutoIncrement (IDENTITY) first, followed by synthetic NotNull
2641 new_order.push(ConstraintType::AutoIncrement);
2642 new_order.push(ConstraintType::NotNull);
2643 // Add remaining constraints in original order (except AutoIncrement)
2644 for ct_type in &col.constraint_order {
2645 if *ct_type != ConstraintType::AutoIncrement {
2646 new_order.push(ct_type.clone());
2647 }
2648 }
2649 col.constraint_order = new_order;
2650 col.nullable = Some(false);
2651 }
2652 }
2653 }
2654 }
2655
2656 Expression::CreateTable(ct)
2657 } else {
2658 expr
2659 };
2660
2661 // Handle CreateView column stripping for Presto/Trino target
2662 let expr = if let Expression::CreateView(mut cv) = expr {
2663 // Presto/Trino: drop column list when view has a SELECT body
2664 if matches!(target, DialectType::Presto | DialectType::Trino) && !cv.columns.is_empty() {
2665 if !matches!(&cv.query, Expression::Null(_)) {
2666 cv.columns.clear();
2667 }
2668 }
2669 Expression::CreateView(cv)
2670 } else {
2671 expr
2672 };
2673
2674 transform_recursive(expr, &|e| {
2675 // BigQuery CAST(ARRAY[STRUCT(...)] AS STRUCT_TYPE[]) -> DuckDB: convert unnamed Structs to ROW()
2676 // This converts auto-named struct literals {'_0': x, '_1': y} inside typed arrays to ROW(x, y)
2677 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
2678 if let Expression::Cast(ref c) = e {
2679 // Check if this is a CAST of an array to a struct array type
2680 let is_struct_array_cast = matches!(&c.to, crate::expressions::DataType::Array { .. });
2681 if is_struct_array_cast {
2682 let has_auto_named_structs = match &c.this {
2683 Expression::Array(arr) => arr.expressions.iter().any(|elem| {
2684 if let Expression::Struct(s) = elem {
2685 s.fields.iter().all(|(name, _)| {
2686 name.as_ref().map_or(true, |n| n.starts_with('_') && n[1..].parse::<usize>().is_ok())
2687 })
2688 } else { false }
2689 }),
2690 Expression::ArrayFunc(arr) => arr.expressions.iter().any(|elem| {
2691 if let Expression::Struct(s) = elem {
2692 s.fields.iter().all(|(name, _)| {
2693 name.as_ref().map_or(true, |n| n.starts_with('_') && n[1..].parse::<usize>().is_ok())
2694 })
2695 } else { false }
2696 }),
2697 _ => false,
2698 };
2699 if has_auto_named_structs {
2700 let convert_struct_to_row = |elem: Expression| -> Expression {
2701 if let Expression::Struct(s) = elem {
2702 let row_args: Vec<Expression> = s.fields.into_iter().map(|(_, v)| v).collect();
2703 Expression::Function(Box::new(Function::new("ROW".to_string(), row_args)))
2704 } else {
2705 elem
2706 }
2707 };
2708 let mut c_clone = c.as_ref().clone();
2709 match &mut c_clone.this {
2710 Expression::Array(arr) => {
2711 arr.expressions = arr.expressions.drain(..).map(convert_struct_to_row).collect();
2712 }
2713 Expression::ArrayFunc(arr) => {
2714 arr.expressions = arr.expressions.drain(..).map(convert_struct_to_row).collect();
2715 }
2716 _ => {}
2717 }
2718 return Ok(Expression::Cast(Box::new(c_clone)));
2719 }
2720 }
2721 }
2722 }
2723
2724 // BigQuery SELECT AS STRUCT -> DuckDB struct literal {'key': value, ...}
2725 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
2726 if let Expression::Select(ref sel) = e {
2727 if sel.kind.as_deref() == Some("STRUCT") {
2728 let mut fields = Vec::new();
2729 for expr in &sel.expressions {
2730 match expr {
2731 Expression::Alias(a) => {
2732 fields.push((Some(a.alias.name.clone()), a.this.clone()));
2733 }
2734 Expression::Column(c) => {
2735 fields.push((Some(c.name.name.clone()), expr.clone()));
2736 }
2737 _ => {
2738 fields.push((None, expr.clone()));
2739 }
2740 }
2741 }
2742 let struct_lit = Expression::Struct(Box::new(crate::expressions::Struct { fields }));
2743 let mut new_select = sel.as_ref().clone();
2744 new_select.kind = None;
2745 new_select.expressions = vec![struct_lit];
2746 return Ok(Expression::Select(Box::new(new_select)));
2747 }
2748 }
2749 }
2750
2751 // Convert @variable -> ${variable} for Spark/Hive/Databricks
2752 if matches!(source, DialectType::TSQL | DialectType::Fabric)
2753 && matches!(target, DialectType::Spark | DialectType::Databricks | DialectType::Hive)
2754 {
2755 if let Expression::Parameter(ref p) = e {
2756 if p.style == crate::expressions::ParameterStyle::At {
2757 if let Some(ref name) = p.name {
2758 return Ok(Expression::Parameter(Box::new(crate::expressions::Parameter {
2759 name: Some(name.clone()),
2760 index: p.index,
2761 style: crate::expressions::ParameterStyle::DollarBrace,
2762 quoted: p.quoted,
2763 expression: None,
2764 })));
2765 }
2766 }
2767 }
2768 // Also handle Column("@x") -> Parameter("x", DollarBrace) for TSQL vars
2769 if let Expression::Column(ref col) = e {
2770 if col.name.name.starts_with('@') && col.table.is_none() {
2771 let var_name = col.name.name.trim_start_matches('@').to_string();
2772 return Ok(Expression::Parameter(Box::new(crate::expressions::Parameter {
2773 name: Some(var_name),
2774 index: None,
2775 style: crate::expressions::ParameterStyle::DollarBrace,
2776 quoted: false,
2777 expression: None,
2778 })));
2779 }
2780 }
2781 }
2782
2783 // Convert @variable -> variable in SET statements for Spark/Databricks
2784 if matches!(source, DialectType::TSQL | DialectType::Fabric)
2785 && matches!(target, DialectType::Spark | DialectType::Databricks)
2786 {
2787 if let Expression::SetStatement(ref s) = e {
2788 let mut new_items = s.items.clone();
2789 let mut changed = false;
2790 for item in &mut new_items {
2791 // Strip @ from the SET name (Parameter style)
2792 if let Expression::Parameter(ref p) = item.name {
2793 if p.style == crate::expressions::ParameterStyle::At {
2794 if let Some(ref name) = p.name {
2795 item.name = Expression::Identifier(Identifier::new(name));
2796 changed = true;
2797 }
2798 }
2799 }
2800 // Strip @ from the SET name (Identifier style - SET parser)
2801 if let Expression::Identifier(ref id) = item.name {
2802 if id.name.starts_with('@') {
2803 let var_name = id.name.trim_start_matches('@').to_string();
2804 item.name = Expression::Identifier(Identifier::new(&var_name));
2805 changed = true;
2806 }
2807 }
2808 // Strip @ from the SET name (Column style - alternative parsing)
2809 if let Expression::Column(ref col) = item.name {
2810 if col.name.name.starts_with('@') && col.table.is_none() {
2811 let var_name = col.name.name.trim_start_matches('@').to_string();
2812 item.name = Expression::Identifier(Identifier::new(&var_name));
2813 changed = true;
2814 }
2815 }
2816 }
2817 if changed {
2818 let mut new_set = (**s).clone();
2819 new_set.items = new_items;
2820 return Ok(Expression::SetStatement(Box::new(new_set)));
2821 }
2822 }
2823 }
2824
2825 // Strip NOLOCK hint for non-TSQL targets
2826 if matches!(source, DialectType::TSQL | DialectType::Fabric)
2827 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
2828 {
2829 if let Expression::Table(ref tr) = e {
2830 if !tr.hints.is_empty() {
2831 let mut new_tr = tr.clone();
2832 new_tr.hints.clear();
2833 return Ok(Expression::Table(new_tr));
2834 }
2835 }
2836 }
2837
2838 // Snowflake: TRUE IS TRUE -> TRUE, FALSE IS FALSE -> FALSE
2839 // Snowflake simplifies IS TRUE/IS FALSE on boolean literals
2840 if matches!(target, DialectType::Snowflake) {
2841 if let Expression::IsTrue(ref itf) = e {
2842 if let Expression::Boolean(ref b) = itf.this {
2843 if !itf.not {
2844 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral { value: b.value }));
2845 } else {
2846 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral { value: !b.value }));
2847 }
2848 }
2849 }
2850 if let Expression::IsFalse(ref itf) = e {
2851 if let Expression::Boolean(ref b) = itf.this {
2852 if !itf.not {
2853 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral { value: !b.value }));
2854 } else {
2855 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral { value: b.value }));
2856 }
2857 }
2858 }
2859 }
2860
2861 // BigQuery: split dotted backtick identifiers in table names
2862 // e.g., `a.b.c` -> "a"."b"."c" when source is BigQuery and target is not BigQuery
2863 if matches!(source, DialectType::BigQuery) && !matches!(target, DialectType::BigQuery) {
2864 if let Expression::CreateTable(ref ct) = e {
2865 let mut changed = false;
2866 let mut new_ct = ct.clone();
2867 // Split the table name
2868 if ct.name.schema.is_none() && ct.name.name.name.contains('.') {
2869 let parts: Vec<&str> = ct.name.name.name.split('.').collect();
2870 // Use quoted identifiers when the original was quoted (backtick in BigQuery)
2871 let was_quoted = ct.name.name.quoted;
2872 let mk_id = |s: &str| if was_quoted { Identifier::quoted(s) } else { Identifier::new(s) };
2873 if parts.len() == 3 {
2874 new_ct.name.catalog = Some(mk_id(parts[0]));
2875 new_ct.name.schema = Some(mk_id(parts[1]));
2876 new_ct.name.name = mk_id(parts[2]);
2877 changed = true;
2878 } else if parts.len() == 2 {
2879 new_ct.name.schema = Some(mk_id(parts[0]));
2880 new_ct.name.name = mk_id(parts[1]);
2881 changed = true;
2882 }
2883 }
2884 // Split the clone source name
2885 if let Some(ref clone_src) = ct.clone_source {
2886 if clone_src.schema.is_none() && clone_src.name.name.contains('.') {
2887 let parts: Vec<&str> = clone_src.name.name.split('.').collect();
2888 let was_quoted = clone_src.name.quoted;
2889 let mk_id = |s: &str| if was_quoted { Identifier::quoted(s) } else { Identifier::new(s) };
2890 let mut new_src = clone_src.clone();
2891 if parts.len() == 3 {
2892 new_src.catalog = Some(mk_id(parts[0]));
2893 new_src.schema = Some(mk_id(parts[1]));
2894 new_src.name = mk_id(parts[2]);
2895 new_ct.clone_source = Some(new_src);
2896 changed = true;
2897 } else if parts.len() == 2 {
2898 new_src.schema = Some(mk_id(parts[0]));
2899 new_src.name = mk_id(parts[1]);
2900 new_ct.clone_source = Some(new_src);
2901 changed = true;
2902 }
2903 }
2904 }
2905 if changed {
2906 return Ok(Expression::CreateTable(new_ct));
2907 }
2908 }
2909 }
2910
2911 // BigQuery array subscript: a[1], b[OFFSET(1)], c[ORDINAL(1)], d[SAFE_OFFSET(1)], e[SAFE_ORDINAL(1)]
2912 // -> DuckDB/Presto: convert 0-based to 1-based, handle SAFE_* -> ELEMENT_AT for Presto
2913 if matches!(source, DialectType::BigQuery)
2914 && matches!(target, DialectType::DuckDB | DialectType::Presto | DialectType::Trino | DialectType::Athena)
2915 {
2916 if let Expression::Subscript(ref sub) = e {
2917 let (new_index, is_safe) = match &sub.index {
2918 // a[1] -> a[1+1] = a[2] (plain index is 0-based in BQ)
2919 Expression::Literal(Literal::Number(n)) => {
2920 if let Ok(val) = n.parse::<i64>() {
2921 (Some(Expression::Literal(Literal::Number((val + 1).to_string()))), false)
2922 } else {
2923 (None, false)
2924 }
2925 }
2926 // OFFSET(n) -> n+1 (0-based)
2927 Expression::Function(ref f) if f.name.eq_ignore_ascii_case("OFFSET") && f.args.len() == 1 => {
2928 if let Expression::Literal(Literal::Number(n)) = &f.args[0] {
2929 if let Ok(val) = n.parse::<i64>() {
2930 (Some(Expression::Literal(Literal::Number((val + 1).to_string()))), false)
2931 } else {
2932 (Some(Expression::Add(Box::new(crate::expressions::BinaryOp::new(f.args[0].clone(), Expression::number(1))))), false)
2933 }
2934 } else {
2935 (Some(Expression::Add(Box::new(crate::expressions::BinaryOp::new(f.args[0].clone(), Expression::number(1))))), false)
2936 }
2937 }
2938 // ORDINAL(n) -> n (already 1-based)
2939 Expression::Function(ref f) if f.name.eq_ignore_ascii_case("ORDINAL") && f.args.len() == 1 => {
2940 (Some(f.args[0].clone()), false)
2941 }
2942 // SAFE_OFFSET(n) -> n+1 (0-based, safe)
2943 Expression::Function(ref f) if f.name.eq_ignore_ascii_case("SAFE_OFFSET") && f.args.len() == 1 => {
2944 if let Expression::Literal(Literal::Number(n)) = &f.args[0] {
2945 if let Ok(val) = n.parse::<i64>() {
2946 (Some(Expression::Literal(Literal::Number((val + 1).to_string()))), true)
2947 } else {
2948 (Some(Expression::Add(Box::new(crate::expressions::BinaryOp::new(f.args[0].clone(), Expression::number(1))))), true)
2949 }
2950 } else {
2951 (Some(Expression::Add(Box::new(crate::expressions::BinaryOp::new(f.args[0].clone(), Expression::number(1))))), true)
2952 }
2953 }
2954 // SAFE_ORDINAL(n) -> n (already 1-based, safe)
2955 Expression::Function(ref f) if f.name.eq_ignore_ascii_case("SAFE_ORDINAL") && f.args.len() == 1 => {
2956 (Some(f.args[0].clone()), true)
2957 }
2958 _ => (None, false),
2959 };
2960 if let Some(idx) = new_index {
2961 if is_safe && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena) {
2962 // Presto: SAFE_OFFSET/SAFE_ORDINAL -> ELEMENT_AT(arr, idx)
2963 return Ok(Expression::Function(Box::new(Function::new(
2964 "ELEMENT_AT".to_string(), vec![sub.this.clone(), idx],
2965 ))));
2966 } else {
2967 // DuckDB or non-safe: just use subscript with converted index
2968 return Ok(Expression::Subscript(Box::new(crate::expressions::Subscript {
2969 this: sub.this.clone(),
2970 index: idx,
2971 })));
2972 }
2973 }
2974 }
2975 }
2976
2977 // BigQuery LENGTH(x) -> DuckDB CASE TYPEOF(x) WHEN 'BLOB' THEN OCTET_LENGTH(...) ELSE LENGTH(...) END
2978 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
2979 if let Expression::Length(ref uf) = e {
2980 let arg = uf.this.clone();
2981 let typeof_func = Expression::Function(Box::new(Function::new("TYPEOF".to_string(), vec![arg.clone()])));
2982 let blob_cast = Expression::Cast(Box::new(Cast {
2983 this: arg.clone(),
2984 to: DataType::VarBinary { length: None },
2985 trailing_comments: vec![],
2986 double_colon_syntax: false,
2987 format: None,
2988 default: None,
2989 }));
2990 let octet_length = Expression::Function(Box::new(Function::new("OCTET_LENGTH".to_string(), vec![blob_cast])));
2991 let text_cast = Expression::Cast(Box::new(Cast {
2992 this: arg,
2993 to: DataType::Text,
2994 trailing_comments: vec![],
2995 double_colon_syntax: false,
2996 format: None,
2997 default: None,
2998 }));
2999 let length_text = Expression::Length(Box::new(crate::expressions::UnaryFunc { this: text_cast, original_name: None }));
3000 return Ok(Expression::Case(Box::new(Case {
3001 operand: Some(typeof_func),
3002 whens: vec![(Expression::Literal(Literal::String("BLOB".to_string())), octet_length)],
3003 else_: Some(length_text),
3004 })));
3005 }
3006 }
3007
3008 // BigQuery UNNEST alias handling (only for non-BigQuery sources):
3009 // UNNEST(...) AS x -> UNNEST(...) (drop unused table alias)
3010 // UNNEST(...) AS x(y) -> UNNEST(...) AS y (use column alias as main alias)
3011 if matches!(target, DialectType::BigQuery) && !matches!(source, DialectType::BigQuery) {
3012 if let Expression::Alias(ref a) = e {
3013 if matches!(&a.this, Expression::Unnest(_)) {
3014 if a.column_aliases.is_empty() {
3015 // Drop the entire alias, return just the UNNEST expression
3016 return Ok(a.this.clone());
3017 } else {
3018 // Use first column alias as the main alias
3019 let mut new_alias = a.as_ref().clone();
3020 new_alias.alias = a.column_aliases[0].clone();
3021 new_alias.column_aliases.clear();
3022 return Ok(Expression::Alias(Box::new(new_alias)));
3023 }
3024 }
3025 }
3026 }
3027
3028 // BigQuery IN UNNEST(expr) -> IN (SELECT UNNEST/EXPLODE(expr)) for non-BigQuery targets
3029 if matches!(source, DialectType::BigQuery) && !matches!(target, DialectType::BigQuery) {
3030 if let Expression::In(ref in_expr) = e {
3031 if let Some(ref unnest_inner) = in_expr.unnest {
3032 // Build the function call for the target dialect
3033 let func_expr = if matches!(target, DialectType::Hive | DialectType::Spark | DialectType::Databricks) {
3034 // Use EXPLODE for Hive/Spark
3035 Expression::Function(Box::new(Function::new("EXPLODE".to_string(), vec![*unnest_inner.clone()])))
3036 } else {
3037 // Use UNNEST for Presto/Trino/DuckDB/etc.
3038 Expression::Unnest(Box::new(crate::expressions::UnnestFunc {
3039 this: *unnest_inner.clone(),
3040 expressions: Vec::new(),
3041 with_ordinality: false,
3042 alias: None,
3043 offset_alias: None,
3044 }))
3045 };
3046
3047 // Wrap in SELECT
3048 let mut inner_select = crate::expressions::Select::new();
3049 inner_select.expressions = vec![func_expr];
3050
3051 let subquery_expr = Expression::Select(Box::new(inner_select));
3052
3053 return Ok(Expression::In(Box::new(crate::expressions::In {
3054 this: in_expr.this.clone(),
3055 expressions: Vec::new(),
3056 query: Some(subquery_expr),
3057 not: in_expr.not,
3058 global: in_expr.global,
3059 unnest: None,
3060 })));
3061 }
3062 }
3063 }
3064
3065 // SQLite: GENERATE_SERIES AS t(i) -> (SELECT value AS i FROM GENERATE_SERIES(...)) AS t
3066 // This handles the subquery wrapping for RANGE -> GENERATE_SERIES in FROM context
3067 if matches!(target, DialectType::SQLite) && matches!(source, DialectType::DuckDB) {
3068 if let Expression::Alias(ref a) = e {
3069 if let Expression::Function(ref f) = a.this {
3070 if f.name.eq_ignore_ascii_case("GENERATE_SERIES") && !a.column_aliases.is_empty() {
3071 // Build: (SELECT value AS col_alias FROM GENERATE_SERIES(start, end)) AS table_alias
3072 let col_alias = a.column_aliases[0].clone();
3073 let mut inner_select = crate::expressions::Select::new();
3074 inner_select.expressions = vec![Expression::Alias(Box::new(crate::expressions::Alias::new(
3075 Expression::Identifier(Identifier::new("value".to_string())),
3076 col_alias,
3077 )))];
3078 inner_select.from = Some(crate::expressions::From {
3079 expressions: vec![a.this.clone()],
3080 });
3081 let subquery = Expression::Subquery(Box::new(crate::expressions::Subquery {
3082 this: Expression::Select(Box::new(inner_select)),
3083 alias: Some(a.alias.clone()),
3084 column_aliases: Vec::new(),
3085 order_by: None,
3086 limit: None,
3087 offset: None,
3088 lateral: false,
3089 modifiers_inside: false,
3090 trailing_comments: Vec::new(),
3091 distribute_by: None,
3092 sort_by: None,
3093 cluster_by: None,
3094 }));
3095 return Ok(subquery);
3096 }
3097 }
3098 }
3099 }
3100
3101 // BigQuery implicit UNNEST: comma-join on array path -> CROSS JOIN UNNEST
3102 // e.g., SELECT results FROM Coordinates, Coordinates.position AS results
3103 // -> SELECT results FROM Coordinates CROSS JOIN UNNEST(Coordinates.position) AS results
3104 if matches!(source, DialectType::BigQuery) {
3105 if let Expression::Select(ref s) = e {
3106 if let Some(ref from) = s.from {
3107 if from.expressions.len() >= 2 {
3108 // Collect table names from first expression
3109 let first_tables: Vec<String> = from.expressions.iter().take(1).filter_map(|expr| {
3110 if let Expression::Table(t) = expr {
3111 Some(t.name.name.to_lowercase())
3112 } else {
3113 None
3114 }
3115 }).collect();
3116
3117 // Check if any subsequent FROM expressions are schema-qualified with a matching table name
3118 // or have a dotted name matching a table
3119 let mut needs_rewrite = false;
3120 for expr in from.expressions.iter().skip(1) {
3121 if let Expression::Table(t) = expr {
3122 if let Some(ref schema) = t.schema {
3123 if first_tables.contains(&schema.name.to_lowercase()) {
3124 needs_rewrite = true;
3125 break;
3126 }
3127 }
3128 // Also check dotted names in quoted identifiers (e.g., `Coordinates.position`)
3129 if t.schema.is_none() && t.name.name.contains('.') {
3130 let parts: Vec<&str> = t.name.name.split('.').collect();
3131 if parts.len() >= 2 && first_tables.contains(&parts[0].to_lowercase()) {
3132 needs_rewrite = true;
3133 break;
3134 }
3135 }
3136 }
3137 }
3138
3139 if needs_rewrite {
3140 let mut new_select = s.clone();
3141 let mut new_from_exprs = vec![from.expressions[0].clone()];
3142 let mut new_joins = s.joins.clone();
3143
3144 for expr in from.expressions.iter().skip(1) {
3145 if let Expression::Table(ref t) = expr {
3146 if let Some(ref schema) = t.schema {
3147 if first_tables.contains(&schema.name.to_lowercase()) {
3148 // This is an array path reference, convert to CROSS JOIN UNNEST
3149 let col_expr = Expression::Column(crate::expressions::Column {
3150 name: t.name.clone(),
3151 table: Some(schema.clone()),
3152 join_mark: false,
3153 trailing_comments: vec![],
3154 });
3155 let unnest_expr = Expression::Unnest(Box::new(crate::expressions::UnnestFunc {
3156 this: col_expr,
3157 expressions: Vec::new(),
3158 with_ordinality: false,
3159 alias: None,
3160 offset_alias: None,
3161 }));
3162 let join_this = if let Some(ref alias) = t.alias {
3163 if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena) {
3164 // Presto: UNNEST(x) AS _t0(results)
3165 Expression::Alias(Box::new(crate::expressions::Alias {
3166 this: unnest_expr,
3167 alias: Identifier::new("_t0"),
3168 column_aliases: vec![alias.clone()],
3169 pre_alias_comments: vec![],
3170 trailing_comments: vec![],
3171 }))
3172 } else {
3173 // BigQuery: UNNEST(x) AS results
3174 Expression::Alias(Box::new(crate::expressions::Alias {
3175 this: unnest_expr,
3176 alias: alias.clone(),
3177 column_aliases: vec![],
3178 pre_alias_comments: vec![],
3179 trailing_comments: vec![],
3180 }))
3181 }
3182 } else {
3183 unnest_expr
3184 };
3185 new_joins.push(crate::expressions::Join {
3186 kind: crate::expressions::JoinKind::Cross,
3187 this: join_this,
3188 on: None,
3189 using: Vec::new(),
3190 use_inner_keyword: false,
3191 use_outer_keyword: false,
3192 deferred_condition: false,
3193 join_hint: None,
3194 match_condition: None,
3195 pivots: Vec::new(),
3196 });
3197 } else {
3198 new_from_exprs.push(expr.clone());
3199 }
3200 } else if t.schema.is_none() && t.name.name.contains('.') {
3201 // Dotted name in quoted identifier: `Coordinates.position`
3202 let parts: Vec<&str> = t.name.name.split('.').collect();
3203 if parts.len() >= 2 && first_tables.contains(&parts[0].to_lowercase()) {
3204 let join_this = if matches!(target, DialectType::BigQuery) {
3205 // BigQuery: keep as single quoted identifier, just convert comma -> CROSS JOIN
3206 Expression::Table(t.clone())
3207 } else {
3208 // Other targets: split into "schema"."name"
3209 let mut new_t = t.clone();
3210 new_t.schema = Some(Identifier::quoted(parts[0]));
3211 new_t.name = Identifier::quoted(parts[1]);
3212 Expression::Table(new_t)
3213 };
3214 new_joins.push(crate::expressions::Join {
3215 kind: crate::expressions::JoinKind::Cross,
3216 this: join_this,
3217 on: None,
3218 using: Vec::new(),
3219 use_inner_keyword: false,
3220 use_outer_keyword: false,
3221 deferred_condition: false,
3222 join_hint: None,
3223 match_condition: None,
3224 pivots: Vec::new(),
3225 });
3226 } else {
3227 new_from_exprs.push(expr.clone());
3228 }
3229 } else {
3230 new_from_exprs.push(expr.clone());
3231 }
3232 } else {
3233 new_from_exprs.push(expr.clone());
3234 }
3235 }
3236
3237 new_select.from = Some(crate::expressions::From {
3238 expressions: new_from_exprs,
3239 ..from.clone()
3240 });
3241 new_select.joins = new_joins;
3242 return Ok(Expression::Select(new_select));
3243 }
3244 }
3245 }
3246 }
3247 }
3248
3249 // CROSS JOIN UNNEST -> LATERAL VIEW EXPLODE for Hive/Spark
3250 if matches!(target, DialectType::Hive | DialectType::Spark | DialectType::Databricks) {
3251 if let Expression::Select(ref s) = e {
3252 // Check if any joins are CROSS JOIN with UNNEST/EXPLODE
3253 let is_unnest_or_explode_expr = |expr: &Expression| -> bool {
3254 matches!(expr, Expression::Unnest(_))
3255 || matches!(expr, Expression::Function(f) if f.name.eq_ignore_ascii_case("EXPLODE"))
3256 };
3257 let has_unnest_join = s.joins.iter().any(|j| {
3258 j.kind == crate::expressions::JoinKind::Cross && (
3259 matches!(&j.this, Expression::Alias(a) if is_unnest_or_explode_expr(&a.this))
3260 || is_unnest_or_explode_expr(&j.this)
3261 )
3262 });
3263 if has_unnest_join {
3264 let mut select = s.clone();
3265 let mut new_joins = Vec::new();
3266 for join in select.joins.drain(..) {
3267 if join.kind == crate::expressions::JoinKind::Cross {
3268 // Extract the UNNEST/EXPLODE from the join
3269 let (func_expr, table_alias, col_aliases) = match &join.this {
3270 Expression::Alias(a) => {
3271 let ta = if a.alias.is_empty() { None } else { Some(a.alias.clone()) };
3272 let cas = a.column_aliases.clone();
3273 match &a.this {
3274 Expression::Unnest(u) => {
3275 // Convert UNNEST(x) to EXPLODE(x)
3276 let explode = Expression::Function(Box::new(crate::expressions::Function::new(
3277 "EXPLODE".to_string(),
3278 vec![u.this.clone()],
3279 )));
3280 (Some(explode), ta, cas)
3281 }
3282 Expression::Function(f) if f.name.eq_ignore_ascii_case("EXPLODE") => {
3283 (Some(Expression::Function(f.clone())), ta, cas)
3284 }
3285 _ => (None, None, Vec::new())
3286 }
3287 }
3288 Expression::Unnest(u) => {
3289 let explode = Expression::Function(Box::new(crate::expressions::Function::new(
3290 "EXPLODE".to_string(),
3291 vec![u.this.clone()],
3292 )));
3293 let ta = u.alias.clone();
3294 (Some(explode), ta, Vec::new())
3295 }
3296 _ => (None, None, Vec::new())
3297 };
3298 if let Some(func) = func_expr {
3299 select.lateral_views.push(crate::expressions::LateralView {
3300 this: func,
3301 table_alias,
3302 column_aliases: col_aliases,
3303 outer: false,
3304 });
3305 } else {
3306 new_joins.push(join);
3307 }
3308 } else {
3309 new_joins.push(join);
3310 }
3311 }
3312 select.joins = new_joins;
3313 return Ok(Expression::Select(select));
3314 }
3315 }
3316 }
3317
3318 // UNNEST expansion: DuckDB SELECT UNNEST(arr) in SELECT list -> expanded query
3319 // for BigQuery, Presto/Trino, Snowflake
3320 if matches!(source, DialectType::DuckDB | DialectType::PostgreSQL)
3321 && matches!(target, DialectType::BigQuery | DialectType::Presto | DialectType::Trino | DialectType::Snowflake)
3322 {
3323 if let Expression::Select(ref s) = e {
3324 // Check if any SELECT expressions contain UNNEST
3325 // Note: UNNEST can appear as Expression::Unnest OR Expression::Function("UNNEST")
3326 let has_unnest_in_select = s.expressions.iter().any(|expr| {
3327 fn contains_unnest(e: &Expression) -> bool {
3328 match e {
3329 Expression::Unnest(_) => true,
3330 Expression::Function(f) if f.name.eq_ignore_ascii_case("UNNEST") => true,
3331 Expression::Alias(a) => contains_unnest(&a.this),
3332 Expression::Add(op) | Expression::Sub(op) | Expression::Mul(op) | Expression::Div(op) => {
3333 contains_unnest(&op.left) || contains_unnest(&op.right)
3334 }
3335 _ => false,
3336 }
3337 }
3338 contains_unnest(expr)
3339 });
3340
3341 if has_unnest_in_select {
3342 let rewritten = Self::rewrite_unnest_expansion(s, target);
3343 if let Some(new_select) = rewritten {
3344 return Ok(Expression::Select(Box::new(new_select)));
3345 }
3346 }
3347 }
3348 }
3349
3350 // BigQuery -> PostgreSQL: convert escape sequences in string literals to actual characters
3351 // BigQuery '\n' -> PostgreSQL literal newline in string
3352 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::PostgreSQL) {
3353 if let Expression::Literal(Literal::String(ref s)) = e {
3354 if s.contains("\\n") || s.contains("\\t") || s.contains("\\r") || s.contains("\\\\") {
3355 let converted = s
3356 .replace("\\n", "\n")
3357 .replace("\\t", "\t")
3358 .replace("\\r", "\r")
3359 .replace("\\\\", "\\");
3360 return Ok(Expression::Literal(Literal::String(converted)));
3361 }
3362 }
3363 }
3364
3365 // Cross-dialect: convert Literal::Timestamp to target-specific CAST form
3366 // when source != target (identity tests keep the Literal::Timestamp for native handling)
3367 if source != target {
3368 if let Expression::Literal(Literal::Timestamp(ref s)) = e {
3369 let s = s.clone();
3370 // MySQL: TIMESTAMP handling depends on source dialect
3371 // BigQuery TIMESTAMP is timezone-aware -> TIMESTAMP() function in MySQL
3372 // Other sources' TIMESTAMP is non-timezone -> CAST('x' AS DATETIME) in MySQL
3373 if matches!(target, DialectType::MySQL) {
3374 if matches!(source, DialectType::BigQuery) {
3375 // BigQuery TIMESTAMP is timezone-aware -> MySQL TIMESTAMP() function
3376 return Ok(Expression::Function(Box::new(Function::new(
3377 "TIMESTAMP".to_string(), vec![Expression::Literal(Literal::String(s))],
3378 ))));
3379 } else {
3380 // Non-timezone TIMESTAMP -> CAST('x' AS DATETIME) in MySQL
3381 return Ok(Expression::Cast(Box::new(Cast {
3382 this: Expression::Literal(Literal::String(s)),
3383 to: DataType::Custom { name: "DATETIME".to_string() },
3384 trailing_comments: Vec::new(),
3385 double_colon_syntax: false,
3386 format: None,
3387 default: None,
3388 })));
3389 }
3390 }
3391 let dt = match target {
3392 DialectType::BigQuery | DialectType::StarRocks => {
3393 DataType::Custom { name: "DATETIME".to_string() }
3394 }
3395 DialectType::Snowflake => {
3396 // BigQuery TIMESTAMP is timezone-aware -> use TIMESTAMPTZ for Snowflake
3397 if matches!(source, DialectType::BigQuery) {
3398 DataType::Custom { name: "TIMESTAMPTZ".to_string() }
3399 } else if matches!(source, DialectType::PostgreSQL | DialectType::Redshift | DialectType::Snowflake) {
3400 DataType::Timestamp { precision: None, timezone: false }
3401 } else {
3402 DataType::Custom { name: "TIMESTAMPNTZ".to_string() }
3403 }
3404 }
3405 DialectType::Spark | DialectType::Databricks => {
3406 // BigQuery TIMESTAMP is timezone-aware -> use plain TIMESTAMP for Spark/Databricks
3407 if matches!(source, DialectType::BigQuery) {
3408 DataType::Timestamp { precision: None, timezone: false }
3409 } else {
3410 DataType::Custom { name: "TIMESTAMP_NTZ".to_string() }
3411 }
3412 }
3413 DialectType::ClickHouse => {
3414 DataType::Custom { name: "Nullable(DateTime)".to_string() }
3415 }
3416 DialectType::TSQL | DialectType::Fabric => {
3417 DataType::Custom { name: "DATETIME2".to_string() }
3418 }
3419 DialectType::DuckDB => {
3420 // DuckDB: use TIMESTAMPTZ when source is BigQuery (BQ TIMESTAMP is always UTC/tz-aware)
3421 // or when the timestamp string explicitly has timezone info
3422 if matches!(source, DialectType::BigQuery) || Self::timestamp_string_has_timezone(&s) {
3423 DataType::Custom { name: "TIMESTAMPTZ".to_string() }
3424 } else {
3425 DataType::Timestamp { precision: None, timezone: false }
3426 }
3427 }
3428 _ => {
3429 DataType::Timestamp { precision: None, timezone: false }
3430 }
3431 };
3432 return Ok(Expression::Cast(Box::new(Cast {
3433 this: Expression::Literal(Literal::String(s)),
3434 to: dt,
3435 trailing_comments: vec![],
3436 double_colon_syntax: false,
3437 format: None,
3438 default: None,
3439 })));
3440 }
3441 }
3442
3443 // PostgreSQL DELETE requires explicit AS for table aliases
3444 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
3445 if let Expression::Delete(ref del) = e {
3446 if del.alias.is_some() && !del.alias_explicit_as {
3447 let mut new_del = del.clone();
3448 new_del.alias_explicit_as = true;
3449 return Ok(Expression::Delete(new_del));
3450 }
3451 }
3452 }
3453
3454 // CREATE DATABASE -> CREATE SCHEMA for DuckDB target
3455 if matches!(target, DialectType::DuckDB) {
3456 if let Expression::CreateDatabase(db) = e {
3457 let mut schema = crate::expressions::CreateSchema::new(db.name.name.clone());
3458 schema.if_not_exists = db.if_not_exists;
3459 return Ok(Expression::CreateSchema(Box::new(schema)));
3460 }
3461 if let Expression::DropDatabase(db) = e {
3462 let mut schema = crate::expressions::DropSchema::new(db.name.name.clone());
3463 schema.if_exists = db.if_exists;
3464 return Ok(Expression::DropSchema(Box::new(schema)));
3465 }
3466 }
3467
3468 // Strip ClickHouse Nullable(...) wrapper for non-ClickHouse targets
3469 if matches!(source, DialectType::ClickHouse) && !matches!(target, DialectType::ClickHouse) {
3470 if let Expression::Cast(ref c) = e {
3471 if let DataType::Custom { ref name } = c.to {
3472 let upper = name.to_uppercase();
3473 if upper.starts_with("NULLABLE(") && upper.ends_with(")") {
3474 let inner = &name[9..name.len()-1]; // strip "Nullable(" and ")"
3475 let inner_upper = inner.to_uppercase();
3476 let new_dt = match inner_upper.as_str() {
3477 "DATETIME" | "DATETIME64" => DataType::Timestamp { precision: None, timezone: false },
3478 "DATE" => DataType::Date,
3479 "INT64" | "BIGINT" => DataType::BigInt { length: None },
3480 "INT32" | "INT" | "INTEGER" => DataType::Int { length: None, integer_spelling: false },
3481 "FLOAT64" | "DOUBLE" => DataType::Double { precision: None, scale: None },
3482 "STRING" => DataType::Text,
3483 _ => DataType::Custom { name: inner.to_string() },
3484 };
3485 let mut new_cast = c.clone();
3486 new_cast.to = new_dt;
3487 return Ok(Expression::Cast(new_cast));
3488 }
3489 }
3490 }
3491 }
3492
3493 // ARRAY_CONCAT_AGG -> Snowflake: ARRAY_FLATTEN(ARRAY_AGG(...))
3494 if matches!(target, DialectType::Snowflake) {
3495 if let Expression::ArrayConcatAgg(ref agg) = e {
3496 let mut agg_clone = agg.as_ref().clone();
3497 agg_clone.name = None; // Clear name so generator uses default "ARRAY_AGG"
3498 let array_agg = Expression::ArrayAgg(Box::new(agg_clone));
3499 let flatten = Expression::Function(Box::new(Function::new(
3500 "ARRAY_FLATTEN".to_string(), vec![array_agg],
3501 )));
3502 return Ok(flatten);
3503 }
3504 }
3505
3506 // ARRAY_CONCAT_AGG -> others: keep as function for cross-dialect
3507 if !matches!(target, DialectType::BigQuery | DialectType::Snowflake) {
3508 if let Expression::ArrayConcatAgg(agg) = e {
3509 let arg = agg.this;
3510 return Ok(Expression::Function(Box::new(Function::new(
3511 "ARRAY_CONCAT_AGG".to_string(), vec![arg],
3512 ))));
3513 }
3514 }
3515
3516 // Determine what action to take by inspecting e immutably
3517 let action = {
3518 let source_propagates_nulls = matches!(source, DialectType::Snowflake | DialectType::BigQuery);
3519 let target_ignores_nulls = matches!(target, DialectType::DuckDB | DialectType::PostgreSQL);
3520
3521 match &e {
3522 Expression::Function(f) => {
3523 let name = f.name.to_uppercase();
3524 // DATE_PART: strip quotes from first arg when target is Snowflake (source != Snowflake)
3525 if (name == "DATE_PART" || name == "DATEPART")
3526 && f.args.len() == 2
3527 && matches!(target, DialectType::Snowflake)
3528 && !matches!(source, DialectType::Snowflake)
3529 && matches!(&f.args[0], Expression::Literal(crate::expressions::Literal::String(_)))
3530 {
3531 Action::DatePartUnquote
3532 } else if source_propagates_nulls && target_ignores_nulls
3533 && (name == "GREATEST" || name == "LEAST") && f.args.len() >= 2 {
3534 Action::GreatestLeastNull
3535 } else if matches!(source, DialectType::Snowflake)
3536 && name == "ARRAY_GENERATE_RANGE" && f.args.len() >= 2 {
3537 Action::ArrayGenerateRange
3538 } else if matches!(source, DialectType::Snowflake)
3539 && matches!(target, DialectType::DuckDB)
3540 && name == "DATE_TRUNC" && f.args.len() == 2 {
3541 // Determine if DuckDB DATE_TRUNC needs CAST wrapping to preserve input type.
3542 // Logic based on Python sqlglot's input_type_preserved flag:
3543 // - DATE + non-date-unit (HOUR, MINUTE, etc.) -> wrap
3544 // - TIMESTAMP + date-unit (YEAR, QUARTER, MONTH, WEEK, DAY) -> wrap
3545 // - TIMESTAMPTZ/TIMESTAMPLTZ/TIME -> always wrap
3546 let unit_str = match &f.args[0] {
3547 Expression::Literal(crate::expressions::Literal::String(s)) => Some(s.to_uppercase()),
3548 _ => None,
3549 };
3550 let is_date_unit = unit_str.as_ref().map_or(false, |u| matches!(u.as_str(), "YEAR" | "QUARTER" | "MONTH" | "WEEK" | "DAY"));
3551 match &f.args[1] {
3552 Expression::Cast(c) => match &c.to {
3553 DataType::Time { .. } => Action::DateTruncWrapCast,
3554 DataType::Custom { name } if name.eq_ignore_ascii_case("TIMESTAMPTZ") || name.eq_ignore_ascii_case("TIMESTAMPLTZ") => Action::DateTruncWrapCast,
3555 DataType::Timestamp { timezone: true, .. } => Action::DateTruncWrapCast,
3556 DataType::Date if !is_date_unit => Action::DateTruncWrapCast,
3557 DataType::Timestamp { timezone: false, .. } if is_date_unit => Action::DateTruncWrapCast,
3558 _ => Action::None,
3559 }
3560 _ => Action::None,
3561 }
3562 } else if matches!(source, DialectType::Snowflake)
3563 && matches!(target, DialectType::DuckDB)
3564 && name == "TO_DATE" && f.args.len() == 1
3565 && !matches!(&f.args[0], Expression::Literal(crate::expressions::Literal::String(_))) {
3566 Action::ToDateToCast
3567 } else if !matches!(source, DialectType::Redshift)
3568 && matches!(target, DialectType::Redshift)
3569 && name == "CONVERT_TIMEZONE"
3570 && (f.args.len() == 2 || f.args.len() == 3) {
3571 // Convert Function("CONVERT_TIMEZONE") to Expression::ConvertTimezone
3572 // so Redshift's transform_expr won't expand 2-arg to 3-arg with 'UTC'.
3573 // The Redshift parser adds 'UTC' as default source_tz, but when
3574 // transpiling from other dialects, we should preserve the original form.
3575 Action::ConvertTimezoneToExpr
3576 } else if matches!(source, DialectType::Snowflake)
3577 && matches!(target, DialectType::DuckDB)
3578 && name == "REGEXP_REPLACE"
3579 && f.args.len() == 4
3580 && !matches!(&f.args[3], Expression::Literal(crate::expressions::Literal::String(_))) {
3581 // Snowflake REGEXP_REPLACE with position arg -> DuckDB needs 'g' flag
3582 Action::RegexpReplaceSnowflakeToDuckDB
3583 } else if name == "_BQ_TO_HEX" {
3584 // Internal marker from TO_HEX conversion - bare (no LOWER/UPPER wrapper)
3585 Action::BigQueryToHexBare
3586 } else if matches!(source, DialectType::BigQuery) && !matches!(target, DialectType::BigQuery) {
3587 // BigQuery-specific functions that need to be converted to standard forms
3588 match name.as_str() {
3589 "TIMESTAMP_DIFF" | "DATETIME_DIFF" | "TIME_DIFF"
3590 | "DATE_DIFF"
3591 | "TIMESTAMP_ADD" | "TIMESTAMP_SUB"
3592 | "DATETIME_ADD" | "DATETIME_SUB"
3593 | "TIME_ADD" | "TIME_SUB"
3594 | "DATE_ADD" | "DATE_SUB"
3595 | "SAFE_DIVIDE"
3596 | "GENERATE_UUID"
3597 | "COUNTIF"
3598 | "EDIT_DISTANCE"
3599 | "TIMESTAMP_SECONDS" | "TIMESTAMP_MILLIS" | "TIMESTAMP_MICROS"
3600 | "TIMESTAMP_TRUNC" | "DATETIME_TRUNC" | "DATE_TRUNC"
3601 | "TO_HEX"
3602 | "TO_JSON_STRING"
3603 | "GENERATE_ARRAY" | "GENERATE_TIMESTAMP_ARRAY"
3604 | "DIV"
3605 | "UNIX_DATE" | "UNIX_SECONDS" | "UNIX_MILLIS" | "UNIX_MICROS"
3606 | "LAST_DAY"
3607 | "TIME" | "DATETIME" | "TIMESTAMP" | "STRING"
3608 | "REGEXP_CONTAINS"
3609 | "CONTAINS_SUBSTR"
3610 | "SAFE_ADD" | "SAFE_SUBTRACT" | "SAFE_MULTIPLY"
3611 | "SAFE_CAST"
3612 | "GENERATE_DATE_ARRAY"
3613 | "PARSE_DATE" | "PARSE_TIMESTAMP"
3614 | "FORMAT_DATE" | "FORMAT_DATETIME" | "FORMAT_TIMESTAMP"
3615 | "ARRAY_CONCAT"
3616 | "JSON_QUERY" | "JSON_VALUE_ARRAY"
3617 | "INSTR"
3618 | "MD5" | "SHA1" | "SHA256" | "SHA512"
3619 | "GENERATE_UUID()" // just in case
3620 | "REGEXP_EXTRACT_ALL"
3621 | "REGEXP_EXTRACT"
3622 | "INT64"
3623 | "ARRAY_CONCAT_AGG"
3624 | "DATE_DIFF(" // just in case
3625 | "TO_HEX_MD5" // internal
3626 | "MOD"
3627 | "CONCAT"
3628 | "CURRENT_TIMESTAMP" | "CURRENT_DATE" | "CURRENT_DATETIME" | "CURRENT_TIME"
3629 | "STRUCT"
3630 | "ROUND"
3631 | "MAKE_INTERVAL"
3632 | "ARRAY_TO_STRING"
3633 | "PERCENTILE_CONT"
3634 => Action::BigQueryFunctionNormalize,
3635 "ARRAY" if matches!(target, DialectType::Snowflake)
3636 && f.args.len() == 1
3637 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT"))
3638 => Action::BigQueryArraySelectAsStructToSnowflake,
3639 _ => Action::None,
3640 }
3641 } else if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::BigQuery) {
3642 // BigQuery -> BigQuery normalizations
3643 match name.as_str() {
3644 "TIMESTAMP_DIFF" | "DATETIME_DIFF" | "TIME_DIFF"
3645 | "DATE_DIFF"
3646 | "DATE_ADD"
3647 | "TO_HEX"
3648 | "CURRENT_TIMESTAMP" | "CURRENT_DATE" | "CURRENT_TIME" | "CURRENT_DATETIME"
3649 | "GENERATE_DATE_ARRAY"
3650 | "INSTR"
3651 | "FORMAT_DATETIME"
3652 | "DATETIME"
3653 | "MAKE_INTERVAL"
3654 => Action::BigQueryFunctionNormalize,
3655 _ => Action::None,
3656 }
3657 } else {
3658 // Generic function normalization for non-BigQuery sources
3659 match name.as_str() {
3660 "ARBITRARY" | "AGGREGATE"
3661 | "REGEXP_MATCHES" | "REGEXP_FULL_MATCH"
3662 | "STRUCT_EXTRACT"
3663 | "LIST_FILTER" | "LIST_TRANSFORM" | "LIST_SORT" | "LIST_REVERSE_SORT"
3664 | "STRING_TO_ARRAY" | "STR_SPLIT" | "STR_SPLIT_REGEX" | "SPLIT_TO_ARRAY"
3665 | "SUBSTRINGINDEX"
3666 | "ARRAY_LENGTH" | "SIZE" | "CARDINALITY"
3667 | "UNICODE"
3668 | "XOR"
3669 | "ARRAY_REVERSE_SORT"
3670 | "ENCODE" | "DECODE"
3671 | "QUANTILE"
3672 | "EPOCH" | "EPOCH_MS"
3673 | "HASHBYTES"
3674 | "JSON_EXTRACT_PATH" | "JSON_EXTRACT_PATH_TEXT"
3675 | "APPROX_DISTINCT"
3676 | "DATE_PARSE" | "FORMAT_DATETIME"
3677 | "REGEXP_EXTRACT" | "REGEXP_SUBSTR" | "TO_DAYS"
3678 | "RLIKE"
3679 | "DATEDIFF" | "DATE_DIFF" | "MONTHS_BETWEEN"
3680 | "ADD_MONTHS" | "DATEADD" | "DATE_ADD" | "DATE_SUB" | "DATETRUNC"
3681 | "LAST_DAY" | "LAST_DAY_OF_MONTH" | "EOMONTH"
3682 | "ARRAY_CONSTRUCT" | "ARRAY_CAT" | "ARRAY_COMPACT"
3683 | "MAP" | "MAP_FROM_ENTRIES"
3684 | "COLLECT_LIST" | "COLLECT_SET"
3685 | "ISNAN" | "IS_NAN"
3686 | "TO_UTC_TIMESTAMP" | "FROM_UTC_TIMESTAMP"
3687 | "FORMAT_NUMBER"
3688 | "TOMONDAY" | "TOSTARTOFWEEK" | "TOSTARTOFMONTH" | "TOSTARTOFYEAR"
3689 | "ELEMENT_AT"
3690 | "EXPLODE" | "EXPLODE_OUTER" | "POSEXPLODE"
3691 | "SPLIT_PART"
3692 // GENERATE_SERIES: handled separately below
3693 | "JSON_EXTRACT" | "JSON_EXTRACT_SCALAR"
3694 | "JSON_QUERY" | "JSON_VALUE"
3695 | "JSON_EXTRACT_JSON" | "BSON_EXTRACT_BSON"
3696 | "ARRAY_SUM"
3697 | "TO_UNIX_TIMESTAMP" | "UNIX_TIMESTAMP"
3698 | "CURDATE" | "CURTIME"
3699 | "ARRAY_TO_STRING"
3700 | "ARRAY_SORT" | "SORT_ARRAY"
3701 | "LEFT" | "RIGHT"
3702 | "MAP_FROM_ARRAYS"
3703 | "LIKE" | "ILIKE"
3704 | "ARRAY_CONCAT"
3705 | "QUANTILE_CONT" | "QUANTILE_DISC"
3706 | "PERCENTILE_CONT" | "PERCENTILE_DISC"
3707 | "PERCENTILE_APPROX" | "APPROX_PERCENTILE"
3708 | "LOCATE" | "STRPOS" | "INSTR"
3709 | "CHAR"
3710 // CONCAT: handled separately for COALESCE wrapping
3711 | "ARRAY_JOIN"
3712 | "ARRAY_CONTAINS" | "HAS" | "CONTAINS"
3713 | "ISNULL"
3714 | "MONTHNAME"
3715 | "TO_TIMESTAMP"
3716 | "TO_DATE"
3717 | "TO_JSON"
3718 | "STR_TO_DATE"
3719 | "REGEXP_SPLIT"
3720 | "SPLIT"
3721 | "FORMATDATETIME"
3722 | "ARRAYJOIN"
3723 | "SPLITBYSTRING" | "SPLITBYREGEXP"
3724 | "NVL"
3725 | "TO_CHAR"
3726 | "DBMS_RANDOM.VALUE"
3727 | "REGEXP_LIKE"
3728 | "REPLICATE"
3729 | "LEN"
3730 | "COUNT_BIG"
3731 | "DATEFROMPARTS"
3732 | "DATETIMEFROMPARTS"
3733 | "CONVERT" | "TRY_CONVERT"
3734 | "STRFTIME" | "STRPTIME"
3735 | "DATE_FORMAT" | "FORMAT_DATE"
3736 | "PARSE_TIMESTAMP" | "PARSE_DATE"
3737 | "FROM_BASE64" | "TO_BASE64"
3738 | "GETDATE"
3739 | "TO_HEX" | "FROM_HEX" | "UNHEX" | "HEX"
3740 | "TO_UTF8" | "FROM_UTF8"
3741 | "STARTS_WITH" | "STARTSWITH"
3742 | "APPROX_COUNT_DISTINCT"
3743 | "JSON_FORMAT"
3744 | "SYSDATE"
3745 | "LOGICAL_OR" | "LOGICAL_AND"
3746 | "MONTHS_ADD"
3747 | "SCHEMA_NAME"
3748 | "STRTOL"
3749 | "EDITDIST3"
3750 | "FORMAT"
3751 | "LIST_CONTAINS" | "LIST_HAS"
3752 | "VARIANCE" | "STDDEV"
3753 | "ISINF"
3754 | "TO_UNIXTIME"
3755 | "FROM_UNIXTIME"
3756 | "DATEPART" | "DATE_PART"
3757 | "DATENAME"
3758 | "STRING_AGG"
3759 | "JSON_ARRAYAGG"
3760 | "APPROX_QUANTILE"
3761 | "MAKE_DATE"
3762 | "LIST_HAS_ANY" | "ARRAY_HAS_ANY"
3763 | "RANGE"
3764 | "TRY_ELEMENT_AT"
3765 | "STR_TO_MAP"
3766 | "STRING"
3767 | "TIME_TO_STR"
3768 => Action::GenericFunctionNormalize,
3769 // Functions needing specific cross-dialect transforms
3770 "MAX_BY" | "MIN_BY" if matches!(target, DialectType::ClickHouse | DialectType::Spark | DialectType::Databricks | DialectType::DuckDB) => Action::MaxByMinByConvert,
3771 "STRUCT" if matches!(source, DialectType::Spark | DialectType::Databricks)
3772 && !matches!(target, DialectType::Spark | DialectType::Databricks | DialectType::Hive) => Action::SparkStructConvert,
3773 "ARRAY" if matches!(source, DialectType::BigQuery)
3774 && matches!(target, DialectType::Snowflake)
3775 && f.args.len() == 1
3776 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT")) => Action::BigQueryArraySelectAsStructToSnowflake,
3777 "ARRAY" if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena | DialectType::BigQuery | DialectType::DuckDB | DialectType::ClickHouse | DialectType::StarRocks) => Action::ArraySyntaxConvert,
3778 "TRUNC" if f.args.len() == 2 && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::ClickHouse) => Action::TruncToDateTrunc,
3779 "UNIFORM" if matches!(target, DialectType::Snowflake) => Action::GenericFunctionNormalize,
3780 // GENERATE_SERIES -> SEQUENCE/UNNEST/EXPLODE for target dialects
3781 "GENERATE_SERIES" if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
3782 && !matches!(target, DialectType::PostgreSQL | DialectType::Redshift | DialectType::TSQL | DialectType::Fabric) => Action::GenerateSeriesConvert,
3783 // GENERATE_SERIES with interval normalization for PG target
3784 "GENERATE_SERIES" if f.args.len() >= 3
3785 && matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
3786 && matches!(target, DialectType::PostgreSQL | DialectType::Redshift) => Action::GenerateSeriesConvert,
3787 "GENERATE_SERIES" => Action::None, // passthrough for other cases
3788 // CONCAT(a, b) -> COALESCE wrapping for Presto/ClickHouse from PostgreSQL
3789 "CONCAT" if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
3790 && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::ClickHouse) => Action::ConcatCoalesceWrap,
3791 "CONCAT" => Action::GenericFunctionNormalize,
3792 // DIV(a, b) -> target-specific integer division
3793 "DIV" if f.args.len() == 2
3794 && matches!(source, DialectType::PostgreSQL)
3795 && matches!(target, DialectType::DuckDB | DialectType::BigQuery | DialectType::SQLite) => Action::DivFuncConvert,
3796 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
3797 "JSON_OBJECT_AGG" | "JSONB_OBJECT_AGG" if f.args.len() == 2
3798 && matches!(target, DialectType::DuckDB) => Action::JsonObjectAggConvert,
3799 // JSONB_EXISTS -> JSON_EXISTS for DuckDB
3800 "JSONB_EXISTS" if f.args.len() == 2
3801 && matches!(target, DialectType::DuckDB) => Action::JsonbExistsConvert,
3802 // DATE_BIN -> TIME_BUCKET for DuckDB
3803 "DATE_BIN" if matches!(target, DialectType::DuckDB) => Action::DateBinConvert,
3804 // Multi-arg MIN(a,b,c) -> LEAST, MAX(a,b,c) -> GREATEST
3805 "MIN" | "MAX" if f.args.len() > 1 && !matches!(target, DialectType::SQLite) => Action::MinMaxToLeastGreatest,
3806 // ClickHouse uniq -> APPROX_COUNT_DISTINCT for other dialects
3807 "UNIQ" if matches!(source, DialectType::ClickHouse) && !matches!(target, DialectType::ClickHouse) => Action::ClickHouseUniqToApproxCountDistinct,
3808 // ClickHouse any -> ANY_VALUE for other dialects
3809 "ANY" if f.args.len() == 1 && matches!(source, DialectType::ClickHouse) && !matches!(target, DialectType::ClickHouse) => Action::ClickHouseAnyToAnyValue,
3810 _ => Action::None,
3811 }
3812 }
3813 }
3814 Expression::AggregateFunction(af) => {
3815 let name = af.name.to_uppercase();
3816 match name.as_str() {
3817 "ARBITRARY" | "AGGREGATE" => Action::GenericFunctionNormalize,
3818 "JSON_ARRAYAGG" => Action::GenericFunctionNormalize,
3819 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
3820 "JSON_OBJECT_AGG" | "JSONB_OBJECT_AGG" if matches!(target, DialectType::DuckDB) => Action::JsonObjectAggConvert,
3821 "ARRAY_AGG" if matches!(target, DialectType::Hive | DialectType::Spark | DialectType::Databricks) => Action::ArrayAggToCollectList,
3822 "MAX_BY" | "MIN_BY" if matches!(target, DialectType::ClickHouse | DialectType::Spark | DialectType::Databricks | DialectType::DuckDB) => Action::MaxByMinByConvert,
3823 "COLLECT_LIST" if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::DuckDB) => Action::CollectListToArrayAgg,
3824 "COLLECT_SET" if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Snowflake | DialectType::DuckDB) => Action::CollectSetConvert,
3825 "PERCENTILE" if matches!(target, DialectType::DuckDB | DialectType::Presto | DialectType::Trino) => Action::PercentileConvert,
3826 // CORR -> CASE WHEN ISNAN(CORR(a,b)) THEN NULL ELSE CORR(a,b) END for DuckDB
3827 "CORR" if matches!(target, DialectType::DuckDB) && matches!(source, DialectType::Snowflake) => Action::CorrIsnanWrap,
3828 // BigQuery APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
3829 "APPROX_QUANTILES" if matches!(source, DialectType::BigQuery)
3830 && matches!(target, DialectType::DuckDB) => Action::BigQueryApproxQuantiles,
3831 // BigQuery PERCENTILE_CONT(x, frac RESPECT NULLS) -> QUANTILE_CONT(x, frac) for DuckDB
3832 "PERCENTILE_CONT" if matches!(source, DialectType::BigQuery)
3833 && matches!(target, DialectType::DuckDB)
3834 && af.args.len() >= 2 => Action::BigQueryPercentileContToDuckDB,
3835 _ => Action::None,
3836 }
3837 }
3838 Expression::JSONArrayAgg(_) => {
3839 match target {
3840 DialectType::PostgreSQL => Action::GenericFunctionNormalize,
3841 _ => Action::None,
3842 }
3843 }
3844 Expression::ToNumber(tn) => {
3845 // TO_NUMBER(x) with 1 arg -> CAST(x AS DOUBLE) for most targets
3846 if tn.format.is_none() && tn.precision.is_none() && tn.scale.is_none() {
3847 match target {
3848 DialectType::Oracle | DialectType::Snowflake | DialectType::Teradata => Action::None,
3849 _ => Action::GenericFunctionNormalize,
3850 }
3851 } else {
3852 Action::None
3853 }
3854 }
3855 Expression::IfFunc(if_func) => {
3856 if matches!(source, DialectType::Snowflake)
3857 && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::SQLite)
3858 && matches!(if_func.false_value, Some(Expression::Div(_))) {
3859 Action::Div0TypedDivision
3860 } else {
3861 Action::None
3862 }
3863 }
3864 Expression::ToJson(_) => {
3865 match target {
3866 DialectType::Presto | DialectType::Trino => Action::ToJsonConvert,
3867 DialectType::BigQuery => Action::ToJsonConvert,
3868 DialectType::DuckDB => Action::ToJsonConvert,
3869 _ => Action::None,
3870 }
3871 }
3872 Expression::ArrayAgg(ref agg) => {
3873 if matches!(target, DialectType::Hive | DialectType::Spark | DialectType::Databricks) {
3874 // Any source -> Hive/Spark: convert ARRAY_AGG to COLLECT_LIST
3875 Action::ArrayAggToCollectList
3876 } else if matches!(source, DialectType::Spark | DialectType::Databricks | DialectType::Hive)
3877 && matches!(target, DialectType::DuckDB)
3878 && agg.filter.is_some() {
3879 // Spark/Hive ARRAY_AGG excludes NULLs, DuckDB includes them
3880 // Need to add NOT x IS NULL to existing filter
3881 Action::ArrayAggNullFilter
3882 } else if matches!(target, DialectType::DuckDB)
3883 && agg.ignore_nulls == Some(true)
3884 && !agg.order_by.is_empty() {
3885 // BigQuery ARRAY_AGG(x IGNORE NULLS ORDER BY ...) -> DuckDB ARRAY_AGG(x ORDER BY a NULLS FIRST, ...)
3886 Action::ArrayAggIgnoreNullsDuckDB
3887 } else if !matches!(source, DialectType::Snowflake) {
3888 Action::None
3889 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
3890 let is_array_agg = agg.name.as_deref().map(|n| n.to_uppercase()) == Some("ARRAY_AGG".to_string())
3891 || agg.name.is_none();
3892 if is_array_agg {
3893 Action::ArrayAggCollectList
3894 } else {
3895 Action::None
3896 }
3897 } else if matches!(target, DialectType::DuckDB | DialectType::Presto | DialectType::Trino) && agg.filter.is_none() {
3898 Action::ArrayAggFilter
3899 } else {
3900 Action::None
3901 }
3902 }
3903 Expression::WithinGroup(wg) => {
3904 if matches!(source, DialectType::Snowflake)
3905 && matches!(target, DialectType::DuckDB | DialectType::Presto | DialectType::Trino)
3906 && matches!(wg.this, Expression::ArrayAgg(_)) {
3907 Action::ArrayAggWithinGroupFilter
3908 } else if matches!(&wg.this, Expression::AggregateFunction(af) if af.name.eq_ignore_ascii_case("STRING_AGG"))
3909 || matches!(&wg.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("STRING_AGG"))
3910 || matches!(&wg.this, Expression::StringAgg(_)) {
3911 Action::StringAggConvert
3912 } else if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena
3913 | DialectType::Spark | DialectType::Databricks)
3914 && (matches!(&wg.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("PERCENTILE_CONT") || f.name.eq_ignore_ascii_case("PERCENTILE_DISC"))
3915 || matches!(&wg.this, Expression::AggregateFunction(af) if af.name.eq_ignore_ascii_case("PERCENTILE_CONT") || af.name.eq_ignore_ascii_case("PERCENTILE_DISC"))
3916 || matches!(&wg.this, Expression::PercentileCont(_))) {
3917 Action::PercentileContConvert
3918 } else {
3919 Action::None
3920 }
3921 }
3922 // For BigQuery: CAST(x AS TIMESTAMP) -> CAST(x AS DATETIME)
3923 // because BigQuery's TIMESTAMP is really TIMESTAMPTZ, and
3924 // DATETIME is the timezone-unaware type
3925 Expression::Cast(ref c) => {
3926 if c.format.is_some() && (matches!(source, DialectType::BigQuery) || matches!(source, DialectType::Teradata)) {
3927 Action::BigQueryCastFormat
3928 } else if matches!(target, DialectType::BigQuery)
3929 && !matches!(source, DialectType::BigQuery)
3930 && matches!(c.to, DataType::Timestamp { timezone: false, .. })
3931 {
3932 Action::CastTimestampToDatetime
3933 } else if matches!(source,
3934 DialectType::Hive | DialectType::Spark | DialectType::Databricks
3935 ) && matches!(target,
3936 DialectType::Presto | DialectType::Trino | DialectType::Athena
3937 | DialectType::DuckDB | DialectType::Snowflake | DialectType::BigQuery
3938 | DialectType::Databricks | DialectType::TSQL
3939 ) {
3940 Action::HiveCastToTryCast
3941 } else if matches!(c.to, DataType::Timestamp { timezone: true, .. })
3942 && matches!(target, DialectType::Hive | DialectType::Spark | DialectType::Databricks | DialectType::BigQuery) {
3943 // CAST(x AS TIMESTAMP WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark/BigQuery
3944 Action::CastTimestampStripTz
3945 } else if matches!(&c.to, DataType::Json)
3946 && matches!(&c.this, Expression::Literal(Literal::String(_)))
3947 && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena | DialectType::Snowflake) {
3948 // CAST('x' AS JSON) -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake
3949 // Only when the input is a string literal (JSON 'value' syntax)
3950 Action::JsonLiteralToJsonParse
3951 } else if matches!(&c.to, DataType::Json | DataType::JsonB)
3952 && matches!(target, DialectType::Spark | DialectType::Databricks) {
3953 // CAST(x AS JSON) -> TO_JSON(x) for Spark
3954 Action::CastToJsonForSpark
3955 } else if (matches!(&c.to, DataType::Array { .. } | DataType::Map { .. } | DataType::Struct { .. }))
3956 && matches!(target, DialectType::Spark | DialectType::Databricks)
3957 && (
3958 matches!(&c.this, Expression::ParseJson(_))
3959 || matches!(
3960 &c.this,
3961 Expression::Function(f)
3962 if f.name.eq_ignore_ascii_case("JSON_EXTRACT")
3963 || f.name.eq_ignore_ascii_case("JSON_EXTRACT_SCALAR")
3964 || f.name.eq_ignore_ascii_case("GET_JSON_OBJECT")
3965 )
3966 ) {
3967 // CAST(JSON_PARSE(...) AS ARRAY/MAP) or CAST(JSON_EXTRACT/GET_JSON_OBJECT(...) AS ARRAY/MAP)
3968 // -> FROM_JSON(..., type_string) for Spark
3969 Action::CastJsonToFromJson
3970 } else if matches!(target, DialectType::Spark | DialectType::Databricks)
3971 && matches!(c.to, DataType::Timestamp { timezone: false, .. })
3972 && matches!(source, DialectType::DuckDB) {
3973 Action::StrftimeCastTimestamp
3974 } else if matches!(source, DialectType::DuckDB)
3975 && matches!(c.to, DataType::Decimal { precision: None, .. }) {
3976 Action::DecimalDefaultPrecision
3977 } else if matches!(source, DialectType::MySQL | DialectType::SingleStore)
3978 && matches!(c.to, DataType::Char { length: None })
3979 && !matches!(target, DialectType::MySQL | DialectType::SingleStore) {
3980 // MySQL CAST(x AS CHAR) was originally TEXT - convert to target text type
3981 Action::MysqlCastCharToText
3982 } else if matches!(source, DialectType::Spark | DialectType::Databricks | DialectType::Hive)
3983 && matches!(target, DialectType::Spark | DialectType::Databricks | DialectType::Hive)
3984 && Self::has_varchar_char_type(&c.to) {
3985 // Spark parses VARCHAR(n)/CHAR(n) as TEXT, so normalize back to STRING
3986 Action::SparkCastVarcharToString
3987 } else {
3988 Action::None
3989 }
3990 }
3991 Expression::SafeCast(ref c) => {
3992 if c.format.is_some() && matches!(source, DialectType::BigQuery)
3993 && !matches!(target, DialectType::BigQuery)
3994 {
3995 Action::BigQueryCastFormat
3996 } else {
3997 Action::None
3998 }
3999 }
4000 // For DuckDB: DATE_TRUNC should preserve the input type
4001 Expression::DateTrunc(_) | Expression::TimestampTrunc(_) => {
4002 if matches!(source, DialectType::Snowflake) && matches!(target, DialectType::DuckDB) {
4003 Action::DateTruncWrapCast
4004 } else {
4005 Action::None
4006 }
4007 }
4008 // For DuckDB: SET a = 1 -> SET VARIABLE a = 1
4009 Expression::SetStatement(s) => {
4010 if matches!(target, DialectType::DuckDB)
4011 && !matches!(source, DialectType::TSQL | DialectType::Fabric)
4012 && s.items.iter().any(|item| item.kind.is_none()) {
4013 Action::SetToVariable
4014 } else {
4015 Action::None
4016 }
4017 }
4018 // Cross-dialect NULL ordering normalization.
4019 // When nulls_first is not specified, fill in the source dialect's implied
4020 // default so the target generator can correctly add/strip NULLS FIRST/LAST.
4021 Expression::Ordered(o) => {
4022 // MySQL doesn't support NULLS FIRST/LAST - strip or rewrite
4023 if matches!(target, DialectType::MySQL) && o.nulls_first.is_some() {
4024 Action::MysqlNullsOrdering
4025 } else {
4026 // Skip targets that don't support NULLS FIRST/LAST syntax
4027 let target_supports_nulls = !matches!(target,
4028 DialectType::MySQL | DialectType::TSQL
4029 | DialectType::StarRocks | DialectType::Doris
4030 );
4031 if o.nulls_first.is_none() && source != target && target_supports_nulls {
4032 Action::NullsOrdering
4033 } else {
4034 Action::None
4035 }
4036 }
4037 }
4038 // BigQuery data types: convert INT64, BYTES, NUMERIC etc. to standard types
4039 Expression::DataType(dt) => {
4040 if matches!(source, DialectType::BigQuery) && !matches!(target, DialectType::BigQuery) {
4041 match dt {
4042 DataType::Custom { ref name } if name.eq_ignore_ascii_case("INT64") || name.eq_ignore_ascii_case("FLOAT64") || name.eq_ignore_ascii_case("BOOL") || name.eq_ignore_ascii_case("BYTES") || name.eq_ignore_ascii_case("NUMERIC") || name.eq_ignore_ascii_case("STRING") || name.eq_ignore_ascii_case("DATETIME") => Action::BigQueryCastType,
4043 _ => Action::None,
4044 }
4045 } else if matches!(source, DialectType::TSQL) {
4046 // For TSQL source -> any target (including TSQL itself for REAL)
4047 match dt {
4048 // REAL -> FLOAT even for TSQL->TSQL
4049 DataType::Custom { ref name } if name.eq_ignore_ascii_case("REAL")
4050 => Action::TSQLTypeNormalize,
4051 DataType::Float { real_spelling: true, .. }
4052 => Action::TSQLTypeNormalize,
4053 // Other TSQL type normalizations only for non-TSQL targets
4054 DataType::Custom { ref name } if !matches!(target, DialectType::TSQL) && (
4055 name.eq_ignore_ascii_case("MONEY")
4056 || name.eq_ignore_ascii_case("SMALLMONEY")
4057 || name.eq_ignore_ascii_case("DATETIME2")
4058 || name.eq_ignore_ascii_case("IMAGE")
4059 || name.eq_ignore_ascii_case("BIT")
4060 || name.eq_ignore_ascii_case("ROWVERSION")
4061 || name.eq_ignore_ascii_case("UNIQUEIDENTIFIER")
4062 || name.eq_ignore_ascii_case("DATETIMEOFFSET")
4063 || name.to_uppercase().starts_with("NUMERIC")
4064 || name.to_uppercase().starts_with("DATETIME2(")
4065 || name.to_uppercase().starts_with("TIME(")
4066 ) => Action::TSQLTypeNormalize,
4067 DataType::Float { precision: Some(_), .. } if !matches!(target, DialectType::TSQL) => Action::TSQLTypeNormalize,
4068 DataType::TinyInt { .. } if !matches!(target, DialectType::TSQL) => Action::TSQLTypeNormalize,
4069 // INTEGER -> INT for Databricks/Spark targets
4070 DataType::Int { integer_spelling: true, .. } if matches!(target, DialectType::Databricks | DialectType::Spark) => Action::TSQLTypeNormalize,
4071 _ => Action::None,
4072 }
4073 } else if matches!(source, DialectType::Oracle) && !matches!(target, DialectType::Oracle) {
4074 match dt {
4075 DataType::Custom { ref name } if name.to_uppercase().starts_with("VARCHAR2(") || name.to_uppercase().starts_with("NVARCHAR2(") || name.eq_ignore_ascii_case("VARCHAR2") || name.eq_ignore_ascii_case("NVARCHAR2") => Action::OracleVarchar2ToVarchar,
4076 _ => Action::None,
4077 }
4078 } else if matches!(target, DialectType::Snowflake) && !matches!(source, DialectType::Snowflake) {
4079 // When target is Snowflake but source is NOT Snowflake,
4080 // protect FLOAT from being converted to DOUBLE by Snowflake's transform.
4081 // Snowflake treats FLOAT=DOUBLE internally, but non-Snowflake sources
4082 // should keep their FLOAT spelling.
4083 match dt {
4084 DataType::Float { .. } => Action::SnowflakeFloatProtect,
4085 _ => Action::None,
4086 }
4087 } else {
4088 Action::None
4089 }
4090 }
4091 // LOWER patterns from BigQuery TO_HEX conversions:
4092 // - LOWER(LOWER(HEX(x))) from non-BQ targets: flatten
4093 // - LOWER(Function("TO_HEX")) for BQ->BQ: strip LOWER
4094 Expression::Lower(uf) => {
4095 if matches!(source, DialectType::BigQuery) {
4096 match &uf.this {
4097 Expression::Lower(_) => Action::BigQueryToHexLower,
4098 Expression::Function(f) if f.name == "TO_HEX" && matches!(target, DialectType::BigQuery) => {
4099 // BQ->BQ: LOWER(TO_HEX(x)) -> TO_HEX(x)
4100 Action::BigQueryToHexLower
4101 }
4102 _ => Action::None,
4103 }
4104 } else {
4105 Action::None
4106 }
4107 }
4108 // UPPER patterns from BigQuery TO_HEX conversions:
4109 // - UPPER(LOWER(HEX(x))) from non-BQ targets: extract inner
4110 // - UPPER(Function("TO_HEX")) for BQ->BQ: keep as UPPER(TO_HEX(x))
4111 Expression::Upper(uf) => {
4112 if matches!(source, DialectType::BigQuery) {
4113 match &uf.this {
4114 Expression::Lower(_) => Action::BigQueryToHexUpper,
4115 _ => Action::None,
4116 }
4117 } else {
4118 Action::None
4119 }
4120 }
4121 // BigQuery LAST_DAY(date, unit) -> strip unit for non-BigQuery targets
4122 // Snowflake supports LAST_DAY with unit, so keep it there
4123 Expression::LastDay(ld) => {
4124 if matches!(source, DialectType::BigQuery)
4125 && !matches!(target, DialectType::BigQuery | DialectType::Snowflake)
4126 && ld.unit.is_some()
4127 {
4128 Action::BigQueryLastDayStripUnit
4129 } else {
4130 Action::None
4131 }
4132 }
4133 // BigQuery SafeDivide expressions (already parsed as SafeDivide)
4134 Expression::SafeDivide(_) => {
4135 if matches!(source, DialectType::BigQuery) && !matches!(target, DialectType::BigQuery) {
4136 Action::BigQuerySafeDivide
4137 } else {
4138 Action::None
4139 }
4140 }
4141 // BigQuery ANY_VALUE(x HAVING MAX/MIN y) -> ARG_MAX_NULL/ARG_MIN_NULL for DuckDB
4142 // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
4143 Expression::AnyValue(ref agg) => {
4144 if matches!(source, DialectType::BigQuery)
4145 && matches!(target, DialectType::DuckDB)
4146 && agg.having_max.is_some()
4147 {
4148 Action::BigQueryAnyValueHaving
4149 } else if matches!(target, DialectType::Spark | DialectType::Databricks)
4150 && !matches!(source, DialectType::Spark | DialectType::Databricks)
4151 && agg.ignore_nulls.is_none()
4152 {
4153 Action::AnyValueIgnoreNulls
4154 } else {
4155 Action::None
4156 }
4157 }
4158 Expression::Any(ref q) => {
4159 if matches!(source, DialectType::PostgreSQL)
4160 && matches!(target, DialectType::Spark | DialectType::Databricks | DialectType::Hive)
4161 && q.op.is_some()
4162 && !matches!(q.subquery, Expression::Select(_) | Expression::Subquery(_))
4163 {
4164 Action::AnyToExists
4165 } else {
4166 Action::None
4167 }
4168 }
4169 // BigQuery APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
4170 // RegexpLike from non-DuckDB sources -> REGEXP_MATCHES for DuckDB target
4171 // DuckDB's ~ is a full match, but other dialects' REGEXP/RLIKE is a partial match
4172 Expression::RegexpLike(_) if !matches!(source, DialectType::DuckDB)
4173 && matches!(target, DialectType::DuckDB) => {
4174 Action::RegexpLikeToDuckDB
4175 }
4176 // MySQL division -> NULLIF wrapping and/or CAST for specific targets
4177 Expression::Div(ref op) if matches!(source, DialectType::MySQL)
4178 && matches!(target, DialectType::PostgreSQL | DialectType::Redshift
4179 | DialectType::Drill | DialectType::Trino | DialectType::Presto
4180 | DialectType::TSQL | DialectType::Teradata | DialectType::SQLite
4181 | DialectType::BigQuery | DialectType::Snowflake | DialectType::Databricks
4182 | DialectType::Oracle) => {
4183 // Only wrap if RHS is not already NULLIF
4184 if !matches!(&op.right, Expression::Function(f) if f.name.eq_ignore_ascii_case("NULLIF")) {
4185 Action::MySQLSafeDivide
4186 } else {
4187 Action::None
4188 }
4189 }
4190 // ALTER TABLE ... RENAME TO <schema>.<table> -> strip schema for most targets
4191 // For TSQL/Fabric, convert to sp_rename instead
4192 Expression::AlterTable(ref at) if !at.actions.is_empty() => {
4193 if let Some(crate::expressions::AlterTableAction::RenameTable(ref new_tbl)) = at.actions.first() {
4194 if matches!(target, DialectType::TSQL | DialectType::Fabric) {
4195 // TSQL: ALTER TABLE RENAME -> EXEC sp_rename
4196 Action::AlterTableToSpRename
4197 } else if new_tbl.schema.is_some()
4198 && matches!(target, DialectType::BigQuery | DialectType::Doris | DialectType::StarRocks
4199 | DialectType::DuckDB | DialectType::PostgreSQL | DialectType::Redshift) {
4200 Action::AlterTableRenameStripSchema
4201 } else {
4202 Action::None
4203 }
4204 } else {
4205 Action::None
4206 }
4207 }
4208 // EPOCH(x) expression -> target-specific epoch conversion
4209 Expression::Epoch(_) if !matches!(target, DialectType::DuckDB) => {
4210 Action::EpochConvert
4211 }
4212 // EPOCH_MS(x) expression -> target-specific epoch ms conversion
4213 Expression::EpochMs(_) if !matches!(target, DialectType::DuckDB) => {
4214 Action::EpochMsConvert
4215 }
4216 // STRING_AGG -> GROUP_CONCAT for MySQL/SQLite
4217 Expression::StringAgg(_) => {
4218 if matches!(target, DialectType::MySQL | DialectType::SingleStore
4219 | DialectType::Doris | DialectType::StarRocks | DialectType::SQLite)
4220 {
4221 Action::StringAggConvert
4222 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
4223 Action::StringAggConvert
4224 } else {
4225 Action::None
4226 }
4227 }
4228 // GROUP_CONCAT -> STRING_AGG for PostgreSQL/Presto/etc.
4229 // Also handles GROUP_CONCAT normalization for MySQL/SQLite targets
4230 Expression::GroupConcat(_) => {
4231 Action::GroupConcatConvert
4232 }
4233 // CARDINALITY/ARRAY_LENGTH/ARRAY_SIZE -> target-specific array length
4234 Expression::Cardinality(_) | Expression::ArrayLength(_) | Expression::ArraySize(_) => {
4235 Action::ArrayLengthConvert
4236 }
4237 // NVL: clear original_name so generator uses dialect-specific function names
4238 Expression::Nvl(f) if f.original_name.is_some() => {
4239 Action::NvlClearOriginal
4240 }
4241 // XOR: expand for dialects that don't support the XOR keyword
4242 Expression::Xor(_) => {
4243 let target_supports_xor = matches!(target,
4244 DialectType::MySQL | DialectType::SingleStore | DialectType::Doris
4245 | DialectType::StarRocks
4246 );
4247 if !target_supports_xor {
4248 Action::XorExpand
4249 } else {
4250 Action::None
4251 }
4252 }
4253 // TSQL #table -> temp table normalization (CREATE TABLE)
4254 Expression::CreateTable(ct)
4255 if matches!(source, DialectType::TSQL | DialectType::Fabric)
4256 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
4257 && ct.name.name.name.starts_with('#') => {
4258 Action::TempTableHash
4259 }
4260 // TSQL #table -> strip # from table references in SELECT/etc.
4261 Expression::Table(tr)
4262 if matches!(source, DialectType::TSQL | DialectType::Fabric)
4263 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
4264 && tr.name.name.starts_with('#') => {
4265 Action::TempTableHash
4266 }
4267 // TSQL #table -> strip # from DROP TABLE names
4268 Expression::DropTable(ref dt)
4269 if matches!(source, DialectType::TSQL | DialectType::Fabric)
4270 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
4271 && dt.names.iter().any(|n| n.name.name.starts_with('#')) => {
4272 Action::TempTableHash
4273 }
4274 // JSON_EXTRACT -> arrow syntax for SQLite/DuckDB
4275 Expression::JsonExtract(ref f) if !f.arrow_syntax && matches!(target, DialectType::SQLite | DialectType::DuckDB) => {
4276 Action::JsonExtractToArrow
4277 }
4278 // JSON_EXTRACT with JSONPath -> JSON_EXTRACT_PATH for PostgreSQL (non-PG sources only)
4279 Expression::JsonExtract(ref f) if matches!(target, DialectType::PostgreSQL | DialectType::Redshift)
4280 && !matches!(source, DialectType::PostgreSQL | DialectType::Redshift | DialectType::Materialize)
4281 && matches!(&f.path, Expression::Literal(Literal::String(s)) if s.starts_with("$.")) => {
4282 Action::JsonExtractToGetJsonObject
4283 }
4284 // JSON_EXTRACT -> GET_JSON_OBJECT for Hive/Spark
4285 Expression::JsonExtract(_) if matches!(target, DialectType::Hive | DialectType::Spark | DialectType::Databricks) => {
4286 Action::JsonExtractToGetJsonObject
4287 }
4288 // JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
4289 Expression::JsonExtractScalar(_) if matches!(target, DialectType::Hive | DialectType::Spark | DialectType::Databricks) => {
4290 Action::JsonExtractScalarToGetJsonObject
4291 }
4292 // JsonQuery (parsed JSON_QUERY) -> target-specific
4293 Expression::JsonQuery(_) => {
4294 Action::JsonQueryValueConvert
4295 }
4296 // JsonValue (parsed JSON_VALUE) -> target-specific
4297 Expression::JsonValue(_) => {
4298 Action::JsonQueryValueConvert
4299 }
4300 // AT TIME ZONE -> AT_TIMEZONE for Presto, FROM_UTC_TIMESTAMP for Spark,
4301 // TIMESTAMP(DATETIME(...)) for BigQuery, CONVERT_TIMEZONE for Snowflake
4302 Expression::AtTimeZone(_) if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena
4303 | DialectType::Spark | DialectType::Databricks
4304 | DialectType::BigQuery | DialectType::Snowflake) => {
4305 Action::AtTimeZoneConvert
4306 }
4307 // DAY_OF_WEEK -> dialect-specific
4308 Expression::DayOfWeek(_) if matches!(target, DialectType::DuckDB | DialectType::Spark | DialectType::Databricks) => {
4309 Action::DayOfWeekConvert
4310 }
4311 // CURRENT_USER -> CURRENT_USER() for Snowflake
4312 Expression::CurrentUser(_) if matches!(target, DialectType::Snowflake) => {
4313 Action::CurrentUserParens
4314 }
4315 // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
4316 Expression::ElementAt(_) if matches!(target, DialectType::PostgreSQL | DialectType::BigQuery) => {
4317 Action::ElementAtConvert
4318 }
4319 // ARRAY[...] (ArrayFunc bracket_notation=false) -> convert for target dialect
4320 Expression::ArrayFunc(ref arr) if !arr.bracket_notation
4321 && matches!(target, DialectType::Spark | DialectType::Databricks | DialectType::Hive | DialectType::BigQuery | DialectType::DuckDB | DialectType::Snowflake | DialectType::Presto | DialectType::Trino | DialectType::Athena | DialectType::ClickHouse | DialectType::StarRocks) => {
4322 Action::ArraySyntaxConvert
4323 }
4324 // VARIANCE expression -> varSamp for ClickHouse
4325 Expression::Variance(_) if matches!(target, DialectType::ClickHouse) => {
4326 Action::VarianceToClickHouse
4327 }
4328 // STDDEV expression -> stddevSamp for ClickHouse
4329 Expression::Stddev(_) if matches!(target, DialectType::ClickHouse) => {
4330 Action::StddevToClickHouse
4331 }
4332 // ApproxQuantile -> APPROX_PERCENTILE for Snowflake
4333 Expression::ApproxQuantile(_) if matches!(target, DialectType::Snowflake) => {
4334 Action::ApproxQuantileConvert
4335 }
4336 // MonthsBetween -> target-specific
4337 Expression::MonthsBetween(_) if !matches!(target, DialectType::Spark | DialectType::Databricks | DialectType::Hive) => {
4338 Action::MonthsBetweenConvert
4339 }
4340 // AddMonths -> target-specific DATEADD/DATE_ADD
4341 Expression::AddMonths(_) => {
4342 Action::AddMonthsConvert
4343 }
4344 // MapFromArrays -> target-specific (MAP, OBJECT_CONSTRUCT, MAP_FROM_ARRAYS)
4345 Expression::MapFromArrays(_) if !matches!(target, DialectType::Spark | DialectType::Databricks) => {
4346 Action::MapFromArraysConvert
4347 }
4348 // CURRENT_USER -> CURRENT_USER() for Spark
4349 Expression::CurrentUser(_) if matches!(target, DialectType::Spark | DialectType::Databricks) => {
4350 Action::CurrentUserSparkParens
4351 }
4352 // MONTH/YEAR/DAY('string') from Spark -> cast string to DATE for DuckDB/Presto
4353 Expression::Month(ref f) | Expression::Year(ref f) | Expression::Day(ref f)
4354 if matches!(source, DialectType::Spark | DialectType::Databricks | DialectType::Hive)
4355 && matches!(&f.this, Expression::Literal(Literal::String(_)))
4356 && matches!(target, DialectType::DuckDB | DialectType::Presto | DialectType::Trino | DialectType::Athena | DialectType::PostgreSQL | DialectType::Redshift) => {
4357 Action::SparkDateFuncCast
4358 }
4359 // $parameter -> @parameter for BigQuery
4360 Expression::Parameter(ref p) if matches!(target, DialectType::BigQuery)
4361 && matches!(source, DialectType::DuckDB)
4362 && (p.style == crate::expressions::ParameterStyle::Dollar || p.style == crate::expressions::ParameterStyle::DoubleDollar) => {
4363 Action::DollarParamConvert
4364 }
4365 // EscapeString literal: normalize literal newlines to \n
4366 Expression::Literal(Literal::EscapeString(ref s)) if s.contains('\n') || s.contains('\r') || s.contains('\t') => {
4367 Action::EscapeStringNormalize
4368 }
4369 // straight_join: keep lowercase for DuckDB, quote for MySQL
4370 Expression::Column(ref col) if col.name.name == "STRAIGHT_JOIN" && col.table.is_none()
4371 && matches!(source, DialectType::DuckDB)
4372 && matches!(target, DialectType::DuckDB | DialectType::MySQL) => {
4373 Action::StraightJoinCase
4374 }
4375 // DATE and TIMESTAMP literal type conversions are now handled in the generator directly
4376 // Snowflake INTERVAL format: INTERVAL '2' HOUR -> INTERVAL '2 HOUR'
4377 Expression::Interval(ref iv) if matches!(target, DialectType::Snowflake | DialectType::PostgreSQL | DialectType::Redshift)
4378 && iv.unit.is_some()
4379 && matches!(&iv.this, Some(Expression::Literal(Literal::String(_)))) => {
4380 Action::SnowflakeIntervalFormat
4381 }
4382 // TABLESAMPLE -> TABLESAMPLE RESERVOIR for DuckDB target
4383 Expression::TableSample(ref ts) if matches!(target, DialectType::DuckDB) => {
4384 if let Some(ref sample) = ts.sample {
4385 if !sample.explicit_method {
4386 Action::TablesampleReservoir
4387 } else {
4388 Action::None
4389 }
4390 } else {
4391 Action::None
4392 }
4393 }
4394 // TABLESAMPLE from non-Snowflake source to Snowflake: strip method and PERCENT
4395 // Handles both Expression::TableSample wrapper and Expression::Table with table_sample
4396 Expression::TableSample(ref ts) if matches!(target, DialectType::Snowflake)
4397 && !matches!(source, DialectType::Snowflake)
4398 && ts.sample.is_some() => {
4399 if let Some(ref sample) = ts.sample {
4400 if !sample.explicit_method {
4401 Action::TablesampleSnowflakeStrip
4402 } else {
4403 Action::None
4404 }
4405 } else {
4406 Action::None
4407 }
4408 }
4409 Expression::Table(ref t) if matches!(target, DialectType::Snowflake)
4410 && !matches!(source, DialectType::Snowflake)
4411 && t.table_sample.is_some() => {
4412 if let Some(ref sample) = t.table_sample {
4413 if !sample.explicit_method {
4414 Action::TablesampleSnowflakeStrip
4415 } else {
4416 Action::None
4417 }
4418 } else {
4419 Action::None
4420 }
4421 }
4422 // ALTER TABLE RENAME -> EXEC sp_rename for TSQL
4423 Expression::AlterTable(ref at) if matches!(target, DialectType::TSQL | DialectType::Fabric)
4424 && !at.actions.is_empty()
4425 && matches!(at.actions.first(), Some(crate::expressions::AlterTableAction::RenameTable(_))) => {
4426 Action::AlterTableToSpRename
4427 }
4428 // Subscript index: 1-based to 0-based for BigQuery/Hive/Spark
4429 Expression::Subscript(ref sub) if matches!(target, DialectType::BigQuery | DialectType::Hive | DialectType::Spark | DialectType::Databricks)
4430 && matches!(source, DialectType::DuckDB | DialectType::PostgreSQL | DialectType::Presto | DialectType::Trino | DialectType::Redshift | DialectType::ClickHouse)
4431 && matches!(&sub.index, Expression::Literal(Literal::Number(ref n)) if n.parse::<i64>().unwrap_or(0) > 0) => {
4432 Action::ArrayIndexConvert
4433 }
4434 // ANY_VALUE IGNORE NULLS detection moved to the AnyValue arm above
4435 // MysqlNullsOrdering for Ordered is now handled in the Ordered arm above
4436 // RESPECT NULLS handling for SQLite (strip it, add NULLS LAST to ORDER BY)
4437 // and for MySQL (rewrite ORDER BY with CASE WHEN for null ordering)
4438 Expression::WindowFunction(ref wf) => {
4439 // BigQuery doesn't support NULLS FIRST/LAST in window function ORDER BY
4440 // EXCEPT for ROW_NUMBER which keeps NULLS LAST
4441 let is_row_number = matches!(wf.this, Expression::RowNumber(_));
4442 if matches!(target, DialectType::BigQuery)
4443 && !is_row_number
4444 && !wf.over.order_by.is_empty()
4445 && wf.over.order_by.iter().any(|o| o.nulls_first.is_some()) {
4446 Action::BigQueryNullsOrdering
4447 // DuckDB -> MySQL: Add CASE WHEN for NULLS LAST simulation in window ORDER BY
4448 // But NOT when frame is RANGE/GROUPS, since adding CASE WHEN would break value-based frames
4449 } else {
4450 let source_nulls_last = matches!(source, DialectType::DuckDB);
4451 let has_range_frame = wf.over.frame.as_ref().map_or(false, |f| {
4452 matches!(f.kind, crate::expressions::WindowFrameKind::Range | crate::expressions::WindowFrameKind::Groups)
4453 });
4454 if source_nulls_last && matches!(target, DialectType::MySQL)
4455 && !wf.over.order_by.is_empty()
4456 && wf.over.order_by.iter().any(|o| !o.desc)
4457 && !has_range_frame {
4458 Action::MysqlNullsLastRewrite
4459 } else {
4460 match &wf.this {
4461 Expression::FirstValue(ref vf) | Expression::LastValue(ref vf) if vf.ignore_nulls == Some(false) => {
4462 // RESPECT NULLS
4463 match target {
4464 DialectType::SQLite => Action::RespectNullsConvert,
4465 _ => Action::None,
4466 }
4467 }
4468 _ => Action::None,
4469 }
4470 }
4471 }
4472 }
4473 // CREATE TABLE: strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
4474 Expression::CreateTable(ref ct) if matches!(target, DialectType::DuckDB)
4475 && matches!(source, DialectType::DuckDB | DialectType::Spark | DialectType::Databricks | DialectType::Hive) => {
4476 let has_comment = ct.columns.iter().any(|c| c.comment.is_some()
4477 || c.constraints.iter().any(|con| matches!(con, crate::expressions::ColumnConstraint::Comment(_)))
4478 );
4479 let has_props = !ct.properties.is_empty();
4480 if has_comment || has_props {
4481 Action::CreateTableStripComment
4482 } else {
4483 Action::None
4484 }
4485 }
4486 // Array conversion: Expression::Array -> Expression::ArrayFunc for PostgreSQL
4487 Expression::Array(_) if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) => {
4488 Action::ArrayConcatBracketConvert
4489 }
4490 // ArrayFunc (bracket notation) -> Function("ARRAY") for Redshift (from BigQuery source)
4491 Expression::ArrayFunc(ref arr) if arr.bracket_notation
4492 && matches!(source, DialectType::BigQuery)
4493 && matches!(target, DialectType::Redshift) => {
4494 Action::ArrayConcatBracketConvert
4495 }
4496 // BIT_OR/BIT_AND/BIT_XOR: float/decimal arg cast for DuckDB, or rename for Snowflake
4497 Expression::BitwiseOrAgg(ref f) | Expression::BitwiseAndAgg(ref f) | Expression::BitwiseXorAgg(ref f) => {
4498 if matches!(target, DialectType::DuckDB) {
4499 // Check if the arg is CAST(val AS FLOAT/DOUBLE/DECIMAL/REAL)
4500 if let Expression::Cast(ref c) = f.this {
4501 match &c.to {
4502 DataType::Float { .. } | DataType::Double { .. }
4503 | DataType::Decimal { .. } => Action::BitAggFloatCast,
4504 DataType::Custom { ref name } if name.eq_ignore_ascii_case("REAL") => Action::BitAggFloatCast,
4505 _ => Action::None,
4506 }
4507 } else {
4508 Action::None
4509 }
4510 } else if matches!(target, DialectType::Snowflake) {
4511 Action::BitAggSnowflakeRename
4512 } else {
4513 Action::None
4514 }
4515 }
4516 // FILTER -> IFF for Snowflake (aggregate functions with FILTER clause)
4517 Expression::Filter(ref _f) if matches!(target, DialectType::Snowflake) => {
4518 Action::FilterToIff
4519 }
4520 // AggFunc.filter -> IFF wrapping for Snowflake (e.g., AVG(x) FILTER(WHERE cond))
4521 Expression::Avg(ref f) | Expression::Sum(ref f) | Expression::Min(ref f)
4522 | Expression::Max(ref f)
4523 | Expression::CountIf(ref f) | Expression::Stddev(ref f)
4524 | Expression::StddevPop(ref f) | Expression::StddevSamp(ref f)
4525 | Expression::Variance(ref f) | Expression::VarPop(ref f)
4526 | Expression::VarSamp(ref f) | Expression::Median(ref f)
4527 | Expression::Mode(ref f) | Expression::First(ref f) | Expression::Last(ref f)
4528 | Expression::ApproxDistinct(ref f)
4529 if f.filter.is_some() && matches!(target, DialectType::Snowflake) =>
4530 {
4531 Action::AggFilterToIff
4532 }
4533 Expression::Count(ref c) if c.filter.is_some() && matches!(target, DialectType::Snowflake) => {
4534 Action::AggFilterToIff
4535 }
4536 // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN ... END) for dialects that don't support multi-arg DISTINCT
4537 Expression::Count(ref c) if c.distinct && matches!(&c.this, Some(Expression::Tuple(_)))
4538 && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::DuckDB | DialectType::PostgreSQL) => {
4539 Action::CountDistinctMultiArg
4540 }
4541 // JSON arrow -> GET_PATH/PARSE_JSON for Snowflake
4542 Expression::JsonExtract(_) if matches!(target, DialectType::Snowflake) => {
4543 Action::JsonToGetPath
4544 }
4545 // DuckDB struct/dict -> BigQuery STRUCT / Presto ROW
4546 Expression::Struct(_) if matches!(target, DialectType::BigQuery | DialectType::Presto | DialectType::Trino)
4547 && matches!(source, DialectType::DuckDB) => {
4548 Action::StructToRow
4549 }
4550 // DuckDB curly-brace dict {'key': value} -> BigQuery STRUCT / Presto ROW
4551 Expression::MapFunc(ref m) if m.curly_brace_syntax
4552 && matches!(target, DialectType::BigQuery | DialectType::Presto | DialectType::Trino)
4553 && matches!(source, DialectType::DuckDB) => {
4554 Action::StructToRow
4555 }
4556 // APPROX_COUNT_DISTINCT -> APPROX_DISTINCT for Presto/Trino
4557 Expression::ApproxCountDistinct(_)
4558 if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena) => {
4559 Action::ApproxCountDistinctToApproxDistinct
4560 }
4561 // ARRAY_CONTAINS(arr, val) -> CONTAINS(arr, val) for Presto, ARRAY_CONTAINS(CAST(val AS VARIANT), arr) for Snowflake
4562 Expression::ArrayContains(_)
4563 if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Snowflake) => {
4564 Action::ArrayContainsConvert
4565 }
4566 // StrPosition with position -> complex expansion for Presto/DuckDB
4567 // STRPOS doesn't support a position arg in these dialects
4568 Expression::StrPosition(ref sp) if sp.position.is_some()
4569 && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena | DialectType::DuckDB) => {
4570 Action::StrPositionExpand
4571 }
4572 // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
4573 Expression::First(ref f) if f.ignore_nulls == Some(true)
4574 && matches!(target, DialectType::DuckDB) => {
4575 Action::FirstToAnyValue
4576 }
4577 // BEGIN -> START TRANSACTION for Presto/Trino
4578 Expression::Command(ref cmd) if cmd.this.eq_ignore_ascii_case("BEGIN")
4579 && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena) => {
4580 // Handled inline below
4581 Action::None // We'll handle it directly
4582 }
4583 // Note: PostgreSQL ^ is now parsed as Power directly (not BitwiseXor).
4584 // PostgreSQL # is parsed as BitwiseXor (which is correct).
4585 // a || b (Concat operator) -> CONCAT function for Presto/Trino
4586 Expression::Concat(ref _op) if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
4587 && matches!(target, DialectType::Presto | DialectType::Trino) => {
4588 Action::PipeConcatToConcat
4589 }
4590 _ => Action::None,
4591 }
4592 };
4593
4594 match action {
4595 Action::None => {
4596 // Handle inline transforms that don't need a dedicated action
4597 // Oracle DBMS_RANDOM.VALUE() -> RANDOM() for PostgreSQL, RAND() for others
4598 if let Expression::MethodCall(ref mc) = e {
4599 if matches!(source, DialectType::Oracle)
4600 && mc.method.name.eq_ignore_ascii_case("VALUE")
4601 && mc.args.is_empty()
4602 {
4603 let is_dbms_random = match &mc.this {
4604 Expression::Identifier(id) => id.name.eq_ignore_ascii_case("DBMS_RANDOM"),
4605 Expression::Column(col) => col.table.is_none() && col.name.name.eq_ignore_ascii_case("DBMS_RANDOM"),
4606 _ => false,
4607 };
4608 if is_dbms_random {
4609 let func_name = match target {
4610 DialectType::PostgreSQL | DialectType::Redshift | DialectType::DuckDB | DialectType::SQLite => "RANDOM",
4611 DialectType::Oracle => "DBMS_RANDOM.VALUE",
4612 _ => "RAND",
4613 };
4614 return Ok(Expression::Function(Box::new(Function::new(func_name.to_string(), vec![]))));
4615 }
4616 }
4617 }
4618 // TRIM without explicit position -> add BOTH for ClickHouse
4619 if let Expression::Trim(ref trim) = e {
4620 if matches!(target, DialectType::ClickHouse)
4621 && trim.sql_standard_syntax
4622 && trim.characters.is_some()
4623 && !trim.position_explicit
4624 {
4625 let mut new_trim = (**trim).clone();
4626 new_trim.position_explicit = true;
4627 return Ok(Expression::Trim(Box::new(new_trim)));
4628 }
4629 }
4630 // BEGIN -> START TRANSACTION for Presto/Trino
4631 if let Expression::Transaction(ref txn) = e {
4632 if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena) {
4633 // Convert BEGIN to START TRANSACTION by setting mark to "START"
4634 let mut txn = txn.clone();
4635 txn.mark = Some(Box::new(Expression::Identifier(Identifier::new("START".to_string()))));
4636 return Ok(Expression::Transaction(Box::new(*txn)));
4637 }
4638 }
4639 // IS TRUE/FALSE -> simplified forms for Presto/Trino
4640 if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena) {
4641 match &e {
4642 Expression::IsTrue(itf) if !itf.not => {
4643 // x IS TRUE -> x
4644 return Ok(itf.this.clone());
4645 }
4646 Expression::IsTrue(itf) if itf.not => {
4647 // x IS NOT TRUE -> NOT x
4648 return Ok(Expression::Not(Box::new(crate::expressions::UnaryOp {
4649 this: itf.this.clone(),
4650 })));
4651 }
4652 Expression::IsFalse(itf) if !itf.not => {
4653 // x IS FALSE -> NOT x
4654 return Ok(Expression::Not(Box::new(crate::expressions::UnaryOp {
4655 this: itf.this.clone(),
4656 })));
4657 }
4658 Expression::IsFalse(itf) if itf.not => {
4659 // x IS NOT FALSE -> NOT NOT x
4660 let not_x = Expression::Not(Box::new(crate::expressions::UnaryOp {
4661 this: itf.this.clone(),
4662 }));
4663 return Ok(Expression::Not(Box::new(crate::expressions::UnaryOp {
4664 this: not_x,
4665 })));
4666 }
4667 _ => {}
4668 }
4669 }
4670 // x IS NOT FALSE -> NOT x IS FALSE for Redshift
4671 if matches!(target, DialectType::Redshift) {
4672 if let Expression::IsFalse(ref itf) = e {
4673 if itf.not {
4674 return Ok(Expression::Not(Box::new(crate::expressions::UnaryOp {
4675 this: Expression::IsFalse(Box::new(crate::expressions::IsTrueFalse {
4676 this: itf.this.clone(),
4677 not: false,
4678 })),
4679 })));
4680 }
4681 }
4682 }
4683 // REGEXP_REPLACE: add 'g' flag when source defaults to global replacement
4684 // Snowflake default is global, PostgreSQL/DuckDB default is first-match-only
4685 if let Expression::Function(ref f) = e {
4686 if f.name.eq_ignore_ascii_case("REGEXP_REPLACE")
4687 && matches!(source, DialectType::Snowflake)
4688 && matches!(target, DialectType::PostgreSQL | DialectType::DuckDB)
4689 {
4690 if f.args.len() == 3 {
4691 let mut args = f.args.clone();
4692 args.push(Expression::string("g"));
4693 return Ok(Expression::Function(Box::new(Function::new(
4694 "REGEXP_REPLACE".to_string(), args,
4695 ))));
4696 } else if f.args.len() == 4 {
4697 // 4th arg might be position, add 'g' as 5th
4698 let mut args = f.args.clone();
4699 args.push(Expression::string("g"));
4700 return Ok(Expression::Function(Box::new(Function::new(
4701 "REGEXP_REPLACE".to_string(), args,
4702 ))));
4703 }
4704 }
4705 }
4706 Ok(e)
4707 }
4708
4709 Action::GreatestLeastNull => {
4710 let f = if let Expression::Function(f) = e { *f } else { unreachable!("action only triggered for Function expressions") };
4711 let mut null_checks: Vec<Expression> = f.args.iter().map(|a| {
4712 Expression::IsNull(Box::new(IsNull {
4713 this: a.clone(),
4714 not: false,
4715 postfix_form: false,
4716 }))
4717 }).collect();
4718 let condition = if null_checks.len() == 1 {
4719 null_checks.remove(0)
4720 } else {
4721 let first = null_checks.remove(0);
4722 null_checks.into_iter().fold(first, |acc, check| {
4723 Expression::Or(Box::new(BinaryOp::new(acc, check)))
4724 })
4725 };
4726 Ok(Expression::Case(Box::new(Case {
4727 operand: None,
4728 whens: vec![(condition, Expression::Null(Null))],
4729 else_: Some(Expression::Function(Box::new(Function::new(f.name, f.args)))),
4730 })))
4731 }
4732
4733 Action::ArrayGenerateRange => {
4734 let f = if let Expression::Function(f) = e { *f } else { unreachable!("action only triggered for Function expressions") };
4735 let start = f.args[0].clone();
4736 let end = f.args[1].clone();
4737 let step = f.args.get(2).cloned();
4738
4739 let end_minus_1 = Expression::Sub(Box::new(BinaryOp::new(
4740 end.clone(),
4741 Expression::number(1),
4742 )));
4743
4744 match target {
4745 DialectType::PostgreSQL | DialectType::Redshift => {
4746 let mut args = vec![start, end_minus_1];
4747 if let Some(s) = step { args.push(s); }
4748 Ok(Expression::Function(Box::new(Function::new(
4749 "GENERATE_SERIES".to_string(), args,
4750 ))))
4751 }
4752 DialectType::Presto | DialectType::Trino => {
4753 let mut args = vec![start, end_minus_1];
4754 if let Some(s) = step { args.push(s); }
4755 Ok(Expression::Function(Box::new(Function::new(
4756 "SEQUENCE".to_string(), args,
4757 ))))
4758 }
4759 DialectType::BigQuery => {
4760 let mut args = vec![start, end_minus_1];
4761 if let Some(s) = step { args.push(s); }
4762 Ok(Expression::Function(Box::new(Function::new(
4763 "GENERATE_ARRAY".to_string(), args,
4764 ))))
4765 }
4766 DialectType::Snowflake => {
4767 let normalized_end = Expression::Add(Box::new(BinaryOp::new(
4768 Expression::Paren(Box::new(Paren { this: end_minus_1, trailing_comments: vec![] })),
4769 Expression::number(1),
4770 )));
4771 let mut args = vec![start, normalized_end];
4772 if let Some(s) = step { args.push(s); }
4773 Ok(Expression::Function(Box::new(Function::new(
4774 "ARRAY_GENERATE_RANGE".to_string(), args,
4775 ))))
4776 }
4777 _ => {
4778 Ok(Expression::Function(Box::new(Function::new(f.name, f.args))))
4779 }
4780 }
4781 }
4782
4783 Action::Div0TypedDivision => {
4784 let if_func = if let Expression::IfFunc(f) = e { *f } else { unreachable!("action only triggered for IfFunc expressions") };
4785 if let Some(Expression::Div(div)) = if_func.false_value {
4786 let cast_type = if matches!(target, DialectType::SQLite) {
4787 DataType::Float { precision: None, scale: None, real_spelling: true }
4788 } else {
4789 DataType::Double { precision: None, scale: None }
4790 };
4791 let casted_left = Expression::Cast(Box::new(Cast {
4792 this: div.left,
4793 to: cast_type,
4794 trailing_comments: vec![],
4795 double_colon_syntax: false,
4796 format: None,
4797 default: None,
4798 }));
4799 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
4800 condition: if_func.condition,
4801 true_value: if_func.true_value,
4802 false_value: Some(Expression::Div(Box::new(BinaryOp::new(casted_left, div.right)))),
4803 original_name: if_func.original_name,
4804 })))
4805 } else {
4806 // Not actually a Div, reconstruct
4807 Ok(Expression::IfFunc(Box::new(if_func)))
4808 }
4809 }
4810
4811 Action::ArrayAggCollectList => {
4812 let agg = if let Expression::ArrayAgg(a) = e { *a } else { unreachable!("action only triggered for ArrayAgg expressions") };
4813 Ok(Expression::ArrayAgg(Box::new(AggFunc {
4814 name: Some("COLLECT_LIST".to_string()),
4815 ..agg
4816 })))
4817 }
4818
4819 Action::ArrayAggWithinGroupFilter => {
4820 let wg = if let Expression::WithinGroup(w) = e { *w } else { unreachable!("action only triggered for WithinGroup expressions") };
4821 if let Expression::ArrayAgg(inner_agg) = wg.this {
4822 let col = inner_agg.this.clone();
4823 let filter = Expression::IsNull(Box::new(IsNull {
4824 this: col,
4825 not: true,
4826 postfix_form: false,
4827 }));
4828 // For DuckDB, add explicit NULLS FIRST for DESC ordering
4829 let order_by = if matches!(target, DialectType::DuckDB) {
4830 wg.order_by.into_iter().map(|mut o| {
4831 if o.desc && o.nulls_first.is_none() {
4832 o.nulls_first = Some(true);
4833 }
4834 o
4835 }).collect()
4836 } else {
4837 wg.order_by
4838 };
4839 Ok(Expression::ArrayAgg(Box::new(AggFunc {
4840 this: inner_agg.this,
4841 distinct: inner_agg.distinct,
4842 filter: Some(filter),
4843 order_by,
4844 name: inner_agg.name,
4845 ignore_nulls: inner_agg.ignore_nulls,
4846 having_max: inner_agg.having_max,
4847 limit: inner_agg.limit,
4848 })))
4849 } else {
4850 Ok(Expression::WithinGroup(Box::new(wg)))
4851 }
4852 }
4853
4854 Action::ArrayAggFilter => {
4855 let agg = if let Expression::ArrayAgg(a) = e { *a } else { unreachable!("action only triggered for ArrayAgg expressions") };
4856 let col = agg.this.clone();
4857 let filter = Expression::IsNull(Box::new(IsNull {
4858 this: col,
4859 not: true,
4860 postfix_form: false,
4861 }));
4862 Ok(Expression::ArrayAgg(Box::new(AggFunc {
4863 filter: Some(filter),
4864 ..agg
4865 })))
4866 }
4867
4868 Action::ArrayAggNullFilter => {
4869 // ARRAY_AGG(x) FILTER(WHERE cond) -> ARRAY_AGG(x) FILTER(WHERE cond AND NOT x IS NULL)
4870 // For source dialects that exclude NULLs (Spark/Hive) targeting DuckDB which includes them
4871 let agg = if let Expression::ArrayAgg(a) = e { *a } else { unreachable!("action only triggered for ArrayAgg expressions") };
4872 let col = agg.this.clone();
4873 let not_null = Expression::IsNull(Box::new(IsNull {
4874 this: col,
4875 not: true,
4876 postfix_form: true, // Use "NOT x IS NULL" form (prefix NOT)
4877 }));
4878 let new_filter = if let Some(existing_filter) = agg.filter {
4879 // AND the NOT IS NULL with existing filter
4880 Expression::And(Box::new(crate::expressions::BinaryOp::new(
4881 existing_filter,
4882 not_null,
4883 )))
4884 } else {
4885 not_null
4886 };
4887 Ok(Expression::ArrayAgg(Box::new(AggFunc {
4888 filter: Some(new_filter),
4889 ..agg
4890 })))
4891 }
4892
4893 Action::BigQueryArraySelectAsStructToSnowflake => {
4894 // ARRAY(SELECT AS STRUCT x1 AS x1, x2 AS x2 FROM t)
4895 // -> (SELECT ARRAY_AGG(OBJECT_CONSTRUCT('x1', x1, 'x2', x2)) FROM t)
4896 if let Expression::Function(mut f) = e {
4897 let is_match = f.args.len() == 1 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT"));
4898 if is_match {
4899 let inner_select = match f.args.remove(0) {
4900 Expression::Select(s) => *s,
4901 _ => unreachable!("argument already verified to be a Select expression"),
4902 };
4903 // Build OBJECT_CONSTRUCT args from SELECT expressions
4904 let mut oc_args = Vec::new();
4905 for expr in &inner_select.expressions {
4906 match expr {
4907 Expression::Alias(a) => {
4908 let key = Expression::Literal(Literal::String(a.alias.name.clone()));
4909 let value = a.this.clone();
4910 oc_args.push(key);
4911 oc_args.push(value);
4912 }
4913 Expression::Column(c) => {
4914 let key = Expression::Literal(Literal::String(c.name.name.clone()));
4915 oc_args.push(key);
4916 oc_args.push(expr.clone());
4917 }
4918 _ => {
4919 oc_args.push(expr.clone());
4920 }
4921 }
4922 }
4923 let object_construct = Expression::Function(Box::new(Function::new(
4924 "OBJECT_CONSTRUCT".to_string(), oc_args,
4925 )));
4926 let array_agg = Expression::Function(Box::new(Function::new(
4927 "ARRAY_AGG".to_string(), vec![object_construct],
4928 )));
4929 let mut new_select = crate::expressions::Select::new();
4930 new_select.expressions = vec![array_agg];
4931 new_select.from = inner_select.from.clone();
4932 new_select.where_clause = inner_select.where_clause.clone();
4933 new_select.group_by = inner_select.group_by.clone();
4934 new_select.having = inner_select.having.clone();
4935 new_select.joins = inner_select.joins.clone();
4936 Ok(Expression::Subquery(Box::new(crate::expressions::Subquery {
4937 this: Expression::Select(Box::new(new_select)),
4938 alias: None,
4939 column_aliases: Vec::new(),
4940 order_by: None,
4941 limit: None,
4942 offset: None,
4943 distribute_by: None,
4944 sort_by: None,
4945 cluster_by: None,
4946 lateral: false,
4947 modifiers_inside: false,
4948 trailing_comments: Vec::new(),
4949 })))
4950 } else {
4951 Ok(Expression::Function(f))
4952 }
4953 } else {
4954 Ok(e)
4955 }
4956 }
4957
4958 Action::BigQueryPercentileContToDuckDB => {
4959 // PERCENTILE_CONT(x, frac [RESPECT NULLS]) -> QUANTILE_CONT(x, frac) for DuckDB
4960 if let Expression::AggregateFunction(mut af) = e {
4961 af.name = "QUANTILE_CONT".to_string();
4962 af.ignore_nulls = None; // Strip RESPECT/IGNORE NULLS
4963 // Keep only first 2 args
4964 if af.args.len() > 2 {
4965 af.args.truncate(2);
4966 }
4967 Ok(Expression::AggregateFunction(af))
4968 } else {
4969 Ok(e)
4970 }
4971 }
4972
4973 Action::ArrayAggIgnoreNullsDuckDB => {
4974 // ARRAY_AGG(x IGNORE NULLS ORDER BY a, b DESC) -> ARRAY_AGG(x ORDER BY a NULLS FIRST, b DESC)
4975 // Strip IGNORE NULLS, add NULLS FIRST to first ORDER BY column
4976 let mut agg = if let Expression::ArrayAgg(a) = e { *a } else { unreachable!("action only triggered for ArrayAgg expressions") };
4977 agg.ignore_nulls = None; // Strip IGNORE NULLS
4978 if !agg.order_by.is_empty() {
4979 agg.order_by[0].nulls_first = Some(true);
4980 }
4981 Ok(Expression::ArrayAgg(Box::new(agg)))
4982 }
4983
4984 Action::CountDistinctMultiArg => {
4985 // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN a IS NULL THEN NULL WHEN b IS NULL THEN NULL ELSE (a, b) END)
4986 if let Expression::Count(c) = e {
4987 if let Some(Expression::Tuple(t)) = c.this {
4988 let args = t.expressions;
4989 // Build CASE expression:
4990 // WHEN a IS NULL THEN NULL WHEN b IS NULL THEN NULL ELSE (a, b) END
4991 let mut whens = Vec::new();
4992 for arg in &args {
4993 whens.push((
4994 Expression::IsNull(Box::new(IsNull {
4995 this: arg.clone(),
4996 not: false,
4997 postfix_form: false,
4998 })),
4999 Expression::Null(crate::expressions::Null),
5000 ));
5001 }
5002 // Build the tuple for ELSE
5003 let tuple_expr = Expression::Tuple(Box::new(crate::expressions::Tuple {
5004 expressions: args,
5005 }));
5006 let case_expr = Expression::Case(Box::new(crate::expressions::Case {
5007 operand: None,
5008 whens,
5009 else_: Some(tuple_expr),
5010 }));
5011 Ok(Expression::Count(Box::new(crate::expressions::CountFunc {
5012 this: Some(case_expr),
5013 star: false,
5014 distinct: true,
5015 filter: c.filter,
5016 ignore_nulls: c.ignore_nulls,
5017 original_name: c.original_name,
5018 })))
5019 } else {
5020 Ok(Expression::Count(c))
5021 }
5022 } else {
5023 Ok(e)
5024 }
5025 }
5026
5027 Action::CastTimestampToDatetime => {
5028 let c = if let Expression::Cast(c) = e { *c } else { unreachable!("action only triggered for Cast expressions") };
5029 Ok(Expression::Cast(Box::new(Cast {
5030 to: DataType::Custom { name: "DATETIME".to_string() },
5031 ..c
5032 })))
5033 }
5034
5035 Action::CastTimestampStripTz => {
5036 // CAST(x AS TIMESTAMP(n) WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark/BigQuery
5037 let c = if let Expression::Cast(c) = e { *c } else { unreachable!("action only triggered for Cast expressions") };
5038 Ok(Expression::Cast(Box::new(Cast {
5039 to: DataType::Timestamp { precision: None, timezone: false },
5040 ..c
5041 })))
5042 }
5043
5044 Action::ToDateToCast => {
5045 // Convert TO_DATE(x) -> CAST(x AS DATE) for DuckDB
5046 if let Expression::Function(f) = e {
5047 let arg = f.args.into_iter().next().unwrap();
5048 Ok(Expression::Cast(Box::new(Cast {
5049 this: arg,
5050 to: DataType::Date,
5051 double_colon_syntax: false,
5052 trailing_comments: vec![],
5053 format: None,
5054 default: None,
5055 })))
5056 } else {
5057 Ok(e)
5058 }
5059 }
5060 Action::DateTruncWrapCast => {
5061 // Handle both Expression::DateTrunc/TimestampTrunc and
5062 // Expression::Function("DATE_TRUNC", [unit, expr])
5063 match e {
5064 Expression::DateTrunc(d) | Expression::TimestampTrunc(d) => {
5065 let input_type = match &d.this {
5066 Expression::Cast(c) => Some(c.to.clone()),
5067 _ => None,
5068 };
5069 if let Some(cast_type) = input_type {
5070 let is_time = matches!(cast_type, DataType::Time { .. });
5071 if is_time {
5072 let date_expr = Expression::Cast(Box::new(Cast {
5073 this: Expression::Literal(crate::expressions::Literal::String("1970-01-01".to_string())),
5074 to: DataType::Date,
5075 double_colon_syntax: false,
5076 trailing_comments: vec![],
5077 format: None,
5078 default: None,
5079 }));
5080 let add_expr = Expression::Add(Box::new(BinaryOp::new(date_expr, d.this)));
5081 let inner = Expression::DateTrunc(Box::new(DateTruncFunc { this: add_expr, unit: d.unit }));
5082 Ok(Expression::Cast(Box::new(Cast { this: inner, to: cast_type, double_colon_syntax: false, trailing_comments: vec![], format: None, default: None })))
5083 } else {
5084 let inner = Expression::DateTrunc(Box::new(*d));
5085 Ok(Expression::Cast(Box::new(Cast { this: inner, to: cast_type, double_colon_syntax: false, trailing_comments: vec![], format: None, default: None })))
5086 }
5087 } else {
5088 Ok(Expression::DateTrunc(d))
5089 }
5090 }
5091 Expression::Function(f) if f.args.len() == 2 => {
5092 // Function-based DATE_TRUNC(unit, expr)
5093 let input_type = match &f.args[1] {
5094 Expression::Cast(c) => Some(c.to.clone()),
5095 _ => None,
5096 };
5097 if let Some(cast_type) = input_type {
5098 let is_time = matches!(cast_type, DataType::Time { .. });
5099 if is_time {
5100 let date_expr = Expression::Cast(Box::new(Cast {
5101 this: Expression::Literal(crate::expressions::Literal::String("1970-01-01".to_string())),
5102 to: DataType::Date,
5103 double_colon_syntax: false,
5104 trailing_comments: vec![],
5105 format: None,
5106 default: None,
5107 }));
5108 let mut args = f.args;
5109 let unit_arg = args.remove(0);
5110 let time_expr = args.remove(0);
5111 let add_expr = Expression::Add(Box::new(BinaryOp::new(date_expr, time_expr)));
5112 let inner = Expression::Function(Box::new(Function::new(
5113 "DATE_TRUNC".to_string(),
5114 vec![unit_arg, add_expr],
5115 )));
5116 Ok(Expression::Cast(Box::new(Cast { this: inner, to: cast_type, double_colon_syntax: false, trailing_comments: vec![], format: None, default: None })))
5117 } else {
5118 // Wrap the function in CAST
5119 Ok(Expression::Cast(Box::new(Cast { this: Expression::Function(f), to: cast_type, double_colon_syntax: false, trailing_comments: vec![], format: None, default: None })))
5120 }
5121 } else {
5122 Ok(Expression::Function(f))
5123 }
5124 }
5125 other => Ok(other),
5126 }
5127 }
5128
5129 Action::RegexpReplaceSnowflakeToDuckDB => {
5130 // Snowflake REGEXP_REPLACE(s, p, r, position) -> REGEXP_REPLACE(s, p, r, 'g')
5131 if let Expression::Function(f) = e {
5132 let mut args = f.args;
5133 let subject = args.remove(0);
5134 let pattern = args.remove(0);
5135 let replacement = args.remove(0);
5136 Ok(Expression::Function(Box::new(Function::new(
5137 "REGEXP_REPLACE".to_string(),
5138 vec![subject, pattern, replacement, Expression::Literal(crate::expressions::Literal::String("g".to_string()))],
5139 ))))
5140 } else {
5141 Ok(e)
5142 }
5143 }
5144
5145 Action::SetToVariable => {
5146 // For DuckDB: SET a = 1 -> SET VARIABLE a = 1
5147 if let Expression::SetStatement(mut s) = e {
5148 for item in &mut s.items {
5149 if item.kind.is_none() {
5150 // Check if name already has VARIABLE prefix (from DuckDB source parsing)
5151 let already_variable = match &item.name {
5152 Expression::Identifier(id) => id.name.starts_with("VARIABLE "),
5153 _ => false,
5154 };
5155 if already_variable {
5156 // Extract the actual name and set kind
5157 if let Expression::Identifier(ref mut id) = item.name {
5158 let actual_name = id.name["VARIABLE ".len()..].to_string();
5159 id.name = actual_name;
5160 }
5161 }
5162 item.kind = Some("VARIABLE".to_string());
5163 }
5164 }
5165 Ok(Expression::SetStatement(s))
5166 } else {
5167 Ok(e)
5168 }
5169 }
5170
5171 Action::ConvertTimezoneToExpr => {
5172 // Convert Function("CONVERT_TIMEZONE", args) to Expression::ConvertTimezone
5173 // This prevents Redshift's transform_expr from expanding 2-arg to 3-arg with 'UTC'
5174 if let Expression::Function(f) = e {
5175 if f.args.len() == 2 {
5176 let mut args = f.args;
5177 let target_tz = args.remove(0);
5178 let timestamp = args.remove(0);
5179 Ok(Expression::ConvertTimezone(Box::new(ConvertTimezone {
5180 source_tz: None,
5181 target_tz: Some(Box::new(target_tz)),
5182 timestamp: Some(Box::new(timestamp)),
5183 options: vec![],
5184 })))
5185 } else if f.args.len() == 3 {
5186 let mut args = f.args;
5187 let source_tz = args.remove(0);
5188 let target_tz = args.remove(0);
5189 let timestamp = args.remove(0);
5190 Ok(Expression::ConvertTimezone(Box::new(ConvertTimezone {
5191 source_tz: Some(Box::new(source_tz)),
5192 target_tz: Some(Box::new(target_tz)),
5193 timestamp: Some(Box::new(timestamp)),
5194 options: vec![],
5195 })))
5196 } else {
5197 Ok(Expression::Function(f))
5198 }
5199 } else {
5200 Ok(e)
5201 }
5202 }
5203
5204 Action::BigQueryCastType => {
5205 // Convert BigQuery types to standard SQL types
5206 if let Expression::DataType(dt) = e {
5207 match dt {
5208 DataType::Custom { ref name } if name.eq_ignore_ascii_case("INT64") => {
5209 Ok(Expression::DataType(DataType::BigInt { length: None }))
5210 }
5211 DataType::Custom { ref name } if name.eq_ignore_ascii_case("FLOAT64") => {
5212 Ok(Expression::DataType(DataType::Double { precision: None, scale: None }))
5213 }
5214 DataType::Custom { ref name } if name.eq_ignore_ascii_case("BOOL") => {
5215 Ok(Expression::DataType(DataType::Boolean))
5216 }
5217 DataType::Custom { ref name } if name.eq_ignore_ascii_case("BYTES") => {
5218 Ok(Expression::DataType(DataType::VarBinary { length: None }))
5219 }
5220 DataType::Custom { ref name } if name.eq_ignore_ascii_case("NUMERIC") => {
5221 // For DuckDB target, use Custom("DECIMAL") to avoid DuckDB's
5222 // default precision (18, 3) being added to bare DECIMAL
5223 if matches!(target, DialectType::DuckDB) {
5224 Ok(Expression::DataType(DataType::Custom { name: "DECIMAL".to_string() }))
5225 } else {
5226 Ok(Expression::DataType(DataType::Decimal { precision: None, scale: None }))
5227 }
5228 }
5229 DataType::Custom { ref name } if name.eq_ignore_ascii_case("STRING") => {
5230 Ok(Expression::DataType(DataType::String { length: None }))
5231 }
5232 DataType::Custom { ref name } if name.eq_ignore_ascii_case("DATETIME") => {
5233 Ok(Expression::DataType(DataType::Timestamp { precision: None, timezone: false }))
5234 }
5235 _ => Ok(Expression::DataType(dt)),
5236 }
5237 } else {
5238 Ok(e)
5239 }
5240 }
5241
5242 Action::BigQuerySafeDivide => {
5243 // Convert SafeDivide expression to IF/CASE form for most targets
5244 if let Expression::SafeDivide(sd) = e {
5245 let x = *sd.this;
5246 let y = *sd.expression;
5247 // Wrap x and y in parens if they're complex expressions
5248 let y_ref = match &y {
5249 Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => y.clone(),
5250 _ => Expression::Paren(Box::new(Paren { this: y.clone(), trailing_comments: vec![] })),
5251 };
5252 let x_ref = match &x {
5253 Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => x.clone(),
5254 _ => Expression::Paren(Box::new(Paren { this: x.clone(), trailing_comments: vec![] })),
5255 };
5256 let condition = Expression::Neq(Box::new(BinaryOp::new(y_ref.clone(), Expression::number(0))));
5257 let div_expr = Expression::Div(Box::new(BinaryOp::new(x_ref, y_ref)));
5258
5259 if matches!(target, DialectType::Presto | DialectType::Trino) {
5260 // Presto/Trino: IF(y <> 0, CAST(x AS DOUBLE) / y, NULL)
5261 let cast_x = Expression::Cast(Box::new(Cast {
5262 this: match &x { Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => x, _ => Expression::Paren(Box::new(Paren { this: x, trailing_comments: vec![] })) },
5263 to: DataType::Double { precision: None, scale: None },
5264 trailing_comments: vec![],
5265 double_colon_syntax: false,
5266 format: None,
5267 default: None,
5268 }));
5269 let cast_div = Expression::Div(Box::new(BinaryOp::new(cast_x, match &y { Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => y, _ => Expression::Paren(Box::new(Paren { this: y, trailing_comments: vec![] })) })));
5270 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
5271 condition,
5272 true_value: cast_div,
5273 false_value: Some(Expression::Null(Null)),
5274 original_name: None,
5275 })))
5276 } else if matches!(target, DialectType::PostgreSQL) {
5277 // PostgreSQL: CASE WHEN y <> 0 THEN CAST(x AS DOUBLE PRECISION) / y ELSE NULL END
5278 let cast_x = Expression::Cast(Box::new(Cast {
5279 this: match &x { Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => x, _ => Expression::Paren(Box::new(Paren { this: x, trailing_comments: vec![] })) },
5280 to: DataType::Custom { name: "DOUBLE PRECISION".to_string() },
5281 trailing_comments: vec![],
5282 double_colon_syntax: false,
5283 format: None,
5284 default: None,
5285 }));
5286 let y_paren = match &y { Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => y, _ => Expression::Paren(Box::new(Paren { this: y, trailing_comments: vec![] })) };
5287 let cast_div = Expression::Div(Box::new(BinaryOp::new(cast_x, y_paren)));
5288 Ok(Expression::Case(Box::new(Case {
5289 operand: None,
5290 whens: vec![(condition, cast_div)],
5291 else_: Some(Expression::Null(Null)),
5292 })))
5293 } else if matches!(target, DialectType::DuckDB) {
5294 // DuckDB: CASE WHEN y <> 0 THEN x / y ELSE NULL END
5295 Ok(Expression::Case(Box::new(Case {
5296 operand: None,
5297 whens: vec![(condition, div_expr)],
5298 else_: Some(Expression::Null(Null)),
5299 })))
5300 } else if matches!(target, DialectType::Snowflake) {
5301 // Snowflake: IFF(y <> 0, x / y, NULL)
5302 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
5303 condition,
5304 true_value: div_expr,
5305 false_value: Some(Expression::Null(Null)),
5306 original_name: Some("IFF".to_string()),
5307 })))
5308 } else {
5309 // All others: IF(y <> 0, x / y, NULL)
5310 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
5311 condition,
5312 true_value: div_expr,
5313 false_value: Some(Expression::Null(Null)),
5314 original_name: None,
5315 })))
5316 }
5317 } else {
5318 Ok(e)
5319 }
5320 }
5321
5322 Action::BigQueryLastDayStripUnit => {
5323 if let Expression::LastDay(mut ld) = e {
5324 ld.unit = None; // Strip the unit (MONTH is default)
5325 match target {
5326 DialectType::PostgreSQL => {
5327 // LAST_DAY(date) -> CAST(DATE_TRUNC('MONTH', date) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)
5328 let date_trunc = Expression::Function(Box::new(Function::new(
5329 "DATE_TRUNC".to_string(),
5330 vec![
5331 Expression::Literal(crate::expressions::Literal::String("MONTH".to_string())),
5332 ld.this.clone(),
5333 ],
5334 )));
5335 let plus_month = Expression::Add(Box::new(crate::expressions::BinaryOp::new(
5336 date_trunc,
5337 Expression::Interval(Box::new(crate::expressions::Interval {
5338 this: Some(Expression::Literal(crate::expressions::Literal::String("1 MONTH".to_string()))),
5339 unit: None,
5340 })),
5341 )));
5342 let minus_day = Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
5343 plus_month,
5344 Expression::Interval(Box::new(crate::expressions::Interval {
5345 this: Some(Expression::Literal(crate::expressions::Literal::String("1 DAY".to_string()))),
5346 unit: None,
5347 })),
5348 )));
5349 Ok(Expression::Cast(Box::new(Cast {
5350 this: minus_day,
5351 to: DataType::Date,
5352 trailing_comments: vec![],
5353 double_colon_syntax: false,
5354 format: None,
5355 default: None,
5356 })))
5357 }
5358 DialectType::Presto => {
5359 // LAST_DAY(date) -> LAST_DAY_OF_MONTH(date)
5360 Ok(Expression::Function(Box::new(Function::new(
5361 "LAST_DAY_OF_MONTH".to_string(),
5362 vec![ld.this],
5363 ))))
5364 }
5365 DialectType::ClickHouse => {
5366 // ClickHouse LAST_DAY(CAST(x AS Nullable(DATE)))
5367 // Need to wrap the DATE type in Nullable
5368 let nullable_date = match ld.this {
5369 Expression::Cast(mut c) => {
5370 c.to = DataType::Custom { name: "Nullable(DATE)".to_string() };
5371 Expression::Cast(c)
5372 }
5373 other => other,
5374 };
5375 ld.this = nullable_date;
5376 Ok(Expression::LastDay(ld))
5377 }
5378 _ => Ok(Expression::LastDay(ld)),
5379 }
5380 } else {
5381 Ok(e)
5382 }
5383 }
5384
5385 Action::BigQueryCastFormat => {
5386 // CAST(x AS DATE FORMAT 'fmt') -> PARSE_DATE('%m/%d/%Y', x) for BigQuery
5387 // CAST(x AS TIMESTAMP FORMAT 'fmt') -> PARSE_TIMESTAMP(...) for BigQuery
5388 // SAFE_CAST(x AS DATE FORMAT 'fmt') -> CAST(TRY_STRPTIME(x, ...) AS DATE) for DuckDB
5389 let (this, to, format_expr, is_safe) = match e {
5390 Expression::Cast(ref c) if c.format.is_some() => {
5391 (c.this.clone(), c.to.clone(), c.format.as_ref().unwrap().as_ref().clone(), false)
5392 }
5393 Expression::SafeCast(ref c) if c.format.is_some() => {
5394 (c.this.clone(), c.to.clone(), c.format.as_ref().unwrap().as_ref().clone(), true)
5395 }
5396 _ => return Ok(e),
5397 };
5398 // For CAST(x AS STRING FORMAT ...) when target is BigQuery, keep as-is
5399 if matches!(target, DialectType::BigQuery) {
5400 match &to {
5401 DataType::String { .. } | DataType::VarChar { .. } | DataType::Text => {
5402 // CAST(x AS STRING FORMAT 'fmt') stays as CAST expression for BigQuery
5403 return Ok(e);
5404 }
5405 _ => {}
5406 }
5407 }
5408 // Extract timezone from format if AT TIME ZONE is present
5409 let (actual_format_expr, timezone) = match &format_expr {
5410 Expression::AtTimeZone(ref atz) => {
5411 (atz.this.clone(), Some(atz.zone.clone()))
5412 }
5413 _ => (format_expr.clone(), None),
5414 };
5415 let strftime_fmt = Self::bq_cast_format_to_strftime(&actual_format_expr);
5416 match target {
5417 DialectType::BigQuery => {
5418 // CAST(x AS DATE FORMAT 'fmt') -> PARSE_DATE(strftime_fmt, x)
5419 // CAST(x AS TIMESTAMP FORMAT 'fmt' AT TIME ZONE 'tz') -> PARSE_TIMESTAMP(strftime_fmt, x, tz)
5420 let func_name = match &to {
5421 DataType::Date => "PARSE_DATE",
5422 DataType::Timestamp { .. } => "PARSE_TIMESTAMP",
5423 DataType::Time { .. } => "PARSE_TIMESTAMP",
5424 _ => "PARSE_TIMESTAMP",
5425 };
5426 let mut func_args = vec![strftime_fmt, this];
5427 if let Some(tz) = timezone {
5428 func_args.push(tz);
5429 }
5430 Ok(Expression::Function(Box::new(Function::new(func_name.to_string(), func_args))))
5431 }
5432 DialectType::DuckDB => {
5433 // SAFE_CAST(x AS DATE FORMAT 'fmt') -> CAST(TRY_STRPTIME(x, fmt) AS DATE)
5434 // CAST(x AS DATE FORMAT 'fmt') -> CAST(STRPTIME(x, fmt) AS DATE)
5435 let duck_fmt = Self::bq_format_to_duckdb(&strftime_fmt);
5436 let parse_fn_name = if is_safe { "TRY_STRPTIME" } else { "STRPTIME" };
5437 let parse_call = Expression::Function(Box::new(Function::new(parse_fn_name.to_string(), vec![this, duck_fmt])));
5438 Ok(Expression::Cast(Box::new(Cast {
5439 this: parse_call,
5440 to,
5441 trailing_comments: vec![],
5442 double_colon_syntax: false,
5443 format: None,
5444 default: None,
5445 })))
5446 }
5447 _ => Ok(e),
5448 }
5449 }
5450
5451 Action::BigQueryFunctionNormalize => {
5452 Self::normalize_bigquery_function(e, source, target)
5453 }
5454
5455 Action::BigQueryToHexBare => {
5456 // Not used anymore - handled directly in normalize_bigquery_function
5457 Ok(e)
5458 }
5459
5460 Action::BigQueryToHexLower => {
5461 if let Expression::Lower(uf) = e {
5462 match uf.this {
5463 // BQ->BQ: LOWER(TO_HEX(x)) -> TO_HEX(x)
5464 Expression::Function(f) if matches!(target, DialectType::BigQuery) && f.name == "TO_HEX" => {
5465 Ok(Expression::Function(f))
5466 }
5467 // LOWER(LOWER(HEX/TO_HEX(x))) patterns
5468 Expression::Lower(inner_uf) => {
5469 if matches!(target, DialectType::BigQuery) {
5470 // BQ->BQ: extract TO_HEX
5471 if let Expression::Function(f) = inner_uf.this {
5472 Ok(Expression::Function(Box::new(Function::new("TO_HEX".to_string(), f.args))))
5473 } else {
5474 Ok(Expression::Lower(inner_uf))
5475 }
5476 } else {
5477 // Flatten: LOWER(LOWER(x)) -> LOWER(x)
5478 Ok(Expression::Lower(inner_uf))
5479 }
5480 }
5481 other => Ok(Expression::Lower(Box::new(crate::expressions::UnaryFunc { this: other, original_name: None })))
5482 }
5483 } else {
5484 Ok(e)
5485 }
5486 }
5487
5488 Action::BigQueryToHexUpper => {
5489 // UPPER(LOWER(HEX(x))) -> HEX(x) (UPPER cancels LOWER, HEX is already uppercase)
5490 // UPPER(LOWER(TO_HEX(x))) -> TO_HEX(x) for Presto/Trino
5491 if let Expression::Upper(uf) = e {
5492 if let Expression::Lower(inner_uf) = uf.this {
5493 // For BQ->BQ: UPPER(TO_HEX(x)) should stay as UPPER(TO_HEX(x))
5494 if matches!(target, DialectType::BigQuery) {
5495 // Restore TO_HEX name in inner function
5496 if let Expression::Function(f) = inner_uf.this {
5497 let restored = Expression::Function(Box::new(Function::new("TO_HEX".to_string(), f.args)));
5498 Ok(Expression::Upper(Box::new(crate::expressions::UnaryFunc::new(restored))))
5499 } else {
5500 Ok(Expression::Upper(inner_uf))
5501 }
5502 } else {
5503 // Extract the inner HEX/TO_HEX function (UPPER(LOWER(x)) = x when HEX is uppercase)
5504 Ok(inner_uf.this)
5505 }
5506 } else {
5507 Ok(Expression::Upper(uf))
5508 }
5509 } else {
5510 Ok(e)
5511 }
5512 }
5513
5514 Action::BigQueryAnyValueHaving => {
5515 // ANY_VALUE(x HAVING MAX y) -> ARG_MAX_NULL(x, y)
5516 // ANY_VALUE(x HAVING MIN y) -> ARG_MIN_NULL(x, y)
5517 if let Expression::AnyValue(agg) = e {
5518 if let Some((having_expr, is_max)) = agg.having_max {
5519 let func_name = if is_max { "ARG_MAX_NULL" } else { "ARG_MIN_NULL" };
5520 Ok(Expression::Function(Box::new(Function::new(
5521 func_name.to_string(),
5522 vec![agg.this, *having_expr],
5523 ))))
5524 } else {
5525 Ok(Expression::AnyValue(agg))
5526 }
5527 } else {
5528 Ok(e)
5529 }
5530 }
5531
5532 Action::BigQueryApproxQuantiles => {
5533 // APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [0, 1/n, 2/n, ..., 1])
5534 // APPROX_QUANTILES(DISTINCT x, n) -> APPROX_QUANTILE(DISTINCT x, [0, 1/n, ..., 1])
5535 if let Expression::AggregateFunction(agg) = e {
5536 if agg.args.len() >= 2 {
5537 let x_expr = agg.args[0].clone();
5538 let n_expr = &agg.args[1];
5539
5540 // Extract the numeric value from n_expr
5541 let n = match n_expr {
5542 Expression::Literal(crate::expressions::Literal::Number(s)) => s.parse::<usize>().unwrap_or(2),
5543 _ => 2,
5544 };
5545
5546 // Generate quantile array: [0, 1/n, 2/n, ..., 1]
5547 let mut quantiles = Vec::new();
5548 for i in 0..=n {
5549 let q = i as f64 / n as f64;
5550 // Format nicely: 0 -> 0, 0.25 -> 0.25, 1 -> 1
5551 if q == 0.0 {
5552 quantiles.push(Expression::number(0));
5553 } else if q == 1.0 {
5554 quantiles.push(Expression::number(1));
5555 } else {
5556 quantiles.push(Expression::Literal(crate::expressions::Literal::Number(format!("{}", q))));
5557 }
5558 }
5559
5560 let array_expr = Expression::Array(Box::new(crate::expressions::Array {
5561 expressions: quantiles,
5562 }));
5563
5564 // Preserve DISTINCT modifier
5565 let mut new_func = Function::new("APPROX_QUANTILE".to_string(), vec![x_expr, array_expr]);
5566 new_func.distinct = agg.distinct;
5567 Ok(Expression::Function(Box::new(new_func)))
5568 } else {
5569 Ok(Expression::AggregateFunction(agg))
5570 }
5571 } else {
5572 Ok(e)
5573 }
5574 }
5575
5576 Action::GenericFunctionNormalize => {
5577 // Helper closure to convert ARBITRARY to target-specific function
5578 fn convert_arbitrary(arg: Expression, target: DialectType) -> Expression {
5579 let name = match target {
5580 DialectType::ClickHouse => "any",
5581 DialectType::TSQL | DialectType::SQLite => "MAX",
5582 DialectType::Hive => "FIRST",
5583 DialectType::Presto | DialectType::Trino | DialectType::Athena => "ARBITRARY",
5584 _ => "ANY_VALUE",
5585 };
5586 Expression::Function(Box::new(Function::new(name.to_string(), vec![arg])))
5587 }
5588
5589 if let Expression::Function(f) = e {
5590 let name = f.name.to_uppercase();
5591 match name.as_str() {
5592 "ARBITRARY" if f.args.len() == 1 => {
5593 let arg = f.args.into_iter().next().unwrap();
5594 Ok(convert_arbitrary(arg, target))
5595 }
5596 "TO_NUMBER" if f.args.len() == 1 => {
5597 let arg = f.args.into_iter().next().unwrap();
5598 match target {
5599 DialectType::Oracle | DialectType::Snowflake => {
5600 Ok(Expression::Function(Box::new(Function::new("TO_NUMBER".to_string(), vec![arg]))))
5601 }
5602 _ => {
5603 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
5604 this: arg,
5605 to: crate::expressions::DataType::Double { precision: None, scale: None },
5606 double_colon_syntax: false,
5607 trailing_comments: Vec::new(),
5608 format: None,
5609 default: None,
5610 })))
5611 }
5612 }
5613 }
5614 "AGGREGATE" if f.args.len() >= 3 => {
5615 match target {
5616 DialectType::DuckDB | DialectType::Hive | DialectType::Presto | DialectType::Trino => {
5617 Ok(Expression::Function(Box::new(Function::new("REDUCE".to_string(), f.args))))
5618 }
5619 _ => Ok(Expression::Function(f)),
5620 }
5621 }
5622 // REGEXP_MATCHES(x, y) -> RegexpLike for most targets, keep for DuckDB
5623 "REGEXP_MATCHES" if f.args.len() >= 2 => {
5624 if matches!(target, DialectType::DuckDB) {
5625 Ok(Expression::Function(f))
5626 } else {
5627 let mut args = f.args;
5628 let this = args.remove(0);
5629 let pattern = args.remove(0);
5630 let flags = if args.is_empty() { None } else { Some(args.remove(0)) };
5631 Ok(Expression::RegexpLike(Box::new(crate::expressions::RegexpFunc {
5632 this,
5633 pattern,
5634 flags,
5635 })))
5636 }
5637 }
5638 // REGEXP_FULL_MATCH (Hive REGEXP) -> RegexpLike
5639 "REGEXP_FULL_MATCH" if f.args.len() >= 2 => {
5640 if matches!(target, DialectType::DuckDB) {
5641 Ok(Expression::Function(f))
5642 } else {
5643 let mut args = f.args;
5644 let this = args.remove(0);
5645 let pattern = args.remove(0);
5646 let flags = if args.is_empty() { None } else { Some(args.remove(0)) };
5647 Ok(Expression::RegexpLike(Box::new(crate::expressions::RegexpFunc {
5648 this,
5649 pattern,
5650 flags,
5651 })))
5652 }
5653 }
5654 // STRUCT_EXTRACT(x, 'field') -> x.field (StructExtract expression)
5655 "STRUCT_EXTRACT" if f.args.len() == 2 => {
5656 let mut args = f.args;
5657 let this = args.remove(0);
5658 let field_expr = args.remove(0);
5659 // Extract string literal to get field name
5660 let field_name = match &field_expr {
5661 Expression::Literal(crate::expressions::Literal::String(s)) => s.clone(),
5662 Expression::Identifier(id) => id.name.clone(),
5663 _ => return Ok(Expression::Function(Box::new(Function::new("STRUCT_EXTRACT".to_string(), vec![this, field_expr])))),
5664 };
5665 Ok(Expression::StructExtract(Box::new(crate::expressions::StructExtractFunc {
5666 this,
5667 field: crate::expressions::Identifier::new(field_name),
5668 })))
5669 }
5670 // LIST_FILTER([4,5,6], x -> x > 4) -> FILTER(ARRAY(4,5,6), x -> x > 4)
5671 "LIST_FILTER" if f.args.len() == 2 => {
5672 let name = match target {
5673 DialectType::DuckDB => "LIST_FILTER",
5674 _ => "FILTER",
5675 };
5676 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
5677 }
5678 // LIST_TRANSFORM(x, y -> y + 1) -> TRANSFORM(x, y -> y + 1)
5679 "LIST_TRANSFORM" if f.args.len() == 2 => {
5680 let name = match target {
5681 DialectType::DuckDB => "LIST_TRANSFORM",
5682 _ => "TRANSFORM",
5683 };
5684 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
5685 }
5686 // LIST_SORT(x) -> SORT_ARRAY(x) / ARRAY_SORT(x)
5687 "LIST_SORT" if f.args.len() >= 1 => {
5688 let name = match target {
5689 DialectType::DuckDB | DialectType::Presto | DialectType::Trino => "ARRAY_SORT",
5690 _ => "SORT_ARRAY",
5691 };
5692 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
5693 }
5694 // LIST_REVERSE_SORT(x) -> SORT_ARRAY(x, FALSE) for Spark/Hive, ARRAY_SORT(x, lambda) for Presto
5695 "LIST_REVERSE_SORT" if f.args.len() >= 1 => {
5696 match target {
5697 DialectType::DuckDB => Ok(Expression::Function(Box::new(Function::new("ARRAY_REVERSE_SORT".to_string(), f.args)))),
5698 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
5699 let mut args = f.args;
5700 args.push(Expression::Identifier(crate::expressions::Identifier::new("FALSE")));
5701 Ok(Expression::Function(Box::new(Function::new("SORT_ARRAY".to_string(), args))))
5702 }
5703 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
5704 // ARRAY_SORT(x, (a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END)
5705 let arr = f.args.into_iter().next().unwrap();
5706 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
5707 parameters: vec![
5708 crate::expressions::Identifier::new("a"),
5709 crate::expressions::Identifier::new("b"),
5710 ],
5711 body: Expression::Case(Box::new(Case {
5712 operand: None,
5713 whens: vec![
5714 (
5715 Expression::Lt(Box::new(BinaryOp::new(
5716 Expression::Identifier(crate::expressions::Identifier::new("a")),
5717 Expression::Identifier(crate::expressions::Identifier::new("b")),
5718 ))),
5719 Expression::number(1),
5720 ),
5721 (
5722 Expression::Gt(Box::new(BinaryOp::new(
5723 Expression::Identifier(crate::expressions::Identifier::new("a")),
5724 Expression::Identifier(crate::expressions::Identifier::new("b")),
5725 ))),
5726 Expression::Literal(Literal::Number("-1".to_string())),
5727 ),
5728 ],
5729 else_: Some(Expression::number(0)),
5730 })),
5731 colon: false,
5732 parameter_types: Vec::new(),
5733 }));
5734 Ok(Expression::Function(Box::new(Function::new("ARRAY_SORT".to_string(), vec![arr, lambda]))))
5735 }
5736 _ => Ok(Expression::Function(Box::new(Function::new("LIST_REVERSE_SORT".to_string(), f.args)))),
5737 }
5738 }
5739 // SPLIT_TO_ARRAY(x) with 1 arg -> add default ',' separator and rename
5740 "SPLIT_TO_ARRAY" if f.args.len() == 1 => {
5741 let mut args = f.args;
5742 args.push(Expression::string(","));
5743 let name = match target {
5744 DialectType::DuckDB => "STR_SPLIT",
5745 DialectType::Presto | DialectType::Trino => "SPLIT",
5746 DialectType::Spark | DialectType::Databricks | DialectType::Hive => "SPLIT",
5747 DialectType::PostgreSQL => "STRING_TO_ARRAY",
5748 DialectType::Redshift => "SPLIT_TO_ARRAY",
5749 _ => "SPLIT",
5750 };
5751 Ok(Expression::Function(Box::new(Function::new(name.to_string(), args))))
5752 }
5753 // SPLIT_TO_ARRAY(x, sep) with 2 args -> rename based on target
5754 "SPLIT_TO_ARRAY" if f.args.len() == 2 => {
5755 let name = match target {
5756 DialectType::DuckDB => "STR_SPLIT",
5757 DialectType::Presto | DialectType::Trino => "SPLIT",
5758 DialectType::Spark | DialectType::Databricks | DialectType::Hive => "SPLIT",
5759 DialectType::PostgreSQL => "STRING_TO_ARRAY",
5760 DialectType::Redshift => "SPLIT_TO_ARRAY",
5761 _ => "SPLIT",
5762 };
5763 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
5764 }
5765 // STRING_TO_ARRAY/STR_SPLIT -> target-specific split function
5766 "STRING_TO_ARRAY" | "STR_SPLIT" if f.args.len() >= 2 => {
5767 let name = match target {
5768 DialectType::DuckDB => "STR_SPLIT",
5769 DialectType::Presto | DialectType::Trino => "SPLIT",
5770 DialectType::Spark | DialectType::Databricks | DialectType::Hive => "SPLIT",
5771 DialectType::Doris | DialectType::StarRocks => "SPLIT_BY_STRING",
5772 DialectType::PostgreSQL | DialectType::Redshift => "STRING_TO_ARRAY",
5773 _ => "SPLIT",
5774 };
5775 // For Spark/Hive, SPLIT uses regex - need to escape literal with \Q...\E
5776 if matches!(target, DialectType::Spark | DialectType::Databricks | DialectType::Hive) {
5777 let mut args = f.args;
5778 let x = args.remove(0);
5779 let sep = args.remove(0);
5780 // Wrap separator in CONCAT('\\Q', sep, '\\E')
5781 let escaped_sep = Expression::Function(Box::new(Function::new(
5782 "CONCAT".to_string(),
5783 vec![
5784 Expression::string("\\Q"),
5785 sep,
5786 Expression::string("\\E"),
5787 ],
5788 )));
5789 Ok(Expression::Function(Box::new(Function::new(name.to_string(), vec![x, escaped_sep]))))
5790 } else {
5791 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
5792 }
5793 }
5794 // STR_SPLIT_REGEX(x, 'a') / REGEXP_SPLIT(x, 'a') -> target-specific regex split
5795 "STR_SPLIT_REGEX" | "REGEXP_SPLIT" if f.args.len() == 2 => {
5796 let name = match target {
5797 DialectType::DuckDB => "STR_SPLIT_REGEX",
5798 DialectType::Presto | DialectType::Trino => "REGEXP_SPLIT",
5799 DialectType::Spark | DialectType::Databricks | DialectType::Hive => "SPLIT",
5800 _ => "REGEXP_SPLIT",
5801 };
5802 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
5803 }
5804 // SPLIT(x, sep) from Presto/StarRocks/Doris -> target-specific split with regex escaping for Hive/Spark
5805 "SPLIT" if f.args.len() == 2
5806 && matches!(source, DialectType::Presto | DialectType::Trino | DialectType::Athena
5807 | DialectType::StarRocks | DialectType::Doris)
5808 && matches!(target, DialectType::Spark | DialectType::Databricks | DialectType::Hive) => {
5809 // Presto/StarRocks SPLIT is literal, Hive/Spark SPLIT is regex
5810 let mut args = f.args;
5811 let x = args.remove(0);
5812 let sep = args.remove(0);
5813 let escaped_sep = Expression::Function(Box::new(Function::new(
5814 "CONCAT".to_string(),
5815 vec![
5816 Expression::string("\\Q"),
5817 sep,
5818 Expression::string("\\E"),
5819 ],
5820 )));
5821 Ok(Expression::Function(Box::new(Function::new("SPLIT".to_string(), vec![x, escaped_sep]))))
5822 }
5823 // SUBSTRINGINDEX -> SUBSTRING_INDEX (ClickHouse camelCase to standard)
5824 // For ClickHouse target, preserve original name to maintain camelCase
5825 "SUBSTRINGINDEX" => {
5826 let name = if matches!(target, DialectType::ClickHouse) {
5827 f.name.clone()
5828 } else {
5829 "SUBSTRING_INDEX".to_string()
5830 };
5831 Ok(Expression::Function(Box::new(Function::new(name, f.args))))
5832 }
5833 // ARRAY_LENGTH/SIZE/CARDINALITY -> target-specific array length function
5834 "ARRAY_LENGTH" | "SIZE" | "CARDINALITY" => {
5835 // Get the array argument (first arg, drop dimension args)
5836 let mut args = f.args;
5837 let arr = if args.is_empty() {
5838 return Ok(Expression::Function(Box::new(Function::new(name.to_string(), args))));
5839 } else {
5840 args.remove(0)
5841 };
5842 let name = match target {
5843 DialectType::Spark | DialectType::Databricks | DialectType::Hive => "SIZE",
5844 DialectType::Presto | DialectType::Trino => "CARDINALITY",
5845 DialectType::BigQuery => "ARRAY_LENGTH",
5846 DialectType::DuckDB => {
5847 // DuckDB: use ARRAY_LENGTH with all args
5848 let mut all_args = vec![arr];
5849 all_args.extend(args);
5850 return Ok(Expression::Function(Box::new(Function::new("ARRAY_LENGTH".to_string(), all_args))));
5851 }
5852 DialectType::PostgreSQL | DialectType::Redshift => {
5853 // Keep ARRAY_LENGTH with dimension arg
5854 let mut all_args = vec![arr];
5855 all_args.extend(args);
5856 return Ok(Expression::Function(Box::new(Function::new("ARRAY_LENGTH".to_string(), all_args))));
5857 }
5858 DialectType::ClickHouse => "LENGTH",
5859 _ => "ARRAY_LENGTH",
5860 };
5861 Ok(Expression::Function(Box::new(Function::new(name.to_string(), vec![arr]))))
5862 }
5863 // UNICODE(x) -> target-specific codepoint function
5864 "UNICODE" if f.args.len() == 1 => {
5865 match target {
5866 DialectType::SQLite | DialectType::DuckDB => {
5867 Ok(Expression::Function(Box::new(Function::new("UNICODE".to_string(), f.args))))
5868 }
5869 DialectType::Oracle => {
5870 // ASCII(UNISTR(x))
5871 let inner = Expression::Function(Box::new(Function::new("UNISTR".to_string(), f.args)));
5872 Ok(Expression::Function(Box::new(Function::new("ASCII".to_string(), vec![inner]))))
5873 }
5874 DialectType::MySQL => {
5875 // ORD(CONVERT(x USING utf32))
5876 let arg = f.args.into_iter().next().unwrap();
5877 let convert_expr = Expression::ConvertToCharset(Box::new(crate::expressions::ConvertToCharset {
5878 this: Box::new(arg),
5879 dest: Some(Box::new(Expression::Identifier(crate::expressions::Identifier::new("utf32")))),
5880 source: None,
5881 }));
5882 Ok(Expression::Function(Box::new(Function::new("ORD".to_string(), vec![convert_expr]))))
5883 }
5884 _ => {
5885 Ok(Expression::Function(Box::new(Function::new("ASCII".to_string(), f.args))))
5886 }
5887 }
5888 }
5889 // XOR(a, b, ...) -> a XOR b XOR ... for MySQL, BITWISE_XOR for Presto/Trino, # for PostgreSQL, ^ for BigQuery
5890 "XOR" if f.args.len() >= 2 => {
5891 match target {
5892 DialectType::ClickHouse => {
5893 // ClickHouse: keep as xor() function with lowercase name
5894 Ok(Expression::Function(Box::new(Function::new("xor".to_string(), f.args))))
5895 }
5896 DialectType::Presto | DialectType::Trino => {
5897 if f.args.len() == 2 {
5898 Ok(Expression::Function(Box::new(Function::new("BITWISE_XOR".to_string(), f.args))))
5899 } else {
5900 // Nest: BITWISE_XOR(BITWISE_XOR(a, b), c)
5901 let mut args = f.args;
5902 let first = args.remove(0);
5903 let second = args.remove(0);
5904 let mut result = Expression::Function(Box::new(Function::new("BITWISE_XOR".to_string(), vec![first, second])));
5905 for arg in args {
5906 result = Expression::Function(Box::new(Function::new("BITWISE_XOR".to_string(), vec![result, arg])));
5907 }
5908 Ok(result)
5909 }
5910 }
5911 DialectType::MySQL | DialectType::SingleStore | DialectType::Doris | DialectType::StarRocks => {
5912 // Convert XOR(a, b, c) -> Expression::Xor with expressions list
5913 let args = f.args;
5914 Ok(Expression::Xor(Box::new(crate::expressions::Xor {
5915 this: None,
5916 expression: None,
5917 expressions: args,
5918 })))
5919 }
5920 DialectType::PostgreSQL | DialectType::Redshift => {
5921 // PostgreSQL: a # b (hash operator for XOR)
5922 let mut args = f.args;
5923 let first = args.remove(0);
5924 let second = args.remove(0);
5925 let mut result = Expression::BitwiseXor(Box::new(BinaryOp::new(first, second)));
5926 for arg in args {
5927 result = Expression::BitwiseXor(Box::new(BinaryOp::new(result, arg)));
5928 }
5929 Ok(result)
5930 }
5931 DialectType::DuckDB => {
5932 // DuckDB: keep as XOR function (DuckDB ^ is Power, not XOR)
5933 Ok(Expression::Function(Box::new(Function::new("XOR".to_string(), f.args))))
5934 }
5935 DialectType::BigQuery => {
5936 // BigQuery: a ^ b (caret operator for XOR)
5937 let mut args = f.args;
5938 let first = args.remove(0);
5939 let second = args.remove(0);
5940 let mut result = Expression::BitwiseXor(Box::new(BinaryOp::new(first, second)));
5941 for arg in args {
5942 result = Expression::BitwiseXor(Box::new(BinaryOp::new(result, arg)));
5943 }
5944 Ok(result)
5945 }
5946 _ => Ok(Expression::Function(Box::new(Function::new("XOR".to_string(), f.args)))),
5947 }
5948 }
5949 // ARRAY_REVERSE_SORT(x) -> SORT_ARRAY(x, FALSE) for Spark/Hive, ARRAY_SORT(x, lambda) for Presto
5950 "ARRAY_REVERSE_SORT" if f.args.len() >= 1 => {
5951 match target {
5952 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
5953 let mut args = f.args;
5954 args.push(Expression::Identifier(crate::expressions::Identifier::new("FALSE")));
5955 Ok(Expression::Function(Box::new(Function::new("SORT_ARRAY".to_string(), args))))
5956 }
5957 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
5958 // ARRAY_SORT(x, (a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END)
5959 let arr = f.args.into_iter().next().unwrap();
5960 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
5961 parameters: vec![
5962 Identifier::new("a"),
5963 Identifier::new("b"),
5964 ],
5965 colon: false,
5966 parameter_types: Vec::new(),
5967 body: Expression::Case(Box::new(Case {
5968 operand: None,
5969 whens: vec![
5970 (
5971 Expression::Lt(Box::new(BinaryOp::new(
5972 Expression::Identifier(Identifier::new("a")),
5973 Expression::Identifier(Identifier::new("b")),
5974 ))),
5975 Expression::number(1),
5976 ),
5977 (
5978 Expression::Gt(Box::new(BinaryOp::new(
5979 Expression::Identifier(Identifier::new("a")),
5980 Expression::Identifier(Identifier::new("b")),
5981 ))),
5982 Expression::Neg(Box::new(crate::expressions::UnaryOp {
5983 this: Expression::number(1),
5984 })),
5985 ),
5986 ],
5987 else_: Some(Expression::number(0)),
5988 })),
5989 }));
5990 Ok(Expression::Function(Box::new(Function::new("ARRAY_SORT".to_string(), vec![arr, lambda]))))
5991 }
5992 _ => Ok(Expression::Function(Box::new(Function::new("ARRAY_REVERSE_SORT".to_string(), f.args)))),
5993 }
5994 }
5995 // ENCODE(x) -> ENCODE(x, 'utf-8') for Spark/Hive, TO_UTF8(x) for Presto
5996 "ENCODE" if f.args.len() == 1 => {
5997 match target {
5998 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
5999 let mut args = f.args;
6000 args.push(Expression::string("utf-8"));
6001 Ok(Expression::Function(Box::new(Function::new("ENCODE".to_string(), args))))
6002 }
6003 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
6004 Ok(Expression::Function(Box::new(Function::new("TO_UTF8".to_string(), f.args))))
6005 }
6006 _ => Ok(Expression::Function(Box::new(Function::new("ENCODE".to_string(), f.args)))),
6007 }
6008 }
6009 // DECODE(x) -> DECODE(x, 'utf-8') for Spark/Hive, FROM_UTF8(x) for Presto
6010 "DECODE" if f.args.len() == 1 => {
6011 match target {
6012 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
6013 let mut args = f.args;
6014 args.push(Expression::string("utf-8"));
6015 Ok(Expression::Function(Box::new(Function::new("DECODE".to_string(), args))))
6016 }
6017 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
6018 Ok(Expression::Function(Box::new(Function::new("FROM_UTF8".to_string(), f.args))))
6019 }
6020 _ => Ok(Expression::Function(Box::new(Function::new("DECODE".to_string(), f.args)))),
6021 }
6022 }
6023 // QUANTILE(x, p) -> PERCENTILE(x, p) for Spark/Hive
6024 "QUANTILE" if f.args.len() == 2 => {
6025 let name = match target {
6026 DialectType::Spark | DialectType::Databricks | DialectType::Hive => "PERCENTILE",
6027 DialectType::Presto | DialectType::Trino => "APPROX_PERCENTILE",
6028 DialectType::BigQuery => "PERCENTILE_CONT",
6029 _ => "QUANTILE",
6030 };
6031 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
6032 }
6033 // QUANTILE_CONT(x, q) -> PERCENTILE_CONT(q) WITHIN GROUP (ORDER BY x) for PostgreSQL/Snowflake
6034 "QUANTILE_CONT" if f.args.len() == 2 => {
6035 let mut args = f.args;
6036 let column = args.remove(0);
6037 let quantile = args.remove(0);
6038 match target {
6039 DialectType::DuckDB => {
6040 Ok(Expression::Function(Box::new(Function::new("QUANTILE_CONT".to_string(), vec![column, quantile]))))
6041 }
6042 DialectType::PostgreSQL | DialectType::Redshift | DialectType::Snowflake => {
6043 // PERCENTILE_CONT(q) WITHIN GROUP (ORDER BY x)
6044 let inner = Expression::PercentileCont(Box::new(crate::expressions::PercentileFunc {
6045 this: column.clone(),
6046 percentile: quantile,
6047 order_by: None,
6048 filter: None,
6049 }));
6050 Ok(Expression::WithinGroup(Box::new(crate::expressions::WithinGroup {
6051 this: inner,
6052 order_by: vec![crate::expressions::Ordered {
6053 this: column,
6054 desc: false,
6055 nulls_first: None,
6056 explicit_asc: false,
6057 with_fill: None,
6058 }],
6059 })))
6060 }
6061 _ => Ok(Expression::Function(Box::new(Function::new("QUANTILE_CONT".to_string(), vec![column, quantile])))),
6062 }
6063 }
6064 // QUANTILE_DISC(x, q) -> PERCENTILE_DISC(q) WITHIN GROUP (ORDER BY x) for PostgreSQL/Snowflake
6065 "QUANTILE_DISC" if f.args.len() == 2 => {
6066 let mut args = f.args;
6067 let column = args.remove(0);
6068 let quantile = args.remove(0);
6069 match target {
6070 DialectType::DuckDB => {
6071 Ok(Expression::Function(Box::new(Function::new("QUANTILE_DISC".to_string(), vec![column, quantile]))))
6072 }
6073 DialectType::PostgreSQL | DialectType::Redshift | DialectType::Snowflake => {
6074 // PERCENTILE_DISC(q) WITHIN GROUP (ORDER BY x)
6075 let inner = Expression::PercentileDisc(Box::new(crate::expressions::PercentileFunc {
6076 this: column.clone(),
6077 percentile: quantile,
6078 order_by: None,
6079 filter: None,
6080 }));
6081 Ok(Expression::WithinGroup(Box::new(crate::expressions::WithinGroup {
6082 this: inner,
6083 order_by: vec![crate::expressions::Ordered {
6084 this: column,
6085 desc: false,
6086 nulls_first: None,
6087 explicit_asc: false,
6088 with_fill: None,
6089 }],
6090 })))
6091 }
6092 _ => Ok(Expression::Function(Box::new(Function::new("QUANTILE_DISC".to_string(), vec![column, quantile])))),
6093 }
6094 }
6095 // PERCENTILE_APPROX(x, p) / APPROX_PERCENTILE(x, p) -> target-specific
6096 "PERCENTILE_APPROX" | "APPROX_PERCENTILE" if f.args.len() >= 2 => {
6097 let name = match target {
6098 DialectType::Presto | DialectType::Trino | DialectType::Athena => "APPROX_PERCENTILE",
6099 DialectType::Spark | DialectType::Databricks | DialectType::Hive => "PERCENTILE_APPROX",
6100 DialectType::DuckDB => "APPROX_QUANTILE",
6101 DialectType::PostgreSQL | DialectType::Redshift => "PERCENTILE_CONT",
6102 _ => &f.name,
6103 };
6104 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
6105 }
6106 // EPOCH(x) -> UNIX_TIMESTAMP(x) for Spark/Hive
6107 "EPOCH" if f.args.len() == 1 => {
6108 let name = match target {
6109 DialectType::Spark | DialectType::Databricks | DialectType::Hive => "UNIX_TIMESTAMP",
6110 DialectType::Presto | DialectType::Trino => "TO_UNIXTIME",
6111 _ => "EPOCH",
6112 };
6113 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
6114 }
6115 // EPOCH_MS(x) -> target-specific epoch milliseconds conversion
6116 "EPOCH_MS" if f.args.len() == 1 => {
6117 match target {
6118 DialectType::Spark | DialectType::Databricks => {
6119 Ok(Expression::Function(Box::new(Function::new("TIMESTAMP_MILLIS".to_string(), f.args))))
6120 }
6121 DialectType::Hive => {
6122 // Hive: FROM_UNIXTIME(x / 1000)
6123 let arg = f.args.into_iter().next().unwrap();
6124 let div_expr = Expression::Div(Box::new(crate::expressions::BinaryOp::new(
6125 arg,
6126 Expression::number(1000),
6127 )));
6128 Ok(Expression::Function(Box::new(Function::new("FROM_UNIXTIME".to_string(), vec![div_expr]))))
6129 }
6130 DialectType::Presto | DialectType::Trino => {
6131 Ok(Expression::Function(Box::new(Function::new("FROM_UNIXTIME".to_string(),
6132 vec![Expression::Div(Box::new(crate::expressions::BinaryOp::new(
6133 f.args.into_iter().next().unwrap(),
6134 Expression::number(1000),
6135 )))]
6136 ))))
6137 }
6138 _ => Ok(Expression::Function(Box::new(Function::new("EPOCH_MS".to_string(), f.args)))),
6139 }
6140 }
6141 // HASHBYTES('algorithm', x) -> target-specific hash function
6142 "HASHBYTES" if f.args.len() == 2 => {
6143 // Keep HASHBYTES as-is for TSQL target
6144 if matches!(target, DialectType::TSQL) {
6145 return Ok(Expression::Function(f));
6146 }
6147 let algo_expr = &f.args[0];
6148 let algo = match algo_expr {
6149 Expression::Literal(crate::expressions::Literal::String(s)) => s.to_uppercase(),
6150 _ => return Ok(Expression::Function(f)),
6151 };
6152 let data_arg = f.args.into_iter().nth(1).unwrap();
6153 match algo.as_str() {
6154 "SHA1" => {
6155 let name = match target {
6156 DialectType::Spark | DialectType::Databricks => "SHA",
6157 DialectType::Hive => "SHA1",
6158 _ => "SHA1",
6159 };
6160 Ok(Expression::Function(Box::new(Function::new(name.to_string(), vec![data_arg]))))
6161 }
6162 "SHA2_256" => {
6163 Ok(Expression::Function(Box::new(Function::new("SHA2".to_string(), vec![data_arg, Expression::number(256)]))))
6164 }
6165 "SHA2_512" => {
6166 Ok(Expression::Function(Box::new(Function::new("SHA2".to_string(), vec![data_arg, Expression::number(512)]))))
6167 }
6168 "MD5" => {
6169 Ok(Expression::Function(Box::new(Function::new("MD5".to_string(), vec![data_arg]))))
6170 }
6171 _ => Ok(Expression::Function(Box::new(Function::new("HASHBYTES".to_string(), vec![Expression::string(&algo), data_arg])))),
6172 }
6173 }
6174 // JSON_EXTRACT_PATH(json, key1, key2, ...) -> target-specific JSON extraction
6175 "JSON_EXTRACT_PATH" | "JSON_EXTRACT_PATH_TEXT" if f.args.len() >= 2 => {
6176 let is_text = name == "JSON_EXTRACT_PATH_TEXT";
6177 let mut args = f.args;
6178 let json_expr = args.remove(0);
6179 // Build JSON path from remaining keys: $.key1.key2 or $.key1[0]
6180 let mut json_path = "$".to_string();
6181 for a in &args {
6182 match a {
6183 Expression::Literal(crate::expressions::Literal::String(s)) => {
6184 // Numeric string keys become array indices: [0]
6185 if s.chars().all(|c| c.is_ascii_digit()) {
6186 json_path.push('[');
6187 json_path.push_str(s);
6188 json_path.push(']');
6189 } else {
6190 json_path.push('.');
6191 json_path.push_str(s);
6192 }
6193 }
6194 _ => {
6195 json_path.push_str(".?");
6196 }
6197 }
6198 }
6199 match target {
6200 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
6201 Ok(Expression::Function(Box::new(Function::new(
6202 "GET_JSON_OBJECT".to_string(),
6203 vec![json_expr, Expression::string(&json_path)],
6204 ))))
6205 }
6206 DialectType::Presto | DialectType::Trino => {
6207 let func_name = if is_text { "JSON_EXTRACT_SCALAR" } else { "JSON_EXTRACT" };
6208 Ok(Expression::Function(Box::new(Function::new(
6209 func_name.to_string(),
6210 vec![json_expr, Expression::string(&json_path)],
6211 ))))
6212 }
6213 DialectType::BigQuery | DialectType::MySQL => {
6214 let func_name = if is_text { "JSON_EXTRACT_SCALAR" } else { "JSON_EXTRACT" };
6215 Ok(Expression::Function(Box::new(Function::new(
6216 func_name.to_string(),
6217 vec![json_expr, Expression::string(&json_path)],
6218 ))))
6219 }
6220 DialectType::PostgreSQL | DialectType::Materialize => {
6221 // Keep as JSON_EXTRACT_PATH_TEXT / JSON_EXTRACT_PATH for PostgreSQL/Materialize
6222 let func_name = if is_text { "JSON_EXTRACT_PATH_TEXT" } else { "JSON_EXTRACT_PATH" };
6223 let mut new_args = vec![json_expr];
6224 new_args.extend(args);
6225 Ok(Expression::Function(Box::new(Function::new(
6226 func_name.to_string(),
6227 new_args,
6228 ))))
6229 }
6230 DialectType::DuckDB | DialectType::SQLite => {
6231 // Use -> for JSON_EXTRACT_PATH, ->> for JSON_EXTRACT_PATH_TEXT
6232 if is_text {
6233 Ok(Expression::JsonExtractScalar(Box::new(crate::expressions::JsonExtractFunc {
6234 this: json_expr,
6235 path: Expression::string(&json_path),
6236 returning: None,
6237 arrow_syntax: true,
6238 hash_arrow_syntax: false,
6239 wrapper_option: None,
6240 quotes_option: None,
6241 on_scalar_string: false,
6242 on_error: None,
6243 })))
6244 } else {
6245 Ok(Expression::JsonExtract(Box::new(crate::expressions::JsonExtractFunc {
6246 this: json_expr,
6247 path: Expression::string(&json_path),
6248 returning: None,
6249 arrow_syntax: true,
6250 hash_arrow_syntax: false,
6251 wrapper_option: None,
6252 quotes_option: None,
6253 on_scalar_string: false,
6254 on_error: None,
6255 })))
6256 }
6257 }
6258 DialectType::Redshift => {
6259 // Keep as JSON_EXTRACT_PATH_TEXT for Redshift
6260 let mut new_args = vec![json_expr];
6261 new_args.extend(args);
6262 Ok(Expression::Function(Box::new(Function::new(
6263 "JSON_EXTRACT_PATH_TEXT".to_string(),
6264 new_args,
6265 ))))
6266 }
6267 DialectType::TSQL => {
6268 // ISNULL(JSON_QUERY(json, '$.path'), JSON_VALUE(json, '$.path'))
6269 let jq = Expression::Function(Box::new(Function::new(
6270 "JSON_QUERY".to_string(),
6271 vec![json_expr.clone(), Expression::string(&json_path)],
6272 )));
6273 let jv = Expression::Function(Box::new(Function::new(
6274 "JSON_VALUE".to_string(),
6275 vec![json_expr, Expression::string(&json_path)],
6276 )));
6277 Ok(Expression::Function(Box::new(Function::new(
6278 "ISNULL".to_string(),
6279 vec![jq, jv],
6280 ))))
6281 }
6282 DialectType::ClickHouse => {
6283 let func_name = if is_text { "JSONExtractString" } else { "JSONExtractRaw" };
6284 let mut new_args = vec![json_expr];
6285 new_args.extend(args);
6286 Ok(Expression::Function(Box::new(Function::new(
6287 func_name.to_string(),
6288 new_args,
6289 ))))
6290 }
6291 _ => {
6292 let func_name = if is_text { "JSON_EXTRACT_SCALAR" } else { "JSON_EXTRACT" };
6293 Ok(Expression::Function(Box::new(Function::new(
6294 func_name.to_string(),
6295 vec![json_expr, Expression::string(&json_path)],
6296 ))))
6297 }
6298 }
6299 }
6300 // APPROX_DISTINCT(x) -> APPROX_COUNT_DISTINCT(x) for Spark/Hive/BigQuery
6301 "APPROX_DISTINCT" if f.args.len() >= 1 => {
6302 let name = match target {
6303 DialectType::Spark | DialectType::Databricks | DialectType::Hive | DialectType::BigQuery => "APPROX_COUNT_DISTINCT",
6304 _ => "APPROX_DISTINCT",
6305 };
6306 let mut args = f.args;
6307 // Hive doesn't support the accuracy parameter
6308 if name == "APPROX_COUNT_DISTINCT" && matches!(target, DialectType::Hive) {
6309 args.truncate(1);
6310 }
6311 Ok(Expression::Function(Box::new(Function::new(name.to_string(), args))))
6312 }
6313 // REGEXP_EXTRACT(x, pattern) - normalize default group index
6314 "REGEXP_EXTRACT" if f.args.len() == 2 => {
6315 // Determine source default group index
6316 let source_default = match source {
6317 DialectType::Presto | DialectType::Trino | DialectType::DuckDB => 0,
6318 _ => 1, // Hive/Spark/Databricks default = 1
6319 };
6320 // Determine target default group index
6321 let target_default = match target {
6322 DialectType::Presto | DialectType::Trino | DialectType::DuckDB
6323 | DialectType::BigQuery => 0,
6324 DialectType::Snowflake => {
6325 // Snowflake uses REGEXP_SUBSTR
6326 return Ok(Expression::Function(Box::new(Function::new("REGEXP_SUBSTR".to_string(), f.args))));
6327 }
6328 _ => 1, // Hive/Spark/Databricks default = 1
6329 };
6330 if source_default != target_default {
6331 let mut args = f.args;
6332 args.push(Expression::number(source_default));
6333 Ok(Expression::Function(Box::new(Function::new("REGEXP_EXTRACT".to_string(), args))))
6334 } else {
6335 Ok(Expression::Function(Box::new(Function::new("REGEXP_EXTRACT".to_string(), f.args))))
6336 }
6337 }
6338 // RLIKE(str, pattern) -> RegexpLike expression (generates as target-specific form)
6339 "RLIKE" if f.args.len() == 2 => {
6340 let mut args = f.args;
6341 let str_expr = args.remove(0);
6342 let pattern = args.remove(0);
6343 match target {
6344 DialectType::DuckDB => {
6345 // REGEXP_MATCHES(str, pattern)
6346 Ok(Expression::Function(Box::new(Function::new(
6347 "REGEXP_MATCHES".to_string(),
6348 vec![str_expr, pattern],
6349 ))))
6350 }
6351 _ => {
6352 // Convert to RegexpLike which generates as RLIKE/~/REGEXP_LIKE per dialect
6353 Ok(Expression::RegexpLike(Box::new(crate::expressions::RegexpFunc {
6354 this: str_expr,
6355 pattern,
6356 flags: None,
6357 })))
6358 }
6359 }
6360 }
6361 // EOMONTH(date[, month_offset]) -> target-specific
6362 "EOMONTH" if f.args.len() >= 1 => {
6363 let mut args = f.args;
6364 let date_arg = args.remove(0);
6365 let month_offset = if !args.is_empty() { Some(args.remove(0)) } else { None };
6366
6367 // Helper: wrap date in CAST to DATE
6368 let cast_to_date = |e: Expression| -> Expression {
6369 Expression::Cast(Box::new(Cast {
6370 this: e,
6371 to: DataType::Date,
6372 trailing_comments: vec![], double_colon_syntax: false, format: None, default: None,
6373 }))
6374 };
6375
6376 match target {
6377 DialectType::TSQL | DialectType::Fabric => {
6378 // TSQL: EOMONTH(CAST(date AS DATE)) or EOMONTH(DATEADD(MONTH, offset, CAST(date AS DATE)))
6379 let date = cast_to_date(date_arg);
6380 let date = if let Some(offset) = month_offset {
6381 Expression::Function(Box::new(Function::new(
6382 "DATEADD".to_string(), vec![
6383 Expression::Identifier(Identifier::new("MONTH")),
6384 offset, date,
6385 ],
6386 )))
6387 } else {
6388 date
6389 };
6390 Ok(Expression::Function(Box::new(Function::new("EOMONTH".to_string(), vec![date]))))
6391 }
6392 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
6393 // Presto: LAST_DAY_OF_MONTH(CAST(CAST(date AS TIMESTAMP) AS DATE))
6394 // or with offset: LAST_DAY_OF_MONTH(DATE_ADD('MONTH', offset, CAST(CAST(date AS TIMESTAMP) AS DATE)))
6395 let cast_ts = Expression::Cast(Box::new(Cast {
6396 this: date_arg,
6397 to: DataType::Timestamp { timezone: false, precision: None },
6398 trailing_comments: vec![], double_colon_syntax: false, format: None, default: None,
6399 }));
6400 let date = cast_to_date(cast_ts);
6401 let date = if let Some(offset) = month_offset {
6402 Expression::Function(Box::new(Function::new(
6403 "DATE_ADD".to_string(), vec![
6404 Expression::string("MONTH"),
6405 offset, date,
6406 ],
6407 )))
6408 } else {
6409 date
6410 };
6411 Ok(Expression::Function(Box::new(Function::new("LAST_DAY_OF_MONTH".to_string(), vec![date]))))
6412 }
6413 DialectType::PostgreSQL => {
6414 // PostgreSQL: CAST(DATE_TRUNC('MONTH', CAST(date AS DATE) [+ INTERVAL 'offset MONTH']) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)
6415 let date = cast_to_date(date_arg);
6416 let date = if let Some(offset) = month_offset {
6417 let interval_str = format!("{} MONTH", Self::expr_to_string_static(&offset));
6418 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
6419 date,
6420 Expression::Interval(Box::new(crate::expressions::Interval {
6421 this: Some(Expression::string(&interval_str)),
6422 unit: None,
6423 })),
6424 )))
6425 } else {
6426 date
6427 };
6428 let truncated = Expression::Function(Box::new(Function::new(
6429 "DATE_TRUNC".to_string(), vec![Expression::string("MONTH"), date],
6430 )));
6431 let plus_month = Expression::Add(Box::new(crate::expressions::BinaryOp::new(
6432 truncated,
6433 Expression::Interval(Box::new(crate::expressions::Interval {
6434 this: Some(Expression::string("1 MONTH")),
6435 unit: None,
6436 })),
6437 )));
6438 let minus_day = Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
6439 plus_month,
6440 Expression::Interval(Box::new(crate::expressions::Interval {
6441 this: Some(Expression::string("1 DAY")),
6442 unit: None,
6443 })),
6444 )));
6445 Ok(Expression::Cast(Box::new(Cast {
6446 this: minus_day,
6447 to: DataType::Date,
6448 trailing_comments: vec![], double_colon_syntax: false, format: None, default: None,
6449 })))
6450 }
6451 DialectType::DuckDB => {
6452 // DuckDB: LAST_DAY(CAST(date AS DATE) [+ INTERVAL (offset) MONTH])
6453 let date = cast_to_date(date_arg);
6454 let date = if let Some(offset) = month_offset {
6455 // Wrap negative numbers in parentheses for DuckDB INTERVAL
6456 let interval_val = if matches!(&offset, Expression::Neg(_)) {
6457 Expression::Paren(Box::new(crate::expressions::Paren {
6458 this: offset,
6459 trailing_comments: Vec::new(),
6460 }))
6461 } else {
6462 offset
6463 };
6464 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
6465 date,
6466 Expression::Interval(Box::new(crate::expressions::Interval {
6467 this: Some(interval_val),
6468 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
6469 unit: crate::expressions::IntervalUnit::Month,
6470 use_plural: false,
6471 }),
6472 })),
6473 )))
6474 } else {
6475 date
6476 };
6477 Ok(Expression::Function(Box::new(Function::new("LAST_DAY".to_string(), vec![date]))))
6478 }
6479 DialectType::Snowflake | DialectType::Redshift => {
6480 // Snowflake/Redshift: LAST_DAY(TO_DATE(date) or CAST(date AS DATE))
6481 // With offset: LAST_DAY(DATEADD(MONTH, offset, TO_DATE(date)))
6482 let date = if matches!(target, DialectType::Snowflake) {
6483 Expression::Function(Box::new(Function::new("TO_DATE".to_string(), vec![date_arg])))
6484 } else {
6485 cast_to_date(date_arg)
6486 };
6487 let date = if let Some(offset) = month_offset {
6488 Expression::Function(Box::new(Function::new(
6489 "DATEADD".to_string(), vec![
6490 Expression::Identifier(Identifier::new("MONTH")),
6491 offset, date,
6492 ],
6493 )))
6494 } else {
6495 date
6496 };
6497 Ok(Expression::Function(Box::new(Function::new("LAST_DAY".to_string(), vec![date]))))
6498 }
6499 DialectType::Spark | DialectType::Databricks => {
6500 // Spark: LAST_DAY(TO_DATE(date))
6501 // With offset: LAST_DAY(ADD_MONTHS(TO_DATE(date), offset))
6502 let date = Expression::Function(Box::new(Function::new("TO_DATE".to_string(), vec![date_arg])));
6503 let date = if let Some(offset) = month_offset {
6504 Expression::Function(Box::new(Function::new(
6505 "ADD_MONTHS".to_string(), vec![date, offset],
6506 )))
6507 } else {
6508 date
6509 };
6510 Ok(Expression::Function(Box::new(Function::new("LAST_DAY".to_string(), vec![date]))))
6511 }
6512 DialectType::MySQL => {
6513 // MySQL: LAST_DAY(DATE(date)) - no offset
6514 // With offset: LAST_DAY(DATE_ADD(date, INTERVAL offset MONTH)) - no DATE() wrapper
6515 let date = if let Some(offset) = month_offset {
6516 let iu = crate::expressions::IntervalUnit::Month;
6517 Expression::DateAdd(Box::new(crate::expressions::DateAddFunc {
6518 this: date_arg,
6519 interval: offset,
6520 unit: iu,
6521 }))
6522 } else {
6523 Expression::Function(Box::new(Function::new("DATE".to_string(), vec![date_arg])))
6524 };
6525 Ok(Expression::Function(Box::new(Function::new("LAST_DAY".to_string(), vec![date]))))
6526 }
6527 DialectType::BigQuery => {
6528 // BigQuery: LAST_DAY(CAST(date AS DATE))
6529 // With offset: LAST_DAY(DATE_ADD(CAST(date AS DATE), INTERVAL offset MONTH))
6530 let date = cast_to_date(date_arg);
6531 let date = if let Some(offset) = month_offset {
6532 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
6533 this: Some(offset),
6534 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
6535 unit: crate::expressions::IntervalUnit::Month,
6536 use_plural: false,
6537 }),
6538 }));
6539 Expression::Function(Box::new(Function::new(
6540 "DATE_ADD".to_string(), vec![date, interval],
6541 )))
6542 } else {
6543 date
6544 };
6545 Ok(Expression::Function(Box::new(Function::new("LAST_DAY".to_string(), vec![date]))))
6546 }
6547 DialectType::ClickHouse => {
6548 // ClickHouse: LAST_DAY(CAST(date AS Nullable(DATE)))
6549 let date = Expression::Cast(Box::new(Cast {
6550 this: date_arg,
6551 to: DataType::Custom { name: "Nullable(DATE)".to_string() },
6552 trailing_comments: vec![], double_colon_syntax: false, format: None, default: None,
6553 }));
6554 let date = if let Some(offset) = month_offset {
6555 Expression::Function(Box::new(Function::new(
6556 "DATE_ADD".to_string(), vec![
6557 Expression::Identifier(Identifier::new("MONTH")),
6558 offset, date,
6559 ],
6560 )))
6561 } else {
6562 date
6563 };
6564 Ok(Expression::Function(Box::new(Function::new("LAST_DAY".to_string(), vec![date]))))
6565 }
6566 DialectType::Hive => {
6567 // Hive: LAST_DAY(date)
6568 let date = if let Some(offset) = month_offset {
6569 Expression::Function(Box::new(Function::new(
6570 "ADD_MONTHS".to_string(), vec![date_arg, offset],
6571 )))
6572 } else {
6573 date_arg
6574 };
6575 Ok(Expression::Function(Box::new(Function::new("LAST_DAY".to_string(), vec![date]))))
6576 }
6577 _ => {
6578 // Default: LAST_DAY(date)
6579 let date = if let Some(offset) = month_offset {
6580 let unit = Expression::Identifier(Identifier::new("MONTH"));
6581 Expression::Function(Box::new(Function::new(
6582 "DATEADD".to_string(), vec![unit, offset, date_arg],
6583 )))
6584 } else {
6585 date_arg
6586 };
6587 Ok(Expression::Function(Box::new(Function::new("LAST_DAY".to_string(), vec![date]))))
6588 }
6589 }
6590 }
6591 // LAST_DAY(x) / LAST_DAY_OF_MONTH(x) -> target-specific
6592 "LAST_DAY" | "LAST_DAY_OF_MONTH" if !matches!(source, DialectType::BigQuery) && f.args.len() >= 1 => {
6593 let first_arg = f.args.into_iter().next().unwrap();
6594 match target {
6595 DialectType::TSQL | DialectType::Fabric => Ok(Expression::Function(Box::new(Function::new("EOMONTH".to_string(), vec![first_arg])))),
6596 DialectType::Presto | DialectType::Trino | DialectType::Athena => Ok(Expression::Function(Box::new(Function::new("LAST_DAY_OF_MONTH".to_string(), vec![first_arg])))),
6597 _ => Ok(Expression::Function(Box::new(Function::new("LAST_DAY".to_string(), vec![first_arg])))),
6598 }
6599 }
6600 // MAP(keys_array, vals_array) from Presto (2-arg form) -> target-specific
6601 "MAP" if f.args.len() == 2
6602 && matches!(source, DialectType::Presto | DialectType::Trino | DialectType::Athena) => {
6603 let keys_arg = f.args[0].clone();
6604 let vals_arg = f.args[1].clone();
6605
6606 // Helper: extract array elements from Array/ArrayFunc/Function("ARRAY") expressions
6607 fn extract_array_elements(expr: &Expression) -> Option<&Vec<Expression>> {
6608 match expr {
6609 Expression::Array(arr) => Some(&arr.expressions),
6610 Expression::ArrayFunc(arr) => Some(&arr.expressions),
6611 Expression::Function(f) if f.name.eq_ignore_ascii_case("ARRAY") => Some(&f.args),
6612 _ => None,
6613 }
6614 }
6615
6616 match target {
6617 DialectType::Spark | DialectType::Databricks => {
6618 // Presto MAP(keys, vals) -> Spark MAP_FROM_ARRAYS(keys, vals)
6619 Ok(Expression::Function(Box::new(Function::new("MAP_FROM_ARRAYS".to_string(), f.args))))
6620 }
6621 DialectType::Hive => {
6622 // Presto MAP(ARRAY[k1,k2], ARRAY[v1,v2]) -> Hive MAP(k1, v1, k2, v2)
6623 if let (Some(keys), Some(vals)) = (extract_array_elements(&keys_arg), extract_array_elements(&vals_arg)) {
6624 if keys.len() == vals.len() {
6625 let mut interleaved = Vec::new();
6626 for (k, v) in keys.iter().zip(vals.iter()) {
6627 interleaved.push(k.clone());
6628 interleaved.push(v.clone());
6629 }
6630 Ok(Expression::Function(Box::new(Function::new("MAP".to_string(), interleaved))))
6631 } else {
6632 Ok(Expression::Function(Box::new(Function::new("MAP".to_string(), f.args))))
6633 }
6634 } else {
6635 Ok(Expression::Function(Box::new(Function::new("MAP".to_string(), f.args))))
6636 }
6637 }
6638 DialectType::Snowflake => {
6639 // Presto MAP(ARRAY[k1,k2], ARRAY[v1,v2]) -> Snowflake OBJECT_CONSTRUCT(k1, v1, k2, v2)
6640 if let (Some(keys), Some(vals)) = (extract_array_elements(&keys_arg), extract_array_elements(&vals_arg)) {
6641 if keys.len() == vals.len() {
6642 let mut interleaved = Vec::new();
6643 for (k, v) in keys.iter().zip(vals.iter()) {
6644 interleaved.push(k.clone());
6645 interleaved.push(v.clone());
6646 }
6647 Ok(Expression::Function(Box::new(Function::new("OBJECT_CONSTRUCT".to_string(), interleaved))))
6648 } else {
6649 Ok(Expression::Function(Box::new(Function::new("MAP".to_string(), f.args))))
6650 }
6651 } else {
6652 Ok(Expression::Function(Box::new(Function::new("MAP".to_string(), f.args))))
6653 }
6654 }
6655 _ => Ok(Expression::Function(f)),
6656 }
6657 }
6658 // MAP() with 0 args from Spark -> MAP(ARRAY[], ARRAY[]) for Presto/Trino
6659 "MAP" if f.args.is_empty()
6660 && matches!(source, DialectType::Hive | DialectType::Spark | DialectType::Databricks)
6661 && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena) => {
6662 let empty_keys = Expression::Array(Box::new(crate::expressions::Array { expressions: vec![] }));
6663 let empty_vals = Expression::Array(Box::new(crate::expressions::Array { expressions: vec![] }));
6664 Ok(Expression::Function(Box::new(Function::new("MAP".to_string(), vec![empty_keys, empty_vals]))))
6665 }
6666 // MAP(k1, v1, k2, v2, ...) from Hive/Spark -> target-specific
6667 "MAP" if f.args.len() >= 2 && f.args.len() % 2 == 0
6668 && matches!(source, DialectType::Hive | DialectType::Spark | DialectType::Databricks | DialectType::ClickHouse) => {
6669 let args = f.args;
6670 match target {
6671 DialectType::DuckDB => {
6672 // MAP([k1, k2], [v1, v2])
6673 let mut keys = Vec::new();
6674 let mut vals = Vec::new();
6675 for (i, arg) in args.into_iter().enumerate() {
6676 if i % 2 == 0 { keys.push(arg); } else { vals.push(arg); }
6677 }
6678 let keys_arr = Expression::Array(Box::new(crate::expressions::Array {
6679 expressions: keys,
6680 }));
6681 let vals_arr = Expression::Array(Box::new(crate::expressions::Array {
6682 expressions: vals,
6683 }));
6684 Ok(Expression::Function(Box::new(Function::new("MAP".to_string(), vec![keys_arr, vals_arr]))))
6685 }
6686 DialectType::Presto | DialectType::Trino => {
6687 // MAP(ARRAY[k1, k2], ARRAY[v1, v2])
6688 let mut keys = Vec::new();
6689 let mut vals = Vec::new();
6690 for (i, arg) in args.into_iter().enumerate() {
6691 if i % 2 == 0 { keys.push(arg); } else { vals.push(arg); }
6692 }
6693 let keys_arr = Expression::Array(Box::new(crate::expressions::Array { expressions: keys }));
6694 let vals_arr = Expression::Array(Box::new(crate::expressions::Array { expressions: vals }));
6695 Ok(Expression::Function(Box::new(Function::new("MAP".to_string(), vec![keys_arr, vals_arr]))))
6696 }
6697 DialectType::Snowflake => {
6698 Ok(Expression::Function(Box::new(Function::new("OBJECT_CONSTRUCT".to_string(), args))))
6699 }
6700 DialectType::ClickHouse => {
6701 Ok(Expression::Function(Box::new(Function::new("map".to_string(), args))))
6702 }
6703 _ => Ok(Expression::Function(Box::new(Function::new("MAP".to_string(), args)))),
6704 }
6705 }
6706 // COLLECT_LIST(x) -> ARRAY_AGG(x) for most targets
6707 "COLLECT_LIST" if f.args.len() >= 1 => {
6708 let name = match target {
6709 DialectType::Spark | DialectType::Databricks | DialectType::Hive => "COLLECT_LIST",
6710 DialectType::DuckDB | DialectType::PostgreSQL | DialectType::Redshift
6711 | DialectType::Snowflake | DialectType::BigQuery => "ARRAY_AGG",
6712 DialectType::Presto | DialectType::Trino => "ARRAY_AGG",
6713 _ => "ARRAY_AGG",
6714 };
6715 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
6716 }
6717 // COLLECT_SET(x) -> target-specific distinct array aggregation
6718 "COLLECT_SET" if f.args.len() >= 1 => {
6719 let name = match target {
6720 DialectType::Spark | DialectType::Databricks | DialectType::Hive => "COLLECT_SET",
6721 DialectType::Presto | DialectType::Trino | DialectType::Athena => "SET_AGG",
6722 DialectType::Snowflake => "ARRAY_UNIQUE_AGG",
6723 _ => "ARRAY_AGG",
6724 };
6725 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
6726 }
6727 // ISNAN(x) / IS_NAN(x) - normalize
6728 "ISNAN" | "IS_NAN" => {
6729 let name = match target {
6730 DialectType::Spark | DialectType::Databricks | DialectType::Hive => "ISNAN",
6731 DialectType::Presto | DialectType::Trino | DialectType::Athena => "IS_NAN",
6732 DialectType::BigQuery | DialectType::PostgreSQL | DialectType::Redshift => "IS_NAN",
6733 DialectType::ClickHouse => "IS_NAN",
6734 _ => "ISNAN",
6735 };
6736 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
6737 }
6738 // SPLIT_PART(str, delim, index) -> target-specific
6739 "SPLIT_PART" if f.args.len() == 3 => {
6740 match target {
6741 DialectType::Spark | DialectType::Databricks => {
6742 // Keep as SPLIT_PART (Spark 3.4+)
6743 Ok(Expression::Function(Box::new(Function::new("SPLIT_PART".to_string(), f.args))))
6744 }
6745 DialectType::DuckDB | DialectType::PostgreSQL | DialectType::Snowflake
6746 | DialectType::Redshift | DialectType::Trino | DialectType::Presto => {
6747 Ok(Expression::Function(Box::new(Function::new("SPLIT_PART".to_string(), f.args))))
6748 }
6749 DialectType::Hive => {
6750 // SPLIT(str, delim)[index]
6751 // Complex conversion, just keep as-is for now
6752 Ok(Expression::Function(Box::new(Function::new("SPLIT_PART".to_string(), f.args))))
6753 }
6754 _ => Ok(Expression::Function(Box::new(Function::new("SPLIT_PART".to_string(), f.args)))),
6755 }
6756 }
6757 // JSON_EXTRACT(json, path) -> target-specific JSON extraction
6758 "JSON_EXTRACT" | "JSON_EXTRACT_SCALAR" if f.args.len() == 2 => {
6759 let is_scalar = name == "JSON_EXTRACT_SCALAR";
6760 match target {
6761 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
6762 let mut args = f.args;
6763 // Spark/Hive don't support Presto's TRY(expr) wrapper form here.
6764 // Mirror sqlglot by unwrapping TRY(expr) to expr before GET_JSON_OBJECT.
6765 if let Some(Expression::Function(inner)) = args.first() {
6766 if inner.name.eq_ignore_ascii_case("TRY") && inner.args.len() == 1 {
6767 let mut inner_args = inner.args.clone();
6768 args[0] = inner_args.remove(0);
6769 }
6770 }
6771 Ok(Expression::Function(Box::new(Function::new(
6772 "GET_JSON_OBJECT".to_string(),
6773 args,
6774 ))))
6775 }
6776 DialectType::DuckDB | DialectType::SQLite => {
6777 // json -> path syntax
6778 let mut args = f.args;
6779 let json_expr = args.remove(0);
6780 let path = args.remove(0);
6781 Ok(Expression::JsonExtract(Box::new(crate::expressions::JsonExtractFunc {
6782 this: json_expr,
6783 path,
6784 returning: None,
6785 arrow_syntax: true,
6786 hash_arrow_syntax: false,
6787 wrapper_option: None,
6788 quotes_option: None,
6789 on_scalar_string: false,
6790 on_error: None,
6791 })))
6792 }
6793 DialectType::TSQL => {
6794 let func_name = if is_scalar { "JSON_VALUE" } else { "JSON_QUERY" };
6795 Ok(Expression::Function(Box::new(Function::new(func_name.to_string(), f.args))))
6796 }
6797 DialectType::PostgreSQL | DialectType::Redshift => {
6798 let func_name = if is_scalar { "JSON_EXTRACT_PATH_TEXT" } else { "JSON_EXTRACT_PATH" };
6799 Ok(Expression::Function(Box::new(Function::new(func_name.to_string(), f.args))))
6800 }
6801 _ => {
6802 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
6803 }
6804 }
6805 }
6806 // SingleStore JSON_EXTRACT_JSON(json, key1, key2, ...) -> JSON_EXTRACT(json, '$.key1.key2' or '$.key1[key2]')
6807 // BSON_EXTRACT_BSON(json, key1, ...) -> JSONB_EXTRACT(json, '$.key1')
6808 "JSON_EXTRACT_JSON" | "BSON_EXTRACT_BSON" if f.args.len() >= 2
6809 && matches!(source, DialectType::SingleStore) => {
6810 let is_bson = name == "BSON_EXTRACT_BSON";
6811 let mut args = f.args;
6812 let json_expr = args.remove(0);
6813
6814 // Build JSONPath from remaining arguments
6815 let mut path = String::from("$");
6816 for arg in &args {
6817 if let Expression::Literal(crate::expressions::Literal::String(s)) = arg {
6818 // Check if it's a numeric string (array index)
6819 if s.parse::<i64>().is_ok() {
6820 path.push('[');
6821 path.push_str(s);
6822 path.push(']');
6823 } else {
6824 path.push('.');
6825 path.push_str(s);
6826 }
6827 }
6828 }
6829
6830 let target_func = if is_bson { "JSONB_EXTRACT" } else { "JSON_EXTRACT" };
6831 Ok(Expression::Function(Box::new(Function::new(
6832 target_func.to_string(),
6833 vec![json_expr, Expression::string(&path)],
6834 ))))
6835 }
6836 // ARRAY_SUM(lambda, array) from Doris -> ClickHouse arraySum
6837 "ARRAY_SUM" if matches!(target, DialectType::ClickHouse) => {
6838 Ok(Expression::Function(Box::new(Function {
6839 name: "arraySum".to_string(),
6840 args: f.args,
6841 distinct: f.distinct,
6842 trailing_comments: f.trailing_comments,
6843 use_bracket_syntax: f.use_bracket_syntax,
6844 no_parens: f.no_parens,
6845 quoted: f.quoted,
6846 })))
6847 }
6848 // TSQL JSON_QUERY/JSON_VALUE -> target-specific
6849 // Note: For TSQL->TSQL, JsonQuery stays as Expression::JsonQuery (source transform not called)
6850 // and is handled by JsonQueryValueConvert action. This handles the case where
6851 // TSQL read transform converted JsonQuery to Function("JSON_QUERY") for cross-dialect.
6852 "JSON_QUERY" | "JSON_VALUE" if f.args.len() == 2 && matches!(source, DialectType::TSQL | DialectType::Fabric) => {
6853 match target {
6854 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
6855 Ok(Expression::Function(Box::new(Function::new(
6856 "GET_JSON_OBJECT".to_string(),
6857 f.args,
6858 ))))
6859 }
6860 _ => Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args)))),
6861 }
6862 }
6863 // UNIX_TIMESTAMP(x) -> TO_UNIXTIME(x) for Presto
6864 "UNIX_TIMESTAMP" if f.args.len() == 1 => {
6865 let arg = f.args.into_iter().next().unwrap();
6866 let is_hive_source = matches!(source, DialectType::Hive | DialectType::Spark | DialectType::Databricks);
6867 match target {
6868 DialectType::DuckDB if is_hive_source => {
6869 // DuckDB: EPOCH(STRPTIME(x, '%Y-%m-%d %H:%M:%S'))
6870 let strptime = Expression::Function(Box::new(Function::new(
6871 "STRPTIME".to_string(),
6872 vec![arg, Expression::string("%Y-%m-%d %H:%M:%S")],
6873 )));
6874 Ok(Expression::Function(Box::new(Function::new("EPOCH".to_string(), vec![strptime]))))
6875 }
6876 DialectType::Presto | DialectType::Trino if is_hive_source => {
6877 // Presto: TO_UNIXTIME(COALESCE(TRY(DATE_PARSE(CAST(x AS VARCHAR), '%Y-%m-%d %T')), PARSE_DATETIME(DATE_FORMAT(x, '%Y-%m-%d %T'), 'yyyy-MM-dd HH:mm:ss')))
6878 let cast_varchar = Expression::Cast(Box::new(crate::expressions::Cast {
6879 this: arg.clone(),
6880 to: DataType::VarChar { length: None, parenthesized_length: false },
6881 trailing_comments: vec![],
6882 double_colon_syntax: false,
6883 format: None,
6884 default: None,
6885 }));
6886 let date_parse = Expression::Function(Box::new(Function::new(
6887 "DATE_PARSE".to_string(),
6888 vec![cast_varchar, Expression::string("%Y-%m-%d %T")],
6889 )));
6890 let try_expr = Expression::Function(Box::new(Function::new(
6891 "TRY".to_string(), vec![date_parse],
6892 )));
6893 let date_format = Expression::Function(Box::new(Function::new(
6894 "DATE_FORMAT".to_string(),
6895 vec![arg, Expression::string("%Y-%m-%d %T")],
6896 )));
6897 let parse_datetime = Expression::Function(Box::new(Function::new(
6898 "PARSE_DATETIME".to_string(),
6899 vec![date_format, Expression::string("yyyy-MM-dd HH:mm:ss")],
6900 )));
6901 let coalesce = Expression::Function(Box::new(Function::new(
6902 "COALESCE".to_string(), vec![try_expr, parse_datetime],
6903 )));
6904 Ok(Expression::Function(Box::new(Function::new("TO_UNIXTIME".to_string(), vec![coalesce]))))
6905 }
6906 DialectType::Presto | DialectType::Trino => {
6907 Ok(Expression::Function(Box::new(Function::new("TO_UNIXTIME".to_string(), vec![arg]))))
6908 }
6909 _ => Ok(Expression::Function(Box::new(Function::new("UNIX_TIMESTAMP".to_string(), vec![arg])))),
6910 }
6911 }
6912 // TO_UNIX_TIMESTAMP(x) -> UNIX_TIMESTAMP(x) for Spark/Hive
6913 "TO_UNIX_TIMESTAMP" if f.args.len() >= 1 => {
6914 match target {
6915 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
6916 Ok(Expression::Function(Box::new(Function::new("UNIX_TIMESTAMP".to_string(), f.args))))
6917 }
6918 _ => Ok(Expression::Function(Box::new(Function::new("TO_UNIX_TIMESTAMP".to_string(), f.args)))),
6919 }
6920 }
6921 // CURDATE() -> CURRENT_DATE
6922 "CURDATE" => {
6923 Ok(Expression::CurrentDate(crate::expressions::CurrentDate))
6924 }
6925 // CURTIME() -> CURRENT_TIME
6926 "CURTIME" => {
6927 Ok(Expression::CurrentTime(crate::expressions::CurrentTime { precision: None }))
6928 }
6929 // ARRAY_SORT(x) or ARRAY_SORT(x, lambda) -> SORT_ARRAY(x) for Hive (drop lambda)
6930 "ARRAY_SORT" if f.args.len() >= 1 => {
6931 match target {
6932 DialectType::Hive => {
6933 let mut args = f.args;
6934 args.truncate(1); // Drop lambda comparator
6935 Ok(Expression::Function(Box::new(Function::new("SORT_ARRAY".to_string(), args))))
6936 }
6937 _ => Ok(Expression::Function(f)),
6938 }
6939 }
6940 // SORT_ARRAY(x) -> ARRAY_SORT(x) for non-Hive
6941 "SORT_ARRAY" if f.args.len() == 1 => {
6942 match target {
6943 DialectType::Hive => Ok(Expression::Function(f)),
6944 _ => {
6945 Ok(Expression::Function(Box::new(Function::new("ARRAY_SORT".to_string(), f.args))))
6946 }
6947 }
6948 }
6949 // SORT_ARRAY(x, FALSE) -> ARRAY_REVERSE_SORT(x) for DuckDB, ARRAY_SORT(x, lambda) for Presto
6950 "SORT_ARRAY" if f.args.len() == 2 => {
6951 let is_desc = matches!(&f.args[1], Expression::Boolean(b) if !b.value);
6952 if is_desc {
6953 match target {
6954 DialectType::DuckDB => {
6955 Ok(Expression::Function(Box::new(Function::new("ARRAY_REVERSE_SORT".to_string(), vec![f.args.into_iter().next().unwrap()]))))
6956 }
6957 DialectType::Presto | DialectType::Trino => {
6958 let arr_arg = f.args.into_iter().next().unwrap();
6959 let a = Expression::Column(crate::expressions::Column {
6960 name: crate::expressions::Identifier::new("a"),
6961 table: None,
6962 join_mark: false,
6963 trailing_comments: Vec::new(),
6964 });
6965 let b = Expression::Column(crate::expressions::Column {
6966 name: crate::expressions::Identifier::new("b"),
6967 table: None,
6968 join_mark: false,
6969 trailing_comments: Vec::new(),
6970 });
6971 let case_expr = Expression::Case(Box::new(crate::expressions::Case {
6972 operand: None,
6973 whens: vec![
6974 (Expression::Lt(Box::new(BinaryOp::new(a.clone(), b.clone()))),
6975 Expression::Literal(Literal::Number("1".to_string()))),
6976 (Expression::Gt(Box::new(BinaryOp::new(a.clone(), b.clone()))),
6977 Expression::Literal(Literal::Number("-1".to_string()))),
6978 ],
6979 else_: Some(Expression::Literal(Literal::Number("0".to_string()))),
6980 }));
6981 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
6982 parameters: vec![
6983 crate::expressions::Identifier::new("a"),
6984 crate::expressions::Identifier::new("b"),
6985 ],
6986 body: case_expr,
6987 colon: false,
6988 parameter_types: Vec::new(),
6989 }));
6990 Ok(Expression::Function(Box::new(Function::new("ARRAY_SORT".to_string(), vec![arr_arg, lambda]))))
6991 }
6992 _ => Ok(Expression::Function(f))
6993 }
6994 } else {
6995 // SORT_ARRAY(x, TRUE) -> ARRAY_SORT(x)
6996 match target {
6997 DialectType::Hive => Ok(Expression::Function(f)),
6998 _ => Ok(Expression::Function(Box::new(Function::new("ARRAY_SORT".to_string(), vec![f.args.into_iter().next().unwrap()]))))
6999 }
7000 }
7001 }
7002 // LEFT(x, n), RIGHT(x, n) -> SUBSTRING for targets without LEFT/RIGHT
7003 "LEFT" if f.args.len() == 2 => {
7004 match target {
7005 DialectType::Hive | DialectType::Presto | DialectType::Trino | DialectType::Athena => {
7006 let x = f.args[0].clone();
7007 let n = f.args[1].clone();
7008 Ok(Expression::Function(Box::new(Function::new(
7009 "SUBSTRING".to_string(),
7010 vec![x, Expression::number(1), n],
7011 ))))
7012 }
7013 DialectType::Spark | DialectType::Databricks
7014 if matches!(source, DialectType::TSQL | DialectType::Fabric) => {
7015 // TSQL LEFT(x, n) -> LEFT(CAST(x AS STRING), n) for Spark
7016 let x = f.args[0].clone();
7017 let n = f.args[1].clone();
7018 let cast_x = Expression::Cast(Box::new(Cast {
7019 this: x,
7020 to: DataType::VarChar { length: None, parenthesized_length: false },
7021 double_colon_syntax: false,
7022 trailing_comments: Vec::new(),
7023 format: None,
7024 default: None,
7025 }));
7026 Ok(Expression::Function(Box::new(Function::new("LEFT".to_string(), vec![cast_x, n]))))
7027 }
7028 _ => Ok(Expression::Function(f)),
7029 }
7030 }
7031 "RIGHT" if f.args.len() == 2 => {
7032 match target {
7033 DialectType::Hive | DialectType::Presto | DialectType::Trino | DialectType::Athena => {
7034 let x = f.args[0].clone();
7035 let n = f.args[1].clone();
7036 // SUBSTRING(x, LENGTH(x) - (n - 1))
7037 let len_x = Expression::Function(Box::new(Function::new(
7038 "LENGTH".to_string(),
7039 vec![x.clone()],
7040 )));
7041 let n_minus_1 = Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
7042 n,
7043 Expression::number(1),
7044 )));
7045 let n_minus_1_paren = Expression::Paren(Box::new(crate::expressions::Paren {
7046 this: n_minus_1,
7047 trailing_comments: Vec::new(),
7048 }));
7049 let offset = Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
7050 len_x,
7051 n_minus_1_paren,
7052 )));
7053 Ok(Expression::Function(Box::new(Function::new(
7054 "SUBSTRING".to_string(),
7055 vec![x, offset],
7056 ))))
7057 }
7058 DialectType::Spark | DialectType::Databricks
7059 if matches!(source, DialectType::TSQL | DialectType::Fabric) => {
7060 // TSQL RIGHT(x, n) -> RIGHT(CAST(x AS STRING), n) for Spark
7061 let x = f.args[0].clone();
7062 let n = f.args[1].clone();
7063 let cast_x = Expression::Cast(Box::new(Cast {
7064 this: x,
7065 to: DataType::VarChar { length: None, parenthesized_length: false },
7066 double_colon_syntax: false,
7067 trailing_comments: Vec::new(),
7068 format: None,
7069 default: None,
7070 }));
7071 Ok(Expression::Function(Box::new(Function::new("RIGHT".to_string(), vec![cast_x, n]))))
7072 }
7073 _ => Ok(Expression::Function(f)),
7074 }
7075 }
7076 // MAP_FROM_ARRAYS(keys, vals) -> target-specific map construction
7077 "MAP_FROM_ARRAYS" if f.args.len() == 2 => {
7078 match target {
7079 DialectType::Snowflake => {
7080 Ok(Expression::Function(Box::new(Function::new("OBJECT_CONSTRUCT".to_string(), f.args))))
7081 }
7082 DialectType::Spark | DialectType::Databricks => {
7083 Ok(Expression::Function(Box::new(Function::new("MAP_FROM_ARRAYS".to_string(), f.args))))
7084 }
7085 _ => {
7086 Ok(Expression::Function(Box::new(Function::new("MAP".to_string(), f.args))))
7087 }
7088 }
7089 }
7090 // LIKE(foo, 'pat') -> foo LIKE 'pat'; LIKE(foo, 'pat', '!') -> foo LIKE 'pat' ESCAPE '!'
7091 // SQLite uses LIKE(pattern, string[, escape]) with args in reverse order
7092 "LIKE" if f.args.len() >= 2 => {
7093 let (this, pattern) = if matches!(source, DialectType::SQLite) {
7094 // SQLite: LIKE(pattern, string) -> string LIKE pattern
7095 (f.args[1].clone(), f.args[0].clone())
7096 } else {
7097 // Standard: LIKE(string, pattern) -> string LIKE pattern
7098 (f.args[0].clone(), f.args[1].clone())
7099 };
7100 let escape = if f.args.len() >= 3 { Some(f.args[2].clone()) } else { None };
7101 Ok(Expression::Like(Box::new(crate::expressions::LikeOp {
7102 left: this,
7103 right: pattern,
7104 escape,
7105 quantifier: None,
7106 })))
7107 }
7108 // ILIKE(foo, 'pat') -> foo ILIKE 'pat'
7109 "ILIKE" if f.args.len() >= 2 => {
7110 let this = f.args[0].clone();
7111 let pattern = f.args[1].clone();
7112 let escape = if f.args.len() >= 3 { Some(f.args[2].clone()) } else { None };
7113 Ok(Expression::ILike(Box::new(crate::expressions::LikeOp {
7114 left: this,
7115 right: pattern,
7116 escape,
7117 quantifier: None,
7118 })))
7119 }
7120 // CHAR(n) -> CHR(n) for non-MySQL/non-TSQL targets
7121 "CHAR" if f.args.len() == 1 => {
7122 match target {
7123 DialectType::MySQL | DialectType::SingleStore
7124 | DialectType::TSQL => Ok(Expression::Function(f)),
7125 _ => {
7126 Ok(Expression::Function(Box::new(Function::new("CHR".to_string(), f.args))))
7127 }
7128 }
7129 }
7130 // CONCAT(a, b) -> a || b for PostgreSQL
7131 "CONCAT" if f.args.len() == 2 && matches!(target, DialectType::PostgreSQL)
7132 && matches!(source, DialectType::ClickHouse | DialectType::MySQL) => {
7133 let mut args = f.args;
7134 let right = args.pop().unwrap();
7135 let left = args.pop().unwrap();
7136 Ok(Expression::DPipe(Box::new(crate::expressions::DPipe {
7137 this: Box::new(left),
7138 expression: Box::new(right),
7139 safe: None,
7140 })))
7141 }
7142 // ARRAY_TO_STRING(arr, delim) -> target-specific
7143 "ARRAY_TO_STRING" if f.args.len() >= 2 => {
7144 match target {
7145 DialectType::Presto | DialectType::Trino => {
7146 Ok(Expression::Function(Box::new(Function::new("ARRAY_JOIN".to_string(), f.args))))
7147 }
7148 DialectType::TSQL => {
7149 Ok(Expression::Function(Box::new(Function::new("STRING_AGG".to_string(), f.args))))
7150 }
7151 _ => Ok(Expression::Function(f)),
7152 }
7153 }
7154 // ARRAY_CONCAT -> target-specific
7155 "ARRAY_CONCAT" if f.args.len() == 2 => {
7156 match target {
7157 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
7158 Ok(Expression::Function(Box::new(Function::new("CONCAT".to_string(), f.args))))
7159 }
7160 DialectType::Snowflake => {
7161 Ok(Expression::Function(Box::new(Function::new("ARRAY_CAT".to_string(), f.args))))
7162 }
7163 DialectType::Redshift => {
7164 Ok(Expression::Function(Box::new(Function::new("ARRAY_CONCAT".to_string(), f.args))))
7165 }
7166 DialectType::PostgreSQL => {
7167 Ok(Expression::Function(Box::new(Function::new("ARRAY_CAT".to_string(), f.args))))
7168 }
7169 DialectType::DuckDB => {
7170 Ok(Expression::Function(Box::new(Function::new("ARRAY_CONCAT".to_string(), f.args))))
7171 }
7172 DialectType::Presto | DialectType::Trino => {
7173 Ok(Expression::Function(Box::new(Function::new("CONCAT".to_string(), f.args))))
7174 }
7175 _ => Ok(Expression::Function(f)),
7176 }
7177 }
7178 // ARRAY_CONTAINS(arr, x) / HAS(arr, x) / CONTAINS(arr, x) normalization
7179 "HAS" if f.args.len() == 2 => {
7180 match target {
7181 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
7182 Ok(Expression::Function(Box::new(Function::new("ARRAY_CONTAINS".to_string(), f.args))))
7183 }
7184 DialectType::Presto | DialectType::Trino => {
7185 Ok(Expression::Function(Box::new(Function::new("CONTAINS".to_string(), f.args))))
7186 }
7187 _ => Ok(Expression::Function(f)),
7188 }
7189 }
7190 // NVL(a, b, c, d) -> COALESCE(a, b, c, d) - NVL should keep all args
7191 "NVL" if f.args.len() > 2 => {
7192 Ok(Expression::Function(Box::new(Function::new("COALESCE".to_string(), f.args))))
7193 }
7194 // ISNULL(x) in MySQL -> (x IS NULL)
7195 "ISNULL" if f.args.len() == 1 && matches!(source, DialectType::MySQL) && matches!(target, DialectType::MySQL) => {
7196 let arg = f.args.into_iter().next().unwrap();
7197 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
7198 this: Expression::IsNull(Box::new(crate::expressions::IsNull {
7199 this: arg,
7200 not: false,
7201 postfix_form: false,
7202 })),
7203 trailing_comments: Vec::new(),
7204 })))
7205 }
7206 // MONTHNAME(x) -> DATE_FORMAT(x, '%M') for MySQL -> MySQL
7207 "MONTHNAME" if f.args.len() == 1 && matches!(target, DialectType::MySQL) => {
7208 let arg = f.args.into_iter().next().unwrap();
7209 Ok(Expression::Function(Box::new(Function::new(
7210 "DATE_FORMAT".to_string(),
7211 vec![arg, Expression::string("%M")],
7212 ))))
7213 }
7214 // ClickHouse splitByString('s', x) -> DuckDB STR_SPLIT(x, 's') / Hive SPLIT(x, CONCAT('\\Q', 's', '\\E'))
7215 "SPLITBYSTRING" if f.args.len() == 2 => {
7216 let sep = f.args[0].clone();
7217 let str_arg = f.args[1].clone();
7218 match target {
7219 DialectType::DuckDB => {
7220 Ok(Expression::Function(Box::new(Function::new("STR_SPLIT".to_string(), vec![str_arg, sep]))))
7221 }
7222 DialectType::Doris => {
7223 Ok(Expression::Function(Box::new(Function::new("SPLIT_BY_STRING".to_string(), vec![str_arg, sep]))))
7224 }
7225 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
7226 // SPLIT(x, CONCAT('\\Q', sep, '\\E'))
7227 let escaped = Expression::Function(Box::new(Function::new(
7228 "CONCAT".to_string(),
7229 vec![Expression::string("\\Q"), sep, Expression::string("\\E")],
7230 )));
7231 Ok(Expression::Function(Box::new(Function::new("SPLIT".to_string(), vec![str_arg, escaped]))))
7232 }
7233 _ => Ok(Expression::Function(f)),
7234 }
7235 }
7236 // ClickHouse splitByRegexp('pattern', x) -> DuckDB STR_SPLIT_REGEX(x, 'pattern')
7237 "SPLITBYREGEXP" if f.args.len() == 2 => {
7238 let sep = f.args[0].clone();
7239 let str_arg = f.args[1].clone();
7240 match target {
7241 DialectType::DuckDB => {
7242 Ok(Expression::Function(Box::new(Function::new("STR_SPLIT_REGEX".to_string(), vec![str_arg, sep]))))
7243 }
7244 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
7245 Ok(Expression::Function(Box::new(Function::new("SPLIT".to_string(), vec![str_arg, sep]))))
7246 }
7247 _ => Ok(Expression::Function(f)),
7248 }
7249 }
7250 // ClickHouse toMonday(x) -> DATE_TRUNC('WEEK', x) / DATE_TRUNC(x, 'WEEK') for Doris
7251 "TOMONDAY" => {
7252 if f.args.len() == 1 {
7253 let arg = f.args.into_iter().next().unwrap();
7254 match target {
7255 DialectType::Doris => {
7256 Ok(Expression::Function(Box::new(Function::new(
7257 "DATE_TRUNC".to_string(),
7258 vec![arg, Expression::string("WEEK")],
7259 ))))
7260 }
7261 _ => {
7262 Ok(Expression::Function(Box::new(Function::new(
7263 "DATE_TRUNC".to_string(),
7264 vec![Expression::string("WEEK"), arg],
7265 ))))
7266 }
7267 }
7268 } else {
7269 Ok(Expression::Function(f))
7270 }
7271 }
7272 // COLLECT_LIST with FILTER(WHERE x IS NOT NULL) for targets that need it
7273 "COLLECT_LIST" if f.args.len() == 1 => {
7274 match target {
7275 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
7276 Ok(Expression::Function(f))
7277 }
7278 _ => {
7279 Ok(Expression::Function(Box::new(Function::new("ARRAY_AGG".to_string(), f.args))))
7280 }
7281 }
7282 }
7283 // TO_CHAR(x) with 1 arg -> CAST(x AS STRING) for Doris
7284 "TO_CHAR" if f.args.len() == 1 && matches!(target, DialectType::Doris) => {
7285 let arg = f.args.into_iter().next().unwrap();
7286 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
7287 this: arg,
7288 to: DataType::Custom { name: "STRING".to_string() },
7289 double_colon_syntax: false,
7290 trailing_comments: Vec::new(),
7291 format: None,
7292 default: None,
7293 })))
7294 }
7295 // DBMS_RANDOM.VALUE() -> RANDOM() for PostgreSQL
7296 "DBMS_RANDOM.VALUE" if f.args.is_empty() => {
7297 match target {
7298 DialectType::PostgreSQL => {
7299 Ok(Expression::Function(Box::new(Function::new("RANDOM".to_string(), vec![]))))
7300 }
7301 _ => Ok(Expression::Function(f)),
7302 }
7303 }
7304 // ClickHouse formatDateTime -> target-specific
7305 "FORMATDATETIME" if f.args.len() >= 2 => {
7306 match target {
7307 DialectType::MySQL => {
7308 Ok(Expression::Function(Box::new(Function::new("DATE_FORMAT".to_string(), f.args))))
7309 }
7310 _ => Ok(Expression::Function(f)),
7311 }
7312 }
7313 // REPLICATE('x', n) -> REPEAT('x', n) for non-TSQL targets
7314 "REPLICATE" if f.args.len() == 2 => {
7315 match target {
7316 DialectType::TSQL => Ok(Expression::Function(f)),
7317 _ => {
7318 Ok(Expression::Function(Box::new(Function::new("REPEAT".to_string(), f.args))))
7319 }
7320 }
7321 }
7322 // LEN(x) -> LENGTH(x) for non-TSQL targets
7323 // No CAST needed when arg is already a string literal
7324 "LEN" if f.args.len() == 1 => {
7325 match target {
7326 DialectType::TSQL => Ok(Expression::Function(f)),
7327 DialectType::Spark | DialectType::Databricks => {
7328 let arg = f.args.into_iter().next().unwrap();
7329 // Don't wrap string literals with CAST - they're already strings
7330 let is_string = matches!(&arg, Expression::Literal(crate::expressions::Literal::String(_)));
7331 let final_arg = if is_string {
7332 arg
7333 } else {
7334 Expression::Cast(Box::new(Cast {
7335 this: arg,
7336 to: DataType::VarChar { length: None, parenthesized_length: false },
7337 double_colon_syntax: false,
7338 trailing_comments: Vec::new(),
7339 format: None,
7340 default: None,
7341 }))
7342 };
7343 Ok(Expression::Function(Box::new(Function::new(
7344 "LENGTH".to_string(),
7345 vec![final_arg],
7346 ))))
7347 }
7348 _ => {
7349 let arg = f.args.into_iter().next().unwrap();
7350 Ok(Expression::Function(Box::new(Function::new(
7351 "LENGTH".to_string(),
7352 vec![arg],
7353 ))))
7354 }
7355 }
7356 }
7357 // COUNT_BIG(x) -> COUNT(x) for non-TSQL targets
7358 "COUNT_BIG" if f.args.len() == 1 => {
7359 match target {
7360 DialectType::TSQL => Ok(Expression::Function(f)),
7361 _ => {
7362 Ok(Expression::Function(Box::new(Function::new("COUNT".to_string(), f.args))))
7363 }
7364 }
7365 }
7366 // DATEFROMPARTS(y, m, d) -> MAKE_DATE(y, m, d) for non-TSQL targets
7367 "DATEFROMPARTS" if f.args.len() == 3 => {
7368 match target {
7369 DialectType::TSQL => Ok(Expression::Function(f)),
7370 _ => {
7371 Ok(Expression::Function(Box::new(Function::new("MAKE_DATE".to_string(), f.args))))
7372 }
7373 }
7374 }
7375 // REGEXP_LIKE(str, pattern) -> RegexpLike expression (target-specific output)
7376 "REGEXP_LIKE" if f.args.len() >= 2 => {
7377 let str_expr = f.args[0].clone();
7378 let pattern = f.args[1].clone();
7379 let flags = if f.args.len() >= 3 { Some(f.args[2].clone()) } else { None };
7380 match target {
7381 DialectType::DuckDB => {
7382 let mut new_args = vec![str_expr, pattern];
7383 if let Some(fl) = flags {
7384 new_args.push(fl);
7385 }
7386 Ok(Expression::Function(Box::new(Function::new(
7387 "REGEXP_MATCHES".to_string(),
7388 new_args,
7389 ))))
7390 }
7391 _ => {
7392 Ok(Expression::RegexpLike(Box::new(crate::expressions::RegexpFunc {
7393 this: str_expr,
7394 pattern,
7395 flags,
7396 })))
7397 }
7398 }
7399 }
7400 // ClickHouse arrayJoin -> UNNEST for PostgreSQL
7401 "ARRAYJOIN" if f.args.len() == 1 => {
7402 match target {
7403 DialectType::PostgreSQL => {
7404 Ok(Expression::Function(Box::new(Function::new("UNNEST".to_string(), f.args))))
7405 }
7406 _ => Ok(Expression::Function(f)),
7407 }
7408 }
7409 // DATETIMEFROMPARTS(y, m, d, h, mi, s, ms) -> MAKE_TIMESTAMP / TIMESTAMP_FROM_PARTS
7410 "DATETIMEFROMPARTS" if f.args.len() == 7 => {
7411 match target {
7412 DialectType::TSQL => Ok(Expression::Function(f)),
7413 DialectType::DuckDB => {
7414 // MAKE_TIMESTAMP(y, m, d, h, mi, s + (ms / 1000.0))
7415 let mut args = f.args;
7416 let ms = args.pop().unwrap();
7417 let s = args.pop().unwrap();
7418 // s + (ms / 1000.0)
7419 let ms_frac = Expression::Div(Box::new(BinaryOp::new(
7420 ms,
7421 Expression::Literal(crate::expressions::Literal::Number("1000.0".to_string())),
7422 )));
7423 let s_with_ms = Expression::Add(Box::new(BinaryOp::new(
7424 s,
7425 Expression::Paren(Box::new(Paren { this: ms_frac, trailing_comments: vec![] })),
7426 )));
7427 args.push(s_with_ms);
7428 Ok(Expression::Function(Box::new(Function::new("MAKE_TIMESTAMP".to_string(), args))))
7429 }
7430 DialectType::Snowflake => {
7431 // TIMESTAMP_FROM_PARTS(y, m, d, h, mi, s, ms * 1000000)
7432 let mut args = f.args;
7433 let ms = args.pop().unwrap();
7434 // ms * 1000000
7435 let ns = Expression::Mul(Box::new(BinaryOp::new(
7436 ms,
7437 Expression::number(1000000),
7438 )));
7439 args.push(ns);
7440 Ok(Expression::Function(Box::new(Function::new("TIMESTAMP_FROM_PARTS".to_string(), args))))
7441 }
7442 _ => {
7443 // Default: keep function name for other targets
7444 Ok(Expression::Function(Box::new(Function::new("DATETIMEFROMPARTS".to_string(), f.args))))
7445 }
7446 }
7447 }
7448 // CONVERT(type, expr [, style]) -> CAST(expr AS type) for non-TSQL targets
7449 // TRY_CONVERT(type, expr [, style]) -> TRY_CAST(expr AS type) for non-TSQL targets
7450 "CONVERT" | "TRY_CONVERT" if f.args.len() >= 2 => {
7451 let is_try = name == "TRY_CONVERT";
7452 let type_expr = f.args[0].clone();
7453 let value_expr = f.args[1].clone();
7454 let style = if f.args.len() >= 3 { Some(&f.args[2]) } else { None };
7455
7456 // For TSQL->TSQL, normalize types and preserve CONVERT/TRY_CONVERT
7457 if matches!(target, DialectType::TSQL) {
7458 let normalized_type = match &type_expr {
7459 Expression::DataType(dt) => {
7460 let new_dt = match dt {
7461 DataType::Int { .. } => DataType::Custom { name: "INTEGER".to_string() },
7462 _ => dt.clone(),
7463 };
7464 Expression::DataType(new_dt)
7465 }
7466 Expression::Identifier(id) => {
7467 let upper = id.name.to_uppercase();
7468 let normalized = match upper.as_str() {
7469 "INT" => "INTEGER",
7470 _ => &upper,
7471 };
7472 Expression::Identifier(crate::expressions::Identifier::new(normalized))
7473 }
7474 Expression::Column(col) => {
7475 let upper = col.name.name.to_uppercase();
7476 let normalized = match upper.as_str() {
7477 "INT" => "INTEGER",
7478 _ => &upper,
7479 };
7480 Expression::Identifier(crate::expressions::Identifier::new(normalized))
7481 }
7482 _ => type_expr.clone(),
7483 };
7484 let func_name = if is_try { "TRY_CONVERT" } else { "CONVERT" };
7485 let mut new_args = vec![normalized_type, value_expr];
7486 if let Some(s) = style {
7487 new_args.push(s.clone());
7488 }
7489 return Ok(Expression::Function(Box::new(Function::new(func_name.to_string(), new_args))));
7490 }
7491
7492 // For other targets: CONVERT(type, expr) -> CAST(expr AS type)
7493 fn expr_to_datatype(e: &Expression) -> Option<DataType> {
7494 match e {
7495 Expression::DataType(dt) => {
7496 // Convert NVARCHAR/NCHAR Custom types to standard VarChar/Char
7497 match dt {
7498 DataType::Custom { name } if name.starts_with("NVARCHAR(") || name.starts_with("NCHAR(") => {
7499 // Extract the length from "NVARCHAR(200)" or "NCHAR(40)"
7500 let inner = &name[name.find('(').unwrap()+1..name.len()-1];
7501 if inner.eq_ignore_ascii_case("MAX") {
7502 Some(DataType::Text)
7503 } else if let Ok(len) = inner.parse::<u32>() {
7504 if name.starts_with("NCHAR") {
7505 Some(DataType::Char { length: Some(len) })
7506 } else {
7507 Some(DataType::VarChar { length: Some(len), parenthesized_length: false })
7508 }
7509 } else {
7510 Some(dt.clone())
7511 }
7512 }
7513 DataType::Custom { name } if name == "NVARCHAR" => {
7514 Some(DataType::VarChar { length: None, parenthesized_length: false })
7515 }
7516 DataType::Custom { name } if name == "NCHAR" => {
7517 Some(DataType::Char { length: None })
7518 }
7519 DataType::Custom { name } if name == "NVARCHAR(MAX)" || name == "VARCHAR(MAX)" => {
7520 Some(DataType::Text)
7521 }
7522 _ => Some(dt.clone()),
7523 }
7524 }
7525 Expression::Identifier(id) => {
7526 let name = id.name.to_uppercase();
7527 match name.as_str() {
7528 "INT" | "INTEGER" => Some(DataType::Int { length: None, integer_spelling: false }),
7529 "BIGINT" => Some(DataType::BigInt { length: None }),
7530 "SMALLINT" => Some(DataType::SmallInt { length: None }),
7531 "TINYINT" => Some(DataType::TinyInt { length: None }),
7532 "FLOAT" => Some(DataType::Float { precision: None, scale: None, real_spelling: false }),
7533 "REAL" => Some(DataType::Float { precision: None, scale: None, real_spelling: true }),
7534 "DATETIME" | "DATETIME2" => Some(DataType::Timestamp { timezone: false, precision: None }),
7535 "DATE" => Some(DataType::Date),
7536 "BIT" => Some(DataType::Boolean),
7537 "TEXT" => Some(DataType::Text),
7538 "NUMERIC" => Some(DataType::Decimal { precision: None, scale: None }),
7539 "MONEY" => Some(DataType::Decimal { precision: Some(15), scale: Some(4) }),
7540 "SMALLMONEY" => Some(DataType::Decimal { precision: Some(6), scale: Some(4) }),
7541 "VARCHAR" => Some(DataType::VarChar { length: None, parenthesized_length: false }),
7542 "NVARCHAR" => Some(DataType::VarChar { length: None, parenthesized_length: false }),
7543 "CHAR" => Some(DataType::Char { length: None }),
7544 "NCHAR" => Some(DataType::Char { length: None }),
7545 _ => Some(DataType::Custom { name }),
7546 }
7547 }
7548 Expression::Column(col) => {
7549 let name = col.name.name.to_uppercase();
7550 match name.as_str() {
7551 "INT" | "INTEGER" => Some(DataType::Int { length: None, integer_spelling: false }),
7552 "BIGINT" => Some(DataType::BigInt { length: None }),
7553 "FLOAT" => Some(DataType::Float { precision: None, scale: None, real_spelling: false }),
7554 "DATETIME" | "DATETIME2" => Some(DataType::Timestamp { timezone: false, precision: None }),
7555 "DATE" => Some(DataType::Date),
7556 "NUMERIC" => Some(DataType::Decimal { precision: None, scale: None }),
7557 "VARCHAR" => Some(DataType::VarChar { length: None, parenthesized_length: false }),
7558 "NVARCHAR" => Some(DataType::VarChar { length: None, parenthesized_length: false }),
7559 "CHAR" => Some(DataType::Char { length: None }),
7560 "NCHAR" => Some(DataType::Char { length: None }),
7561 _ => Some(DataType::Custom { name }),
7562 }
7563 }
7564 // NVARCHAR(200) parsed as Function("NVARCHAR", [200])
7565 Expression::Function(f) => {
7566 let fname = f.name.to_uppercase();
7567 match fname.as_str() {
7568 "VARCHAR" | "NVARCHAR" => {
7569 let len = f.args.first().and_then(|a| {
7570 if let Expression::Literal(crate::expressions::Literal::Number(n)) = a {
7571 n.parse::<u32>().ok()
7572 } else if let Expression::Identifier(id) = a {
7573 if id.name.eq_ignore_ascii_case("MAX") { None } else { None }
7574 } else { None }
7575 });
7576 // Check for VARCHAR(MAX) -> TEXT
7577 let is_max = f.args.first().map_or(false, |a| {
7578 matches!(a, Expression::Identifier(id) if id.name.eq_ignore_ascii_case("MAX"))
7579 || matches!(a, Expression::Column(col) if col.name.name.eq_ignore_ascii_case("MAX"))
7580 });
7581 if is_max {
7582 Some(DataType::Text)
7583 } else {
7584 Some(DataType::VarChar { length: len, parenthesized_length: false })
7585 }
7586 }
7587 "NCHAR" | "CHAR" => {
7588 let len = f.args.first().and_then(|a| {
7589 if let Expression::Literal(crate::expressions::Literal::Number(n)) = a {
7590 n.parse::<u32>().ok()
7591 } else { None }
7592 });
7593 Some(DataType::Char { length: len })
7594 }
7595 "NUMERIC" | "DECIMAL" => {
7596 let precision = f.args.first().and_then(|a| {
7597 if let Expression::Literal(crate::expressions::Literal::Number(n)) = a {
7598 n.parse::<u32>().ok()
7599 } else { None }
7600 });
7601 let scale = f.args.get(1).and_then(|a| {
7602 if let Expression::Literal(crate::expressions::Literal::Number(n)) = a {
7603 n.parse::<u32>().ok()
7604 } else { None }
7605 });
7606 Some(DataType::Decimal { precision, scale })
7607 }
7608 _ => None,
7609 }
7610 }
7611 _ => None,
7612 }
7613 }
7614
7615 if let Some(mut dt) = expr_to_datatype(&type_expr) {
7616 // For TSQL source: VARCHAR/CHAR without length defaults to 30
7617 let is_tsql_source = matches!(source, DialectType::TSQL | DialectType::Fabric);
7618 if is_tsql_source {
7619 match &dt {
7620 DataType::VarChar { length: None, .. } => {
7621 dt = DataType::VarChar { length: Some(30), parenthesized_length: false };
7622 }
7623 DataType::Char { length: None } => {
7624 dt = DataType::Char { length: Some(30) };
7625 }
7626 _ => {}
7627 }
7628 }
7629
7630 // Determine if this is a string type
7631 let is_string_type = matches!(dt, DataType::VarChar { .. } | DataType::Char { .. } | DataType::Text)
7632 || matches!(&dt, DataType::Custom { name } if name == "NVARCHAR" || name == "NCHAR"
7633 || name.starts_with("NVARCHAR(") || name.starts_with("NCHAR(")
7634 || name.starts_with("VARCHAR(") || name == "VARCHAR"
7635 || name == "STRING");
7636
7637 // Determine if this is a date/time type
7638 let is_datetime_type = matches!(dt, DataType::Timestamp { .. } | DataType::Date)
7639 || matches!(&dt, DataType::Custom { name } if name == "DATETIME"
7640 || name == "DATETIME2" || name == "SMALLDATETIME");
7641
7642 // Check for date conversion with style
7643 if style.is_some() {
7644 let style_num = style.and_then(|s| {
7645 if let Expression::Literal(crate::expressions::Literal::Number(n)) = s {
7646 n.parse::<u32>().ok()
7647 } else { None }
7648 });
7649
7650 // TSQL CONVERT date styles (Java format)
7651 let format_str = style_num.and_then(|n| match n {
7652 101 => Some("MM/dd/yyyy"),
7653 102 => Some("yyyy.MM.dd"),
7654 103 => Some("dd/MM/yyyy"),
7655 104 => Some("dd.MM.yyyy"),
7656 105 => Some("dd-MM-yyyy"),
7657 108 => Some("HH:mm:ss"),
7658 110 => Some("MM-dd-yyyy"),
7659 112 => Some("yyyyMMdd"),
7660 120 | 20 => Some("yyyy-MM-dd HH:mm:ss"),
7661 121 | 21 => Some("yyyy-MM-dd HH:mm:ss.SSSSSS"),
7662 126 | 127 => Some("yyyy-MM-dd'T'HH:mm:ss.SSS"),
7663 _ => None,
7664 });
7665
7666 // Non-string, non-datetime types with style: just CAST, ignore the style
7667 if !is_string_type && !is_datetime_type {
7668 let cast_expr = if is_try {
7669 Expression::TryCast(Box::new(crate::expressions::Cast {
7670 this: value_expr,
7671 to: dt,
7672 trailing_comments: Vec::new(),
7673 double_colon_syntax: false,
7674 format: None,
7675 default: None,
7676 }))
7677 } else {
7678 Expression::Cast(Box::new(crate::expressions::Cast {
7679 this: value_expr,
7680 to: dt,
7681 trailing_comments: Vec::new(),
7682 double_colon_syntax: false,
7683 format: None,
7684 default: None,
7685 }))
7686 };
7687 return Ok(cast_expr);
7688 }
7689
7690 if let Some(java_fmt) = format_str {
7691 let c_fmt = java_fmt
7692 .replace("yyyy", "%Y")
7693 .replace("MM", "%m")
7694 .replace("dd", "%d")
7695 .replace("HH", "%H")
7696 .replace("mm", "%M")
7697 .replace("ss", "%S")
7698 .replace("SSSSSS", "%f")
7699 .replace("SSS", "%f")
7700 .replace("'T'", "T");
7701
7702 // For datetime target types: style is the INPUT format for parsing strings -> dates
7703 if is_datetime_type {
7704 match target {
7705 DialectType::DuckDB => {
7706 return Ok(Expression::Function(Box::new(Function::new(
7707 "STRPTIME".to_string(),
7708 vec![value_expr, Expression::string(&c_fmt)],
7709 ))));
7710 }
7711 DialectType::Spark | DialectType::Databricks => {
7712 // CONVERT(DATETIME, x, style) -> TO_TIMESTAMP(x, fmt)
7713 // CONVERT(DATE, x, style) -> TO_DATE(x, fmt)
7714 let func_name = if matches!(dt, DataType::Date) {
7715 "TO_DATE"
7716 } else {
7717 "TO_TIMESTAMP"
7718 };
7719 return Ok(Expression::Function(Box::new(Function::new(
7720 func_name.to_string(),
7721 vec![value_expr, Expression::string(java_fmt)],
7722 ))));
7723 }
7724 DialectType::Hive => {
7725 return Ok(Expression::Function(Box::new(Function::new(
7726 "TO_TIMESTAMP".to_string(),
7727 vec![value_expr, Expression::string(java_fmt)],
7728 ))));
7729 }
7730 _ => {
7731 return Ok(Expression::Cast(Box::new(crate::expressions::Cast {
7732 this: value_expr,
7733 to: dt,
7734 trailing_comments: Vec::new(),
7735 double_colon_syntax: false,
7736 format: None,
7737 default: None,
7738 })));
7739 }
7740 }
7741 }
7742
7743 // For string target types: style is the OUTPUT format for dates -> strings
7744 match target {
7745 DialectType::DuckDB => {
7746 Ok(Expression::Function(Box::new(Function::new(
7747 "STRPTIME".to_string(),
7748 vec![value_expr, Expression::string(&c_fmt)],
7749 ))))
7750 }
7751 DialectType::Spark | DialectType::Databricks => {
7752 // For string target types with style: CAST(DATE_FORMAT(x, fmt) AS type)
7753 // Determine the target string type
7754 let string_dt = match &dt {
7755 DataType::VarChar { length: Some(l), .. } => DataType::VarChar { length: Some(*l), parenthesized_length: false },
7756 DataType::Text => DataType::Custom { name: "STRING".to_string() },
7757 _ => DataType::Custom { name: "STRING".to_string() },
7758 };
7759 let date_format_expr = Expression::Function(Box::new(Function::new(
7760 "DATE_FORMAT".to_string(),
7761 vec![value_expr, Expression::string(java_fmt)],
7762 )));
7763 let cast_expr = if is_try {
7764 Expression::TryCast(Box::new(crate::expressions::Cast {
7765 this: date_format_expr,
7766 to: string_dt,
7767 trailing_comments: Vec::new(),
7768 double_colon_syntax: false,
7769 format: None,
7770 default: None,
7771 }))
7772 } else {
7773 Expression::Cast(Box::new(crate::expressions::Cast {
7774 this: date_format_expr,
7775 to: string_dt,
7776 trailing_comments: Vec::new(),
7777 double_colon_syntax: false,
7778 format: None,
7779 default: None,
7780 }))
7781 };
7782 Ok(cast_expr)
7783 }
7784 DialectType::MySQL | DialectType::SingleStore => {
7785 // For MySQL: CAST(DATE_FORMAT(x, mysql_fmt) AS CHAR(n))
7786 let mysql_fmt = java_fmt
7787 .replace("yyyy", "%Y")
7788 .replace("MM", "%m")
7789 .replace("dd", "%d")
7790 .replace("HH:mm:ss.SSSSSS", "%T")
7791 .replace("HH:mm:ss", "%T")
7792 .replace("HH", "%H")
7793 .replace("mm", "%i")
7794 .replace("ss", "%S");
7795 let date_format_expr = Expression::Function(Box::new(Function::new(
7796 "DATE_FORMAT".to_string(),
7797 vec![value_expr, Expression::string(&mysql_fmt)],
7798 )));
7799 // MySQL uses CHAR for string casts
7800 let mysql_dt = match &dt {
7801 DataType::VarChar { length, .. } => DataType::Char { length: *length },
7802 _ => dt,
7803 };
7804 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
7805 this: date_format_expr,
7806 to: mysql_dt,
7807 trailing_comments: Vec::new(),
7808 double_colon_syntax: false,
7809 format: None,
7810 default: None,
7811 })))
7812 }
7813 DialectType::Hive => {
7814 let func_name = "TO_TIMESTAMP";
7815 Ok(Expression::Function(Box::new(Function::new(
7816 func_name.to_string(),
7817 vec![value_expr, Expression::string(java_fmt)],
7818 ))))
7819 }
7820 _ => {
7821 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
7822 this: value_expr,
7823 to: dt,
7824 trailing_comments: Vec::new(),
7825 double_colon_syntax: false,
7826 format: None,
7827 default: None,
7828 })))
7829 }
7830 }
7831 } else {
7832 // Unknown style, just CAST
7833 let cast_expr = if is_try {
7834 Expression::TryCast(Box::new(crate::expressions::Cast {
7835 this: value_expr,
7836 to: dt,
7837 trailing_comments: Vec::new(),
7838 double_colon_syntax: false,
7839 format: None,
7840 default: None,
7841 }))
7842 } else {
7843 Expression::Cast(Box::new(crate::expressions::Cast {
7844 this: value_expr,
7845 to: dt,
7846 trailing_comments: Vec::new(),
7847 double_colon_syntax: false,
7848 format: None,
7849 default: None,
7850 }))
7851 };
7852 Ok(cast_expr)
7853 }
7854 } else {
7855 // No style - simple CAST
7856 let final_dt = if matches!(target, DialectType::MySQL | DialectType::SingleStore) {
7857 match &dt {
7858 DataType::Int { .. } | DataType::BigInt { .. } | DataType::SmallInt { .. } | DataType::TinyInt { .. } => {
7859 DataType::Custom { name: "SIGNED".to_string() }
7860 }
7861 DataType::VarChar { length, .. } => DataType::Char { length: *length },
7862 _ => dt,
7863 }
7864 } else {
7865 dt
7866 };
7867 let cast_expr = if is_try {
7868 Expression::TryCast(Box::new(crate::expressions::Cast {
7869 this: value_expr,
7870 to: final_dt,
7871 trailing_comments: Vec::new(),
7872 double_colon_syntax: false,
7873 format: None,
7874 default: None,
7875 }))
7876 } else {
7877 Expression::Cast(Box::new(crate::expressions::Cast {
7878 this: value_expr,
7879 to: final_dt,
7880 trailing_comments: Vec::new(),
7881 double_colon_syntax: false,
7882 format: None,
7883 default: None,
7884 }))
7885 };
7886 Ok(cast_expr)
7887 }
7888 } else {
7889 // Can't convert type expression - keep as CONVERT/TRY_CONVERT function
7890 Ok(Expression::Function(f))
7891 }
7892 }
7893 // STRFTIME(val, fmt) from DuckDB / STRFTIME(fmt, val) from SQLite -> target-specific
7894 "STRFTIME" if f.args.len() == 2 => {
7895 // SQLite uses STRFTIME(fmt, val); DuckDB uses STRFTIME(val, fmt)
7896 let (val, fmt_expr) = if matches!(source, DialectType::SQLite) {
7897 // SQLite: args[0] = format, args[1] = value
7898 (f.args[1].clone(), &f.args[0])
7899 } else {
7900 // DuckDB and others: args[0] = value, args[1] = format
7901 (f.args[0].clone(), &f.args[1])
7902 };
7903
7904 // Helper to convert C-style format to Java-style
7905 fn c_to_java_format(fmt: &str) -> String {
7906 fmt.replace("%Y", "yyyy")
7907 .replace("%m", "MM")
7908 .replace("%d", "dd")
7909 .replace("%H", "HH")
7910 .replace("%M", "mm")
7911 .replace("%S", "ss")
7912 .replace("%f", "SSSSSS")
7913 .replace("%y", "yy")
7914 .replace("%-m", "M")
7915 .replace("%-d", "d")
7916 .replace("%-H", "H")
7917 .replace("%-I", "h")
7918 .replace("%I", "hh")
7919 .replace("%p", "a")
7920 .replace("%j", "DDD")
7921 .replace("%a", "EEE")
7922 .replace("%b", "MMM")
7923 .replace("%F", "yyyy-MM-dd")
7924 .replace("%T", "HH:mm:ss")
7925 }
7926
7927 // Helper: recursively convert format strings within expressions (handles CONCAT)
7928 fn convert_fmt_expr(expr: &Expression, converter: &dyn Fn(&str) -> String) -> Expression {
7929 match expr {
7930 Expression::Literal(crate::expressions::Literal::String(s)) => {
7931 Expression::string(&converter(s))
7932 }
7933 Expression::Function(func) if func.name.eq_ignore_ascii_case("CONCAT") => {
7934 let new_args: Vec<Expression> = func.args.iter()
7935 .map(|a| convert_fmt_expr(a, converter))
7936 .collect();
7937 Expression::Function(Box::new(Function::new("CONCAT".to_string(), new_args)))
7938 }
7939 other => other.clone(),
7940 }
7941 }
7942
7943 match target {
7944 DialectType::DuckDB => {
7945 if matches!(source, DialectType::SQLite) {
7946 // SQLite STRFTIME(fmt, val) -> DuckDB STRFTIME(CAST(val AS TIMESTAMP), fmt)
7947 let cast_val = Expression::Cast(Box::new(Cast {
7948 this: val,
7949 to: crate::expressions::DataType::Timestamp { precision: None, timezone: false },
7950 trailing_comments: Vec::new(),
7951 double_colon_syntax: false,
7952 format: None,
7953 default: None,
7954 }));
7955 Ok(Expression::Function(Box::new(Function::new(
7956 "STRFTIME".to_string(),
7957 vec![cast_val, fmt_expr.clone()],
7958 ))))
7959 } else {
7960 Ok(Expression::Function(f))
7961 }
7962 }
7963 DialectType::Spark | DialectType::Databricks
7964 | DialectType::Hive => {
7965 // STRFTIME(val, fmt) -> DATE_FORMAT(val, java_fmt)
7966 let converted_fmt = convert_fmt_expr(fmt_expr, &c_to_java_format);
7967 Ok(Expression::Function(Box::new(Function::new(
7968 "DATE_FORMAT".to_string(),
7969 vec![val, converted_fmt],
7970 ))))
7971 }
7972 DialectType::TSQL | DialectType::Fabric => {
7973 // STRFTIME(val, fmt) -> FORMAT(val, java_fmt)
7974 let converted_fmt = convert_fmt_expr(fmt_expr, &c_to_java_format);
7975 Ok(Expression::Function(Box::new(Function::new(
7976 "FORMAT".to_string(),
7977 vec![val, converted_fmt],
7978 ))))
7979 }
7980 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
7981 // STRFTIME(val, fmt) -> DATE_FORMAT(val, presto_fmt) (convert DuckDB format to Presto)
7982 if let Expression::Literal(crate::expressions::Literal::String(s)) = fmt_expr {
7983 let presto_fmt = duckdb_to_presto_format(s);
7984 Ok(Expression::Function(Box::new(Function::new(
7985 "DATE_FORMAT".to_string(),
7986 vec![val, Expression::string(&presto_fmt)],
7987 ))))
7988 } else {
7989 Ok(Expression::Function(Box::new(Function::new(
7990 "DATE_FORMAT".to_string(),
7991 vec![val, fmt_expr.clone()],
7992 ))))
7993 }
7994 }
7995 DialectType::BigQuery => {
7996 // STRFTIME(val, fmt) -> FORMAT_DATE(bq_fmt, val) - note reversed arg order
7997 if let Expression::Literal(crate::expressions::Literal::String(s)) = fmt_expr {
7998 let bq_fmt = duckdb_to_bigquery_format(s);
7999 Ok(Expression::Function(Box::new(Function::new(
8000 "FORMAT_DATE".to_string(),
8001 vec![Expression::string(&bq_fmt), val],
8002 ))))
8003 } else {
8004 Ok(Expression::Function(Box::new(Function::new(
8005 "FORMAT_DATE".to_string(),
8006 vec![fmt_expr.clone(), val],
8007 ))))
8008 }
8009 }
8010 DialectType::PostgreSQL | DialectType::Redshift => {
8011 // STRFTIME(val, fmt) -> TO_CHAR(val, pg_fmt)
8012 if let Expression::Literal(crate::expressions::Literal::String(s)) = fmt_expr {
8013 let pg_fmt = s
8014 .replace("%Y", "YYYY")
8015 .replace("%m", "MM")
8016 .replace("%d", "DD")
8017 .replace("%H", "HH24")
8018 .replace("%M", "MI")
8019 .replace("%S", "SS")
8020 .replace("%y", "YY")
8021 .replace("%-m", "FMMM")
8022 .replace("%-d", "FMDD")
8023 .replace("%-H", "FMHH24")
8024 .replace("%-I", "FMHH12")
8025 .replace("%p", "AM")
8026 .replace("%F", "YYYY-MM-DD")
8027 .replace("%T", "HH24:MI:SS");
8028 Ok(Expression::Function(Box::new(Function::new(
8029 "TO_CHAR".to_string(),
8030 vec![val, Expression::string(&pg_fmt)],
8031 ))))
8032 } else {
8033 Ok(Expression::Function(Box::new(Function::new(
8034 "TO_CHAR".to_string(),
8035 vec![val, fmt_expr.clone()],
8036 ))))
8037 }
8038 }
8039 _ => Ok(Expression::Function(f)),
8040 }
8041 }
8042 // STRPTIME(val, fmt) from DuckDB -> target-specific date parse function
8043 "STRPTIME" if f.args.len() == 2 => {
8044 let val = f.args[0].clone();
8045 let fmt_expr = &f.args[1];
8046
8047 fn c_to_java_format_parse(fmt: &str) -> String {
8048 fmt.replace("%Y", "yyyy")
8049 .replace("%m", "MM")
8050 .replace("%d", "dd")
8051 .replace("%H", "HH")
8052 .replace("%M", "mm")
8053 .replace("%S", "ss")
8054 .replace("%f", "SSSSSS")
8055 .replace("%y", "yy")
8056 .replace("%-m", "M")
8057 .replace("%-d", "d")
8058 .replace("%-H", "H")
8059 .replace("%-I", "h")
8060 .replace("%I", "hh")
8061 .replace("%p", "a")
8062 .replace("%F", "yyyy-MM-dd")
8063 .replace("%T", "HH:mm:ss")
8064 }
8065
8066 match target {
8067 DialectType::DuckDB => Ok(Expression::Function(f)),
8068 DialectType::Spark | DialectType::Databricks => {
8069 // STRPTIME(val, fmt) -> TO_TIMESTAMP(val, java_fmt)
8070 if let Expression::Literal(crate::expressions::Literal::String(s)) = fmt_expr {
8071 let java_fmt = c_to_java_format_parse(s);
8072 Ok(Expression::Function(Box::new(Function::new(
8073 "TO_TIMESTAMP".to_string(),
8074 vec![val, Expression::string(&java_fmt)],
8075 ))))
8076 } else {
8077 Ok(Expression::Function(Box::new(Function::new(
8078 "TO_TIMESTAMP".to_string(),
8079 vec![val, fmt_expr.clone()],
8080 ))))
8081 }
8082 }
8083 DialectType::Hive => {
8084 // STRPTIME(val, fmt) -> CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(val, java_fmt)) AS TIMESTAMP)
8085 if let Expression::Literal(crate::expressions::Literal::String(s)) = fmt_expr {
8086 let java_fmt = c_to_java_format_parse(s);
8087 let unix_ts = Expression::Function(Box::new(Function::new(
8088 "UNIX_TIMESTAMP".to_string(),
8089 vec![val, Expression::string(&java_fmt)],
8090 )));
8091 let from_unix = Expression::Function(Box::new(Function::new(
8092 "FROM_UNIXTIME".to_string(),
8093 vec![unix_ts],
8094 )));
8095 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
8096 this: from_unix,
8097 to: DataType::Timestamp { timezone: false, precision: None },
8098 trailing_comments: Vec::new(),
8099 double_colon_syntax: false,
8100 format: None,
8101 default: None,
8102 })))
8103 } else {
8104 Ok(Expression::Function(f))
8105 }
8106 }
8107 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
8108 // STRPTIME(val, fmt) -> DATE_PARSE(val, presto_fmt) (convert DuckDB format to Presto)
8109 if let Expression::Literal(crate::expressions::Literal::String(s)) = fmt_expr {
8110 let presto_fmt = duckdb_to_presto_format(s);
8111 Ok(Expression::Function(Box::new(Function::new(
8112 "DATE_PARSE".to_string(),
8113 vec![val, Expression::string(&presto_fmt)],
8114 ))))
8115 } else {
8116 Ok(Expression::Function(Box::new(Function::new(
8117 "DATE_PARSE".to_string(),
8118 vec![val, fmt_expr.clone()],
8119 ))))
8120 }
8121 }
8122 DialectType::BigQuery => {
8123 // STRPTIME(val, fmt) -> PARSE_TIMESTAMP(bq_fmt, val) - note reversed arg order
8124 if let Expression::Literal(crate::expressions::Literal::String(s)) = fmt_expr {
8125 let bq_fmt = duckdb_to_bigquery_format(s);
8126 Ok(Expression::Function(Box::new(Function::new(
8127 "PARSE_TIMESTAMP".to_string(),
8128 vec![Expression::string(&bq_fmt), val],
8129 ))))
8130 } else {
8131 Ok(Expression::Function(Box::new(Function::new(
8132 "PARSE_TIMESTAMP".to_string(),
8133 vec![fmt_expr.clone(), val],
8134 ))))
8135 }
8136 }
8137 _ => Ok(Expression::Function(f)),
8138 }
8139 }
8140 // DATE_FORMAT(val, fmt) from Presto source (C-style format) -> target-specific
8141 "DATE_FORMAT" if f.args.len() >= 2
8142 && matches!(source, DialectType::Presto | DialectType::Trino | DialectType::Athena) => {
8143 let val = f.args[0].clone();
8144 let fmt_expr = &f.args[1];
8145
8146 match target {
8147 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
8148 // Presto -> Presto: normalize format (e.g., %H:%i:%S -> %T)
8149 if let Expression::Literal(crate::expressions::Literal::String(s)) = fmt_expr {
8150 let normalized = crate::dialects::presto::PrestoDialect::normalize_presto_format(s);
8151 Ok(Expression::Function(Box::new(Function::new(
8152 "DATE_FORMAT".to_string(),
8153 vec![val, Expression::string(&normalized)],
8154 ))))
8155 } else {
8156 Ok(Expression::Function(f))
8157 }
8158 }
8159 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
8160 // Convert Presto C-style to Java-style format
8161 if let Expression::Literal(crate::expressions::Literal::String(s)) = fmt_expr {
8162 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
8163 Ok(Expression::Function(Box::new(Function::new(
8164 "DATE_FORMAT".to_string(),
8165 vec![val, Expression::string(&java_fmt)],
8166 ))))
8167 } else {
8168 Ok(Expression::Function(f))
8169 }
8170 }
8171 DialectType::DuckDB => {
8172 // Convert to STRFTIME(val, duckdb_fmt)
8173 if let Expression::Literal(crate::expressions::Literal::String(s)) = fmt_expr {
8174 let duckdb_fmt = crate::dialects::presto::PrestoDialect::presto_to_duckdb_format(s);
8175 Ok(Expression::Function(Box::new(Function::new(
8176 "STRFTIME".to_string(),
8177 vec![val, Expression::string(&duckdb_fmt)],
8178 ))))
8179 } else {
8180 Ok(Expression::Function(Box::new(Function::new(
8181 "STRFTIME".to_string(),
8182 vec![val, fmt_expr.clone()],
8183 ))))
8184 }
8185 }
8186 DialectType::BigQuery => {
8187 // Convert to FORMAT_DATE(bq_fmt, val) - reversed args
8188 if let Expression::Literal(crate::expressions::Literal::String(s)) = fmt_expr {
8189 let bq_fmt = crate::dialects::presto::PrestoDialect::presto_to_bigquery_format(s);
8190 Ok(Expression::Function(Box::new(Function::new(
8191 "FORMAT_DATE".to_string(),
8192 vec![Expression::string(&bq_fmt), val],
8193 ))))
8194 } else {
8195 Ok(Expression::Function(Box::new(Function::new(
8196 "FORMAT_DATE".to_string(),
8197 vec![fmt_expr.clone(), val],
8198 ))))
8199 }
8200 }
8201 _ => Ok(Expression::Function(f)),
8202 }
8203 }
8204 // DATE_PARSE(val, fmt) from Presto source -> target-specific parse function
8205 "DATE_PARSE" if f.args.len() >= 2
8206 && matches!(source, DialectType::Presto | DialectType::Trino | DialectType::Athena) => {
8207 let val = f.args[0].clone();
8208 let fmt_expr = &f.args[1];
8209
8210 match target {
8211 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
8212 // Presto -> Presto: normalize format
8213 if let Expression::Literal(crate::expressions::Literal::String(s)) = fmt_expr {
8214 let normalized = crate::dialects::presto::PrestoDialect::normalize_presto_format(s);
8215 Ok(Expression::Function(Box::new(Function::new(
8216 "DATE_PARSE".to_string(),
8217 vec![val, Expression::string(&normalized)],
8218 ))))
8219 } else {
8220 Ok(Expression::Function(f))
8221 }
8222 }
8223 DialectType::Hive => {
8224 // Presto -> Hive: if default format, just CAST(x AS TIMESTAMP)
8225 if let Expression::Literal(crate::expressions::Literal::String(s)) = fmt_expr {
8226 if crate::dialects::presto::PrestoDialect::is_default_timestamp_format(s)
8227 || crate::dialects::presto::PrestoDialect::is_default_date_format(s) {
8228 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
8229 this: val,
8230 to: DataType::Timestamp { timezone: false, precision: None },
8231 trailing_comments: Vec::new(),
8232 double_colon_syntax: false,
8233 format: None,
8234 default: None,
8235 })))
8236 } else {
8237 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
8238 Ok(Expression::Function(Box::new(Function::new(
8239 "TO_TIMESTAMP".to_string(),
8240 vec![val, Expression::string(&java_fmt)],
8241 ))))
8242 }
8243 } else {
8244 Ok(Expression::Function(f))
8245 }
8246 }
8247 DialectType::Spark | DialectType::Databricks => {
8248 // Presto -> Spark: TO_TIMESTAMP(val, java_fmt)
8249 if let Expression::Literal(crate::expressions::Literal::String(s)) = fmt_expr {
8250 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
8251 Ok(Expression::Function(Box::new(Function::new(
8252 "TO_TIMESTAMP".to_string(),
8253 vec![val, Expression::string(&java_fmt)],
8254 ))))
8255 } else {
8256 Ok(Expression::Function(f))
8257 }
8258 }
8259 DialectType::DuckDB => {
8260 // Presto -> DuckDB: STRPTIME(val, duckdb_fmt)
8261 if let Expression::Literal(crate::expressions::Literal::String(s)) = fmt_expr {
8262 let duckdb_fmt = crate::dialects::presto::PrestoDialect::presto_to_duckdb_format(s);
8263 Ok(Expression::Function(Box::new(Function::new(
8264 "STRPTIME".to_string(),
8265 vec![val, Expression::string(&duckdb_fmt)],
8266 ))))
8267 } else {
8268 Ok(Expression::Function(Box::new(Function::new(
8269 "STRPTIME".to_string(),
8270 vec![val, fmt_expr.clone()],
8271 ))))
8272 }
8273 }
8274 _ => Ok(Expression::Function(f)),
8275 }
8276 }
8277 // FROM_BASE64(x) / TO_BASE64(x) from Presto -> Hive-specific renames
8278 "FROM_BASE64" if f.args.len() == 1
8279 && matches!(target, DialectType::Hive) => {
8280 Ok(Expression::Function(Box::new(Function::new("UNBASE64".to_string(), f.args))))
8281 }
8282 "TO_BASE64" if f.args.len() == 1
8283 && matches!(target, DialectType::Hive) => {
8284 Ok(Expression::Function(Box::new(Function::new("BASE64".to_string(), f.args))))
8285 }
8286 // FROM_UNIXTIME(x) -> CAST(FROM_UNIXTIME(x) AS TIMESTAMP) for Spark
8287 "FROM_UNIXTIME" if f.args.len() == 1
8288 && matches!(source, DialectType::Presto | DialectType::Trino | DialectType::Athena)
8289 && matches!(target, DialectType::Spark | DialectType::Databricks) => {
8290 // Wrap FROM_UNIXTIME(x) in CAST(... AS TIMESTAMP)
8291 let from_unix = Expression::Function(Box::new(Function::new("FROM_UNIXTIME".to_string(), f.args)));
8292 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
8293 this: from_unix,
8294 to: DataType::Timestamp { timezone: false, precision: None },
8295 trailing_comments: Vec::new(),
8296 double_colon_syntax: false,
8297 format: None,
8298 default: None,
8299 })))
8300 }
8301 // DATE_FORMAT(val, fmt) from Hive/Spark/MySQL -> target-specific format function
8302 "DATE_FORMAT" if f.args.len() >= 2
8303 && !matches!(target, DialectType::Hive | DialectType::Spark
8304 | DialectType::Databricks
8305 | DialectType::MySQL | DialectType::SingleStore) => {
8306 let val = f.args[0].clone();
8307 let fmt_expr = &f.args[1];
8308 let is_hive_source = matches!(source, DialectType::Hive | DialectType::Spark | DialectType::Databricks);
8309
8310 fn java_to_c_format(fmt: &str) -> String {
8311 // Replace Java patterns with C strftime patterns.
8312 // Uses multi-pass to handle patterns that conflict.
8313 // First pass: replace multi-char patterns (longer first)
8314 let result = fmt
8315 .replace("yyyy", "%Y")
8316 .replace("SSSSSS", "%f")
8317 .replace("EEEE", "%W")
8318 .replace("MM", "%m")
8319 .replace("dd", "%d")
8320 .replace("HH", "%H")
8321 .replace("mm", "%M")
8322 .replace("ss", "%S")
8323 .replace("yy", "%y");
8324 // Second pass: handle single-char timezone patterns
8325 // z -> %Z (timezone name), Z -> %z (timezone offset)
8326 // Must be careful not to replace 'z'/'Z' inside already-replaced %Y, %M etc.
8327 let mut out = String::new();
8328 let chars: Vec<char> = result.chars().collect();
8329 let mut i = 0;
8330 while i < chars.len() {
8331 if chars[i] == '%' && i + 1 < chars.len() {
8332 // Already a format specifier, skip both chars
8333 out.push(chars[i]);
8334 out.push(chars[i + 1]);
8335 i += 2;
8336 } else if chars[i] == 'z' {
8337 out.push_str("%Z");
8338 i += 1;
8339 } else if chars[i] == 'Z' {
8340 out.push_str("%z");
8341 i += 1;
8342 } else {
8343 out.push(chars[i]);
8344 i += 1;
8345 }
8346 }
8347 out
8348 }
8349
8350 fn java_to_presto_format(fmt: &str) -> String {
8351 // Presto uses %T for HH:MM:SS
8352 let c_fmt = java_to_c_format(fmt);
8353 c_fmt.replace("%H:%M:%S", "%T")
8354 }
8355
8356 fn java_to_bq_format(fmt: &str) -> String {
8357 // BigQuery uses %F for yyyy-MM-dd and %T for HH:mm:ss
8358 let c_fmt = java_to_c_format(fmt);
8359 c_fmt.replace("%Y-%m-%d", "%F")
8360 .replace("%H:%M:%S", "%T")
8361 }
8362
8363 // For Hive source, CAST string literals to appropriate type
8364 let cast_val = if is_hive_source {
8365 match &val {
8366 Expression::Literal(crate::expressions::Literal::String(_)) => {
8367 match target {
8368 DialectType::DuckDB | DialectType::Presto | DialectType::Trino | DialectType::Athena => {
8369 Self::ensure_cast_timestamp(val.clone())
8370 }
8371 DialectType::BigQuery => {
8372 // BigQuery: CAST(val AS DATETIME)
8373 Expression::Cast(Box::new(crate::expressions::Cast {
8374 this: val.clone(),
8375 to: DataType::Custom { name: "DATETIME".to_string() },
8376 trailing_comments: vec![],
8377 double_colon_syntax: false,
8378 format: None,
8379 default: None,
8380 }))
8381 }
8382 _ => val.clone(),
8383 }
8384 }
8385 // For CAST(x AS DATE) or DATE literal, Presto needs CAST(CAST(x AS DATE) AS TIMESTAMP)
8386 Expression::Cast(c) if matches!(c.to, DataType::Date) && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena) => {
8387 Expression::Cast(Box::new(crate::expressions::Cast {
8388 this: val.clone(),
8389 to: DataType::Timestamp { timezone: false, precision: None },
8390 trailing_comments: vec![],
8391 double_colon_syntax: false,
8392 format: None,
8393 default: None,
8394 }))
8395 }
8396 Expression::Literal(crate::expressions::Literal::Date(_)) if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena) => {
8397 // DATE 'x' -> CAST(CAST('x' AS DATE) AS TIMESTAMP)
8398 let cast_date = Self::date_literal_to_cast(val.clone());
8399 Expression::Cast(Box::new(crate::expressions::Cast {
8400 this: cast_date,
8401 to: DataType::Timestamp { timezone: false, precision: None },
8402 trailing_comments: vec![],
8403 double_colon_syntax: false,
8404 format: None,
8405 default: None,
8406 }))
8407 }
8408 _ => val.clone(),
8409 }
8410 } else {
8411 val.clone()
8412 };
8413
8414 match target {
8415 DialectType::DuckDB => {
8416 if let Expression::Literal(crate::expressions::Literal::String(s)) = fmt_expr {
8417 let c_fmt = if is_hive_source {
8418 java_to_c_format(s)
8419 } else { s.clone() };
8420 Ok(Expression::Function(Box::new(Function::new(
8421 "STRFTIME".to_string(),
8422 vec![cast_val, Expression::string(&c_fmt)],
8423 ))))
8424 } else {
8425 Ok(Expression::Function(Box::new(Function::new(
8426 "STRFTIME".to_string(),
8427 vec![cast_val, fmt_expr.clone()],
8428 ))))
8429 }
8430 }
8431 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
8432 if is_hive_source {
8433 if let Expression::Literal(crate::expressions::Literal::String(s)) = fmt_expr {
8434 let p_fmt = java_to_presto_format(s);
8435 Ok(Expression::Function(Box::new(Function::new(
8436 "DATE_FORMAT".to_string(),
8437 vec![cast_val, Expression::string(&p_fmt)],
8438 ))))
8439 } else {
8440 Ok(Expression::Function(Box::new(Function::new(
8441 "DATE_FORMAT".to_string(),
8442 vec![cast_val, fmt_expr.clone()],
8443 ))))
8444 }
8445 } else {
8446 Ok(Expression::Function(Box::new(Function::new(
8447 "DATE_FORMAT".to_string(),
8448 f.args,
8449 ))))
8450 }
8451 }
8452 DialectType::BigQuery => {
8453 // DATE_FORMAT(val, fmt) -> FORMAT_DATE(fmt, val)
8454 if let Expression::Literal(crate::expressions::Literal::String(s)) = fmt_expr {
8455 let bq_fmt = if is_hive_source {
8456 java_to_bq_format(s)
8457 } else {
8458 java_to_c_format(s)
8459 };
8460 Ok(Expression::Function(Box::new(Function::new(
8461 "FORMAT_DATE".to_string(),
8462 vec![Expression::string(&bq_fmt), cast_val],
8463 ))))
8464 } else {
8465 Ok(Expression::Function(Box::new(Function::new(
8466 "FORMAT_DATE".to_string(),
8467 vec![fmt_expr.clone(), cast_val],
8468 ))))
8469 }
8470 }
8471 DialectType::PostgreSQL | DialectType::Redshift => {
8472 if let Expression::Literal(crate::expressions::Literal::String(s)) = fmt_expr {
8473 let pg_fmt = s
8474 .replace("yyyy", "YYYY")
8475 .replace("MM", "MM")
8476 .replace("dd", "DD")
8477 .replace("HH", "HH24")
8478 .replace("mm", "MI")
8479 .replace("ss", "SS")
8480 .replace("yy", "YY");
8481 Ok(Expression::Function(Box::new(Function::new(
8482 "TO_CHAR".to_string(),
8483 vec![val, Expression::string(&pg_fmt)],
8484 ))))
8485 } else {
8486 Ok(Expression::Function(Box::new(Function::new(
8487 "TO_CHAR".to_string(),
8488 vec![val, fmt_expr.clone()],
8489 ))))
8490 }
8491 }
8492 _ => Ok(Expression::Function(f)),
8493 }
8494 }
8495 // DATEDIFF(unit, start, end) - 3-arg form
8496 // SQLite uses DATEDIFF(date1, date2, unit_string) instead
8497 "DATEDIFF" if f.args.len() == 3 => {
8498 let mut args = f.args;
8499 // SQLite source: args = (date1, date2, unit_string)
8500 // Standard source: args = (unit, start, end)
8501 let (_arg0, arg1, arg2, unit_str) = if matches!(source, DialectType::SQLite) {
8502 let date1 = args.remove(0);
8503 let date2 = args.remove(0);
8504 let unit_expr = args.remove(0);
8505 let unit_s = Self::get_unit_str_static(&unit_expr);
8506
8507 // For SQLite target, generate JULIANDAY arithmetic directly
8508 if matches!(target, DialectType::SQLite) {
8509 let jd_first = Expression::Function(Box::new(Function::new(
8510 "JULIANDAY".to_string(), vec![date1],
8511 )));
8512 let jd_second = Expression::Function(Box::new(Function::new(
8513 "JULIANDAY".to_string(), vec![date2],
8514 )));
8515 let diff = Expression::Sub(Box::new(crate::expressions::BinaryOp::new(jd_first, jd_second)));
8516 let paren_diff = Expression::Paren(Box::new(crate::expressions::Paren {
8517 this: diff, trailing_comments: Vec::new(),
8518 }));
8519 let adjusted = match unit_s.as_str() {
8520 "HOUR" => Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
8521 paren_diff, Expression::Literal(Literal::Number("24.0".to_string())),
8522 ))),
8523 "MINUTE" => Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
8524 paren_diff, Expression::Literal(Literal::Number("1440.0".to_string())),
8525 ))),
8526 "SECOND" => Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
8527 paren_diff, Expression::Literal(Literal::Number("86400.0".to_string())),
8528 ))),
8529 "MONTH" => Expression::Div(Box::new(crate::expressions::BinaryOp::new(
8530 paren_diff, Expression::Literal(Literal::Number("30.0".to_string())),
8531 ))),
8532 "YEAR" => Expression::Div(Box::new(crate::expressions::BinaryOp::new(
8533 paren_diff, Expression::Literal(Literal::Number("365.0".to_string())),
8534 ))),
8535 _ => paren_diff,
8536 };
8537 return Ok(Expression::Cast(Box::new(Cast {
8538 this: adjusted,
8539 to: DataType::Int { length: None, integer_spelling: true },
8540 trailing_comments: vec![], double_colon_syntax: false, format: None, default: None,
8541 })));
8542 }
8543
8544 // For other targets, remap to standard (unit, start, end) form
8545 let unit_ident = Expression::Identifier(Identifier::new(&unit_s));
8546 (unit_ident, date1, date2, unit_s)
8547 } else {
8548 let arg0 = args.remove(0);
8549 let arg1 = args.remove(0);
8550 let arg2 = args.remove(0);
8551 let unit_s = Self::get_unit_str_static(&arg0);
8552 (arg0, arg1, arg2, unit_s)
8553 };
8554
8555 // For Hive/Spark source, string literal dates need to be cast
8556 // Note: Databricks is excluded - it handles string args like standard SQL
8557 let is_hive_spark = matches!(source, DialectType::Hive | DialectType::Spark);
8558
8559 match target {
8560 DialectType::Snowflake => {
8561 let unit = Expression::Identifier(Identifier::new(&unit_str));
8562 // Use ensure_to_date_preserved to add TO_DATE with a marker
8563 // that prevents the Snowflake TO_DATE handler from converting it to CAST
8564 let d1 = if is_hive_spark { Self::ensure_to_date_preserved(arg1) } else { arg1 };
8565 let d2 = if is_hive_spark { Self::ensure_to_date_preserved(arg2) } else { arg2 };
8566 Ok(Expression::Function(Box::new(Function::new(
8567 "DATEDIFF".to_string(), vec![unit, d1, d2],
8568 ))))
8569 }
8570 DialectType::Redshift => {
8571 let unit = Expression::Identifier(Identifier::new(&unit_str));
8572 let d1 = if is_hive_spark { Self::ensure_cast_date(arg1) } else { arg1 };
8573 let d2 = if is_hive_spark { Self::ensure_cast_date(arg2) } else { arg2 };
8574 Ok(Expression::Function(Box::new(Function::new(
8575 "DATEDIFF".to_string(), vec![unit, d1, d2],
8576 ))))
8577 }
8578 DialectType::TSQL => {
8579 let unit = Expression::Identifier(Identifier::new(&unit_str));
8580 Ok(Expression::Function(Box::new(Function::new(
8581 "DATEDIFF".to_string(), vec![unit, arg1, arg2],
8582 ))))
8583 }
8584 DialectType::DuckDB => {
8585 let is_redshift_tsql = matches!(source, DialectType::Redshift | DialectType::TSQL);
8586 if is_hive_spark {
8587 // For Hive/Spark source, CAST string args to DATE and emit DATE_DIFF directly
8588 let d1 = Self::ensure_cast_date(arg1);
8589 let d2 = Self::ensure_cast_date(arg2);
8590 Ok(Expression::Function(Box::new(Function::new(
8591 "DATE_DIFF".to_string(), vec![
8592 Expression::string(&unit_str),
8593 d1, d2,
8594 ],
8595 ))))
8596 } else if matches!(source, DialectType::Snowflake) {
8597 // For Snowflake source: special handling per unit
8598 match unit_str.as_str() {
8599 "NANOSECOND" => {
8600 // DATEDIFF(NANOSECOND, start, end) -> EPOCH_NS(CAST(end AS TIMESTAMP_NS)) - EPOCH_NS(CAST(start AS TIMESTAMP_NS))
8601 fn cast_to_timestamp_ns(expr: Expression) -> Expression {
8602 Expression::Cast(Box::new(Cast {
8603 this: expr,
8604 to: DataType::Custom { name: "TIMESTAMP_NS".to_string() },
8605 trailing_comments: vec![], double_colon_syntax: false, format: None, default: None,
8606 }))
8607 }
8608 let epoch_end = Expression::Function(Box::new(Function::new(
8609 "EPOCH_NS".to_string(), vec![cast_to_timestamp_ns(arg2)],
8610 )));
8611 let epoch_start = Expression::Function(Box::new(Function::new(
8612 "EPOCH_NS".to_string(), vec![cast_to_timestamp_ns(arg1)],
8613 )));
8614 Ok(Expression::Sub(Box::new(BinaryOp::new(epoch_end, epoch_start))))
8615 }
8616 "WEEK" => {
8617 // DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST(x AS DATE)), DATE_TRUNC('WEEK', CAST(y AS DATE)))
8618 let d1 = Self::force_cast_date(arg1);
8619 let d2 = Self::force_cast_date(arg2);
8620 let dt1 = Expression::Function(Box::new(Function::new(
8621 "DATE_TRUNC".to_string(), vec![Expression::string("WEEK"), d1],
8622 )));
8623 let dt2 = Expression::Function(Box::new(Function::new(
8624 "DATE_TRUNC".to_string(), vec![Expression::string("WEEK"), d2],
8625 )));
8626 Ok(Expression::Function(Box::new(Function::new(
8627 "DATE_DIFF".to_string(), vec![
8628 Expression::string(&unit_str),
8629 dt1, dt2,
8630 ],
8631 ))))
8632 }
8633 _ => {
8634 // YEAR, MONTH, QUARTER, DAY, etc.: CAST to DATE
8635 let d1 = Self::force_cast_date(arg1);
8636 let d2 = Self::force_cast_date(arg2);
8637 Ok(Expression::Function(Box::new(Function::new(
8638 "DATE_DIFF".to_string(), vec![
8639 Expression::string(&unit_str),
8640 d1, d2,
8641 ],
8642 ))))
8643 }
8644 }
8645 } else if is_redshift_tsql {
8646 // For Redshift/TSQL source, CAST args to TIMESTAMP (always)
8647 let d1 = Self::force_cast_timestamp(arg1);
8648 let d2 = Self::force_cast_timestamp(arg2);
8649 Ok(Expression::Function(Box::new(Function::new(
8650 "DATE_DIFF".to_string(), vec![
8651 Expression::string(&unit_str),
8652 d1, d2,
8653 ],
8654 ))))
8655 } else {
8656 // Keep as DATEDIFF so DuckDB's transform_datediff handles
8657 // DATE_TRUNC for WEEK, CAST for string literals, etc.
8658 let unit = Expression::Identifier(Identifier::new(&unit_str));
8659 Ok(Expression::Function(Box::new(Function::new(
8660 "DATEDIFF".to_string(), vec![unit, arg1, arg2],
8661 ))))
8662 }
8663 }
8664 DialectType::BigQuery => {
8665 let is_redshift_tsql = matches!(source, DialectType::Redshift | DialectType::TSQL | DialectType::Snowflake);
8666 let cast_d1 = if is_hive_spark { Self::ensure_cast_date(arg1) }
8667 else if is_redshift_tsql { Self::force_cast_datetime(arg1) }
8668 else { Self::ensure_cast_datetime(arg1) };
8669 let cast_d2 = if is_hive_spark { Self::ensure_cast_date(arg2) }
8670 else if is_redshift_tsql { Self::force_cast_datetime(arg2) }
8671 else { Self::ensure_cast_datetime(arg2) };
8672 let unit = Expression::Identifier(Identifier::new(&unit_str));
8673 Ok(Expression::Function(Box::new(Function::new(
8674 "DATE_DIFF".to_string(), vec![cast_d2, cast_d1, unit],
8675 ))))
8676 }
8677 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
8678 // For Hive/Spark source, string literals need double-cast: CAST(CAST(x AS TIMESTAMP) AS DATE)
8679 // For Redshift/TSQL source, args need CAST to TIMESTAMP (always)
8680 let is_redshift_tsql = matches!(source, DialectType::Redshift | DialectType::TSQL | DialectType::Snowflake);
8681 let d1 = if is_hive_spark { Self::double_cast_timestamp_date(arg1) }
8682 else if is_redshift_tsql { Self::force_cast_timestamp(arg1) }
8683 else { arg1 };
8684 let d2 = if is_hive_spark { Self::double_cast_timestamp_date(arg2) }
8685 else if is_redshift_tsql { Self::force_cast_timestamp(arg2) }
8686 else { arg2 };
8687 Ok(Expression::Function(Box::new(Function::new(
8688 "DATE_DIFF".to_string(), vec![
8689 Expression::string(&unit_str),
8690 d1, d2,
8691 ],
8692 ))))
8693 }
8694 DialectType::Hive => {
8695 match unit_str.as_str() {
8696 "MONTH" => {
8697 Ok(Expression::Cast(Box::new(Cast {
8698 this: Expression::Function(Box::new(Function::new(
8699 "MONTHS_BETWEEN".to_string(), vec![arg2, arg1],
8700 ))),
8701 to: DataType::Int { length: None, integer_spelling: false },
8702 trailing_comments: vec![], double_colon_syntax: false, format: None, default: None,
8703 })))
8704 }
8705 "WEEK" => {
8706 Ok(Expression::Cast(Box::new(Cast {
8707 this: Expression::Div(Box::new(crate::expressions::BinaryOp::new(
8708 Expression::Function(Box::new(Function::new(
8709 "DATEDIFF".to_string(), vec![arg2, arg1],
8710 ))),
8711 Expression::number(7),
8712 ))),
8713 to: DataType::Int { length: None, integer_spelling: false },
8714 trailing_comments: vec![], double_colon_syntax: false, format: None, default: None,
8715 })))
8716 }
8717 _ => {
8718 Ok(Expression::Function(Box::new(Function::new(
8719 "DATEDIFF".to_string(), vec![arg2, arg1],
8720 ))))
8721 }
8722 }
8723 }
8724 DialectType::Spark | DialectType::Databricks => {
8725 let unit = Expression::Identifier(Identifier::new(&unit_str));
8726 Ok(Expression::Function(Box::new(Function::new(
8727 "DATEDIFF".to_string(), vec![unit, arg1, arg2],
8728 ))))
8729 }
8730 _ => {
8731 // For Hive/Spark source targeting PostgreSQL etc., cast string literals to DATE
8732 let d1 = if is_hive_spark { Self::ensure_cast_date(arg1) } else { arg1 };
8733 let d2 = if is_hive_spark { Self::ensure_cast_date(arg2) } else { arg2 };
8734 let unit = Expression::Identifier(Identifier::new(&unit_str));
8735 Ok(Expression::Function(Box::new(Function::new(
8736 "DATEDIFF".to_string(), vec![unit, d1, d2],
8737 ))))
8738 }
8739 }
8740 }
8741 // DATEDIFF(end, start) - 2-arg form from Hive/MySQL
8742 "DATEDIFF" if f.args.len() == 2 => {
8743 let mut args = f.args;
8744 let arg0 = args.remove(0);
8745 let arg1 = args.remove(0);
8746
8747 // Helper: unwrap TO_DATE(x) -> x (extracts inner arg)
8748 // Also recognizes TryCast/Cast to DATE that may have been produced by
8749 // cross-dialect TO_DATE -> TRY_CAST conversion
8750 let unwrap_to_date = |e: Expression| -> (Expression, bool) {
8751 if let Expression::Function(ref f) = e {
8752 if f.name.eq_ignore_ascii_case("TO_DATE") && f.args.len() == 1 {
8753 return (f.args[0].clone(), true);
8754 }
8755 }
8756 // Also recognize TryCast(x, Date) as an already-converted TO_DATE
8757 if let Expression::TryCast(ref c) = e {
8758 if matches!(c.to, DataType::Date) {
8759 return (e, true); // Already properly cast, return as-is
8760 }
8761 }
8762 (e, false)
8763 };
8764
8765 match target {
8766 DialectType::DuckDB => {
8767 // For Hive source, always CAST to DATE
8768 // If arg is TO_DATE(x) or TRY_CAST(x AS DATE), use it directly
8769 let cast_d0 = if matches!(source, DialectType::Hive | DialectType::Spark | DialectType::Databricks) {
8770 let (inner, was_to_date) = unwrap_to_date(arg1);
8771 if was_to_date {
8772 // Already a date expression, use directly
8773 if matches!(&inner, Expression::TryCast(_)) {
8774 inner // Already TRY_CAST(x AS DATE)
8775 } else {
8776 Self::try_cast_date(inner)
8777 }
8778 } else {
8779 Self::force_cast_date(inner)
8780 }
8781 } else {
8782 Self::ensure_cast_date(arg1)
8783 };
8784 let cast_d1 = if matches!(source, DialectType::Hive | DialectType::Spark | DialectType::Databricks) {
8785 let (inner, was_to_date) = unwrap_to_date(arg0);
8786 if was_to_date {
8787 if matches!(&inner, Expression::TryCast(_)) {
8788 inner
8789 } else {
8790 Self::try_cast_date(inner)
8791 }
8792 } else {
8793 Self::force_cast_date(inner)
8794 }
8795 } else {
8796 Self::ensure_cast_date(arg0)
8797 };
8798 Ok(Expression::Function(Box::new(Function::new(
8799 "DATE_DIFF".to_string(), vec![
8800 Expression::string("DAY"),
8801 cast_d0, cast_d1,
8802 ],
8803 ))))
8804 }
8805 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
8806 // For Hive/Spark source, apply double_cast_timestamp_date
8807 // For other sources (MySQL etc.), just swap args without casting
8808 if matches!(source, DialectType::Hive | DialectType::Spark | DialectType::Databricks) {
8809 let cast_fn = |e: Expression| -> Expression {
8810 let (inner, was_to_date) = unwrap_to_date(e);
8811 if was_to_date {
8812 let first_cast = Self::double_cast_timestamp_date(inner);
8813 Self::double_cast_timestamp_date(first_cast)
8814 } else {
8815 Self::double_cast_timestamp_date(inner)
8816 }
8817 };
8818 Ok(Expression::Function(Box::new(Function::new(
8819 "DATE_DIFF".to_string(), vec![
8820 Expression::string("DAY"),
8821 cast_fn(arg1), cast_fn(arg0),
8822 ],
8823 ))))
8824 } else {
8825 Ok(Expression::Function(Box::new(Function::new(
8826 "DATE_DIFF".to_string(), vec![
8827 Expression::string("DAY"),
8828 arg1, arg0,
8829 ],
8830 ))))
8831 }
8832 }
8833 DialectType::Redshift => {
8834 let unit = Expression::Identifier(Identifier::new("DAY"));
8835 Ok(Expression::Function(Box::new(Function::new(
8836 "DATEDIFF".to_string(), vec![unit, arg1, arg0],
8837 ))))
8838 }
8839 _ => {
8840 Ok(Expression::Function(Box::new(Function::new(
8841 "DATEDIFF".to_string(), vec![arg0, arg1],
8842 ))))
8843 }
8844 }
8845 }
8846 // DATE_DIFF(unit, start, end) - 3-arg with string unit (ClickHouse/DuckDB style)
8847 "DATE_DIFF" if f.args.len() == 3 => {
8848 let mut args = f.args;
8849 let arg0 = args.remove(0);
8850 let arg1 = args.remove(0);
8851 let arg2 = args.remove(0);
8852 let unit_str = Self::get_unit_str_static(&arg0);
8853
8854 match target {
8855 DialectType::DuckDB => {
8856 // DuckDB: DATE_DIFF('UNIT', start, end)
8857 Ok(Expression::Function(Box::new(Function::new(
8858 "DATE_DIFF".to_string(), vec![
8859 Expression::string(&unit_str),
8860 arg1, arg2,
8861 ],
8862 ))))
8863 }
8864 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
8865 Ok(Expression::Function(Box::new(Function::new(
8866 "DATE_DIFF".to_string(), vec![
8867 Expression::string(&unit_str),
8868 arg1, arg2,
8869 ],
8870 ))))
8871 }
8872 DialectType::ClickHouse => {
8873 // ClickHouse: DATE_DIFF(UNIT, start, end) - identifier unit
8874 let unit = Expression::Identifier(Identifier::new(&unit_str));
8875 Ok(Expression::Function(Box::new(Function::new(
8876 "DATE_DIFF".to_string(), vec![unit, arg1, arg2],
8877 ))))
8878 }
8879 DialectType::Snowflake | DialectType::Redshift => {
8880 let unit = Expression::Identifier(Identifier::new(&unit_str));
8881 Ok(Expression::Function(Box::new(Function::new(
8882 "DATEDIFF".to_string(), vec![unit, arg1, arg2],
8883 ))))
8884 }
8885 _ => {
8886 let unit = Expression::Identifier(Identifier::new(&unit_str));
8887 Ok(Expression::Function(Box::new(Function::new(
8888 "DATEDIFF".to_string(), vec![unit, arg1, arg2],
8889 ))))
8890 }
8891 }
8892 }
8893 // DATEADD(unit, val, date) - 3-arg form
8894 "DATEADD" if f.args.len() == 3 => {
8895 let mut args = f.args;
8896 let arg0 = args.remove(0);
8897 let arg1 = args.remove(0);
8898 let arg2 = args.remove(0);
8899 let unit_str = Self::get_unit_str_static(&arg0);
8900
8901 // Normalize TSQL unit abbreviations to standard names
8902 let unit_str = match unit_str.as_str() {
8903 "YY" | "YYYY" => "YEAR".to_string(),
8904 "QQ" | "Q" => "QUARTER".to_string(),
8905 "MM" | "M" => "MONTH".to_string(),
8906 "WK" | "WW" => "WEEK".to_string(),
8907 "DD" | "D" | "DY" => "DAY".to_string(),
8908 "HH" => "HOUR".to_string(),
8909 "MI" | "N" => "MINUTE".to_string(),
8910 "SS" | "S" => "SECOND".to_string(),
8911 "MS" => "MILLISECOND".to_string(),
8912 "MCS" | "US" => "MICROSECOND".to_string(),
8913 _ => unit_str,
8914 };
8915 match target {
8916 DialectType::Snowflake => {
8917 let unit = Expression::Identifier(Identifier::new(&unit_str));
8918 // Cast string literal to TIMESTAMP, but not for Snowflake source
8919 // (Snowflake natively accepts string literals in DATEADD)
8920 let arg2 = if matches!(&arg2, Expression::Literal(Literal::String(_)))
8921 && !matches!(source, DialectType::Snowflake) {
8922 Expression::Cast(Box::new(Cast {
8923 this: arg2, to: DataType::Timestamp { precision: None, timezone: false },
8924 trailing_comments: Vec::new(), double_colon_syntax: false, format: None, default: None,
8925 }))
8926 } else { arg2 };
8927 Ok(Expression::Function(Box::new(Function::new(
8928 "DATEADD".to_string(), vec![unit, arg1, arg2],
8929 ))))
8930 }
8931 DialectType::TSQL => {
8932 let unit = Expression::Identifier(Identifier::new(&unit_str));
8933 // Cast string literal to DATETIME2, but not when source is Spark/Databricks family
8934 let arg2 = if matches!(&arg2, Expression::Literal(Literal::String(_)))
8935 && !matches!(source, DialectType::Spark | DialectType::Databricks | DialectType::Hive) {
8936 Expression::Cast(Box::new(Cast {
8937 this: arg2, to: DataType::Custom { name: "DATETIME2".to_string() },
8938 trailing_comments: Vec::new(), double_colon_syntax: false, format: None, default: None,
8939 }))
8940 } else { arg2 };
8941 Ok(Expression::Function(Box::new(Function::new(
8942 "DATEADD".to_string(), vec![unit, arg1, arg2],
8943 ))))
8944 }
8945 DialectType::Redshift => {
8946 let unit = Expression::Identifier(Identifier::new(&unit_str));
8947 Ok(Expression::Function(Box::new(Function::new(
8948 "DATEADD".to_string(), vec![unit, arg1, arg2],
8949 ))))
8950 }
8951 DialectType::Databricks => {
8952 let unit = Expression::Identifier(Identifier::new(&unit_str));
8953 // Sources with native DATEADD (TSQL, Databricks, Snowflake) -> DATEADD
8954 // Other sources (Redshift TsOrDsAdd, etc.) -> DATE_ADD
8955 let func_name = if matches!(source, DialectType::TSQL | DialectType::Fabric | DialectType::Databricks | DialectType::Snowflake) {
8956 "DATEADD"
8957 } else {
8958 "DATE_ADD"
8959 };
8960 Ok(Expression::Function(Box::new(Function::new(
8961 func_name.to_string(), vec![unit, arg1, arg2],
8962 ))))
8963 }
8964 DialectType::DuckDB => {
8965 // Special handling for NANOSECOND from Snowflake
8966 if unit_str == "NANOSECOND" && matches!(source, DialectType::Snowflake) {
8967 // DATEADD(NANOSECOND, offset, ts) -> MAKE_TIMESTAMP_NS(EPOCH_NS(CAST(ts AS TIMESTAMP_NS)) + offset)
8968 let cast_ts = Expression::Cast(Box::new(Cast {
8969 this: arg2,
8970 to: DataType::Custom { name: "TIMESTAMP_NS".to_string() },
8971 trailing_comments: vec![], double_colon_syntax: false, format: None, default: None,
8972 }));
8973 let epoch_ns = Expression::Function(Box::new(Function::new(
8974 "EPOCH_NS".to_string(), vec![cast_ts],
8975 )));
8976 let sum = Expression::Add(Box::new(BinaryOp::new(epoch_ns, arg1)));
8977 Ok(Expression::Function(Box::new(Function::new(
8978 "MAKE_TIMESTAMP_NS".to_string(), vec![sum],
8979 ))))
8980 } else {
8981 // DuckDB: convert to date + INTERVAL syntax with CAST
8982 let iu = Self::parse_interval_unit_static(&unit_str);
8983 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
8984 this: Some(arg1),
8985 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: iu, use_plural: false }),
8986 }));
8987 // Cast string literal to TIMESTAMP
8988 let arg2 = if matches!(&arg2, Expression::Literal(Literal::String(_))) {
8989 Expression::Cast(Box::new(Cast {
8990 this: arg2, to: DataType::Timestamp { precision: None, timezone: false },
8991 trailing_comments: Vec::new(), double_colon_syntax: false, format: None, default: None,
8992 }))
8993 } else { arg2 };
8994 Ok(Expression::Add(Box::new(crate::expressions::BinaryOp::new(arg2, interval))))
8995 }
8996 }
8997 DialectType::Spark => {
8998 // For TSQL source: convert to ADD_MONTHS/DATE_ADD(date, val)
8999 // For other sources: keep 3-arg DATE_ADD(UNIT, val, date) form
9000 if matches!(source, DialectType::TSQL | DialectType::Fabric) {
9001 fn multiply_expr_spark(expr: Expression, factor: i64) -> Expression {
9002 if let Expression::Literal(crate::expressions::Literal::Number(n)) = &expr {
9003 if let Ok(val) = n.parse::<i64>() {
9004 return Expression::Literal(crate::expressions::Literal::Number((val * factor).to_string()));
9005 }
9006 }
9007 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
9008 expr, Expression::Literal(crate::expressions::Literal::Number(factor.to_string())),
9009 )))
9010 }
9011 let normalized_unit = match unit_str.as_str() {
9012 "YEAR" | "YY" | "YYYY" => "YEAR",
9013 "QUARTER" | "QQ" | "Q" => "QUARTER",
9014 "MONTH" | "MM" | "M" => "MONTH",
9015 "WEEK" | "WK" | "WW" => "WEEK",
9016 "DAY" | "DD" | "D" | "DY" => "DAY",
9017 _ => &unit_str,
9018 };
9019 match normalized_unit {
9020 "YEAR" => {
9021 let months = multiply_expr_spark(arg1, 12);
9022 Ok(Expression::Function(Box::new(Function::new(
9023 "ADD_MONTHS".to_string(), vec![arg2, months],
9024 ))))
9025 }
9026 "QUARTER" => {
9027 let months = multiply_expr_spark(arg1, 3);
9028 Ok(Expression::Function(Box::new(Function::new(
9029 "ADD_MONTHS".to_string(), vec![arg2, months],
9030 ))))
9031 }
9032 "MONTH" => {
9033 Ok(Expression::Function(Box::new(Function::new(
9034 "ADD_MONTHS".to_string(), vec![arg2, arg1],
9035 ))))
9036 }
9037 "WEEK" => {
9038 let days = multiply_expr_spark(arg1, 7);
9039 Ok(Expression::Function(Box::new(Function::new(
9040 "DATE_ADD".to_string(), vec![arg2, days],
9041 ))))
9042 }
9043 "DAY" => {
9044 Ok(Expression::Function(Box::new(Function::new(
9045 "DATE_ADD".to_string(), vec![arg2, arg1],
9046 ))))
9047 }
9048 _ => {
9049 let unit = Expression::Identifier(Identifier::new(&unit_str));
9050 Ok(Expression::Function(Box::new(Function::new(
9051 "DATE_ADD".to_string(), vec![unit, arg1, arg2],
9052 ))))
9053 }
9054 }
9055 } else {
9056 // Non-TSQL source: keep 3-arg DATE_ADD(UNIT, val, date)
9057 let unit = Expression::Identifier(Identifier::new(&unit_str));
9058 Ok(Expression::Function(Box::new(Function::new(
9059 "DATE_ADD".to_string(), vec![unit, arg1, arg2],
9060 ))))
9061 }
9062 }
9063 DialectType::Hive => {
9064 match unit_str.as_str() {
9065 "MONTH" => {
9066 Ok(Expression::Function(Box::new(Function::new(
9067 "ADD_MONTHS".to_string(), vec![arg2, arg1],
9068 ))))
9069 }
9070 _ => {
9071 Ok(Expression::Function(Box::new(Function::new(
9072 "DATE_ADD".to_string(), vec![arg2, arg1],
9073 ))))
9074 }
9075 }
9076 }
9077 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
9078 // Cast string literal date to TIMESTAMP
9079 let arg2 = if matches!(&arg2, Expression::Literal(Literal::String(_))) {
9080 Expression::Cast(Box::new(Cast {
9081 this: arg2, to: DataType::Timestamp { precision: None, timezone: false },
9082 trailing_comments: Vec::new(), double_colon_syntax: false, format: None, default: None,
9083 }))
9084 } else { arg2 };
9085 Ok(Expression::Function(Box::new(Function::new(
9086 "DATE_ADD".to_string(), vec![
9087 Expression::string(&unit_str),
9088 arg1, arg2,
9089 ],
9090 ))))
9091 }
9092 DialectType::MySQL => {
9093 let iu = Self::parse_interval_unit_static(&unit_str);
9094 Ok(Expression::DateAdd(Box::new(crate::expressions::DateAddFunc {
9095 this: arg2,
9096 interval: arg1,
9097 unit: iu,
9098 })))
9099 }
9100 DialectType::PostgreSQL => {
9101 // Cast string literal date to TIMESTAMP
9102 let arg2 = if matches!(&arg2, Expression::Literal(Literal::String(_))) {
9103 Expression::Cast(Box::new(Cast {
9104 this: arg2, to: DataType::Timestamp { precision: None, timezone: false },
9105 trailing_comments: Vec::new(), double_colon_syntax: false, format: None, default: None,
9106 }))
9107 } else { arg2 };
9108 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
9109 this: Some(Expression::string(&format!("{} {}", Self::expr_to_string_static(&arg1), unit_str))),
9110 unit: None,
9111 }));
9112 Ok(Expression::Add(Box::new(crate::expressions::BinaryOp::new(arg2, interval))))
9113 }
9114 DialectType::BigQuery => {
9115 let iu = Self::parse_interval_unit_static(&unit_str);
9116 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
9117 this: Some(arg1),
9118 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: iu, use_plural: false }),
9119 }));
9120 // Non-TSQL sources: CAST string literal to DATETIME
9121 let arg2 = if !matches!(source, DialectType::TSQL | DialectType::Fabric)
9122 && matches!(&arg2, Expression::Literal(Literal::String(_)))
9123 {
9124 Expression::Cast(Box::new(Cast {
9125 this: arg2, to: DataType::Custom { name: "DATETIME".to_string() },
9126 trailing_comments: Vec::new(), double_colon_syntax: false, format: None, default: None,
9127 }))
9128 } else { arg2 };
9129 Ok(Expression::Function(Box::new(Function::new(
9130 "DATE_ADD".to_string(), vec![arg2, interval],
9131 ))))
9132 }
9133 _ => {
9134 let unit = Expression::Identifier(Identifier::new(&unit_str));
9135 Ok(Expression::Function(Box::new(Function::new(
9136 "DATEADD".to_string(), vec![unit, arg1, arg2],
9137 ))))
9138 }
9139 }
9140 }
9141 // DATE_ADD(unit, val, date) - 3-arg from ClickHouse/Presto/Spark
9142 "DATE_ADD" if f.args.len() == 3 => {
9143 let mut args = f.args;
9144 let arg0 = args.remove(0);
9145 let arg1 = args.remove(0);
9146 let arg2 = args.remove(0);
9147 let unit_str = Self::get_unit_str_static(&arg0);
9148
9149 match target {
9150 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
9151 Ok(Expression::Function(Box::new(Function::new(
9152 "DATE_ADD".to_string(), vec![
9153 Expression::string(&unit_str),
9154 arg1, arg2,
9155 ],
9156 ))))
9157 }
9158 DialectType::DuckDB => {
9159 let iu = Self::parse_interval_unit_static(&unit_str);
9160 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
9161 this: Some(arg1),
9162 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: iu, use_plural: false }),
9163 }));
9164 Ok(Expression::Add(Box::new(crate::expressions::BinaryOp::new(arg2, interval))))
9165 }
9166 DialectType::Snowflake | DialectType::TSQL | DialectType::Redshift => {
9167 let unit = Expression::Identifier(Identifier::new(&unit_str));
9168 Ok(Expression::Function(Box::new(Function::new(
9169 "DATEADD".to_string(), vec![unit, arg1, arg2],
9170 ))))
9171 }
9172 DialectType::Spark => {
9173 // Spark: DATE_ADD(date, val) for DAY, or DATEADD(UNIT, val, date)
9174 if unit_str == "DAY" {
9175 Ok(Expression::Function(Box::new(Function::new(
9176 "DATE_ADD".to_string(), vec![arg2, arg1],
9177 ))))
9178 } else {
9179 let unit = Expression::Identifier(Identifier::new(&unit_str));
9180 Ok(Expression::Function(Box::new(Function::new(
9181 "DATE_ADD".to_string(), vec![unit, arg1, arg2],
9182 ))))
9183 }
9184 }
9185 DialectType::Databricks => {
9186 let unit = Expression::Identifier(Identifier::new(&unit_str));
9187 Ok(Expression::Function(Box::new(Function::new(
9188 "DATE_ADD".to_string(), vec![unit, arg1, arg2],
9189 ))))
9190 }
9191 DialectType::Hive => {
9192 // Hive: DATE_ADD(date, val) for DAY
9193 Ok(Expression::Function(Box::new(Function::new(
9194 "DATE_ADD".to_string(), vec![arg2, arg1],
9195 ))))
9196 }
9197 _ => {
9198 let unit = Expression::Identifier(Identifier::new(&unit_str));
9199 Ok(Expression::Function(Box::new(Function::new(
9200 "DATE_ADD".to_string(), vec![unit, arg1, arg2],
9201 ))))
9202 }
9203 }
9204 }
9205 // DATE_ADD(date, days) - 2-arg Hive/Spark form (add days)
9206 "DATE_ADD" if f.args.len() == 2
9207 && matches!(source, DialectType::Hive | DialectType::Spark | DialectType::Databricks) => {
9208 let mut args = f.args;
9209 let date = args.remove(0);
9210 let days = args.remove(0);
9211 match target {
9212 DialectType::Hive | DialectType::Spark => {
9213 // Keep as DATE_ADD(date, days) for Hive/Spark
9214 Ok(Expression::Function(Box::new(Function::new(
9215 "DATE_ADD".to_string(), vec![date, days],
9216 ))))
9217 }
9218 DialectType::Databricks => {
9219 // Databricks: DATEADD(DAY, days, date)
9220 Ok(Expression::Function(Box::new(Function::new(
9221 "DATEADD".to_string(), vec![
9222 Expression::Identifier(Identifier::new("DAY")),
9223 days, date,
9224 ],
9225 ))))
9226 }
9227 DialectType::DuckDB => {
9228 // DuckDB: CAST(date AS DATE) + INTERVAL days DAY
9229 let cast_date = Self::ensure_cast_date(date);
9230 // Wrap complex expressions (like Mul from DATE_SUB negation) in Paren
9231 let interval_val = if matches!(days, Expression::Mul(_) | Expression::Sub(_) | Expression::Add(_)) {
9232 Expression::Paren(Box::new(crate::expressions::Paren { this: days, trailing_comments: vec![] }))
9233 } else { days };
9234 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
9235 this: Some(interval_val),
9236 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
9237 unit: crate::expressions::IntervalUnit::Day,
9238 use_plural: false,
9239 }),
9240 }));
9241 Ok(Expression::Add(Box::new(crate::expressions::BinaryOp::new(cast_date, interval))))
9242 }
9243 DialectType::Snowflake => {
9244 // For Hive source with string literal date, use CAST(CAST(date AS TIMESTAMP) AS DATE)
9245 let cast_date = if matches!(source, DialectType::Hive | DialectType::Spark | DialectType::Databricks) {
9246 if matches!(date, Expression::Literal(Literal::String(_))) {
9247 Self::double_cast_timestamp_date(date)
9248 } else { date }
9249 } else { date };
9250 Ok(Expression::Function(Box::new(Function::new(
9251 "DATEADD".to_string(), vec![
9252 Expression::Identifier(Identifier::new("DAY")),
9253 days, cast_date,
9254 ],
9255 ))))
9256 }
9257 DialectType::Redshift => {
9258 Ok(Expression::Function(Box::new(Function::new(
9259 "DATEADD".to_string(), vec![
9260 Expression::Identifier(Identifier::new("DAY")),
9261 days, date,
9262 ],
9263 ))))
9264 }
9265 DialectType::TSQL | DialectType::Fabric => {
9266 // For Hive source with string literal date, use CAST(CAST(date AS DATETIME2) AS DATE)
9267 // But Databricks DATE_ADD doesn't need this wrapping for TSQL
9268 let cast_date = if matches!(source, DialectType::Hive | DialectType::Spark) {
9269 if matches!(date, Expression::Literal(Literal::String(_))) {
9270 Self::double_cast_datetime2_date(date)
9271 } else { date }
9272 } else { date };
9273 Ok(Expression::Function(Box::new(Function::new(
9274 "DATEADD".to_string(), vec![
9275 Expression::Identifier(Identifier::new("DAY")),
9276 days, cast_date,
9277 ],
9278 ))))
9279 }
9280 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
9281 // For Hive source with string literal date, use CAST(CAST(date AS TIMESTAMP) AS DATE)
9282 let cast_date = if matches!(source, DialectType::Hive | DialectType::Spark | DialectType::Databricks) {
9283 if matches!(date, Expression::Literal(Literal::String(_))) {
9284 Self::double_cast_timestamp_date(date)
9285 } else { date }
9286 } else { date };
9287 Ok(Expression::Function(Box::new(Function::new(
9288 "DATE_ADD".to_string(), vec![
9289 Expression::string("DAY"),
9290 days, cast_date,
9291 ],
9292 ))))
9293 }
9294 DialectType::BigQuery => {
9295 // For Hive/Spark source, wrap date in CAST(CAST(date AS DATETIME) AS DATE)
9296 let cast_date = if matches!(source, DialectType::Hive | DialectType::Spark | DialectType::Databricks) {
9297 Self::double_cast_datetime_date(date)
9298 } else { date };
9299 // Wrap complex expressions in Paren for interval
9300 let interval_val = if matches!(days, Expression::Mul(_) | Expression::Sub(_) | Expression::Add(_)) {
9301 Expression::Paren(Box::new(crate::expressions::Paren { this: days, trailing_comments: vec![] }))
9302 } else { days };
9303 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
9304 this: Some(interval_val),
9305 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
9306 unit: crate::expressions::IntervalUnit::Day,
9307 use_plural: false,
9308 }),
9309 }));
9310 Ok(Expression::Function(Box::new(Function::new(
9311 "DATE_ADD".to_string(), vec![cast_date, interval],
9312 ))))
9313 }
9314 DialectType::MySQL => {
9315 let iu = crate::expressions::IntervalUnit::Day;
9316 Ok(Expression::DateAdd(Box::new(crate::expressions::DateAddFunc {
9317 this: date,
9318 interval: days,
9319 unit: iu,
9320 })))
9321 }
9322 DialectType::PostgreSQL => {
9323 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
9324 this: Some(Expression::string(&format!("{} DAY", Self::expr_to_string_static(&days)))),
9325 unit: None,
9326 }));
9327 Ok(Expression::Add(Box::new(crate::expressions::BinaryOp::new(date, interval))))
9328 }
9329 _ => {
9330 Ok(Expression::Function(Box::new(Function::new(
9331 "DATE_ADD".to_string(), vec![date, days],
9332 ))))
9333 }
9334 }
9335 }
9336 // DATE_SUB(date, days) - 2-arg Hive/Spark form (subtract days)
9337 "DATE_SUB" if f.args.len() == 2
9338 && matches!(source, DialectType::Hive | DialectType::Spark | DialectType::Databricks) => {
9339 let mut args = f.args;
9340 let date = args.remove(0);
9341 let days = args.remove(0);
9342 // Helper to create days * -1
9343 let make_neg_days = |d: Expression| -> Expression {
9344 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
9345 d,
9346 Expression::Literal(Literal::Number("-1".to_string())),
9347 )))
9348 };
9349 let is_string_literal = matches!(date, Expression::Literal(Literal::String(_)));
9350 match target {
9351 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
9352 // Keep as DATE_SUB(date, days) for Hive/Spark
9353 Ok(Expression::Function(Box::new(Function::new(
9354 "DATE_SUB".to_string(), vec![date, days],
9355 ))))
9356 }
9357 DialectType::DuckDB => {
9358 let cast_date = Self::ensure_cast_date(date);
9359 let neg = make_neg_days(days);
9360 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
9361 this: Some(Expression::Paren(Box::new(crate::expressions::Paren { this: neg, trailing_comments: vec![] }))),
9362 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
9363 unit: crate::expressions::IntervalUnit::Day,
9364 use_plural: false,
9365 }),
9366 }));
9367 Ok(Expression::Add(Box::new(crate::expressions::BinaryOp::new(cast_date, interval))))
9368 }
9369 DialectType::Snowflake => {
9370 let cast_date = if is_string_literal {
9371 Self::double_cast_timestamp_date(date)
9372 } else { date };
9373 let neg = make_neg_days(days);
9374 Ok(Expression::Function(Box::new(Function::new(
9375 "DATEADD".to_string(), vec![
9376 Expression::Identifier(Identifier::new("DAY")),
9377 neg, cast_date,
9378 ],
9379 ))))
9380 }
9381 DialectType::Redshift => {
9382 let neg = make_neg_days(days);
9383 Ok(Expression::Function(Box::new(Function::new(
9384 "DATEADD".to_string(), vec![
9385 Expression::Identifier(Identifier::new("DAY")),
9386 neg, date,
9387 ],
9388 ))))
9389 }
9390 DialectType::TSQL | DialectType::Fabric => {
9391 let cast_date = if is_string_literal {
9392 Self::double_cast_datetime2_date(date)
9393 } else { date };
9394 let neg = make_neg_days(days);
9395 Ok(Expression::Function(Box::new(Function::new(
9396 "DATEADD".to_string(), vec![
9397 Expression::Identifier(Identifier::new("DAY")),
9398 neg, cast_date,
9399 ],
9400 ))))
9401 }
9402 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
9403 let cast_date = if is_string_literal {
9404 Self::double_cast_timestamp_date(date)
9405 } else { date };
9406 let neg = make_neg_days(days);
9407 Ok(Expression::Function(Box::new(Function::new(
9408 "DATE_ADD".to_string(), vec![
9409 Expression::string("DAY"),
9410 neg, cast_date,
9411 ],
9412 ))))
9413 }
9414 DialectType::BigQuery => {
9415 let cast_date = if is_string_literal {
9416 Self::double_cast_datetime_date(date)
9417 } else { date };
9418 let neg = make_neg_days(days);
9419 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
9420 this: Some(Expression::Paren(Box::new(crate::expressions::Paren { this: neg, trailing_comments: vec![] }))),
9421 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
9422 unit: crate::expressions::IntervalUnit::Day,
9423 use_plural: false,
9424 }),
9425 }));
9426 Ok(Expression::Function(Box::new(Function::new(
9427 "DATE_ADD".to_string(), vec![cast_date, interval],
9428 ))))
9429 }
9430 _ => {
9431 Ok(Expression::Function(Box::new(Function::new(
9432 "DATE_SUB".to_string(), vec![date, days],
9433 ))))
9434 }
9435 }
9436 }
9437 // ADD_MONTHS(date, val) -> target-specific
9438 "ADD_MONTHS" if f.args.len() == 2 => {
9439 let mut args = f.args;
9440 let date = args.remove(0);
9441 let val = args.remove(0);
9442 match target {
9443 DialectType::TSQL => {
9444 let cast_date = Self::ensure_cast_datetime2(date);
9445 Ok(Expression::Function(Box::new(Function::new(
9446 "DATEADD".to_string(), vec![
9447 Expression::Identifier(Identifier::new("MONTH")),
9448 val, cast_date,
9449 ],
9450 ))))
9451 }
9452 DialectType::DuckDB => {
9453 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
9454 this: Some(val),
9455 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
9456 unit: crate::expressions::IntervalUnit::Month,
9457 use_plural: false,
9458 }),
9459 }));
9460 Ok(Expression::Add(Box::new(crate::expressions::BinaryOp::new(date, interval))))
9461 }
9462 DialectType::Snowflake => {
9463 // Keep ADD_MONTHS when source is Snowflake
9464 if matches!(source, DialectType::Snowflake) {
9465 Ok(Expression::Function(Box::new(Function::new(
9466 "ADD_MONTHS".to_string(), vec![date, val],
9467 ))))
9468 } else {
9469 Ok(Expression::Function(Box::new(Function::new(
9470 "DATEADD".to_string(), vec![
9471 Expression::Identifier(Identifier::new("MONTH")),
9472 val, date,
9473 ],
9474 ))))
9475 }
9476 }
9477 DialectType::Redshift => {
9478 Ok(Expression::Function(Box::new(Function::new(
9479 "DATEADD".to_string(), vec![
9480 Expression::Identifier(Identifier::new("MONTH")),
9481 val, date,
9482 ],
9483 ))))
9484 }
9485 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
9486 Ok(Expression::Function(Box::new(Function::new(
9487 "DATE_ADD".to_string(), vec![
9488 Expression::string("MONTH"),
9489 val, date,
9490 ],
9491 ))))
9492 }
9493 DialectType::BigQuery => {
9494 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
9495 this: Some(val),
9496 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
9497 unit: crate::expressions::IntervalUnit::Month,
9498 use_plural: false,
9499 }),
9500 }));
9501 Ok(Expression::Function(Box::new(Function::new(
9502 "DATE_ADD".to_string(), vec![date, interval],
9503 ))))
9504 }
9505 _ => {
9506 Ok(Expression::Function(Box::new(Function::new(
9507 "ADD_MONTHS".to_string(), vec![date, val],
9508 ))))
9509 }
9510 }
9511 }
9512 // DATETRUNC(unit, date) - TSQL form -> DATE_TRUNC for other targets
9513 "DATETRUNC" if f.args.len() == 2 => {
9514 let mut args = f.args;
9515 let arg0 = args.remove(0);
9516 let arg1 = args.remove(0);
9517 let unit_str = Self::get_unit_str_static(&arg0);
9518 match target {
9519 DialectType::TSQL | DialectType::Fabric => {
9520 // Keep as DATETRUNC for TSQL - the target handler will uppercase the unit
9521 Ok(Expression::Function(Box::new(Function::new(
9522 "DATETRUNC".to_string(), vec![
9523 Expression::Identifier(Identifier::new(&unit_str)),
9524 arg1,
9525 ],
9526 ))))
9527 }
9528 DialectType::DuckDB => {
9529 // DuckDB: DATE_TRUNC('UNIT', expr) with CAST for string literals
9530 let date = Self::ensure_cast_timestamp(arg1);
9531 Ok(Expression::Function(Box::new(Function::new(
9532 "DATE_TRUNC".to_string(), vec![
9533 Expression::string(&unit_str),
9534 date,
9535 ],
9536 ))))
9537 }
9538 DialectType::ClickHouse => {
9539 // ClickHouse: dateTrunc('UNIT', expr)
9540 Ok(Expression::Function(Box::new(Function::new(
9541 "dateTrunc".to_string(), vec![
9542 Expression::string(&unit_str),
9543 arg1,
9544 ],
9545 ))))
9546 }
9547 _ => {
9548 // Standard: DATE_TRUNC('UNIT', expr)
9549 let unit = Expression::string(&unit_str);
9550 Ok(Expression::Function(Box::new(Function::new(
9551 "DATE_TRUNC".to_string(), vec![unit, arg1],
9552 ))))
9553 }
9554 }
9555 }
9556 // GETDATE() -> CURRENT_TIMESTAMP for non-TSQL targets
9557 "GETDATE" if f.args.is_empty() => {
9558 match target {
9559 DialectType::TSQL => Ok(Expression::Function(f)),
9560 DialectType::Redshift => Ok(Expression::Function(Box::new(Function::new("GETDATE".to_string(), vec![])))),
9561 _ => Ok(Expression::CurrentTimestamp(crate::expressions::CurrentTimestamp {
9562 precision: None,
9563 sysdate: false,
9564 })),
9565 }
9566 }
9567 // TO_HEX(x) / HEX(x) -> target-specific hex function
9568 "TO_HEX" | "HEX" if f.args.len() == 1 => {
9569 let name = match target {
9570 DialectType::Presto | DialectType::Trino => "TO_HEX",
9571 DialectType::Spark | DialectType::Databricks | DialectType::Hive => "HEX",
9572 DialectType::DuckDB | DialectType::PostgreSQL | DialectType::Redshift => "TO_HEX",
9573 _ => &f.name,
9574 };
9575 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
9576 }
9577 // FROM_HEX(x) / UNHEX(x) -> target-specific hex decode function
9578 "FROM_HEX" | "UNHEX" if f.args.len() == 1 => {
9579 match target {
9580 DialectType::BigQuery => {
9581 // BigQuery: UNHEX(x) -> FROM_HEX(x)
9582 // Special case: UNHEX(MD5(x)) -> FROM_HEX(TO_HEX(MD5(x)))
9583 // because BigQuery MD5 returns BYTES, not hex string
9584 let arg = &f.args[0];
9585 let wrapped_arg = match arg {
9586 Expression::Function(inner_f) if inner_f.name.to_uppercase() == "MD5"
9587 || inner_f.name.to_uppercase() == "SHA1"
9588 || inner_f.name.to_uppercase() == "SHA256"
9589 || inner_f.name.to_uppercase() == "SHA512" => {
9590 // Wrap hash function in TO_HEX for BigQuery
9591 Expression::Function(Box::new(Function::new(
9592 "TO_HEX".to_string(), vec![arg.clone()],
9593 )))
9594 }
9595 _ => f.args.into_iter().next().unwrap(),
9596 };
9597 Ok(Expression::Function(Box::new(Function::new("FROM_HEX".to_string(), vec![wrapped_arg]))))
9598 }
9599 _ => {
9600 let name = match target {
9601 DialectType::Presto | DialectType::Trino => "FROM_HEX",
9602 DialectType::Spark | DialectType::Databricks | DialectType::Hive => "UNHEX",
9603 _ => &f.name,
9604 };
9605 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
9606 }
9607 }
9608 }
9609 // TO_UTF8(x) -> ENCODE(x, 'utf-8') for Spark
9610 "TO_UTF8" if f.args.len() == 1 => {
9611 match target {
9612 DialectType::Spark | DialectType::Databricks => {
9613 let mut args = f.args;
9614 args.push(Expression::string("utf-8"));
9615 Ok(Expression::Function(Box::new(Function::new("ENCODE".to_string(), args))))
9616 }
9617 _ => Ok(Expression::Function(f)),
9618 }
9619 }
9620 // FROM_UTF8(x) -> DECODE(x, 'utf-8') for Spark
9621 "FROM_UTF8" if f.args.len() == 1 => {
9622 match target {
9623 DialectType::Spark | DialectType::Databricks => {
9624 let mut args = f.args;
9625 args.push(Expression::string("utf-8"));
9626 Ok(Expression::Function(Box::new(Function::new("DECODE".to_string(), args))))
9627 }
9628 _ => Ok(Expression::Function(f)),
9629 }
9630 }
9631 // STARTS_WITH(x, y) / STARTSWITH(x, y) -> target-specific
9632 "STARTS_WITH" | "STARTSWITH" if f.args.len() == 2 => {
9633 let name = match target {
9634 DialectType::Spark | DialectType::Databricks => "STARTSWITH",
9635 DialectType::Presto | DialectType::Trino => "STARTS_WITH",
9636 DialectType::PostgreSQL | DialectType::Redshift => "STARTS_WITH",
9637 _ => &f.name,
9638 };
9639 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
9640 }
9641 // APPROX_COUNT_DISTINCT(x) <-> APPROX_DISTINCT(x)
9642 "APPROX_COUNT_DISTINCT" if f.args.len() >= 1 => {
9643 let name = match target {
9644 DialectType::Presto | DialectType::Trino | DialectType::Athena => "APPROX_DISTINCT",
9645 _ => "APPROX_COUNT_DISTINCT",
9646 };
9647 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
9648 }
9649 // JSON_EXTRACT -> GET_JSON_OBJECT for Spark/Hive
9650 "JSON_EXTRACT" if f.args.len() == 2
9651 && !matches!(source, DialectType::BigQuery)
9652 && matches!(target, DialectType::Spark | DialectType::Databricks | DialectType::Hive) => {
9653 Ok(Expression::Function(Box::new(Function::new("GET_JSON_OBJECT".to_string(), f.args))))
9654 }
9655 // JSON_EXTRACT(x, path) -> x -> path for SQLite (arrow syntax)
9656 "JSON_EXTRACT" if f.args.len() == 2
9657 && matches!(target, DialectType::SQLite) => {
9658 let mut args = f.args;
9659 let path = args.remove(1);
9660 let this = args.remove(0);
9661 Ok(Expression::JsonExtract(Box::new(crate::expressions::JsonExtractFunc {
9662 this,
9663 path,
9664 returning: None,
9665 arrow_syntax: true,
9666 hash_arrow_syntax: false,
9667 wrapper_option: None,
9668 quotes_option: None,
9669 on_scalar_string: false,
9670 on_error: None,
9671 })))
9672 }
9673 // JSON_FORMAT(x) -> TO_JSON(x) for Spark, TO_JSON_STRING for BigQuery, CAST(TO_JSON(x) AS TEXT) for DuckDB
9674 "JSON_FORMAT" if f.args.len() == 1 => {
9675 match target {
9676 DialectType::Spark | DialectType::Databricks => {
9677 // Presto JSON_FORMAT(JSON '...') needs Spark's string-unquoting flow:
9678 // REGEXP_EXTRACT(TO_JSON(FROM_JSON('[...]', SCHEMA_OF_JSON('[...]'))), '^.(.*).$', 1)
9679 if matches!(source, DialectType::Presto | DialectType::Trino | DialectType::Athena) {
9680 if let Some(Expression::ParseJson(pj)) = f.args.first() {
9681 if let Expression::Literal(Literal::String(s)) = &pj.this {
9682 let wrapped = Expression::Literal(Literal::String(format!("[{}]", s)));
9683 let schema_of_json = Expression::Function(Box::new(Function::new(
9684 "SCHEMA_OF_JSON".to_string(),
9685 vec![wrapped.clone()],
9686 )));
9687 let from_json = Expression::Function(Box::new(Function::new(
9688 "FROM_JSON".to_string(),
9689 vec![wrapped, schema_of_json],
9690 )));
9691 let to_json = Expression::Function(Box::new(Function::new(
9692 "TO_JSON".to_string(),
9693 vec![from_json],
9694 )));
9695 return Ok(Expression::Function(Box::new(Function::new(
9696 "REGEXP_EXTRACT".to_string(),
9697 vec![
9698 to_json,
9699 Expression::Literal(Literal::String("^.(.*).$".to_string())),
9700 Expression::Literal(Literal::Number("1".to_string())),
9701 ],
9702 ))));
9703 }
9704 }
9705 }
9706
9707 // Strip inner CAST(... AS JSON) or TO_JSON() if present
9708 // The CastToJsonForSpark may have already converted CAST(x AS JSON) to TO_JSON(x)
9709 let mut args = f.args;
9710 if let Some(Expression::Cast(ref c)) = args.first() {
9711 if matches!(&c.to, DataType::Json | DataType::JsonB) {
9712 args = vec![c.this.clone()];
9713 }
9714 } else if let Some(Expression::Function(ref inner_f)) = args.first() {
9715 if inner_f.name.eq_ignore_ascii_case("TO_JSON") && inner_f.args.len() == 1 {
9716 // Already TO_JSON(x) from CastToJsonForSpark, just use the inner arg
9717 args = inner_f.args.clone();
9718 }
9719 }
9720 Ok(Expression::Function(Box::new(Function::new("TO_JSON".to_string(), args))))
9721 }
9722 DialectType::BigQuery => {
9723 Ok(Expression::Function(Box::new(Function::new("TO_JSON_STRING".to_string(), f.args))))
9724 }
9725 DialectType::DuckDB => {
9726 // CAST(TO_JSON(x) AS TEXT)
9727 let to_json = Expression::Function(Box::new(Function::new("TO_JSON".to_string(), f.args)));
9728 Ok(Expression::Cast(Box::new(Cast {
9729 this: to_json,
9730 to: DataType::Text,
9731 trailing_comments: Vec::new(),
9732 double_colon_syntax: false,
9733 format: None,
9734 default: None,
9735 })))
9736 }
9737 _ => Ok(Expression::Function(f)),
9738 }
9739 }
9740 // SYSDATE -> CURRENT_TIMESTAMP for non-Oracle/Redshift/Snowflake targets
9741 "SYSDATE" if f.args.is_empty() => {
9742 match target {
9743 DialectType::Oracle | DialectType::Redshift => Ok(Expression::Function(f)),
9744 DialectType::Snowflake => {
9745 // Snowflake uses SYSDATE() with parens
9746 let mut f = *f;
9747 f.no_parens = false;
9748 Ok(Expression::Function(Box::new(f)))
9749 }
9750 DialectType::DuckDB => {
9751 // DuckDB: SYSDATE() -> CURRENT_TIMESTAMP AT TIME ZONE 'UTC'
9752 Ok(Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
9753 this: Expression::CurrentTimestamp(crate::expressions::CurrentTimestamp {
9754 precision: None,
9755 sysdate: false,
9756 }),
9757 zone: Expression::Literal(Literal::String("UTC".to_string())),
9758 })))
9759 }
9760 _ => Ok(Expression::CurrentTimestamp(crate::expressions::CurrentTimestamp {
9761 precision: None,
9762 sysdate: true,
9763 })),
9764 }
9765 }
9766 // LOGICAL_OR(x) -> BOOL_OR(x)
9767 "LOGICAL_OR" if f.args.len() == 1 => {
9768 let name = match target {
9769 DialectType::Spark | DialectType::Databricks => "BOOL_OR",
9770 _ => &f.name,
9771 };
9772 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
9773 }
9774 // LOGICAL_AND(x) -> BOOL_AND(x)
9775 "LOGICAL_AND" if f.args.len() == 1 => {
9776 let name = match target {
9777 DialectType::Spark | DialectType::Databricks => "BOOL_AND",
9778 _ => &f.name,
9779 };
9780 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
9781 }
9782 // MONTHS_ADD(d, n) -> ADD_MONTHS(d, n) for Oracle
9783 "MONTHS_ADD" if f.args.len() == 2 => {
9784 match target {
9785 DialectType::Oracle => {
9786 Ok(Expression::Function(Box::new(Function::new("ADD_MONTHS".to_string(), f.args))))
9787 }
9788 _ => Ok(Expression::Function(f)),
9789 }
9790 }
9791 // ARRAY_JOIN(arr, sep[, null_replacement]) -> target-specific
9792 "ARRAY_JOIN" if f.args.len() >= 2 => {
9793 match target {
9794 DialectType::Spark | DialectType::Databricks => {
9795 // Keep as ARRAY_JOIN for Spark (it supports null_replacement)
9796 Ok(Expression::Function(f))
9797 }
9798 DialectType::Hive => {
9799 // ARRAY_JOIN(arr, sep[, null_rep]) -> CONCAT_WS(sep, arr) (drop null_replacement)
9800 let mut args = f.args;
9801 let arr = args.remove(0);
9802 let sep = args.remove(0);
9803 // Drop any remaining args (null_replacement)
9804 Ok(Expression::Function(Box::new(Function::new("CONCAT_WS".to_string(), vec![sep, arr]))))
9805 }
9806 DialectType::Presto | DialectType::Trino => {
9807 Ok(Expression::Function(f))
9808 }
9809 _ => Ok(Expression::Function(f)),
9810 }
9811 }
9812 // LOCATE(substr, str, pos) 3-arg -> target-specific
9813 // For Presto/DuckDB: STRPOS doesn't support 3-arg, need complex expansion
9814 "LOCATE" if f.args.len() == 3 && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena | DialectType::DuckDB) => {
9815 let mut args = f.args;
9816 let substr = args.remove(0);
9817 let string = args.remove(0);
9818 let pos = args.remove(0);
9819 // STRPOS(SUBSTRING(string, pos), substr)
9820 let substring_call = Expression::Function(Box::new(Function::new(
9821 "SUBSTRING".to_string(), vec![string.clone(), pos.clone()],
9822 )));
9823 let strpos_call = Expression::Function(Box::new(Function::new(
9824 "STRPOS".to_string(), vec![substring_call, substr.clone()],
9825 )));
9826 // STRPOS(...) + pos - 1
9827 let pos_adjusted = Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
9828 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
9829 strpos_call.clone(),
9830 pos.clone(),
9831 ))),
9832 Expression::number(1),
9833 )));
9834 // STRPOS(...) = 0
9835 let is_zero = Expression::Eq(Box::new(crate::expressions::BinaryOp::new(
9836 strpos_call.clone(),
9837 Expression::number(0),
9838 )));
9839
9840 match target {
9841 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
9842 // IF(STRPOS(...) = 0, 0, STRPOS(...) + pos - 1)
9843 Ok(Expression::Function(Box::new(Function::new(
9844 "IF".to_string(),
9845 vec![is_zero, Expression::number(0), pos_adjusted],
9846 ))))
9847 }
9848 DialectType::DuckDB => {
9849 // CASE WHEN STRPOS(...) = 0 THEN 0 ELSE STRPOS(...) + pos - 1 END
9850 Ok(Expression::Case(Box::new(crate::expressions::Case {
9851 operand: None,
9852 whens: vec![
9853 (is_zero, Expression::number(0)),
9854 ],
9855 else_: Some(pos_adjusted),
9856 })))
9857 }
9858 _ => Ok(Expression::Function(Box::new(Function::new(
9859 "LOCATE".to_string(), vec![substr, string, pos],
9860 )))),
9861 }
9862 }
9863 // STRPOS(haystack, needle, occurrence) 3-arg -> INSTR(haystack, needle, 1, occurrence)
9864 "STRPOS" if f.args.len() == 3
9865 && matches!(target, DialectType::BigQuery | DialectType::Oracle | DialectType::Teradata) => {
9866 let mut args = f.args;
9867 let haystack = args.remove(0);
9868 let needle = args.remove(0);
9869 let occurrence = args.remove(0);
9870 Ok(Expression::Function(Box::new(Function::new(
9871 "INSTR".to_string(),
9872 vec![haystack, needle, Expression::number(1), occurrence],
9873 ))))
9874 }
9875 // SCHEMA_NAME(id) -> target-specific
9876 "SCHEMA_NAME" if f.args.len() <= 1 => {
9877 match target {
9878 DialectType::MySQL | DialectType::SingleStore => {
9879 Ok(Expression::Function(Box::new(Function::new("SCHEMA".to_string(), vec![]))))
9880 }
9881 DialectType::PostgreSQL => {
9882 Ok(Expression::CurrentSchema(Box::new(crate::expressions::CurrentSchema { this: None })))
9883 }
9884 DialectType::SQLite => {
9885 Ok(Expression::string("main"))
9886 }
9887 _ => Ok(Expression::Function(f)),
9888 }
9889 }
9890 // STRTOL(str, base) -> FROM_BASE(str, base) for Trino/Presto
9891 "STRTOL" if f.args.len() == 2 => {
9892 match target {
9893 DialectType::Presto | DialectType::Trino => {
9894 Ok(Expression::Function(Box::new(Function::new("FROM_BASE".to_string(), f.args))))
9895 }
9896 _ => Ok(Expression::Function(f)),
9897 }
9898 }
9899 // EDITDIST3(a, b) -> LEVENSHTEIN(a, b) for Spark
9900 "EDITDIST3" if f.args.len() == 2 => {
9901 match target {
9902 DialectType::Spark | DialectType::Databricks => {
9903 Ok(Expression::Function(Box::new(Function::new("LEVENSHTEIN".to_string(), f.args))))
9904 }
9905 _ => Ok(Expression::Function(f)),
9906 }
9907 }
9908 // FORMAT(num, decimals) from MySQL -> DuckDB FORMAT('{:,.Xf}', num)
9909 "FORMAT" if f.args.len() == 2
9910 && matches!(source, DialectType::MySQL | DialectType::SingleStore)
9911 && matches!(target, DialectType::DuckDB) => {
9912 let mut args = f.args;
9913 let num_expr = args.remove(0);
9914 let decimals_expr = args.remove(0);
9915 // Extract decimal count
9916 let dec_count = match &decimals_expr {
9917 Expression::Literal(Literal::Number(n)) => n.clone(),
9918 _ => "0".to_string(),
9919 };
9920 let fmt_str = format!("{{:,.{}f}}", dec_count);
9921 Ok(Expression::Function(Box::new(Function::new(
9922 "FORMAT".to_string(),
9923 vec![Expression::string(&fmt_str), num_expr],
9924 ))))
9925 }
9926 // FORMAT(x, fmt) from TSQL -> DATE_FORMAT for Spark, or expand short codes
9927 "FORMAT" if f.args.len() == 2 && matches!(source, DialectType::TSQL | DialectType::Fabric) => {
9928 let val_expr = f.args[0].clone();
9929 let fmt_expr = f.args[1].clone();
9930 // Expand unambiguous .NET single-char date format shortcodes to full patterns.
9931 // Only expand shortcodes that are NOT also valid numeric format specifiers.
9932 // Ambiguous: d, D, f, F, g, G (used for both dates and numbers)
9933 // Unambiguous date: m/M (Month day), t/T (Time), y/Y (Year month)
9934 let (expanded_fmt, is_shortcode) = match &fmt_expr {
9935 Expression::Literal(crate::expressions::Literal::String(s)) => {
9936 match s.as_str() {
9937 "m" | "M" => (Expression::string("MMMM d"), true),
9938 "t" => (Expression::string("h:mm tt"), true),
9939 "T" => (Expression::string("h:mm:ss tt"), true),
9940 "y" | "Y" => (Expression::string("MMMM yyyy"), true),
9941 _ => (fmt_expr.clone(), false),
9942 }
9943 }
9944 _ => (fmt_expr.clone(), false),
9945 };
9946 // Check if the format looks like a date format
9947 let is_date_format = is_shortcode || match &expanded_fmt {
9948 Expression::Literal(crate::expressions::Literal::String(s)) => {
9949 // Date formats typically contain yyyy, MM, dd, MMMM, HH, etc.
9950 s.contains("yyyy") || s.contains("YYYY") || s.contains("MM")
9951 || s.contains("dd") || s.contains("MMMM") || s.contains("HH")
9952 || s.contains("hh") || s.contains("ss")
9953 }
9954 _ => false,
9955 };
9956 match target {
9957 DialectType::Spark | DialectType::Databricks => {
9958 let func_name = if is_date_format {
9959 "DATE_FORMAT"
9960 } else {
9961 "FORMAT_NUMBER"
9962 };
9963 Ok(Expression::Function(Box::new(Function::new(
9964 func_name.to_string(), vec![val_expr, expanded_fmt],
9965 ))))
9966 }
9967 _ => {
9968 // For TSQL and other targets, expand shortcodes but keep FORMAT
9969 if is_shortcode {
9970 Ok(Expression::Function(Box::new(Function::new(
9971 "FORMAT".to_string(), vec![val_expr, expanded_fmt],
9972 ))))
9973 } else {
9974 Ok(Expression::Function(f))
9975 }
9976 }
9977 }
9978 }
9979 // FORMAT('%s', x) from Trino/Presto -> target-specific
9980 "FORMAT" if f.args.len() >= 2
9981 && matches!(source, DialectType::Trino | DialectType::Presto | DialectType::Athena) => {
9982 let fmt_expr = f.args[0].clone();
9983 let value_args: Vec<Expression> = f.args[1..].to_vec();
9984 match target {
9985 // DuckDB: replace %s with {} in format string
9986 DialectType::DuckDB => {
9987 let new_fmt = match &fmt_expr {
9988 Expression::Literal(Literal::String(s)) => {
9989 Expression::Literal(Literal::String(s.replace("%s", "{}")))
9990 }
9991 _ => fmt_expr,
9992 };
9993 let mut args = vec![new_fmt];
9994 args.extend(value_args);
9995 Ok(Expression::Function(Box::new(Function::new(
9996 "FORMAT".to_string(), args,
9997 ))))
9998 }
9999 // Snowflake: FORMAT('%s', x) -> TO_CHAR(x) when just %s
10000 DialectType::Snowflake => {
10001 match &fmt_expr {
10002 Expression::Literal(Literal::String(s)) if s == "%s" && value_args.len() == 1 => {
10003 Ok(Expression::Function(Box::new(Function::new(
10004 "TO_CHAR".to_string(), value_args,
10005 ))))
10006 }
10007 _ => Ok(Expression::Function(f)),
10008 }
10009 }
10010 // Default: keep FORMAT as-is
10011 _ => Ok(Expression::Function(f)),
10012 }
10013 }
10014 // LIST_CONTAINS / LIST_HAS / ARRAY_CONTAINS -> target-specific
10015 "LIST_CONTAINS" | "LIST_HAS" | "ARRAY_CONTAINS" if f.args.len() == 2 => {
10016 match target {
10017 DialectType::PostgreSQL | DialectType::Redshift => {
10018 // CASE WHEN needle IS NULL THEN NULL ELSE COALESCE(needle = ANY(arr), FALSE) END
10019 let arr = f.args[0].clone();
10020 let needle = f.args[1].clone();
10021 // Convert [] to ARRAY[] for PostgreSQL
10022 let pg_arr = match arr {
10023 Expression::Array(a) => Expression::ArrayFunc(Box::new(crate::expressions::ArrayConstructor {
10024 expressions: a.expressions,
10025 bracket_notation: false,
10026 use_list_keyword: false,
10027 })),
10028 _ => arr,
10029 };
10030 // needle = ANY(arr) using the Any quantified expression
10031 let any_expr = Expression::Any(Box::new(crate::expressions::QuantifiedExpr {
10032 this: needle.clone(),
10033 subquery: pg_arr,
10034 op: Some(crate::expressions::QuantifiedOp::Eq),
10035 }));
10036 let coalesce = Expression::Coalesce(Box::new(crate::expressions::VarArgFunc {
10037 expressions: vec![any_expr, Expression::Boolean(crate::expressions::BooleanLiteral { value: false })],
10038 original_name: None,
10039 }));
10040 let is_null_check = Expression::IsNull(Box::new(crate::expressions::IsNull {
10041 this: needle,
10042 not: false,
10043 postfix_form: false,
10044 }));
10045 Ok(Expression::Case(Box::new(Case {
10046 operand: None,
10047 whens: vec![(is_null_check, Expression::Null(crate::expressions::Null))],
10048 else_: Some(coalesce),
10049 })))
10050 }
10051 _ => {
10052 Ok(Expression::Function(Box::new(Function::new("ARRAY_CONTAINS".to_string(), f.args))))
10053 }
10054 }
10055 }
10056 // LIST_HAS_ANY / ARRAY_HAS_ANY -> target-specific overlap operator
10057 "LIST_HAS_ANY" | "ARRAY_HAS_ANY" if f.args.len() == 2 => {
10058 match target {
10059 DialectType::PostgreSQL | DialectType::Redshift => {
10060 // arr1 && arr2 with ARRAY[] syntax
10061 let mut args = f.args;
10062 let arr1 = args.remove(0);
10063 let arr2 = args.remove(0);
10064 let pg_arr1 = match arr1 {
10065 Expression::Array(a) => Expression::ArrayFunc(Box::new(crate::expressions::ArrayConstructor {
10066 expressions: a.expressions,
10067 bracket_notation: false,
10068 use_list_keyword: false,
10069 })),
10070 _ => arr1,
10071 };
10072 let pg_arr2 = match arr2 {
10073 Expression::Array(a) => Expression::ArrayFunc(Box::new(crate::expressions::ArrayConstructor {
10074 expressions: a.expressions,
10075 bracket_notation: false,
10076 use_list_keyword: false,
10077 })),
10078 _ => arr2,
10079 };
10080 Ok(Expression::ArrayOverlaps(Box::new(BinaryOp::new(pg_arr1, pg_arr2))))
10081 }
10082 DialectType::DuckDB => {
10083 // DuckDB: arr1 && arr2 (native support)
10084 let mut args = f.args;
10085 let arr1 = args.remove(0);
10086 let arr2 = args.remove(0);
10087 Ok(Expression::ArrayOverlaps(Box::new(BinaryOp::new(arr1, arr2))))
10088 }
10089 _ => Ok(Expression::Function(Box::new(Function::new("LIST_HAS_ANY".to_string(), f.args)))),
10090 }
10091 }
10092 // APPROX_QUANTILE(x, q) -> target-specific
10093 "APPROX_QUANTILE" if f.args.len() == 2 => {
10094 match target {
10095 DialectType::Snowflake => {
10096 Ok(Expression::Function(Box::new(Function::new("APPROX_PERCENTILE".to_string(), f.args))))
10097 }
10098 DialectType::DuckDB => {
10099 Ok(Expression::Function(f))
10100 }
10101 _ => Ok(Expression::Function(f)),
10102 }
10103 }
10104 // MAKE_DATE(y, m, d) -> DATE(y, m, d) for BigQuery
10105 "MAKE_DATE" if f.args.len() == 3 => {
10106 match target {
10107 DialectType::BigQuery => {
10108 Ok(Expression::Function(Box::new(Function::new("DATE".to_string(), f.args))))
10109 }
10110 _ => Ok(Expression::Function(f)),
10111 }
10112 }
10113 // RANGE(start, end[, step]) -> target-specific
10114 "RANGE" if f.args.len() >= 2 && !matches!(target, DialectType::DuckDB) => {
10115 let start = f.args[0].clone();
10116 let end = f.args[1].clone();
10117 let step = f.args.get(2).cloned();
10118 match target {
10119 DialectType::Spark | DialectType::Databricks => {
10120 // RANGE(start, end) -> SEQUENCE(start, end-1)
10121 // RANGE(start, end, step) -> SEQUENCE(start, end-step, step) when step constant
10122 // RANGE(start, start) -> ARRAY() (empty)
10123 // RANGE(start, end, 0) -> ARRAY() (empty)
10124 // When end is variable: IF((end - 1) <= start, ARRAY(), SEQUENCE(start, (end - 1)))
10125
10126 // Check for constant args
10127 fn extract_i64(e: &Expression) -> Option<i64> {
10128 match e {
10129 Expression::Literal(Literal::Number(n)) => n.parse::<i64>().ok(),
10130 Expression::Neg(u) => {
10131 if let Expression::Literal(Literal::Number(n)) = &u.this {
10132 n.parse::<i64>().ok().map(|v| -v)
10133 } else { None }
10134 }
10135 _ => None,
10136 }
10137 }
10138 let start_val = extract_i64(&start);
10139 let end_val = extract_i64(&end);
10140 let step_val = step.as_ref().and_then(|s| extract_i64(s));
10141
10142 // Check for RANGE(x, x) or RANGE(x, y, 0) -> empty array
10143 if step_val == Some(0) {
10144 return Ok(Expression::Function(Box::new(Function::new("ARRAY".to_string(), vec![]))));
10145 }
10146 if let (Some(s), Some(e_val)) = (start_val, end_val) {
10147 if s == e_val {
10148 return Ok(Expression::Function(Box::new(Function::new("ARRAY".to_string(), vec![]))));
10149 }
10150 }
10151
10152 if let (Some(_s_val), Some(e_val)) = (start_val, end_val) {
10153 // All constants - compute new end = end - step (if step provided) or end - 1
10154 match step_val {
10155 Some(st) if st < 0 => {
10156 // Negative step: SEQUENCE(start, end - step, step)
10157 let new_end = e_val - st; // end - step (= end + |step|)
10158 let mut args = vec![start, Expression::number(new_end)];
10159 if let Some(s) = step { args.push(s); }
10160 Ok(Expression::Function(Box::new(Function::new("SEQUENCE".to_string(), args))))
10161 }
10162 Some(st) => {
10163 let new_end = e_val - st;
10164 let mut args = vec![start, Expression::number(new_end)];
10165 if let Some(s) = step { args.push(s); }
10166 Ok(Expression::Function(Box::new(Function::new("SEQUENCE".to_string(), args))))
10167 }
10168 None => {
10169 // No step: SEQUENCE(start, end - 1)
10170 let new_end = e_val - 1;
10171 Ok(Expression::Function(Box::new(Function::new("SEQUENCE".to_string(), vec![start, Expression::number(new_end)]))))
10172 }
10173 }
10174 } else {
10175 // Variable end: IF((end - 1) <= start, ARRAY(), SEQUENCE(start, (end - 1)))
10176 let end_m1 = Expression::Sub(Box::new(BinaryOp::new(end.clone(), Expression::number(1))));
10177 let cond = Expression::Lte(Box::new(BinaryOp::new(
10178 Expression::Paren(Box::new(Paren { this: end_m1.clone(), trailing_comments: Vec::new() })),
10179 start.clone(),
10180 )));
10181 let empty = Expression::Function(Box::new(Function::new("ARRAY".to_string(), vec![])));
10182 let mut seq_args = vec![start, Expression::Paren(Box::new(Paren { this: end_m1, trailing_comments: Vec::new() }))];
10183 if let Some(s) = step { seq_args.push(s); }
10184 let seq = Expression::Function(Box::new(Function::new("SEQUENCE".to_string(), seq_args)));
10185 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
10186 condition: cond,
10187 true_value: empty,
10188 false_value: Some(seq),
10189 original_name: None,
10190 })))
10191 }
10192 }
10193 DialectType::SQLite => {
10194 // RANGE(start, end) -> GENERATE_SERIES(start, end)
10195 // The subquery wrapping is handled at the Alias level
10196 let mut args = vec![start, end];
10197 if let Some(s) = step { args.push(s); }
10198 Ok(Expression::Function(Box::new(Function::new("GENERATE_SERIES".to_string(), args))))
10199 }
10200 _ => Ok(Expression::Function(f)),
10201 }
10202 }
10203 // ARRAY_REVERSE_SORT -> target-specific
10204 // (handled above as well, but also need DuckDB self-normalization)
10205 // MAP_FROM_ARRAYS(keys, values) -> target-specific map construction
10206 "MAP_FROM_ARRAYS" if f.args.len() == 2 => {
10207 match target {
10208 DialectType::Snowflake => {
10209 Ok(Expression::Function(Box::new(Function::new("OBJECT_CONSTRUCT".to_string(), f.args))))
10210 }
10211 DialectType::Spark | DialectType::Databricks => {
10212 Ok(Expression::Function(Box::new(Function::new("MAP_FROM_ARRAYS".to_string(), f.args))))
10213 }
10214 _ => {
10215 Ok(Expression::Function(Box::new(Function::new("MAP".to_string(), f.args))))
10216 }
10217 }
10218 }
10219 // VARIANCE(x) -> varSamp(x) for ClickHouse
10220 "VARIANCE" if f.args.len() == 1 => {
10221 match target {
10222 DialectType::ClickHouse => {
10223 Ok(Expression::Function(Box::new(Function::new("varSamp".to_string(), f.args))))
10224 }
10225 _ => Ok(Expression::Function(f)),
10226 }
10227 }
10228 // STDDEV(x) -> stddevSamp(x) for ClickHouse
10229 "STDDEV" if f.args.len() == 1 => {
10230 match target {
10231 DialectType::ClickHouse => {
10232 Ok(Expression::Function(Box::new(Function::new("stddevSamp".to_string(), f.args))))
10233 }
10234 _ => Ok(Expression::Function(f)),
10235 }
10236 }
10237 // ISINF(x) -> IS_INF(x) for BigQuery
10238 "ISINF" if f.args.len() == 1 => {
10239 match target {
10240 DialectType::BigQuery => {
10241 Ok(Expression::Function(Box::new(Function::new("IS_INF".to_string(), f.args))))
10242 }
10243 _ => Ok(Expression::Function(f)),
10244 }
10245 }
10246 // CONTAINS(arr, x) -> ARRAY_CONTAINS(arr, x) for Spark/Hive
10247 "CONTAINS" if f.args.len() == 2 => {
10248 match target {
10249 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
10250 Ok(Expression::Function(Box::new(Function::new("ARRAY_CONTAINS".to_string(), f.args))))
10251 }
10252 _ => Ok(Expression::Function(f)),
10253 }
10254 }
10255 // ARRAY_CONTAINS(arr, x) -> CONTAINS(arr, x) for Presto
10256 "ARRAY_CONTAINS" if f.args.len() == 2 => {
10257 match target {
10258 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
10259 Ok(Expression::Function(Box::new(Function::new("CONTAINS".to_string(), f.args))))
10260 }
10261 DialectType::DuckDB => {
10262 Ok(Expression::Function(Box::new(Function::new("ARRAY_CONTAINS".to_string(), f.args))))
10263 }
10264 _ => Ok(Expression::Function(f)),
10265 }
10266 }
10267 // TO_UNIXTIME(x) -> UNIX_TIMESTAMP(x) for Hive/Spark
10268 "TO_UNIXTIME" if f.args.len() == 1 => {
10269 match target {
10270 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
10271 Ok(Expression::Function(Box::new(Function::new("UNIX_TIMESTAMP".to_string(), f.args))))
10272 }
10273 _ => Ok(Expression::Function(f)),
10274 }
10275 }
10276 // FROM_UNIXTIME(x) -> target-specific
10277 "FROM_UNIXTIME" if f.args.len() == 1 => {
10278 match target {
10279 DialectType::Hive | DialectType::Spark | DialectType::Databricks
10280 | DialectType::Presto | DialectType::Trino => {
10281 Ok(Expression::Function(f))
10282 }
10283 DialectType::DuckDB => {
10284 // DuckDB: TO_TIMESTAMP(x)
10285 let arg = f.args.into_iter().next().unwrap();
10286 Ok(Expression::Function(Box::new(Function::new("TO_TIMESTAMP".to_string(), vec![arg]))))
10287 }
10288 DialectType::PostgreSQL => {
10289 // PG: TO_TIMESTAMP(col)
10290 let arg = f.args.into_iter().next().unwrap();
10291 Ok(Expression::Function(Box::new(Function::new("TO_TIMESTAMP".to_string(), vec![arg]))))
10292 }
10293 DialectType::Redshift => {
10294 // Redshift: (TIMESTAMP 'epoch' + col * INTERVAL '1 SECOND')
10295 let arg = f.args.into_iter().next().unwrap();
10296 let epoch_ts = Expression::Literal(Literal::Timestamp("epoch".to_string()));
10297 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
10298 this: Some(Expression::string("1 SECOND")),
10299 unit: None,
10300 }));
10301 let mul = Expression::Mul(Box::new(BinaryOp::new(arg, interval)));
10302 let add = Expression::Add(Box::new(BinaryOp::new(epoch_ts, mul)));
10303 Ok(Expression::Paren(Box::new(crate::expressions::Paren { this: add, trailing_comments: Vec::new() })))
10304 }
10305 _ => Ok(Expression::Function(f)),
10306 }
10307 }
10308 // FROM_UNIXTIME(x, fmt) with 2 args from Hive/Spark -> target-specific
10309 "FROM_UNIXTIME" if f.args.len() == 2
10310 && matches!(source, DialectType::Hive | DialectType::Spark | DialectType::Databricks) => {
10311 let mut args = f.args;
10312 let unix_ts = args.remove(0);
10313 let fmt_expr = args.remove(0);
10314 match target {
10315 DialectType::DuckDB => {
10316 // DuckDB: STRFTIME(TO_TIMESTAMP(x), c_fmt)
10317 let to_ts = Expression::Function(Box::new(Function::new("TO_TIMESTAMP".to_string(), vec![unix_ts])));
10318 if let Expression::Literal(crate::expressions::Literal::String(s)) = &fmt_expr {
10319 let c_fmt = Self::hive_format_to_c_format(s);
10320 Ok(Expression::Function(Box::new(Function::new(
10321 "STRFTIME".to_string(), vec![to_ts, Expression::string(&c_fmt)],
10322 ))))
10323 } else {
10324 Ok(Expression::Function(Box::new(Function::new(
10325 "STRFTIME".to_string(), vec![to_ts, fmt_expr],
10326 ))))
10327 }
10328 }
10329 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
10330 // Presto: DATE_FORMAT(FROM_UNIXTIME(x), presto_fmt)
10331 let from_unix = Expression::Function(Box::new(Function::new("FROM_UNIXTIME".to_string(), vec![unix_ts])));
10332 if let Expression::Literal(crate::expressions::Literal::String(s)) = &fmt_expr {
10333 let p_fmt = Self::hive_format_to_presto_format(s);
10334 Ok(Expression::Function(Box::new(Function::new(
10335 "DATE_FORMAT".to_string(), vec![from_unix, Expression::string(&p_fmt)],
10336 ))))
10337 } else {
10338 Ok(Expression::Function(Box::new(Function::new(
10339 "DATE_FORMAT".to_string(), vec![from_unix, fmt_expr],
10340 ))))
10341 }
10342 }
10343 _ => {
10344 // Keep as FROM_UNIXTIME(x, fmt) for other targets
10345 Ok(Expression::Function(Box::new(Function::new(
10346 "FROM_UNIXTIME".to_string(), vec![unix_ts, fmt_expr],
10347 ))))
10348 }
10349 }
10350 }
10351 // DATEPART(unit, expr) -> EXTRACT(unit FROM expr) for Spark
10352 "DATEPART" | "DATE_PART" if f.args.len() == 2 => {
10353 let unit_str = Self::get_unit_str_static(&f.args[0]);
10354 // Get the raw unit text preserving original case
10355 let raw_unit = match &f.args[0] {
10356 Expression::Identifier(id) => id.name.clone(),
10357 Expression::Literal(crate::expressions::Literal::String(s)) => s.clone(),
10358 Expression::Column(col) => col.name.name.clone(),
10359 _ => unit_str.clone(),
10360 };
10361 match target {
10362 DialectType::TSQL | DialectType::Fabric => {
10363 // Preserve original case of unit for TSQL
10364 let unit_name = match unit_str.as_str() {
10365 "YY" | "YYYY" => "YEAR".to_string(),
10366 "QQ" | "Q" => "QUARTER".to_string(),
10367 "MM" | "M" => "MONTH".to_string(),
10368 "WK" | "WW" => "WEEK".to_string(),
10369 "DD" | "D" | "DY" => "DAY".to_string(),
10370 "HH" => "HOUR".to_string(),
10371 "MI" | "N" => "MINUTE".to_string(),
10372 "SS" | "S" => "SECOND".to_string(),
10373 _ => raw_unit.clone(), // preserve original case
10374 };
10375 let mut args = f.args;
10376 args[0] = Expression::Identifier(Identifier::new(&unit_name));
10377 Ok(Expression::Function(Box::new(Function::new("DATEPART".to_string(), args))))
10378 }
10379 DialectType::Spark | DialectType::Databricks => {
10380 // DATEPART(unit, expr) -> EXTRACT(unit FROM expr)
10381 // Preserve original case for non-abbreviation units
10382 let unit = match unit_str.as_str() {
10383 "YY" | "YYYY" => "YEAR".to_string(),
10384 "QQ" | "Q" => "QUARTER".to_string(),
10385 "MM" | "M" => "MONTH".to_string(),
10386 "WK" | "WW" => "WEEK".to_string(),
10387 "DD" | "D" | "DY" => "DAY".to_string(),
10388 "HH" => "HOUR".to_string(),
10389 "MI" | "N" => "MINUTE".to_string(),
10390 "SS" | "S" => "SECOND".to_string(),
10391 _ => raw_unit, // preserve original case
10392 };
10393 Ok(Expression::Extract(Box::new(crate::expressions::ExtractFunc {
10394 this: f.args[1].clone(),
10395 field: crate::expressions::DateTimeField::Custom(unit),
10396 })))
10397 }
10398 _ => {
10399 Ok(Expression::Function(Box::new(Function::new("DATE_PART".to_string(), f.args))))
10400 }
10401 }
10402 }
10403 // DATENAME(mm, date) -> FORMAT(CAST(date AS DATETIME2), 'MMMM') for TSQL
10404 // DATENAME(dw, date) -> FORMAT(CAST(date AS DATETIME2), 'dddd') for TSQL
10405 // DATENAME(mm, date) -> DATE_FORMAT(CAST(date AS TIMESTAMP), 'MMMM') for Spark
10406 // DATENAME(dw, date) -> DATE_FORMAT(CAST(date AS TIMESTAMP), 'EEEE') for Spark
10407 "DATENAME" if f.args.len() == 2 => {
10408 let unit_str = Self::get_unit_str_static(&f.args[0]);
10409 let date_expr = f.args[1].clone();
10410 match unit_str.as_str() {
10411 "MM" | "M" | "MONTH" => {
10412 match target {
10413 DialectType::TSQL => {
10414 let cast_date = Expression::Cast(Box::new(crate::expressions::Cast {
10415 this: date_expr,
10416 to: DataType::Custom { name: "DATETIME2".to_string() },
10417 trailing_comments: Vec::new(),
10418 double_colon_syntax: false,
10419 format: None,
10420 default: None,
10421 }));
10422 Ok(Expression::Function(Box::new(Function::new(
10423 "FORMAT".to_string(), vec![cast_date, Expression::string("MMMM")],
10424 ))))
10425 }
10426 DialectType::Spark | DialectType::Databricks => {
10427 let cast_date = Expression::Cast(Box::new(crate::expressions::Cast {
10428 this: date_expr,
10429 to: DataType::Timestamp { timezone: false, precision: None },
10430 trailing_comments: Vec::new(),
10431 double_colon_syntax: false,
10432 format: None,
10433 default: None,
10434 }));
10435 Ok(Expression::Function(Box::new(Function::new(
10436 "DATE_FORMAT".to_string(), vec![cast_date, Expression::string("MMMM")],
10437 ))))
10438 }
10439 _ => Ok(Expression::Function(f)),
10440 }
10441 }
10442 "DW" | "WEEKDAY" => {
10443 match target {
10444 DialectType::TSQL => {
10445 let cast_date = Expression::Cast(Box::new(crate::expressions::Cast {
10446 this: date_expr,
10447 to: DataType::Custom { name: "DATETIME2".to_string() },
10448 trailing_comments: Vec::new(),
10449 double_colon_syntax: false,
10450 format: None,
10451 default: None,
10452 }));
10453 Ok(Expression::Function(Box::new(Function::new(
10454 "FORMAT".to_string(), vec![cast_date, Expression::string("dddd")],
10455 ))))
10456 }
10457 DialectType::Spark | DialectType::Databricks => {
10458 let cast_date = Expression::Cast(Box::new(crate::expressions::Cast {
10459 this: date_expr,
10460 to: DataType::Timestamp { timezone: false, precision: None },
10461 trailing_comments: Vec::new(),
10462 double_colon_syntax: false,
10463 format: None,
10464 default: None,
10465 }));
10466 Ok(Expression::Function(Box::new(Function::new(
10467 "DATE_FORMAT".to_string(), vec![cast_date, Expression::string("EEEE")],
10468 ))))
10469 }
10470 _ => Ok(Expression::Function(f)),
10471 }
10472 }
10473 _ => Ok(Expression::Function(f)),
10474 }
10475 }
10476 // STRING_AGG(x, sep) without WITHIN GROUP -> target-specific
10477 "STRING_AGG" if f.args.len() >= 2 => {
10478 let x = f.args[0].clone();
10479 let sep = f.args[1].clone();
10480 match target {
10481 DialectType::MySQL | DialectType::SingleStore | DialectType::Doris | DialectType::StarRocks => {
10482 Ok(Expression::GroupConcat(Box::new(crate::expressions::GroupConcatFunc {
10483 this: x, separator: Some(sep), order_by: None, distinct: false, filter: None,
10484 })))
10485 }
10486 DialectType::SQLite => {
10487 Ok(Expression::GroupConcat(Box::new(crate::expressions::GroupConcatFunc {
10488 this: x, separator: Some(sep), order_by: None, distinct: false, filter: None,
10489 })))
10490 }
10491 DialectType::PostgreSQL | DialectType::Redshift => {
10492 Ok(Expression::StringAgg(Box::new(crate::expressions::StringAggFunc {
10493 this: x, separator: Some(sep), order_by: None, distinct: false, filter: None, limit: None,
10494 })))
10495 }
10496 _ => Ok(Expression::Function(f)),
10497 }
10498 }
10499 // JSON_ARRAYAGG -> JSON_AGG for PostgreSQL
10500 "JSON_ARRAYAGG" => {
10501 match target {
10502 DialectType::PostgreSQL => {
10503 Ok(Expression::Function(Box::new(Function { name: "JSON_AGG".to_string(), ..(*f) })))
10504 }
10505 _ => Ok(Expression::Function(f)),
10506 }
10507 }
10508 // SCHEMA_NAME(id) -> CURRENT_SCHEMA for PostgreSQL, 'main' for SQLite
10509 "SCHEMA_NAME" => {
10510 match target {
10511 DialectType::PostgreSQL => {
10512 Ok(Expression::CurrentSchema(Box::new(crate::expressions::CurrentSchema { this: None })))
10513 }
10514 DialectType::SQLite => {
10515 Ok(Expression::string("main"))
10516 }
10517 _ => Ok(Expression::Function(f)),
10518 }
10519 }
10520 // TO_TIMESTAMP(x, fmt) 2-arg from Spark/Hive: convert Java format to target format
10521 "TO_TIMESTAMP" if f.args.len() == 2
10522 && matches!(source, DialectType::Spark | DialectType::Databricks | DialectType::Hive)
10523 && matches!(target, DialectType::DuckDB) => {
10524 let mut args = f.args;
10525 let val = args.remove(0);
10526 let fmt_expr = args.remove(0);
10527 if let Expression::Literal(Literal::String(ref s)) = fmt_expr {
10528 // Convert Java/Spark format to C strptime format
10529 fn java_to_c_fmt(fmt: &str) -> String {
10530 let result = fmt
10531 .replace("yyyy", "%Y")
10532 .replace("SSSSSS", "%f")
10533 .replace("EEEE", "%W")
10534 .replace("MM", "%m")
10535 .replace("dd", "%d")
10536 .replace("HH", "%H")
10537 .replace("mm", "%M")
10538 .replace("ss", "%S")
10539 .replace("yy", "%y");
10540 let mut out = String::new();
10541 let chars: Vec<char> = result.chars().collect();
10542 let mut i = 0;
10543 while i < chars.len() {
10544 if chars[i] == '%' && i + 1 < chars.len() {
10545 out.push(chars[i]);
10546 out.push(chars[i + 1]);
10547 i += 2;
10548 } else if chars[i] == 'z' {
10549 out.push_str("%Z");
10550 i += 1;
10551 } else if chars[i] == 'Z' {
10552 out.push_str("%z");
10553 i += 1;
10554 } else {
10555 out.push(chars[i]);
10556 i += 1;
10557 }
10558 }
10559 out
10560 }
10561 let c_fmt = java_to_c_fmt(s);
10562 Ok(Expression::Function(Box::new(Function::new(
10563 "STRPTIME".to_string(),
10564 vec![val, Expression::string(&c_fmt)],
10565 ))))
10566 } else {
10567 Ok(Expression::Function(Box::new(Function::new(
10568 "STRPTIME".to_string(),
10569 vec![val, fmt_expr],
10570 ))))
10571 }
10572 }
10573 // TO_DATE(x) 1-arg from Doris: date conversion
10574 "TO_DATE" if f.args.len() == 1
10575 && matches!(source, DialectType::Doris | DialectType::StarRocks) => {
10576 let arg = f.args.into_iter().next().unwrap();
10577 match target {
10578 DialectType::Oracle | DialectType::DuckDB | DialectType::TSQL => {
10579 // CAST(x AS DATE)
10580 Ok(Expression::Cast(Box::new(Cast {
10581 this: arg,
10582 to: DataType::Date,
10583 double_colon_syntax: false,
10584 trailing_comments: vec![],
10585 format: None,
10586 default: None,
10587 })))
10588 }
10589 DialectType::MySQL | DialectType::SingleStore => {
10590 // DATE(x)
10591 Ok(Expression::Function(Box::new(Function::new("DATE".to_string(), vec![arg]))))
10592 }
10593 _ => {
10594 // Default: keep as TO_DATE(x) (Spark, PostgreSQL, etc.)
10595 Ok(Expression::Function(Box::new(Function::new("TO_DATE".to_string(), vec![arg]))))
10596 }
10597 }
10598 }
10599 // TO_DATE(x) 1-arg from Spark/Hive: safe date conversion
10600 "TO_DATE" if f.args.len() == 1
10601 && matches!(source, DialectType::Spark | DialectType::Databricks | DialectType::Hive) => {
10602 let arg = f.args.into_iter().next().unwrap();
10603 match target {
10604 DialectType::DuckDB => {
10605 // Spark TO_DATE is safe -> TRY_CAST(x AS DATE)
10606 Ok(Expression::TryCast(Box::new(Cast {
10607 this: arg,
10608 to: DataType::Date,
10609 double_colon_syntax: false,
10610 trailing_comments: vec![],
10611 format: None,
10612 default: None,
10613 })))
10614 }
10615 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
10616 // CAST(CAST(x AS TIMESTAMP) AS DATE)
10617 Ok(Self::double_cast_timestamp_date(arg))
10618 }
10619 DialectType::Snowflake => {
10620 // Spark's TO_DATE is safe -> TRY_TO_DATE(x, 'yyyy-mm-DD')
10621 // The default Spark format 'yyyy-MM-dd' maps to Snowflake 'yyyy-mm-DD'
10622 Ok(Expression::Function(Box::new(Function::new(
10623 "TRY_TO_DATE".to_string(),
10624 vec![arg, Expression::string("yyyy-mm-DD")],
10625 ))))
10626 }
10627 _ => {
10628 // Default: keep as TO_DATE(x)
10629 Ok(Expression::Function(Box::new(Function::new("TO_DATE".to_string(), vec![arg]))))
10630 }
10631 }
10632 }
10633 // TO_DATE(x, fmt) 2-arg from Spark/Hive: format-based date conversion
10634 "TO_DATE" if f.args.len() == 2
10635 && matches!(source, DialectType::Spark | DialectType::Databricks | DialectType::Hive) => {
10636 let mut args = f.args;
10637 let val = args.remove(0);
10638 let fmt_expr = args.remove(0);
10639 let is_default_format = matches!(&fmt_expr, Expression::Literal(Literal::String(s)) if s == "yyyy-MM-dd");
10640
10641 if is_default_format {
10642 // Default format: same as 1-arg form
10643 match target {
10644 DialectType::DuckDB => {
10645 Ok(Expression::TryCast(Box::new(Cast {
10646 this: val,
10647 to: DataType::Date,
10648 double_colon_syntax: false,
10649 trailing_comments: vec![],
10650 format: None,
10651 default: None,
10652 })))
10653 }
10654 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
10655 Ok(Self::double_cast_timestamp_date(val))
10656 }
10657 DialectType::Snowflake => {
10658 // TRY_TO_DATE(x, format) with Snowflake format mapping
10659 let sf_fmt = "yyyy-MM-dd".replace("yyyy", "yyyy").replace("MM", "mm").replace("dd", "DD");
10660 Ok(Expression::Function(Box::new(Function::new(
10661 "TRY_TO_DATE".to_string(),
10662 vec![val, Expression::string(&sf_fmt)],
10663 ))))
10664 }
10665 _ => {
10666 Ok(Expression::Function(Box::new(Function::new("TO_DATE".to_string(), vec![val]))))
10667 }
10668 }
10669 } else {
10670 // Non-default format: use format-based parsing
10671 if let Expression::Literal(Literal::String(ref s)) = fmt_expr {
10672 match target {
10673 DialectType::DuckDB => {
10674 // CAST(CAST(TRY_STRPTIME(x, c_fmt) AS TIMESTAMP) AS DATE)
10675 fn java_to_c_fmt_todate(fmt: &str) -> String {
10676 let result = fmt
10677 .replace("yyyy", "%Y")
10678 .replace("SSSSSS", "%f")
10679 .replace("EEEE", "%W")
10680 .replace("MM", "%m")
10681 .replace("dd", "%d")
10682 .replace("HH", "%H")
10683 .replace("mm", "%M")
10684 .replace("ss", "%S")
10685 .replace("yy", "%y");
10686 let mut out = String::new();
10687 let chars: Vec<char> = result.chars().collect();
10688 let mut i = 0;
10689 while i < chars.len() {
10690 if chars[i] == '%' && i + 1 < chars.len() {
10691 out.push(chars[i]);
10692 out.push(chars[i + 1]);
10693 i += 2;
10694 } else if chars[i] == 'z' {
10695 out.push_str("%Z");
10696 i += 1;
10697 } else if chars[i] == 'Z' {
10698 out.push_str("%z");
10699 i += 1;
10700 } else {
10701 out.push(chars[i]);
10702 i += 1;
10703 }
10704 }
10705 out
10706 }
10707 let c_fmt = java_to_c_fmt_todate(s);
10708 // CAST(CAST(TRY_STRPTIME(x, fmt) AS TIMESTAMP) AS DATE)
10709 let try_strptime = Expression::Function(Box::new(Function::new(
10710 "TRY_STRPTIME".to_string(),
10711 vec![val, Expression::string(&c_fmt)],
10712 )));
10713 let cast_ts = Expression::Cast(Box::new(Cast {
10714 this: try_strptime,
10715 to: DataType::Timestamp { precision: None, timezone: false },
10716 double_colon_syntax: false,
10717 trailing_comments: vec![],
10718 format: None,
10719 default: None,
10720 }));
10721 Ok(Expression::Cast(Box::new(Cast {
10722 this: cast_ts,
10723 to: DataType::Date,
10724 double_colon_syntax: false,
10725 trailing_comments: vec![],
10726 format: None,
10727 default: None,
10728 })))
10729 }
10730 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
10731 // CAST(DATE_PARSE(x, presto_fmt) AS DATE)
10732 let p_fmt = s
10733 .replace("yyyy", "%Y")
10734 .replace("SSSSSS", "%f")
10735 .replace("MM", "%m")
10736 .replace("dd", "%d")
10737 .replace("HH", "%H")
10738 .replace("mm", "%M")
10739 .replace("ss", "%S")
10740 .replace("yy", "%y");
10741 let date_parse = Expression::Function(Box::new(Function::new(
10742 "DATE_PARSE".to_string(),
10743 vec![val, Expression::string(&p_fmt)],
10744 )));
10745 Ok(Expression::Cast(Box::new(Cast {
10746 this: date_parse,
10747 to: DataType::Date,
10748 double_colon_syntax: false,
10749 trailing_comments: vec![],
10750 format: None,
10751 default: None,
10752 })))
10753 }
10754 DialectType::Snowflake => {
10755 // TRY_TO_DATE(x, snowflake_fmt)
10756 Ok(Expression::Function(Box::new(Function::new(
10757 "TRY_TO_DATE".to_string(),
10758 vec![val, Expression::string(s)],
10759 ))))
10760 }
10761 _ => {
10762 Ok(Expression::Function(Box::new(Function::new("TO_DATE".to_string(), vec![val, fmt_expr]))))
10763 }
10764 }
10765 } else {
10766 Ok(Expression::Function(Box::new(Function::new("TO_DATE".to_string(), vec![val, fmt_expr]))))
10767 }
10768 }
10769 }
10770 // TO_TIMESTAMP(x) 1-arg: epoch conversion
10771 "TO_TIMESTAMP" if f.args.len() == 1
10772 && matches!(source, DialectType::DuckDB)
10773 && matches!(target, DialectType::BigQuery | DialectType::Presto | DialectType::Trino
10774 | DialectType::Hive | DialectType::Spark | DialectType::Databricks | DialectType::Athena) => {
10775 let arg = f.args.into_iter().next().unwrap();
10776 let func_name = match target {
10777 DialectType::BigQuery => "TIMESTAMP_SECONDS",
10778 DialectType::Presto | DialectType::Trino | DialectType::Athena
10779 | DialectType::Hive | DialectType::Spark | DialectType::Databricks => "FROM_UNIXTIME",
10780 _ => "TO_TIMESTAMP",
10781 };
10782 Ok(Expression::Function(Box::new(Function::new(func_name.to_string(), vec![arg]))))
10783 }
10784 // CONCAT(x) single-arg: -> CONCAT(COALESCE(x, '')) for Spark
10785 "CONCAT" if f.args.len() == 1
10786 && matches!(target, DialectType::Spark | DialectType::Databricks) => {
10787 let arg = f.args.into_iter().next().unwrap();
10788 let coalesced = Expression::Coalesce(Box::new(crate::expressions::VarArgFunc {
10789 expressions: vec![arg, Expression::string("")],
10790 original_name: None,
10791 }));
10792 Ok(Expression::Function(Box::new(Function::new("CONCAT".to_string(), vec![coalesced]))))
10793 }
10794 // REGEXP_EXTRACT(a, p) 2-arg: BigQuery default group is 0 (no 3rd arg needed)
10795 "REGEXP_EXTRACT" if f.args.len() == 3
10796 && matches!(target, DialectType::BigQuery) => {
10797 // If group_index is 0, drop it
10798 let drop_group = match &f.args[2] {
10799 Expression::Literal(Literal::Number(n)) => n == "0",
10800 _ => false,
10801 };
10802 if drop_group {
10803 let mut args = f.args;
10804 args.truncate(2);
10805 Ok(Expression::Function(Box::new(Function::new("REGEXP_EXTRACT".to_string(), args))))
10806 } else {
10807 Ok(Expression::Function(f))
10808 }
10809 }
10810 // REGEXP_EXTRACT(a, pattern, group, flags) 4-arg -> REGEXP_SUBSTR for Snowflake
10811 "REGEXP_EXTRACT" if f.args.len() == 4
10812 && matches!(target, DialectType::Snowflake) => {
10813 // REGEXP_EXTRACT(a, 'pattern', 2, 'i') -> REGEXP_SUBSTR(a, 'pattern', 1, 1, 'i', 2)
10814 let mut args = f.args;
10815 let this = args.remove(0);
10816 let pattern = args.remove(0);
10817 let group = args.remove(0);
10818 let flags = args.remove(0);
10819 Ok(Expression::Function(Box::new(Function::new(
10820 "REGEXP_SUBSTR".to_string(),
10821 vec![this, pattern, Expression::number(1), Expression::number(1), flags, group],
10822 ))))
10823 }
10824 // REGEXP_SUBSTR(a, pattern, position) 3-arg -> REGEXP_EXTRACT(SUBSTRING(a, pos), pattern)
10825 "REGEXP_SUBSTR" if f.args.len() == 3
10826 && matches!(target, DialectType::DuckDB | DialectType::Presto | DialectType::Trino | DialectType::Spark | DialectType::Databricks) => {
10827 let mut args = f.args;
10828 let this = args.remove(0);
10829 let pattern = args.remove(0);
10830 let position = args.remove(0);
10831 // Wrap subject in SUBSTRING(this, position) to apply the offset
10832 let substring_expr = Expression::Function(Box::new(Function::new(
10833 "SUBSTRING".to_string(),
10834 vec![this, position],
10835 )));
10836 let target_name = match target {
10837 DialectType::DuckDB => "REGEXP_EXTRACT",
10838 _ => "REGEXP_EXTRACT",
10839 };
10840 Ok(Expression::Function(Box::new(Function::new(
10841 target_name.to_string(),
10842 vec![substring_expr, pattern],
10843 ))))
10844 }
10845 // TO_DAYS(x) -> (DATEDIFF(x, '0000-01-01') + 1) or target-specific
10846 "TO_DAYS" if f.args.len() == 1 => {
10847 let x = f.args.into_iter().next().unwrap();
10848 let epoch = Expression::string("0000-01-01");
10849 // Build the final target-specific expression directly
10850 let datediff_expr = match target {
10851 DialectType::MySQL | DialectType::SingleStore => {
10852 // MySQL: (DATEDIFF(x, '0000-01-01') + 1)
10853 Expression::Function(Box::new(Function::new("DATEDIFF".to_string(), vec![x, epoch])))
10854 }
10855 DialectType::DuckDB => {
10856 // DuckDB: (DATE_DIFF('DAY', CAST('0000-01-01' AS DATE), CAST(x AS DATE)) + 1)
10857 let cast_epoch = Expression::Cast(Box::new(Cast {
10858 this: epoch, to: DataType::Date,
10859 trailing_comments: Vec::new(), double_colon_syntax: false,
10860 format: None, default: None,
10861 }));
10862 let cast_x = Expression::Cast(Box::new(Cast {
10863 this: x, to: DataType::Date,
10864 trailing_comments: Vec::new(), double_colon_syntax: false,
10865 format: None, default: None,
10866 }));
10867 Expression::Function(Box::new(Function::new("DATE_DIFF".to_string(), vec![
10868 Expression::string("DAY"), cast_epoch, cast_x,
10869 ])))
10870 }
10871 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
10872 // Presto: (DATE_DIFF('DAY', CAST(CAST('0000-01-01' AS TIMESTAMP) AS DATE), CAST(CAST(x AS TIMESTAMP) AS DATE)) + 1)
10873 let cast_epoch = Self::double_cast_timestamp_date(epoch);
10874 let cast_x = Self::double_cast_timestamp_date(x);
10875 Expression::Function(Box::new(Function::new("DATE_DIFF".to_string(), vec![
10876 Expression::string("DAY"), cast_epoch, cast_x,
10877 ])))
10878 }
10879 _ => {
10880 // Default: (DATEDIFF(x, '0000-01-01') + 1)
10881 Expression::Function(Box::new(Function::new("DATEDIFF".to_string(), vec![x, epoch])))
10882 }
10883 };
10884 let add_one = Expression::Add(Box::new(BinaryOp::new(datediff_expr, Expression::number(1))));
10885 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
10886 this: add_one,
10887 trailing_comments: Vec::new(),
10888 })))
10889 }
10890 // STR_TO_DATE(x, format) -> DATE_PARSE / STRPTIME / TO_DATE etc.
10891 "STR_TO_DATE" if f.args.len() == 2
10892 && matches!(target, DialectType::Presto | DialectType::Trino) => {
10893 let mut args = f.args;
10894 let x = args.remove(0);
10895 let format_expr = args.remove(0);
10896 // Check if the format contains time components
10897 let has_time = if let Expression::Literal(Literal::String(ref fmt)) = format_expr {
10898 fmt.contains("%H") || fmt.contains("%T") || fmt.contains("%M") || fmt.contains("%S")
10899 || fmt.contains("%I") || fmt.contains("%p")
10900 } else {
10901 false
10902 };
10903 let date_parse = Expression::Function(Box::new(Function::new(
10904 "DATE_PARSE".to_string(),
10905 vec![x, format_expr],
10906 )));
10907 if has_time {
10908 // Has time components: just DATE_PARSE
10909 Ok(date_parse)
10910 } else {
10911 // Date-only: CAST(DATE_PARSE(...) AS DATE)
10912 Ok(Expression::Cast(Box::new(Cast {
10913 this: date_parse,
10914 to: DataType::Date,
10915 trailing_comments: Vec::new(),
10916 double_colon_syntax: false,
10917 format: None,
10918 default: None,
10919 })))
10920 }
10921 }
10922 "STR_TO_DATE" if f.args.len() == 2
10923 && matches!(target, DialectType::PostgreSQL | DialectType::Redshift) => {
10924 let mut args = f.args;
10925 let x = args.remove(0);
10926 let fmt = args.remove(0);
10927 let pg_fmt = match fmt {
10928 Expression::Literal(Literal::String(s)) => {
10929 Expression::string(
10930 &s.replace("%Y", "YYYY")
10931 .replace("%m", "MM")
10932 .replace("%d", "DD")
10933 .replace("%H", "HH24")
10934 .replace("%M", "MI")
10935 .replace("%S", "SS")
10936 )
10937 }
10938 other => other,
10939 };
10940 let to_date = Expression::Function(Box::new(Function::new(
10941 "TO_DATE".to_string(),
10942 vec![x, pg_fmt],
10943 )));
10944 Ok(Expression::Cast(Box::new(Cast {
10945 this: to_date,
10946 to: DataType::Timestamp { timezone: false, precision: None },
10947 trailing_comments: Vec::new(),
10948 double_colon_syntax: false,
10949 format: None,
10950 default: None,
10951 })))
10952 }
10953 // RANGE(start, end) -> GENERATE_SERIES for SQLite
10954 "RANGE" if (f.args.len() == 1 || f.args.len() == 2)
10955 && matches!(target, DialectType::SQLite) => {
10956 if f.args.len() == 2 {
10957 // RANGE(start, end) -> (SELECT value AS col_alias FROM GENERATE_SERIES(start, end))
10958 // For SQLite, RANGE is exclusive on end, GENERATE_SERIES is inclusive
10959 let mut args = f.args;
10960 let start = args.remove(0);
10961 let end = args.remove(0);
10962 Ok(Expression::Function(Box::new(Function::new("GENERATE_SERIES".to_string(), vec![start, end]))))
10963 } else {
10964 Ok(Expression::Function(f))
10965 }
10966 }
10967 // UNIFORM(low, high[, seed]) -> UNIFORM(low, high, RANDOM([seed])) for Snowflake
10968 // When source is Snowflake, keep as-is (args already in correct form)
10969 "UNIFORM" if matches!(target, DialectType::Snowflake) && (f.args.len() == 2 || f.args.len() == 3) => {
10970 if matches!(source, DialectType::Snowflake) {
10971 // Snowflake -> Snowflake: keep as-is
10972 Ok(Expression::Function(f))
10973 } else {
10974 let mut args = f.args;
10975 let low = args.remove(0);
10976 let high = args.remove(0);
10977 let random = if !args.is_empty() {
10978 let seed = args.remove(0);
10979 Expression::Function(Box::new(Function::new("RANDOM".to_string(), vec![seed])))
10980 } else {
10981 Expression::Function(Box::new(Function::new("RANDOM".to_string(), vec![])))
10982 };
10983 Ok(Expression::Function(Box::new(Function::new("UNIFORM".to_string(), vec![low, high, random]))))
10984 }
10985 }
10986 // TO_UTC_TIMESTAMP(ts, tz) -> target-specific UTC conversion
10987 "TO_UTC_TIMESTAMP" if f.args.len() == 2 => {
10988 let mut args = f.args;
10989 let ts_arg = args.remove(0);
10990 let tz_arg = args.remove(0);
10991 // Cast string literal to TIMESTAMP for all targets
10992 let ts_cast = if matches!(&ts_arg, Expression::Literal(Literal::String(_))) {
10993 Expression::Cast(Box::new(Cast {
10994 this: ts_arg, to: DataType::Timestamp { timezone: false, precision: None },
10995 trailing_comments: vec![], double_colon_syntax: false, format: None, default: None,
10996 }))
10997 } else { ts_arg };
10998 match target {
10999 DialectType::Spark | DialectType::Databricks => {
11000 Ok(Expression::Function(Box::new(Function::new(
11001 "TO_UTC_TIMESTAMP".to_string(), vec![ts_cast, tz_arg],
11002 ))))
11003 }
11004 DialectType::Snowflake => {
11005 // CONVERT_TIMEZONE(tz, 'UTC', CAST(ts AS TIMESTAMP))
11006 Ok(Expression::Function(Box::new(Function::new(
11007 "CONVERT_TIMEZONE".to_string(), vec![tz_arg, Expression::string("UTC"), ts_cast],
11008 ))))
11009 }
11010 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
11011 // WITH_TIMEZONE(CAST(ts AS TIMESTAMP), tz) AT TIME ZONE 'UTC'
11012 let wtz = Expression::Function(Box::new(Function::new(
11013 "WITH_TIMEZONE".to_string(), vec![ts_cast, tz_arg],
11014 )));
11015 Ok(Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
11016 this: wtz, zone: Expression::string("UTC"),
11017 })))
11018 }
11019 DialectType::BigQuery => {
11020 // DATETIME(TIMESTAMP(CAST(ts AS DATETIME), tz), 'UTC')
11021 let cast_dt = Expression::Cast(Box::new(Cast {
11022 this: if let Expression::Cast(c) = ts_cast { c.this } else { ts_cast.clone() },
11023 to: DataType::Custom { name: "DATETIME".to_string() },
11024 trailing_comments: vec![], double_colon_syntax: false, format: None, default: None,
11025 }));
11026 let ts_func = Expression::Function(Box::new(Function::new(
11027 "TIMESTAMP".to_string(), vec![cast_dt, tz_arg],
11028 )));
11029 Ok(Expression::Function(Box::new(Function::new(
11030 "DATETIME".to_string(), vec![ts_func, Expression::string("UTC")],
11031 ))))
11032 }
11033 _ => {
11034 // DuckDB, PostgreSQL, Redshift: CAST(ts AS TIMESTAMP) AT TIME ZONE tz AT TIME ZONE 'UTC'
11035 let atz1 = Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
11036 this: ts_cast, zone: tz_arg,
11037 }));
11038 Ok(Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
11039 this: atz1, zone: Expression::string("UTC"),
11040 })))
11041 }
11042 }
11043 }
11044 // FROM_UTC_TIMESTAMP(ts, tz) -> target-specific UTC conversion
11045 "FROM_UTC_TIMESTAMP" if f.args.len() == 2 => {
11046 let mut args = f.args;
11047 let ts_arg = args.remove(0);
11048 let tz_arg = args.remove(0);
11049 // Cast string literal to TIMESTAMP
11050 let ts_cast = if matches!(&ts_arg, Expression::Literal(Literal::String(_))) {
11051 Expression::Cast(Box::new(Cast {
11052 this: ts_arg, to: DataType::Timestamp { timezone: false, precision: None },
11053 trailing_comments: vec![], double_colon_syntax: false, format: None, default: None,
11054 }))
11055 } else { ts_arg };
11056 match target {
11057 DialectType::Spark | DialectType::Databricks => {
11058 Ok(Expression::Function(Box::new(Function::new(
11059 "FROM_UTC_TIMESTAMP".to_string(), vec![ts_cast, tz_arg],
11060 ))))
11061 }
11062 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
11063 // AT_TIMEZONE(CAST(ts AS TIMESTAMP), tz)
11064 Ok(Expression::Function(Box::new(Function::new(
11065 "AT_TIMEZONE".to_string(), vec![ts_cast, tz_arg],
11066 ))))
11067 }
11068 DialectType::Snowflake => {
11069 // CONVERT_TIMEZONE('UTC', tz, CAST(ts AS TIMESTAMP))
11070 Ok(Expression::Function(Box::new(Function::new(
11071 "CONVERT_TIMEZONE".to_string(), vec![Expression::string("UTC"), tz_arg, ts_cast],
11072 ))))
11073 }
11074 _ => {
11075 // DuckDB, PostgreSQL, Redshift: CAST(ts AS TIMESTAMP) AT TIME ZONE tz
11076 Ok(Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
11077 this: ts_cast, zone: tz_arg,
11078 })))
11079 }
11080 }
11081 }
11082 // MAP_FROM_ARRAYS(keys, values) -> target-specific map construction
11083 "MAP_FROM_ARRAYS" if f.args.len() == 2 => {
11084 let name = match target {
11085 DialectType::Snowflake => "OBJECT_CONSTRUCT",
11086 _ => "MAP",
11087 };
11088 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
11089 }
11090 // STR_TO_MAP(s, pair_delim, kv_delim) -> SPLIT_TO_MAP for Presto
11091 "STR_TO_MAP" if f.args.len() >= 1 => {
11092 match target {
11093 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
11094 Ok(Expression::Function(Box::new(Function::new("SPLIT_TO_MAP".to_string(), f.args))))
11095 }
11096 _ => Ok(Expression::Function(f)),
11097 }
11098 }
11099 // TIME_TO_STR(x, fmt) -> Expression::TimeToStr for proper generation
11100 "TIME_TO_STR" if f.args.len() == 2 => {
11101 let mut args = f.args;
11102 let this = args.remove(0);
11103 let fmt_expr = args.remove(0);
11104 let format = if let Expression::Literal(Literal::String(s)) = fmt_expr {
11105 s
11106 } else {
11107 "%Y-%m-%d %H:%M:%S".to_string()
11108 };
11109 Ok(Expression::TimeToStr(Box::new(crate::expressions::TimeToStr {
11110 this: Box::new(this),
11111 format,
11112 culture: None,
11113 zone: None,
11114 })))
11115 }
11116 // MONTHS_BETWEEN(end, start) -> DuckDB complex expansion
11117 "MONTHS_BETWEEN" if f.args.len() == 2 => {
11118 match target {
11119 DialectType::DuckDB => {
11120 let mut args = f.args;
11121 let end_date = args.remove(0);
11122 let start_date = args.remove(0);
11123 let cast_end = Self::ensure_cast_date(end_date);
11124 let cast_start = Self::ensure_cast_date(start_date);
11125 // DATE_DIFF('MONTH', start, end) + CASE WHEN DAY(end) = DAY(LAST_DAY(end)) AND DAY(start) = DAY(LAST_DAY(start)) THEN 0 ELSE (DAY(end) - DAY(start)) / 31.0 END
11126 let dd = Expression::Function(Box::new(Function::new(
11127 "DATE_DIFF".to_string(), vec![Expression::string("MONTH"), cast_start.clone(), cast_end.clone()],
11128 )));
11129 let day_end = Expression::Function(Box::new(Function::new("DAY".to_string(), vec![cast_end.clone()])));
11130 let day_start = Expression::Function(Box::new(Function::new("DAY".to_string(), vec![cast_start.clone()])));
11131 let last_day_end = Expression::Function(Box::new(Function::new("LAST_DAY".to_string(), vec![cast_end.clone()])));
11132 let last_day_start = Expression::Function(Box::new(Function::new("LAST_DAY".to_string(), vec![cast_start.clone()])));
11133 let day_last_end = Expression::Function(Box::new(Function::new("DAY".to_string(), vec![last_day_end])));
11134 let day_last_start = Expression::Function(Box::new(Function::new("DAY".to_string(), vec![last_day_start])));
11135 let cond1 = Expression::Eq(Box::new(BinaryOp::new(day_end.clone(), day_last_end)));
11136 let cond2 = Expression::Eq(Box::new(BinaryOp::new(day_start.clone(), day_last_start)));
11137 let both_cond = Expression::And(Box::new(BinaryOp::new(cond1, cond2)));
11138 let day_diff = Expression::Sub(Box::new(BinaryOp::new(day_end, day_start)));
11139 let day_diff_paren = Expression::Paren(Box::new(crate::expressions::Paren {
11140 this: day_diff, trailing_comments: Vec::new(),
11141 }));
11142 let frac = Expression::Div(Box::new(BinaryOp::new(
11143 day_diff_paren,
11144 Expression::Literal(Literal::Number("31.0".to_string())),
11145 )));
11146 let case_expr = Expression::Case(Box::new(Case {
11147 operand: None,
11148 whens: vec![(both_cond, Expression::number(0))],
11149 else_: Some(frac),
11150 }));
11151 Ok(Expression::Add(Box::new(BinaryOp::new(dd, case_expr))))
11152 }
11153 DialectType::Snowflake | DialectType::Redshift => {
11154 let mut args = f.args;
11155 let end_date = args.remove(0);
11156 let start_date = args.remove(0);
11157 let unit = Expression::Identifier(Identifier::new("MONTH"));
11158 Ok(Expression::Function(Box::new(Function::new(
11159 "DATEDIFF".to_string(), vec![unit, start_date, end_date],
11160 ))))
11161 }
11162 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
11163 let mut args = f.args;
11164 let end_date = args.remove(0);
11165 let start_date = args.remove(0);
11166 Ok(Expression::Function(Box::new(Function::new(
11167 "DATE_DIFF".to_string(), vec![Expression::string("MONTH"), start_date, end_date],
11168 ))))
11169 }
11170 _ => Ok(Expression::Function(f)),
11171 }
11172 }
11173 // MONTHS_BETWEEN(end, start, roundOff) - 3-arg form (Spark-specific)
11174 // Drop the roundOff arg for non-Spark targets, keep it for Spark
11175 "MONTHS_BETWEEN" if f.args.len() == 3 => {
11176 match target {
11177 DialectType::Spark | DialectType::Databricks => {
11178 Ok(Expression::Function(f))
11179 }
11180 _ => {
11181 // Drop the 3rd arg and delegate to the 2-arg logic
11182 let mut args = f.args;
11183 let end_date = args.remove(0);
11184 let start_date = args.remove(0);
11185 // Re-create as 2-arg and process
11186 let f2 = Function::new("MONTHS_BETWEEN".to_string(), vec![end_date, start_date]);
11187 let e2 = Expression::Function(Box::new(f2));
11188 Self::cross_dialect_normalize(e2, source, target)
11189 }
11190 }
11191 }
11192 // TO_TIMESTAMP(x) with 1 arg -> CAST(x AS TIMESTAMP) for most targets
11193 "TO_TIMESTAMP" if f.args.len() == 1
11194 && matches!(source, DialectType::Spark | DialectType::Databricks | DialectType::Hive) => {
11195 let arg = f.args.into_iter().next().unwrap();
11196 Ok(Expression::Cast(Box::new(Cast {
11197 this: arg, to: DataType::Timestamp { timezone: false, precision: None },
11198 trailing_comments: vec![], double_colon_syntax: false, format: None, default: None,
11199 })))
11200 }
11201 // STRING(x) -> CAST(x AS STRING) for Spark target
11202 "STRING" if f.args.len() == 1
11203 && matches!(source, DialectType::Spark | DialectType::Databricks) => {
11204 let arg = f.args.into_iter().next().unwrap();
11205 let dt = match target {
11206 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
11207 DataType::Custom { name: "STRING".to_string() }
11208 }
11209 _ => DataType::Text,
11210 };
11211 Ok(Expression::Cast(Box::new(Cast {
11212 this: arg, to: dt,
11213 trailing_comments: vec![], double_colon_syntax: false, format: None, default: None,
11214 })))
11215 }
11216 // LOGICAL_OR(x) -> BOOL_OR(x) for Spark target
11217 "LOGICAL_OR" if f.args.len() == 1 => {
11218 let name = match target {
11219 DialectType::Spark | DialectType::Databricks => "BOOL_OR",
11220 _ => "LOGICAL_OR",
11221 };
11222 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
11223 }
11224 // SPLIT(x, pattern) from Spark -> STR_SPLIT_REGEX for DuckDB, REGEXP_SPLIT for Presto
11225 "SPLIT" if f.args.len() == 2
11226 && matches!(source, DialectType::Spark | DialectType::Databricks | DialectType::Hive) => {
11227 let name = match target {
11228 DialectType::DuckDB => "STR_SPLIT_REGEX",
11229 DialectType::Presto | DialectType::Trino | DialectType::Athena => "REGEXP_SPLIT",
11230 DialectType::Spark | DialectType::Databricks | DialectType::Hive => "SPLIT",
11231 _ => "SPLIT",
11232 };
11233 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
11234 }
11235 // TRY_ELEMENT_AT -> ELEMENT_AT for Presto, array[idx] for DuckDB
11236 "TRY_ELEMENT_AT" if f.args.len() == 2 => {
11237 match target {
11238 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
11239 Ok(Expression::Function(Box::new(Function::new("ELEMENT_AT".to_string(), f.args))))
11240 }
11241 DialectType::DuckDB => {
11242 let mut args = f.args;
11243 let arr = args.remove(0);
11244 let idx = args.remove(0);
11245 Ok(Expression::Subscript(Box::new(crate::expressions::Subscript {
11246 this: arr,
11247 index: idx,
11248 })))
11249 }
11250 _ => Ok(Expression::Function(f)),
11251 }
11252 }
11253 _ => Ok(Expression::Function(f)),
11254 }
11255 } else if let Expression::AggregateFunction(mut af) = e {
11256 let name = af.name.to_uppercase();
11257 match name.as_str() {
11258 "ARBITRARY" if af.args.len() == 1 => {
11259 let arg = af.args.into_iter().next().unwrap();
11260 Ok(convert_arbitrary(arg, target))
11261 }
11262 "JSON_ARRAYAGG" => {
11263 match target {
11264 DialectType::PostgreSQL => {
11265 af.name = "JSON_AGG".to_string();
11266 // Add NULLS FIRST to ORDER BY items for PostgreSQL
11267 for ordered in af.order_by.iter_mut() {
11268 if ordered.nulls_first.is_none() {
11269 ordered.nulls_first = Some(true);
11270 }
11271 }
11272 Ok(Expression::AggregateFunction(af))
11273 }
11274 _ => Ok(Expression::AggregateFunction(af)),
11275 }
11276 }
11277 _ => Ok(Expression::AggregateFunction(af)),
11278 }
11279 } else if let Expression::JSONArrayAgg(ja) = e {
11280 // JSONArrayAgg -> JSON_AGG for PostgreSQL, JSON_ARRAYAGG for others
11281 match target {
11282 DialectType::PostgreSQL => {
11283 let mut order_by = Vec::new();
11284 if let Some(order_expr) = ja.order {
11285 if let Expression::OrderBy(ob) = *order_expr {
11286 for mut ordered in ob.expressions {
11287 if ordered.nulls_first.is_none() {
11288 ordered.nulls_first = Some(true);
11289 }
11290 order_by.push(ordered);
11291 }
11292 }
11293 }
11294 Ok(Expression::AggregateFunction(Box::new(crate::expressions::AggregateFunction {
11295 name: "JSON_AGG".to_string(),
11296 args: vec![*ja.this],
11297 distinct: false,
11298 filter: None,
11299 order_by,
11300 limit: None,
11301 ignore_nulls: None,
11302 })))
11303 }
11304 _ => Ok(Expression::JSONArrayAgg(ja)),
11305 }
11306 } else if let Expression::ToNumber(tn) = e {
11307 // TO_NUMBER(x) with no format/precision/scale -> CAST(x AS DOUBLE)
11308 let arg = *tn.this;
11309 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
11310 this: arg,
11311 to: crate::expressions::DataType::Double { precision: None, scale: None },
11312 double_colon_syntax: false,
11313 trailing_comments: Vec::new(),
11314 format: None,
11315 default: None,
11316 })))
11317 } else {
11318 Ok(e)
11319 }
11320 }
11321
11322 Action::RegexpLikeToDuckDB => {
11323 if let Expression::RegexpLike(f) = e {
11324 let mut args = vec![f.this, f.pattern];
11325 if let Some(flags) = f.flags {
11326 args.push(flags);
11327 }
11328 Ok(Expression::Function(Box::new(Function::new(
11329 "REGEXP_MATCHES".to_string(),
11330 args,
11331 ))))
11332 } else {
11333 Ok(e)
11334 }
11335 }
11336 Action::EpochConvert => {
11337 if let Expression::Epoch(f) = e {
11338 let arg = f.this;
11339 let name = match target {
11340 DialectType::Spark | DialectType::Databricks | DialectType::Hive => "UNIX_TIMESTAMP",
11341 DialectType::Presto | DialectType::Trino => "TO_UNIXTIME",
11342 DialectType::BigQuery => "TIME_TO_UNIX",
11343 _ => "EPOCH",
11344 };
11345 Ok(Expression::Function(Box::new(Function::new(name.to_string(), vec![arg]))))
11346 } else {
11347 Ok(e)
11348 }
11349 }
11350 Action::EpochMsConvert => {
11351 use crate::expressions::{BinaryOp, Cast};
11352 if let Expression::EpochMs(f) = e {
11353 let arg = f.this;
11354 match target {
11355 DialectType::Spark | DialectType::Databricks => {
11356 Ok(Expression::Function(Box::new(Function::new("TIMESTAMP_MILLIS".to_string(), vec![arg]))))
11357 }
11358 DialectType::BigQuery => {
11359 Ok(Expression::Function(Box::new(Function::new("TIMESTAMP_MILLIS".to_string(), vec![arg]))))
11360 }
11361 DialectType::Presto | DialectType::Trino => {
11362 // FROM_UNIXTIME(CAST(x AS DOUBLE) / POW(10, 3))
11363 let cast_arg = Expression::Cast(Box::new(Cast {
11364 this: arg,
11365 to: DataType::Double { precision: None, scale: None },
11366 trailing_comments: Vec::new(),
11367 double_colon_syntax: false,
11368 format: None,
11369 default: None,
11370 }));
11371 let div = Expression::Div(Box::new(BinaryOp::new(
11372 cast_arg,
11373 Expression::Function(Box::new(Function::new("POW".to_string(), vec![Expression::number(10), Expression::number(3)]))),
11374 )));
11375 Ok(Expression::Function(Box::new(Function::new("FROM_UNIXTIME".to_string(), vec![div]))))
11376 }
11377 DialectType::MySQL => {
11378 // FROM_UNIXTIME(x / POWER(10, 3))
11379 let div = Expression::Div(Box::new(BinaryOp::new(
11380 arg,
11381 Expression::Function(Box::new(Function::new("POWER".to_string(), vec![Expression::number(10), Expression::number(3)]))),
11382 )));
11383 Ok(Expression::Function(Box::new(Function::new("FROM_UNIXTIME".to_string(), vec![div]))))
11384 }
11385 DialectType::PostgreSQL | DialectType::Redshift => {
11386 // TO_TIMESTAMP(CAST(x AS DOUBLE PRECISION) / POWER(10, 3))
11387 let cast_arg = Expression::Cast(Box::new(Cast {
11388 this: arg,
11389 to: DataType::Custom { name: "DOUBLE PRECISION".to_string() },
11390 trailing_comments: Vec::new(),
11391 double_colon_syntax: false,
11392 format: None,
11393 default: None,
11394 }));
11395 let div = Expression::Div(Box::new(BinaryOp::new(
11396 cast_arg,
11397 Expression::Function(Box::new(Function::new("POWER".to_string(), vec![Expression::number(10), Expression::number(3)]))),
11398 )));
11399 Ok(Expression::Function(Box::new(Function::new("TO_TIMESTAMP".to_string(), vec![div]))))
11400 }
11401 DialectType::ClickHouse => {
11402 // fromUnixTimestamp64Milli(CAST(x AS Nullable(Int64)))
11403 let cast_arg = Expression::Cast(Box::new(Cast {
11404 this: arg,
11405 to: DataType::Custom { name: "Nullable(Int64)".to_string() },
11406 trailing_comments: Vec::new(),
11407 double_colon_syntax: false,
11408 format: None,
11409 default: None,
11410 }));
11411 Ok(Expression::Function(Box::new(Function::new("fromUnixTimestamp64Milli".to_string(), vec![cast_arg]))))
11412 }
11413 _ => Ok(Expression::Function(Box::new(Function::new("EPOCH_MS".to_string(), vec![arg])))),
11414 }
11415 } else {
11416 Ok(e)
11417 }
11418 }
11419 Action::TSQLTypeNormalize => {
11420 if let Expression::DataType(dt) = e {
11421 let new_dt = match &dt {
11422 DataType::Custom { name } if name.eq_ignore_ascii_case("MONEY") => {
11423 DataType::Decimal { precision: Some(15), scale: Some(4) }
11424 }
11425 DataType::Custom { name } if name.eq_ignore_ascii_case("SMALLMONEY") => {
11426 DataType::Decimal { precision: Some(6), scale: Some(4) }
11427 }
11428 DataType::Custom { name } if name.eq_ignore_ascii_case("DATETIME2") => {
11429 DataType::Timestamp { timezone: false, precision: None }
11430 }
11431 DataType::Custom { name } if name.eq_ignore_ascii_case("REAL") => {
11432 DataType::Float { precision: None, scale: None, real_spelling: false }
11433 }
11434 DataType::Float { real_spelling: true, .. } => {
11435 DataType::Float { precision: None, scale: None, real_spelling: false }
11436 }
11437 DataType::Custom { name } if name.eq_ignore_ascii_case("IMAGE") => {
11438 DataType::Custom { name: "BLOB".to_string() }
11439 }
11440 DataType::Custom { name } if name.eq_ignore_ascii_case("BIT") => {
11441 DataType::Boolean
11442 }
11443 DataType::Custom { name } if name.eq_ignore_ascii_case("ROWVERSION") => {
11444 DataType::Custom { name: "BINARY".to_string() }
11445 }
11446 DataType::Custom { name } if name.eq_ignore_ascii_case("UNIQUEIDENTIFIER") => {
11447 match target {
11448 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
11449 DataType::Custom { name: "STRING".to_string() }
11450 }
11451 _ => DataType::VarChar { length: Some(36), parenthesized_length: true },
11452 }
11453 }
11454 DataType::Custom { name } if name.eq_ignore_ascii_case("DATETIMEOFFSET") => {
11455 match target {
11456 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
11457 DataType::Timestamp { timezone: false, precision: None }
11458 }
11459 _ => DataType::Timestamp { timezone: true, precision: None },
11460 }
11461 }
11462 DataType::Custom { ref name } if name.to_uppercase().starts_with("DATETIME2(") => {
11463 // DATETIME2(n) -> TIMESTAMP
11464 DataType::Timestamp { timezone: false, precision: None }
11465 }
11466 DataType::Custom { ref name } if name.to_uppercase().starts_with("TIME(") => {
11467 // TIME(n) -> TIMESTAMP for Spark, keep as TIME for others
11468 match target {
11469 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
11470 DataType::Timestamp { timezone: false, precision: None }
11471 }
11472 _ => return Ok(Expression::DataType(dt)),
11473 }
11474 }
11475 DataType::Custom { ref name } if name.to_uppercase().starts_with("NUMERIC") => {
11476 // Parse NUMERIC(p,s) back to Decimal(p,s)
11477 let upper = name.to_uppercase();
11478 if let Some(inner) = upper.strip_prefix("NUMERIC(").and_then(|s| s.strip_suffix(')')) {
11479 let parts: Vec<&str> = inner.split(',').collect();
11480 let precision = parts.first().and_then(|s| s.trim().parse::<u32>().ok());
11481 let scale = parts.get(1).and_then(|s| s.trim().parse::<u32>().ok());
11482 DataType::Decimal { precision, scale }
11483 } else if upper == "NUMERIC" {
11484 DataType::Decimal { precision: None, scale: None }
11485 } else {
11486 return Ok(Expression::DataType(dt));
11487 }
11488 }
11489 DataType::Float { precision: Some(p), .. } => {
11490 // For Hive/Spark: FLOAT(1-32) -> FLOAT, FLOAT(33+) -> DOUBLE (IEEE 754 boundary)
11491 // For other targets: FLOAT(1-24) -> FLOAT, FLOAT(25+) -> DOUBLE (TSQL boundary)
11492 let boundary = match target {
11493 DialectType::Hive | DialectType::Spark | DialectType::Databricks => 32,
11494 _ => 24,
11495 };
11496 if *p <= boundary {
11497 DataType::Float { precision: None, scale: None, real_spelling: false }
11498 } else {
11499 DataType::Double { precision: None, scale: None }
11500 }
11501 }
11502 DataType::TinyInt { .. } => {
11503 match target {
11504 DialectType::DuckDB => DataType::Custom { name: "UTINYINT".to_string() },
11505 DialectType::Hive | DialectType::Spark | DialectType::Databricks => DataType::SmallInt { length: None },
11506 _ => return Ok(Expression::DataType(dt)),
11507 }
11508 }
11509 // INTEGER -> INT for Spark/Databricks
11510 DataType::Int { length, integer_spelling: true } => {
11511 DataType::Int { length: *length, integer_spelling: false }
11512 }
11513 _ => return Ok(Expression::DataType(dt)),
11514 };
11515 Ok(Expression::DataType(new_dt))
11516 } else {
11517 Ok(e)
11518 }
11519 }
11520 Action::MySQLSafeDivide => {
11521 use crate::expressions::{BinaryOp, Cast};
11522 if let Expression::Div(op) = e {
11523 let left = op.left;
11524 let right = op.right;
11525 // For SQLite: CAST left as REAL but NO NULLIF wrapping
11526 if matches!(target, DialectType::SQLite) {
11527 let new_left = Expression::Cast(Box::new(Cast {
11528 this: left,
11529 to: DataType::Float { precision: None, scale: None, real_spelling: true },
11530 trailing_comments: Vec::new(),
11531 double_colon_syntax: false,
11532 format: None,
11533 default: None,
11534 }));
11535 return Ok(Expression::Div(Box::new(BinaryOp::new(new_left, right))));
11536 }
11537 // Wrap right in NULLIF(right, 0)
11538 let nullif_right = Expression::Function(Box::new(Function::new(
11539 "NULLIF".to_string(),
11540 vec![right, Expression::number(0)],
11541 )));
11542 // For some dialects, also CAST the left side
11543 let new_left = match target {
11544 DialectType::PostgreSQL | DialectType::Redshift | DialectType::Teradata => {
11545 Expression::Cast(Box::new(Cast {
11546 this: left,
11547 to: DataType::Custom { name: "DOUBLE PRECISION".to_string() },
11548 trailing_comments: Vec::new(),
11549 double_colon_syntax: false,
11550 format: None,
11551 default: None,
11552 }))
11553 }
11554 DialectType::Drill | DialectType::Trino | DialectType::Presto => {
11555 Expression::Cast(Box::new(Cast {
11556 this: left,
11557 to: DataType::Double { precision: None, scale: None },
11558 trailing_comments: Vec::new(),
11559 double_colon_syntax: false,
11560 format: None,
11561 default: None,
11562 }))
11563 }
11564 DialectType::TSQL => {
11565 Expression::Cast(Box::new(Cast {
11566 this: left,
11567 to: DataType::Float { precision: None, scale: None, real_spelling: false },
11568 trailing_comments: Vec::new(),
11569 double_colon_syntax: false,
11570 format: None,
11571 default: None,
11572 }))
11573 }
11574 _ => left,
11575 };
11576 Ok(Expression::Div(Box::new(BinaryOp::new(new_left, nullif_right))))
11577 } else {
11578 Ok(e)
11579 }
11580 }
11581 Action::AlterTableRenameStripSchema => {
11582 if let Expression::AlterTable(mut at) = e {
11583 if let Some(crate::expressions::AlterTableAction::RenameTable(ref mut new_tbl)) = at.actions.first_mut() {
11584 new_tbl.schema = None;
11585 new_tbl.catalog = None;
11586 }
11587 Ok(Expression::AlterTable(at))
11588 } else {
11589 Ok(e)
11590 }
11591 }
11592 Action::NullsOrdering => {
11593 // Fill in the source dialect's implied null ordering default.
11594 // This makes implicit null ordering explicit so the target generator
11595 // can correctly strip or keep it.
11596 //
11597 // Dialect null ordering categories:
11598 // nulls_are_large (Oracle, PostgreSQL, Redshift, Snowflake):
11599 // ASC -> NULLS LAST, DESC -> NULLS FIRST
11600 // nulls_are_small (Spark, Hive, BigQuery, MySQL, Databricks, ClickHouse, etc.):
11601 // ASC -> NULLS FIRST, DESC -> NULLS LAST
11602 // nulls_are_last (DuckDB, Presto, Trino, Dremio, Athena):
11603 // NULLS LAST always (both ASC and DESC)
11604 if let Expression::Ordered(mut o) = e {
11605 let is_asc = !o.desc;
11606
11607 let is_source_nulls_large = matches!(source,
11608 DialectType::Oracle | DialectType::PostgreSQL | DialectType::Redshift
11609 | DialectType::Snowflake
11610 );
11611 let is_source_nulls_last = matches!(source,
11612 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
11613 | DialectType::Dremio | DialectType::Athena | DialectType::ClickHouse
11614 | DialectType::Drill | DialectType::Exasol | DialectType::DataFusion
11615 );
11616
11617 // Determine target category to check if default matches
11618 let is_target_nulls_large = matches!(target,
11619 DialectType::Oracle | DialectType::PostgreSQL | DialectType::Redshift
11620 | DialectType::Snowflake
11621 );
11622 let is_target_nulls_last = matches!(target,
11623 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
11624 | DialectType::Dremio | DialectType::Athena | DialectType::ClickHouse
11625 | DialectType::Drill | DialectType::Exasol | DialectType::DataFusion
11626 );
11627
11628 // Compute the implied nulls_first for source
11629 let source_nulls_first = if is_source_nulls_large {
11630 !is_asc // ASC -> NULLS LAST (false), DESC -> NULLS FIRST (true)
11631 } else if is_source_nulls_last {
11632 false // NULLS LAST always
11633 } else {
11634 is_asc // nulls_are_small: ASC -> NULLS FIRST (true), DESC -> NULLS LAST (false)
11635 };
11636
11637 // Compute the target's default
11638 let target_nulls_first = if is_target_nulls_large {
11639 !is_asc
11640 } else if is_target_nulls_last {
11641 false
11642 } else {
11643 is_asc
11644 };
11645
11646 // Only add explicit nulls ordering if source and target defaults differ
11647 if source_nulls_first != target_nulls_first {
11648 o.nulls_first = Some(source_nulls_first);
11649 }
11650 // If they match, leave nulls_first as None so the generator won't output it
11651
11652 Ok(Expression::Ordered(o))
11653 } else {
11654 Ok(e)
11655 }
11656 }
11657 Action::StringAggConvert => {
11658 match e {
11659 Expression::WithinGroup(wg) => {
11660 // STRING_AGG(x, sep) WITHIN GROUP (ORDER BY z) -> target-specific
11661 // Extract args and distinct flag from either Function, AggregateFunction, or StringAgg
11662 let (x_opt, sep_opt, distinct) = match wg.this {
11663 Expression::AggregateFunction(ref af) if af.name.eq_ignore_ascii_case("STRING_AGG") && af.args.len() >= 2 => {
11664 (Some(af.args[0].clone()), Some(af.args[1].clone()), af.distinct)
11665 }
11666 Expression::Function(ref f) if f.name.eq_ignore_ascii_case("STRING_AGG") && f.args.len() >= 2 => {
11667 (Some(f.args[0].clone()), Some(f.args[1].clone()), false)
11668 }
11669 Expression::StringAgg(ref sa) => {
11670 (Some(sa.this.clone()), sa.separator.clone(), sa.distinct)
11671 }
11672 _ => (None, None, false),
11673 };
11674 if let (Some(x), Some(sep)) = (x_opt, sep_opt) {
11675 let order_by = wg.order_by;
11676
11677 match target {
11678 DialectType::TSQL | DialectType::Fabric => {
11679 // Keep as WithinGroup(StringAgg) for TSQL
11680 Ok(Expression::WithinGroup(Box::new(crate::expressions::WithinGroup {
11681 this: Expression::StringAgg(Box::new(crate::expressions::StringAggFunc {
11682 this: x,
11683 separator: Some(sep),
11684 order_by: None, // order_by goes in WithinGroup, not StringAgg
11685 distinct,
11686 filter: None,
11687 limit: None,
11688 })),
11689 order_by,
11690 })))
11691 }
11692 DialectType::MySQL | DialectType::SingleStore | DialectType::Doris | DialectType::StarRocks => {
11693 // GROUP_CONCAT(x ORDER BY z SEPARATOR sep)
11694 Ok(Expression::GroupConcat(Box::new(crate::expressions::GroupConcatFunc {
11695 this: x,
11696 separator: Some(sep),
11697 order_by: Some(order_by),
11698 distinct,
11699 filter: None,
11700 })))
11701 }
11702 DialectType::SQLite => {
11703 // GROUP_CONCAT(x, sep) - no ORDER BY support
11704 Ok(Expression::GroupConcat(Box::new(crate::expressions::GroupConcatFunc {
11705 this: x,
11706 separator: Some(sep),
11707 order_by: None,
11708 distinct,
11709 filter: None,
11710 })))
11711 }
11712 DialectType::PostgreSQL | DialectType::Redshift => {
11713 // STRING_AGG(x, sep ORDER BY z)
11714 Ok(Expression::StringAgg(Box::new(crate::expressions::StringAggFunc {
11715 this: x,
11716 separator: Some(sep),
11717 order_by: Some(order_by),
11718 distinct,
11719 filter: None,
11720 limit: None,
11721 })))
11722 }
11723 _ => {
11724 // Default: keep as STRING_AGG(x, sep) with ORDER BY inside
11725 Ok(Expression::StringAgg(Box::new(crate::expressions::StringAggFunc {
11726 this: x,
11727 separator: Some(sep),
11728 order_by: Some(order_by),
11729 distinct,
11730 filter: None,
11731 limit: None,
11732 })))
11733 }
11734 }
11735 } else {
11736 Ok(Expression::WithinGroup(wg))
11737 }
11738 }
11739 Expression::StringAgg(sa) => {
11740 match target {
11741 DialectType::MySQL | DialectType::SingleStore | DialectType::Doris | DialectType::StarRocks => {
11742 // STRING_AGG(x, sep) -> GROUP_CONCAT(x SEPARATOR sep)
11743 Ok(Expression::GroupConcat(Box::new(crate::expressions::GroupConcatFunc {
11744 this: sa.this,
11745 separator: sa.separator,
11746 order_by: sa.order_by,
11747 distinct: sa.distinct,
11748 filter: sa.filter,
11749 })))
11750 }
11751 DialectType::SQLite => {
11752 // STRING_AGG(x, sep) -> GROUP_CONCAT(x, sep)
11753 Ok(Expression::GroupConcat(Box::new(crate::expressions::GroupConcatFunc {
11754 this: sa.this,
11755 separator: sa.separator,
11756 order_by: None, // SQLite doesn't support ORDER BY in GROUP_CONCAT
11757 distinct: sa.distinct,
11758 filter: sa.filter,
11759 })))
11760 }
11761 DialectType::Spark | DialectType::Databricks => {
11762 // STRING_AGG(x, sep) -> LISTAGG(x, sep)
11763 Ok(Expression::ListAgg(Box::new(crate::expressions::ListAggFunc {
11764 this: sa.this,
11765 separator: sa.separator,
11766 on_overflow: None,
11767 order_by: sa.order_by,
11768 distinct: sa.distinct,
11769 filter: None,
11770 })))
11771 }
11772 _ => Ok(Expression::StringAgg(sa)),
11773 }
11774 }
11775 _ => Ok(e),
11776 }
11777 }
11778 Action::GroupConcatConvert => {
11779 // Helper to expand CONCAT(a, b, c) -> a || b || c (for PostgreSQL/SQLite)
11780 // or CONCAT(a, b, c) -> a + b + c (for TSQL)
11781 fn expand_concat_to_dpipe(expr: Expression) -> Expression {
11782 if let Expression::Function(ref f) = expr {
11783 if f.name.to_uppercase() == "CONCAT" && f.args.len() > 1 {
11784 let mut result = f.args[0].clone();
11785 for arg in &f.args[1..] {
11786 result = Expression::Concat(Box::new(BinaryOp {
11787 left: result,
11788 right: arg.clone(),
11789 left_comments: vec![],
11790 operator_comments: vec![],
11791 trailing_comments: vec![],
11792 }));
11793 }
11794 return result;
11795 }
11796 }
11797 expr
11798 }
11799 fn expand_concat_to_plus(expr: Expression) -> Expression {
11800 if let Expression::Function(ref f) = expr {
11801 if f.name.to_uppercase() == "CONCAT" && f.args.len() > 1 {
11802 let mut result = f.args[0].clone();
11803 for arg in &f.args[1..] {
11804 result = Expression::Add(Box::new(BinaryOp {
11805 left: result,
11806 right: arg.clone(),
11807 left_comments: vec![],
11808 operator_comments: vec![],
11809 trailing_comments: vec![],
11810 }));
11811 }
11812 return result;
11813 }
11814 }
11815 expr
11816 }
11817 // Helper to wrap each arg in CAST(arg AS VARCHAR) for Presto/Trino CONCAT
11818 fn wrap_concat_args_in_varchar_cast(expr: Expression) -> Expression {
11819 if let Expression::Function(ref f) = expr {
11820 if f.name.to_uppercase() == "CONCAT" && f.args.len() > 1 {
11821 let new_args: Vec<Expression> = f.args.iter().map(|arg| {
11822 Expression::Cast(Box::new(crate::expressions::Cast {
11823 this: arg.clone(),
11824 to: crate::expressions::DataType::VarChar { length: None, parenthesized_length: false },
11825 trailing_comments: Vec::new(),
11826 double_colon_syntax: false,
11827 format: None,
11828 default: None,
11829 }))
11830 }).collect();
11831 return Expression::Function(Box::new(crate::expressions::Function::new(
11832 "CONCAT".to_string(),
11833 new_args,
11834 )));
11835 }
11836 }
11837 expr
11838 }
11839 if let Expression::GroupConcat(gc) = e {
11840 match target {
11841 DialectType::Presto => {
11842 // GROUP_CONCAT(x [, sep]) -> ARRAY_JOIN(ARRAY_AGG(x), sep)
11843 let sep = gc.separator.unwrap_or(Expression::string(","));
11844 // For multi-arg CONCAT, wrap each arg in CAST(... AS VARCHAR)
11845 let this = wrap_concat_args_in_varchar_cast(gc.this);
11846 let array_agg = Expression::ArrayAgg(Box::new(crate::expressions::AggFunc {
11847 this,
11848 distinct: gc.distinct,
11849 filter: gc.filter,
11850 order_by: gc.order_by.unwrap_or_default(),
11851 name: None,
11852 ignore_nulls: None,
11853 having_max: None,
11854 limit: None,
11855 }));
11856 Ok(Expression::ArrayJoin(Box::new(crate::expressions::ArrayJoinFunc {
11857 this: array_agg,
11858 separator: sep,
11859 null_replacement: None,
11860 })))
11861 }
11862 DialectType::Trino => {
11863 // GROUP_CONCAT(x [, sep]) -> LISTAGG(x, sep)
11864 let sep = gc.separator.unwrap_or(Expression::string(","));
11865 // For multi-arg CONCAT, wrap each arg in CAST(... AS VARCHAR)
11866 let this = wrap_concat_args_in_varchar_cast(gc.this);
11867 Ok(Expression::ListAgg(Box::new(crate::expressions::ListAggFunc {
11868 this,
11869 separator: Some(sep),
11870 on_overflow: None,
11871 order_by: gc.order_by,
11872 distinct: gc.distinct,
11873 filter: gc.filter,
11874 })))
11875 }
11876 DialectType::PostgreSQL | DialectType::Redshift | DialectType::Snowflake
11877 | DialectType::DuckDB
11878 | DialectType::Hive | DialectType::ClickHouse => {
11879 // GROUP_CONCAT(x [, sep]) -> STRING_AGG(x, sep)
11880 let sep = gc.separator.unwrap_or(Expression::string(","));
11881 // Expand CONCAT(a,b,c) -> a || b || c for || dialects
11882 let this = expand_concat_to_dpipe(gc.this);
11883 // For PostgreSQL, add NULLS LAST for DESC / NULLS FIRST for ASC
11884 let order_by = if target == DialectType::PostgreSQL {
11885 gc.order_by.map(|ords| {
11886 ords.into_iter().map(|mut o| {
11887 if o.nulls_first.is_none() {
11888 if o.desc {
11889 o.nulls_first = Some(false); // NULLS LAST
11890 } else {
11891 o.nulls_first = Some(true); // NULLS FIRST
11892 }
11893 }
11894 o
11895 }).collect()
11896 })
11897 } else {
11898 gc.order_by
11899 };
11900 Ok(Expression::StringAgg(Box::new(crate::expressions::StringAggFunc {
11901 this,
11902 separator: Some(sep),
11903 order_by,
11904 distinct: gc.distinct,
11905 filter: gc.filter,
11906 limit: None,
11907 })))
11908 }
11909 DialectType::TSQL => {
11910 // GROUP_CONCAT(x [, sep]) -> STRING_AGG(x, sep) WITHIN GROUP (ORDER BY ...)
11911 // TSQL doesn't support DISTINCT in STRING_AGG
11912 let sep = gc.separator.unwrap_or(Expression::string(","));
11913 // Expand CONCAT(a,b,c) -> a + b + c for TSQL
11914 let this = expand_concat_to_plus(gc.this);
11915 Ok(Expression::StringAgg(Box::new(crate::expressions::StringAggFunc {
11916 this,
11917 separator: Some(sep),
11918 order_by: gc.order_by,
11919 distinct: false, // TSQL doesn't support DISTINCT in STRING_AGG
11920 filter: gc.filter,
11921 limit: None,
11922 })))
11923 }
11924 DialectType::SQLite => {
11925 // GROUP_CONCAT stays as GROUP_CONCAT but ORDER BY is removed
11926 // SQLite GROUP_CONCAT doesn't support ORDER BY
11927 // Expand CONCAT(a,b,c) -> a || b || c
11928 let this = expand_concat_to_dpipe(gc.this);
11929 Ok(Expression::GroupConcat(Box::new(crate::expressions::GroupConcatFunc {
11930 this,
11931 separator: gc.separator,
11932 order_by: None, // SQLite doesn't support ORDER BY in GROUP_CONCAT
11933 distinct: gc.distinct,
11934 filter: gc.filter,
11935 })))
11936 }
11937 DialectType::Spark | DialectType::Databricks => {
11938 // GROUP_CONCAT(x [, sep]) -> LISTAGG(x, sep)
11939 let sep = gc.separator.unwrap_or(Expression::string(","));
11940 Ok(Expression::ListAgg(Box::new(crate::expressions::ListAggFunc {
11941 this: gc.this,
11942 separator: Some(sep),
11943 on_overflow: None,
11944 order_by: gc.order_by,
11945 distinct: gc.distinct,
11946 filter: None,
11947 })))
11948 }
11949 DialectType::MySQL | DialectType::SingleStore | DialectType::StarRocks => {
11950 // MySQL GROUP_CONCAT should have explicit SEPARATOR (default ',')
11951 if gc.separator.is_none() {
11952 let mut gc = gc;
11953 gc.separator = Some(Expression::string(","));
11954 Ok(Expression::GroupConcat(gc))
11955 } else {
11956 Ok(Expression::GroupConcat(gc))
11957 }
11958 }
11959 _ => Ok(Expression::GroupConcat(gc)),
11960 }
11961 } else {
11962 Ok(e)
11963 }
11964 }
11965 Action::TempTableHash => {
11966 match e {
11967 Expression::CreateTable(mut ct) => {
11968 // TSQL #table -> TEMPORARY TABLE with # stripped from name
11969 let name = &ct.name.name.name;
11970 if name.starts_with('#') {
11971 ct.name.name.name = name.trim_start_matches('#').to_string();
11972 }
11973 // Set temporary flag
11974 ct.temporary = true;
11975 Ok(Expression::CreateTable(ct))
11976 }
11977 Expression::Table(mut tr) => {
11978 // Strip # from table references
11979 let name = &tr.name.name;
11980 if name.starts_with('#') {
11981 tr.name.name = name.trim_start_matches('#').to_string();
11982 }
11983 Ok(Expression::Table(tr))
11984 }
11985 Expression::DropTable(mut dt) => {
11986 // Strip # from DROP TABLE names
11987 for table_ref in &mut dt.names {
11988 if table_ref.name.name.starts_with('#') {
11989 table_ref.name.name = table_ref.name.name.trim_start_matches('#').to_string();
11990 }
11991 }
11992 Ok(Expression::DropTable(dt))
11993 }
11994 _ => Ok(e),
11995 }
11996 }
11997 Action::NvlClearOriginal => {
11998 if let Expression::Nvl(mut f) = e {
11999 f.original_name = None;
12000 Ok(Expression::Nvl(f))
12001 } else {
12002 Ok(e)
12003 }
12004 }
12005 Action::HiveCastToTryCast => {
12006 // Convert Hive/Spark CAST to TRY_CAST for targets that support it
12007 if let Expression::Cast(mut c) = e {
12008 // For Spark/Hive -> DuckDB: TIMESTAMP -> TIMESTAMPTZ
12009 // (Spark's TIMESTAMP is always timezone-aware)
12010 if matches!(target, DialectType::DuckDB)
12011 && matches!(source, DialectType::Spark | DialectType::Databricks)
12012 && matches!(c.to, DataType::Timestamp { timezone: false, .. })
12013 {
12014 c.to = DataType::Custom { name: "TIMESTAMPTZ".to_string() };
12015 }
12016 // For Spark source -> Databricks: VARCHAR/CHAR -> STRING
12017 // Spark parses VARCHAR(n)/CHAR(n) as TEXT, normalize to STRING
12018 if matches!(target, DialectType::Databricks | DialectType::Spark)
12019 && matches!(source, DialectType::Spark | DialectType::Databricks | DialectType::Hive)
12020 && Self::has_varchar_char_type(&c.to)
12021 {
12022 c.to = Self::normalize_varchar_to_string(c.to);
12023 }
12024 Ok(Expression::TryCast(c))
12025 } else {
12026 Ok(e)
12027 }
12028 }
12029 Action::XorExpand => {
12030 // Expand XOR to (a AND NOT b) OR (NOT a AND b) for dialects without XOR keyword
12031 // Snowflake: use BOOLXOR(a, b) instead
12032 if let Expression::Xor(xor) = e {
12033 // Collect all XOR operands
12034 let mut operands = Vec::new();
12035 if let Some(this) = xor.this {
12036 operands.push(*this);
12037 }
12038 if let Some(expr) = xor.expression {
12039 operands.push(*expr);
12040 }
12041 operands.extend(xor.expressions);
12042
12043 // Snowflake: use BOOLXOR(a, b)
12044 if matches!(target, DialectType::Snowflake) && operands.len() == 2 {
12045 let a = operands.remove(0);
12046 let b = operands.remove(0);
12047 return Ok(Expression::Function(Box::new(Function::new("BOOLXOR".to_string(), vec![a, b]))));
12048 }
12049
12050 // Helper to build (a AND NOT b) OR (NOT a AND b)
12051 let make_xor = |a: Expression, b: Expression| -> Expression {
12052 let not_b = Expression::Not(Box::new(crate::expressions::UnaryOp::new(b.clone())));
12053 let not_a = Expression::Not(Box::new(crate::expressions::UnaryOp::new(a.clone())));
12054 let left_and = Expression::And(Box::new(BinaryOp {
12055 left: a,
12056 right: Expression::Paren(Box::new(Paren { this: not_b, trailing_comments: Vec::new() })),
12057 left_comments: Vec::new(), operator_comments: Vec::new(), trailing_comments: Vec::new(),
12058 }));
12059 let right_and = Expression::And(Box::new(BinaryOp {
12060 left: Expression::Paren(Box::new(Paren { this: not_a, trailing_comments: Vec::new() })),
12061 right: b,
12062 left_comments: Vec::new(), operator_comments: Vec::new(), trailing_comments: Vec::new(),
12063 }));
12064 Expression::Or(Box::new(BinaryOp {
12065 left: Expression::Paren(Box::new(Paren { this: left_and, trailing_comments: Vec::new() })),
12066 right: Expression::Paren(Box::new(Paren { this: right_and, trailing_comments: Vec::new() })),
12067 left_comments: Vec::new(), operator_comments: Vec::new(), trailing_comments: Vec::new(),
12068 }))
12069 };
12070
12071 if operands.len() >= 2 {
12072 let mut result = make_xor(operands.remove(0), operands.remove(0));
12073 for operand in operands {
12074 result = make_xor(result, operand);
12075 }
12076 Ok(result)
12077 } else if operands.len() == 1 {
12078 Ok(operands.remove(0))
12079 } else {
12080 // No operands - return FALSE (shouldn't happen)
12081 Ok(Expression::Boolean(crate::expressions::BooleanLiteral { value: false }))
12082 }
12083 } else {
12084 Ok(e)
12085 }
12086 }
12087 Action::DatePartUnquote => {
12088 // DATE_PART('month', x) -> DATE_PART(month, x) for Snowflake target
12089 // Convert the quoted string first arg to a bare Column/Identifier
12090 if let Expression::Function(mut f) = e {
12091 if let Some(Expression::Literal(crate::expressions::Literal::String(s))) = f.args.first() {
12092 let bare_name = s.to_lowercase();
12093 f.args[0] = Expression::Column(crate::expressions::Column {
12094 name: Identifier::new(bare_name),
12095 table: None,
12096 join_mark: false,
12097 trailing_comments: Vec::new(),
12098 });
12099 }
12100 Ok(Expression::Function(f))
12101 } else {
12102 Ok(e)
12103 }
12104 }
12105 Action::ArrayLengthConvert => {
12106 // Extract the argument from the expression
12107 let arg = match e {
12108 Expression::Cardinality(ref f) => f.this.clone(),
12109 Expression::ArrayLength(ref f) => f.this.clone(),
12110 Expression::ArraySize(ref f) => f.this.clone(),
12111 _ => return Ok(e),
12112 };
12113 match target {
12114 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
12115 Ok(Expression::Function(Box::new(Function::new("SIZE".to_string(), vec![arg]))))
12116 }
12117 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
12118 Ok(Expression::Cardinality(Box::new(crate::expressions::UnaryFunc::new(arg))))
12119 }
12120 DialectType::BigQuery => {
12121 Ok(Expression::ArrayLength(Box::new(crate::expressions::UnaryFunc::new(arg))))
12122 }
12123 DialectType::DuckDB => {
12124 Ok(Expression::ArrayLength(Box::new(crate::expressions::UnaryFunc::new(arg))))
12125 }
12126 DialectType::PostgreSQL | DialectType::Redshift => {
12127 // PostgreSQL ARRAY_LENGTH requires dimension arg
12128 Ok(Expression::Function(Box::new(Function::new("ARRAY_LENGTH".to_string(), vec![arg, Expression::number(1)]))))
12129 }
12130 _ => Ok(e), // Keep original
12131 }
12132 }
12133
12134 Action::JsonExtractToArrow => {
12135 // JSON_EXTRACT(x, path) -> x -> path for SQLite/DuckDB (set arrow_syntax = true)
12136 if let Expression::JsonExtract(mut f) = e {
12137 f.arrow_syntax = true;
12138 Ok(Expression::JsonExtract(f))
12139 } else {
12140 Ok(e)
12141 }
12142 }
12143
12144 Action::JsonExtractToGetJsonObject => {
12145 if let Expression::JsonExtract(f) = e {
12146 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
12147 // JSON_EXTRACT(x, '$.key') -> JSON_EXTRACT_PATH(x, 'key') for PostgreSQL
12148 // Convert JSONPath to individual keys
12149 let extracted_keys: Option<Vec<String>> = if let Expression::Literal(Literal::String(ref s)) = f.path {
12150 s.strip_prefix("$.").map(|stripped| {
12151 stripped.split('.').map(|k| k.to_string()).collect()
12152 })
12153 } else {
12154 None
12155 };
12156 let keys = if let Some(key_list) = extracted_keys {
12157 key_list.into_iter().map(|k| Expression::string(&k)).collect::<Vec<_>>()
12158 } else {
12159 vec![f.path]
12160 };
12161 let mut args = vec![f.this];
12162 args.extend(keys);
12163 Ok(Expression::Function(Box::new(Function::new(
12164 "JSON_EXTRACT_PATH".to_string(),
12165 args,
12166 ))))
12167 } else {
12168 // GET_JSON_OBJECT(x, '$.path') for Hive/Spark
12169 Ok(Expression::Function(Box::new(Function::new(
12170 "GET_JSON_OBJECT".to_string(),
12171 vec![f.this, f.path],
12172 ))))
12173 }
12174 } else {
12175 Ok(e)
12176 }
12177 }
12178
12179 Action::JsonExtractScalarToGetJsonObject => {
12180 // JSON_EXTRACT_SCALAR(x, '$.path') -> GET_JSON_OBJECT(x, '$.path') for Hive/Spark
12181 if let Expression::JsonExtractScalar(f) = e {
12182 Ok(Expression::Function(Box::new(Function::new(
12183 "GET_JSON_OBJECT".to_string(),
12184 vec![f.this, f.path],
12185 ))))
12186 } else {
12187 Ok(e)
12188 }
12189 }
12190
12191 Action::JsonQueryValueConvert => {
12192 // JsonQuery/JsonValue -> target-specific
12193 let (f, is_query) = match e {
12194 Expression::JsonQuery(f) => (f, true),
12195 Expression::JsonValue(f) => (f, false),
12196 _ => return Ok(e),
12197 };
12198 match target {
12199 DialectType::TSQL | DialectType::Fabric => {
12200 // ISNULL(JSON_QUERY(...), JSON_VALUE(...))
12201 let json_query = Expression::Function(Box::new(Function::new(
12202 "JSON_QUERY".to_string(),
12203 vec![f.this.clone(), f.path.clone()],
12204 )));
12205 let json_value = Expression::Function(Box::new(Function::new(
12206 "JSON_VALUE".to_string(),
12207 vec![f.this, f.path],
12208 )));
12209 Ok(Expression::Function(Box::new(Function::new(
12210 "ISNULL".to_string(),
12211 vec![json_query, json_value],
12212 ))))
12213 }
12214 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
12215 Ok(Expression::Function(Box::new(Function::new(
12216 "GET_JSON_OBJECT".to_string(),
12217 vec![f.this, f.path],
12218 ))))
12219 }
12220 DialectType::PostgreSQL | DialectType::Redshift => {
12221 Ok(Expression::Function(Box::new(Function::new(
12222 "JSON_EXTRACT_PATH_TEXT".to_string(),
12223 vec![f.this, f.path],
12224 ))))
12225 }
12226 DialectType::DuckDB | DialectType::SQLite => {
12227 // json -> path arrow syntax
12228 Ok(Expression::JsonExtract(Box::new(crate::expressions::JsonExtractFunc {
12229 this: f.this,
12230 path: f.path,
12231 returning: f.returning,
12232 arrow_syntax: true,
12233 hash_arrow_syntax: false,
12234 wrapper_option: f.wrapper_option,
12235 quotes_option: f.quotes_option,
12236 on_scalar_string: f.on_scalar_string,
12237 on_error: f.on_error,
12238 })))
12239 }
12240 DialectType::Snowflake => {
12241 // GET_PATH(PARSE_JSON(json), 'path')
12242 // Strip $. prefix from path
12243 // Only wrap in PARSE_JSON if not already a PARSE_JSON call or ParseJson expression
12244 let json_expr = match &f.this {
12245 Expression::Function(ref inner_f) if inner_f.name.eq_ignore_ascii_case("PARSE_JSON") => {
12246 f.this
12247 }
12248 Expression::ParseJson(_) => {
12249 // Already a ParseJson expression, which generates as PARSE_JSON(...)
12250 f.this
12251 }
12252 _ => {
12253 Expression::Function(Box::new(Function::new(
12254 "PARSE_JSON".to_string(), vec![f.this],
12255 )))
12256 }
12257 };
12258 let path_str = match &f.path {
12259 Expression::Literal(Literal::String(s)) => {
12260 let stripped = s.strip_prefix("$.").unwrap_or(s);
12261 Expression::Literal(Literal::String(stripped.to_string()))
12262 }
12263 other => other.clone(),
12264 };
12265 Ok(Expression::Function(Box::new(Function::new(
12266 "GET_PATH".to_string(), vec![json_expr, path_str],
12267 ))))
12268 }
12269 _ => {
12270 // Default: keep as JSON_QUERY/JSON_VALUE function
12271 let func_name = if is_query { "JSON_QUERY" } else { "JSON_VALUE" };
12272 Ok(Expression::Function(Box::new(Function::new(
12273 func_name.to_string(),
12274 vec![f.this, f.path],
12275 ))))
12276 }
12277 }
12278 }
12279
12280 Action::JsonLiteralToJsonParse => {
12281 // CAST('x' AS JSON) -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake
12282 if let Expression::Cast(c) = e {
12283 let func_name = if matches!(target, DialectType::Snowflake) {
12284 "PARSE_JSON"
12285 } else {
12286 "JSON_PARSE"
12287 };
12288 Ok(Expression::Function(Box::new(Function::new(
12289 func_name.to_string(),
12290 vec![c.this],
12291 ))))
12292 } else {
12293 Ok(e)
12294 }
12295 }
12296
12297 Action::AtTimeZoneConvert => {
12298 // AT TIME ZONE -> target-specific conversion
12299 if let Expression::AtTimeZone(atz) = e {
12300 match target {
12301 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
12302 Ok(Expression::Function(Box::new(Function::new(
12303 "AT_TIMEZONE".to_string(),
12304 vec![atz.this, atz.zone],
12305 ))))
12306 }
12307 DialectType::Spark | DialectType::Databricks => {
12308 Ok(Expression::Function(Box::new(Function::new(
12309 "FROM_UTC_TIMESTAMP".to_string(),
12310 vec![atz.this, atz.zone],
12311 ))))
12312 }
12313 DialectType::Snowflake => {
12314 // CONVERT_TIMEZONE('zone', expr)
12315 Ok(Expression::Function(Box::new(Function::new(
12316 "CONVERT_TIMEZONE".to_string(),
12317 vec![atz.zone, atz.this],
12318 ))))
12319 }
12320 DialectType::BigQuery => {
12321 // TIMESTAMP(DATETIME(expr, 'zone'))
12322 let datetime_call = Expression::Function(Box::new(Function::new(
12323 "DATETIME".to_string(),
12324 vec![atz.this, atz.zone],
12325 )));
12326 Ok(Expression::Function(Box::new(Function::new(
12327 "TIMESTAMP".to_string(),
12328 vec![datetime_call],
12329 ))))
12330 }
12331 _ => {
12332 Ok(Expression::Function(Box::new(Function::new(
12333 "AT_TIMEZONE".to_string(),
12334 vec![atz.this, atz.zone],
12335 ))))
12336 }
12337 }
12338 } else {
12339 Ok(e)
12340 }
12341 }
12342
12343 Action::DayOfWeekConvert => {
12344 // DAY_OF_WEEK -> ISODOW for DuckDB, ((DAYOFWEEK(x) % 7) + 1) for Spark
12345 if let Expression::DayOfWeek(f) = e {
12346 match target {
12347 DialectType::DuckDB => {
12348 Ok(Expression::Function(Box::new(Function::new(
12349 "ISODOW".to_string(),
12350 vec![f.this],
12351 ))))
12352 }
12353 DialectType::Spark | DialectType::Databricks => {
12354 // ((DAYOFWEEK(x) % 7) + 1)
12355 let dayofweek = Expression::Function(Box::new(Function::new(
12356 "DAYOFWEEK".to_string(),
12357 vec![f.this],
12358 )));
12359 let modulo = Expression::Mod(Box::new(BinaryOp {
12360 left: dayofweek,
12361 right: Expression::number(7),
12362 left_comments: Vec::new(),
12363 operator_comments: Vec::new(),
12364 trailing_comments: Vec::new(),
12365 }));
12366 let paren_mod = Expression::Paren(Box::new(Paren {
12367 this: modulo,
12368 trailing_comments: Vec::new(),
12369 }));
12370 let add_one = Expression::Add(Box::new(BinaryOp {
12371 left: paren_mod,
12372 right: Expression::number(1),
12373 left_comments: Vec::new(),
12374 operator_comments: Vec::new(),
12375 trailing_comments: Vec::new(),
12376 }));
12377 Ok(Expression::Paren(Box::new(Paren {
12378 this: add_one,
12379 trailing_comments: Vec::new(),
12380 })))
12381 }
12382 _ => Ok(Expression::DayOfWeek(f)),
12383 }
12384 } else {
12385 Ok(e)
12386 }
12387 }
12388
12389 Action::MaxByMinByConvert => {
12390 // MAX_BY -> argMax for ClickHouse, drop 3rd arg for Spark
12391 // MIN_BY -> argMin for ClickHouse, ARG_MIN for DuckDB, drop 3rd arg for Spark/ClickHouse
12392 // Handle both Expression::Function and Expression::AggregateFunction
12393 let (is_max, args) = match &e {
12394 Expression::Function(f) => (f.name.eq_ignore_ascii_case("MAX_BY"), f.args.clone()),
12395 Expression::AggregateFunction(af) => (af.name.eq_ignore_ascii_case("MAX_BY"), af.args.clone()),
12396 _ => return Ok(e),
12397 };
12398 match target {
12399 DialectType::ClickHouse => {
12400 let name = if is_max { "argMax" } else { "argMin" };
12401 let mut args = args;
12402 args.truncate(2);
12403 Ok(Expression::Function(Box::new(Function::new(name.to_string(), args))))
12404 }
12405 DialectType::DuckDB => {
12406 let name = if is_max { "ARG_MAX" } else { "ARG_MIN" };
12407 Ok(Expression::Function(Box::new(Function::new(name.to_string(), args))))
12408 }
12409 DialectType::Spark | DialectType::Databricks => {
12410 let mut args = args;
12411 args.truncate(2);
12412 let name = if is_max { "MAX_BY" } else { "MIN_BY" };
12413 Ok(Expression::Function(Box::new(Function::new(name.to_string(), args))))
12414 }
12415 _ => Ok(e),
12416 }
12417 }
12418
12419 Action::ElementAtConvert => {
12420 // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
12421 let (arr, idx) = if let Expression::ElementAt(bf) = e {
12422 (bf.this, bf.expression)
12423 } else if let Expression::Function(ref f) = e {
12424 if f.args.len() >= 2 {
12425 if let Expression::Function(f) = e {
12426 let mut args = f.args;
12427 let arr = args.remove(0);
12428 let idx = args.remove(0);
12429 (arr, idx)
12430 } else {
12431 unreachable!("outer condition already matched Expression::Function")
12432 }
12433 } else {
12434 return Ok(e);
12435 }
12436 } else {
12437 return Ok(e);
12438 };
12439 match target {
12440 DialectType::PostgreSQL => {
12441 // Wrap array in parens for PostgreSQL: (ARRAY[1,2,3])[4]
12442 let arr_expr = Expression::Paren(Box::new(Paren {
12443 this: arr,
12444 trailing_comments: vec![],
12445 }));
12446 Ok(Expression::Subscript(Box::new(crate::expressions::Subscript {
12447 this: arr_expr,
12448 index: idx,
12449 })))
12450 }
12451 DialectType::BigQuery => {
12452 // BigQuery: convert ARRAY[...] to bare [...] for subscript
12453 let arr_expr = match arr {
12454 Expression::ArrayFunc(af) => {
12455 Expression::ArrayFunc(Box::new(crate::expressions::ArrayConstructor {
12456 expressions: af.expressions,
12457 bracket_notation: true,
12458 use_list_keyword: false,
12459 }))
12460 }
12461 other => other,
12462 };
12463 let safe_ordinal = Expression::Function(Box::new(Function::new(
12464 "SAFE_ORDINAL".to_string(),
12465 vec![idx],
12466 )));
12467 Ok(Expression::Subscript(Box::new(crate::expressions::Subscript {
12468 this: arr_expr,
12469 index: safe_ordinal,
12470 })))
12471 }
12472 _ => Ok(Expression::Function(Box::new(Function::new(
12473 "ELEMENT_AT".to_string(),
12474 vec![arr, idx],
12475 ))))
12476 }
12477 }
12478
12479 Action::CurrentUserParens => {
12480 // CURRENT_USER -> CURRENT_USER() for Snowflake
12481 Ok(Expression::Function(Box::new(Function::new(
12482 "CURRENT_USER".to_string(),
12483 vec![],
12484 ))))
12485 }
12486
12487 Action::ArrayAggToCollectList => {
12488 // ARRAY_AGG(x ORDER BY ...) -> COLLECT_LIST(x) for Hive/Spark
12489 // Python sqlglot Hive.arrayagg_sql strips ORDER BY for simple cases
12490 // but preserves it when DISTINCT/IGNORE NULLS/LIMIT are present
12491 match e {
12492 Expression::AggregateFunction(mut af) => {
12493 let is_simple = !af.distinct && af.ignore_nulls.is_none() && af.limit.is_none();
12494 let args = if af.args.is_empty() { vec![] } else { vec![af.args[0].clone()] };
12495 af.name = "COLLECT_LIST".to_string();
12496 af.args = args;
12497 if is_simple {
12498 af.order_by = Vec::new();
12499 }
12500 Ok(Expression::AggregateFunction(af))
12501 }
12502 Expression::ArrayAgg(agg) => {
12503 let is_simple = !agg.distinct && agg.ignore_nulls.is_none() && agg.limit.is_none();
12504 Ok(Expression::AggregateFunction(Box::new(crate::expressions::AggregateFunction {
12505 name: "COLLECT_LIST".to_string(),
12506 args: vec![agg.this.clone()],
12507 distinct: agg.distinct,
12508 filter: agg.filter.clone(),
12509 order_by: if is_simple { Vec::new() } else { agg.order_by.clone() },
12510 limit: agg.limit.clone(),
12511 ignore_nulls: agg.ignore_nulls,
12512 })))
12513 }
12514 _ => Ok(e),
12515 }
12516 }
12517
12518 Action::ArraySyntaxConvert => {
12519 match e {
12520 // ARRAY[1, 2] (ArrayFunc bracket_notation=false) -> set bracket_notation=true
12521 // so the generator uses dialect-specific output (ARRAY() for Spark, [] for BigQuery)
12522 Expression::ArrayFunc(arr) if !arr.bracket_notation => {
12523 Ok(Expression::ArrayFunc(Box::new(crate::expressions::ArrayConstructor {
12524 expressions: arr.expressions,
12525 bracket_notation: true,
12526 use_list_keyword: false,
12527 })))
12528 }
12529 // ARRAY(y) function style -> ArrayFunc for target dialect
12530 // bracket_notation=true for BigQuery/DuckDB/ClickHouse/StarRocks (output []), false for Presto (output ARRAY[])
12531 Expression::Function(f) if f.name.eq_ignore_ascii_case("ARRAY") => {
12532 let bracket = matches!(target, DialectType::BigQuery | DialectType::DuckDB | DialectType::ClickHouse | DialectType::StarRocks);
12533 Ok(Expression::ArrayFunc(Box::new(crate::expressions::ArrayConstructor {
12534 expressions: f.args,
12535 bracket_notation: bracket,
12536 use_list_keyword: false,
12537 })))
12538 }
12539 _ => Ok(e),
12540 }
12541 }
12542
12543 Action::CastToJsonForSpark => {
12544 // CAST(x AS JSON) -> TO_JSON(x) for Spark
12545 if let Expression::Cast(c) = e {
12546 Ok(Expression::Function(Box::new(Function::new("TO_JSON".to_string(), vec![c.this]))))
12547 } else {
12548 Ok(e)
12549 }
12550 }
12551
12552 Action::CastJsonToFromJson => {
12553 // CAST(ParseJson(literal) AS ARRAY/MAP/STRUCT) -> FROM_JSON(literal, type_string) for Spark
12554 if let Expression::Cast(c) = e {
12555 // Extract the string literal from ParseJson
12556 let literal_expr = if let Expression::ParseJson(pj) = c.this {
12557 pj.this
12558 } else {
12559 c.this
12560 };
12561 // Convert the target DataType to Spark's type string format
12562 let type_str = Self::data_type_to_spark_string(&c.to);
12563 Ok(Expression::Function(Box::new(Function::new(
12564 "FROM_JSON".to_string(),
12565 vec![literal_expr, Expression::Literal(Literal::String(type_str))],
12566 ))))
12567 } else {
12568 Ok(e)
12569 }
12570 }
12571
12572 Action::ToJsonConvert => {
12573 // TO_JSON(x) -> target-specific conversion
12574 if let Expression::ToJson(f) = e {
12575 let arg = f.this;
12576 match target {
12577 DialectType::Presto | DialectType::Trino => {
12578 // JSON_FORMAT(CAST(x AS JSON))
12579 let cast_json = Expression::Cast(Box::new(Cast {
12580 this: arg,
12581 to: DataType::Custom { name: "JSON".to_string() },
12582 trailing_comments: vec![],
12583 double_colon_syntax: false,
12584 format: None,
12585 default: None,
12586 }));
12587 Ok(Expression::Function(Box::new(Function::new("JSON_FORMAT".to_string(), vec![cast_json]))))
12588 }
12589 DialectType::BigQuery => {
12590 Ok(Expression::Function(Box::new(Function::new("TO_JSON_STRING".to_string(), vec![arg]))))
12591 }
12592 DialectType::DuckDB => {
12593 // CAST(TO_JSON(x) AS TEXT)
12594 let to_json = Expression::ToJson(Box::new(crate::expressions::UnaryFunc { this: arg, original_name: None }));
12595 Ok(Expression::Cast(Box::new(Cast {
12596 this: to_json,
12597 to: DataType::Text,
12598 trailing_comments: vec![],
12599 double_colon_syntax: false,
12600 format: None,
12601 default: None,
12602 })))
12603 }
12604 _ => Ok(Expression::ToJson(Box::new(crate::expressions::UnaryFunc { this: arg, original_name: None })))
12605 }
12606 } else {
12607 Ok(e)
12608 }
12609 }
12610
12611 Action::VarianceToClickHouse => {
12612 if let Expression::Variance(f) = e {
12613 Ok(Expression::Function(Box::new(Function::new("varSamp".to_string(), vec![f.this]))))
12614 } else { Ok(e) }
12615 }
12616
12617 Action::StddevToClickHouse => {
12618 if let Expression::Stddev(f) = e {
12619 Ok(Expression::Function(Box::new(Function::new("stddevSamp".to_string(), vec![f.this]))))
12620 } else { Ok(e) }
12621 }
12622
12623 Action::ApproxQuantileConvert => {
12624 if let Expression::ApproxQuantile(aq) = e {
12625 let mut args = vec![*aq.this];
12626 if let Some(q) = aq.quantile { args.push(*q); }
12627 Ok(Expression::Function(Box::new(Function::new("APPROX_PERCENTILE".to_string(), args))))
12628 } else { Ok(e) }
12629 }
12630
12631 Action::DollarParamConvert => {
12632 if let Expression::Parameter(p) = e {
12633 Ok(Expression::Parameter(Box::new(crate::expressions::Parameter {
12634 name: p.name,
12635 index: p.index,
12636 style: crate::expressions::ParameterStyle::At,
12637 quoted: p.quoted,
12638 expression: p.expression,
12639 })))
12640 } else { Ok(e) }
12641 }
12642
12643 Action::EscapeStringNormalize => {
12644 if let Expression::Literal(Literal::EscapeString(s)) = e {
12645 // Strip prefix (e.g., "e:" or "E:") if present from tokenizer
12646 let stripped = if s.starts_with("e:") || s.starts_with("E:") {
12647 s[2..].to_string()
12648 } else {
12649 s
12650 };
12651 let normalized = stripped.replace('\n', "\\n").replace('\r', "\\r").replace('\t', "\\t");
12652 match target {
12653 DialectType::BigQuery => {
12654 // BigQuery: e'...' -> CAST(b'...' AS STRING)
12655 // Use Raw for the b'...' part to avoid double-escaping
12656 let raw_sql = format!("CAST(b'{}' AS STRING)", normalized);
12657 Ok(Expression::Raw(crate::expressions::Raw { sql: raw_sql }))
12658 }
12659 _ => {
12660 Ok(Expression::Literal(Literal::EscapeString(normalized)))
12661 }
12662 }
12663 } else { Ok(e) }
12664 }
12665
12666 Action::StraightJoinCase => {
12667 // straight_join: keep lowercase for DuckDB, quote for MySQL
12668 if let Expression::Column(col) = e {
12669 if col.name.name == "STRAIGHT_JOIN" {
12670 let mut new_col = col;
12671 new_col.name.name = "straight_join".to_string();
12672 if matches!(target, DialectType::MySQL) {
12673 // MySQL: needs quoting since it's a reserved keyword
12674 new_col.name.quoted = true;
12675 }
12676 Ok(Expression::Column(new_col))
12677 } else {
12678 Ok(Expression::Column(col))
12679 }
12680 } else { Ok(e) }
12681 }
12682
12683 Action::TablesampleReservoir => {
12684 // TABLESAMPLE -> TABLESAMPLE RESERVOIR for DuckDB
12685 if let Expression::TableSample(mut ts) = e {
12686 if let Some(ref mut sample) = ts.sample {
12687 sample.method = crate::expressions::SampleMethod::Reservoir;
12688 sample.explicit_method = true;
12689 }
12690 Ok(Expression::TableSample(ts))
12691 } else { Ok(e) }
12692 }
12693
12694 Action::TablesampleSnowflakeStrip => {
12695 // Strip method and PERCENT for Snowflake target from non-Snowflake source
12696 match e {
12697 Expression::TableSample(mut ts) => {
12698 if let Some(ref mut sample) = ts.sample {
12699 sample.suppress_method_output = true;
12700 sample.unit_after_size = false;
12701 sample.is_percent = false;
12702 }
12703 Ok(Expression::TableSample(ts))
12704 }
12705 Expression::Table(mut t) => {
12706 if let Some(ref mut sample) = t.table_sample {
12707 sample.suppress_method_output = true;
12708 sample.unit_after_size = false;
12709 sample.is_percent = false;
12710 }
12711 Ok(Expression::Table(t))
12712 }
12713 _ => Ok(e),
12714 }
12715 }
12716
12717 Action::FirstToAnyValue => {
12718 // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
12719 if let Expression::First(mut agg) = e {
12720 agg.ignore_nulls = None;
12721 agg.name = Some("ANY_VALUE".to_string());
12722 Ok(Expression::AnyValue(agg))
12723 } else { Ok(e) }
12724 }
12725
12726 Action::ArrayIndexConvert => {
12727 // Subscript index: 1-based to 0-based for BigQuery
12728 if let Expression::Subscript(mut sub) = e {
12729 if let Expression::Literal(Literal::Number(ref n)) = sub.index {
12730 if let Ok(val) = n.parse::<i64>() {
12731 sub.index = Expression::Literal(Literal::Number((val - 1).to_string()));
12732 }
12733 }
12734 Ok(Expression::Subscript(sub))
12735 } else { Ok(e) }
12736 }
12737
12738 Action::AnyValueIgnoreNulls => {
12739 // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
12740 if let Expression::AnyValue(mut av) = e {
12741 if av.ignore_nulls.is_none() {
12742 av.ignore_nulls = Some(true);
12743 }
12744 Ok(Expression::AnyValue(av))
12745 } else { Ok(e) }
12746 }
12747
12748 Action::BigQueryNullsOrdering => {
12749 // BigQuery doesn't support NULLS FIRST/LAST in window function ORDER BY
12750 if let Expression::WindowFunction(mut wf) = e {
12751 for o in &mut wf.over.order_by {
12752 o.nulls_first = None;
12753 }
12754 Ok(Expression::WindowFunction(wf))
12755 } else if let Expression::Ordered(mut o) = e {
12756 o.nulls_first = None;
12757 Ok(Expression::Ordered(o))
12758 } else { Ok(e) }
12759 }
12760
12761 Action::SnowflakeFloatProtect => {
12762 // Convert DataType::Float to DataType::Custom("FLOAT") to prevent
12763 // Snowflake's target transform from converting it to DOUBLE.
12764 // Non-Snowflake sources should keep their FLOAT spelling.
12765 if let Expression::DataType(DataType::Float { .. }) = e {
12766 Ok(Expression::DataType(DataType::Custom { name: "FLOAT".to_string() }))
12767 } else { Ok(e) }
12768 }
12769
12770 Action::MysqlNullsOrdering => {
12771 // MySQL doesn't support NULLS FIRST/LAST - strip or rewrite
12772 if let Expression::Ordered(mut o) = e {
12773 let nulls_last = o.nulls_first == Some(false);
12774 let desc = o.desc;
12775 // MySQL default: ASC -> NULLS LAST, DESC -> NULLS FIRST
12776 // If requested ordering matches default, just strip NULLS clause
12777 let matches_default = if desc {
12778 // DESC default is NULLS FIRST, so nulls_first=true matches
12779 o.nulls_first == Some(true)
12780 } else {
12781 // ASC default is NULLS LAST, so nulls_first=false matches
12782 nulls_last
12783 };
12784 if matches_default {
12785 o.nulls_first = None;
12786 Ok(Expression::Ordered(o))
12787 } else {
12788 // Need CASE WHEN x IS NULL THEN 0/1 ELSE 0/1 END, x
12789 // For ASC NULLS FIRST: ORDER BY CASE WHEN x IS NULL THEN 0 ELSE 1 END, x ASC
12790 // For DESC NULLS LAST: ORDER BY CASE WHEN x IS NULL THEN 1 ELSE 0 END, x DESC
12791 let null_val = if desc { 1 } else { 0 };
12792 let non_null_val = if desc { 0 } else { 1 };
12793 let _case_expr = Expression::Case(Box::new(Case {
12794 operand: None,
12795 whens: vec![(
12796 Expression::IsNull(Box::new(crate::expressions::IsNull {
12797 this: o.this.clone(),
12798 not: false,
12799 postfix_form: false,
12800 })),
12801 Expression::number(null_val),
12802 )],
12803 else_: Some(Expression::number(non_null_val)),
12804 }));
12805 o.nulls_first = None;
12806 // Return a tuple of [case_expr, ordered_expr]
12807 // We need to return both as part of the ORDER BY
12808 // But since transform_recursive processes individual expressions,
12809 // we can't easily add extra ORDER BY items here.
12810 // Instead, strip the nulls_first
12811 o.nulls_first = None;
12812 Ok(Expression::Ordered(o))
12813 }
12814 } else { Ok(e) }
12815 }
12816
12817 Action::MysqlNullsLastRewrite => {
12818 // DuckDB -> MySQL: Add CASE WHEN IS NULL THEN 1 ELSE 0 END to ORDER BY
12819 // to simulate NULLS LAST for ASC ordering
12820 if let Expression::WindowFunction(mut wf) = e {
12821 let mut new_order_by = Vec::new();
12822 for o in wf.over.order_by {
12823 if !o.desc {
12824 // ASC: DuckDB has NULLS LAST, MySQL has NULLS FIRST
12825 // Add CASE WHEN expr IS NULL THEN 1 ELSE 0 END before expr
12826 let case_expr = Expression::Case(Box::new(Case {
12827 operand: None,
12828 whens: vec![(
12829 Expression::IsNull(Box::new(crate::expressions::IsNull {
12830 this: o.this.clone(),
12831 not: false,
12832 postfix_form: false,
12833 })),
12834 Expression::Literal(Literal::Number("1".to_string())),
12835 )],
12836 else_: Some(Expression::Literal(Literal::Number("0".to_string()))),
12837 }));
12838 new_order_by.push(crate::expressions::Ordered {
12839 this: case_expr,
12840 desc: false,
12841 nulls_first: None,
12842 explicit_asc: false,
12843 with_fill: None,
12844 });
12845 let mut ordered = o;
12846 ordered.nulls_first = None;
12847 new_order_by.push(ordered);
12848 } else {
12849 // DESC: DuckDB has NULLS LAST, MySQL also has NULLS LAST (NULLs smallest in DESC)
12850 // No change needed
12851 let mut ordered = o;
12852 ordered.nulls_first = None;
12853 new_order_by.push(ordered);
12854 }
12855 }
12856 wf.over.order_by = new_order_by;
12857 Ok(Expression::WindowFunction(wf))
12858 } else { Ok(e) }
12859 }
12860
12861 Action::RespectNullsConvert => {
12862 // RESPECT NULLS -> strip for SQLite (FIRST_VALUE(c) OVER (...))
12863 if let Expression::WindowFunction(mut wf) = e {
12864 match &mut wf.this {
12865 Expression::FirstValue(ref mut vf) => {
12866 if vf.ignore_nulls == Some(false) {
12867 vf.ignore_nulls = None;
12868 // For SQLite, we'd need to add NULLS LAST to ORDER BY in the OVER clause
12869 // but that's handled by the generator's NULLS ordering
12870 }
12871 }
12872 Expression::LastValue(ref mut vf) => {
12873 if vf.ignore_nulls == Some(false) {
12874 vf.ignore_nulls = None;
12875 }
12876 }
12877 _ => {}
12878 }
12879 Ok(Expression::WindowFunction(wf))
12880 } else { Ok(e) }
12881 }
12882
12883 Action::CreateTableStripComment => {
12884 // Strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
12885 if let Expression::CreateTable(mut ct) = e {
12886 for col in &mut ct.columns {
12887 col.comment = None;
12888 col.constraints.retain(|c| !matches!(c, crate::expressions::ColumnConstraint::Comment(_)));
12889 // Also remove Comment from constraint_order
12890 col.constraint_order.retain(|c| !matches!(c, crate::expressions::ConstraintType::Comment));
12891 }
12892 // Strip properties (USING, PARTITIONED BY, etc.)
12893 ct.properties.clear();
12894 Ok(Expression::CreateTable(ct))
12895 } else { Ok(e) }
12896 }
12897
12898 Action::AlterTableToSpRename => {
12899 // ALTER TABLE db.t1 RENAME TO db.t2 -> EXEC sp_rename 'db.t1', 't2'
12900 if let Expression::AlterTable(ref at) = e {
12901 if let Some(crate::expressions::AlterTableAction::RenameTable(ref new_tbl)) = at.actions.first() {
12902 // Build the old table name using TSQL bracket quoting
12903 let old_name = if let Some(ref schema) = at.name.schema {
12904 if at.name.name.quoted || schema.quoted {
12905 format!("[{}].[{}]", schema.name, at.name.name.name)
12906 } else {
12907 format!("{}.{}", schema.name, at.name.name.name)
12908 }
12909 } else {
12910 if at.name.name.quoted {
12911 format!("[{}]", at.name.name.name)
12912 } else {
12913 at.name.name.name.clone()
12914 }
12915 };
12916 let new_name = new_tbl.name.name.clone();
12917 // EXEC sp_rename 'old_name', 'new_name'
12918 let sql = format!("EXEC sp_rename '{}', '{}'", old_name, new_name);
12919 Ok(Expression::Raw(crate::expressions::Raw { sql }))
12920 } else { Ok(e) }
12921 } else { Ok(e) }
12922 }
12923
12924 Action::SnowflakeIntervalFormat => {
12925 // INTERVAL '2' HOUR -> INTERVAL '2 HOUR' for Snowflake
12926 if let Expression::Interval(mut iv) = e {
12927 if let (Some(Expression::Literal(Literal::String(ref val))), Some(ref unit_spec)) = (&iv.this, &iv.unit) {
12928 let unit_str = match unit_spec {
12929 crate::expressions::IntervalUnitSpec::Simple { unit, .. } => {
12930 match unit {
12931 crate::expressions::IntervalUnit::Year => "YEAR",
12932 crate::expressions::IntervalUnit::Quarter => "QUARTER",
12933 crate::expressions::IntervalUnit::Month => "MONTH",
12934 crate::expressions::IntervalUnit::Week => "WEEK",
12935 crate::expressions::IntervalUnit::Day => "DAY",
12936 crate::expressions::IntervalUnit::Hour => "HOUR",
12937 crate::expressions::IntervalUnit::Minute => "MINUTE",
12938 crate::expressions::IntervalUnit::Second => "SECOND",
12939 crate::expressions::IntervalUnit::Millisecond => "MILLISECOND",
12940 crate::expressions::IntervalUnit::Microsecond => "MICROSECOND",
12941 }
12942 }
12943 _ => "",
12944 };
12945 if !unit_str.is_empty() {
12946 let combined = format!("{} {}", val, unit_str);
12947 iv.this = Some(Expression::Literal(Literal::String(combined)));
12948 iv.unit = None;
12949 }
12950 }
12951 Ok(Expression::Interval(iv))
12952 } else { Ok(e) }
12953 }
12954
12955 Action::ArrayConcatBracketConvert => {
12956 // Expression::Array/ArrayFunc -> target-specific
12957 // For PostgreSQL: Array -> ArrayFunc (bracket_notation: false)
12958 // For Redshift: Array/ArrayFunc -> Function("ARRAY", args) to produce ARRAY(1, 2) with parens
12959 match e {
12960 Expression::Array(arr) => {
12961 if matches!(target, DialectType::Redshift) {
12962 Ok(Expression::Function(Box::new(Function::new(
12963 "ARRAY".to_string(), arr.expressions,
12964 ))))
12965 } else {
12966 Ok(Expression::ArrayFunc(Box::new(crate::expressions::ArrayConstructor {
12967 expressions: arr.expressions,
12968 bracket_notation: false,
12969 use_list_keyword: false,
12970 })))
12971 }
12972 }
12973 Expression::ArrayFunc(arr) => {
12974 // Only for Redshift: convert bracket-notation ArrayFunc to Function("ARRAY")
12975 if matches!(target, DialectType::Redshift) {
12976 Ok(Expression::Function(Box::new(Function::new(
12977 "ARRAY".to_string(), arr.expressions,
12978 ))))
12979 } else {
12980 Ok(Expression::ArrayFunc(arr))
12981 }
12982 }
12983 _ => Ok(e),
12984 }
12985 }
12986
12987 Action::BitAggFloatCast => {
12988 // BIT_OR/BIT_AND/BIT_XOR with float/decimal cast arg -> wrap with ROUND+INT cast for DuckDB
12989 // For FLOAT/DOUBLE/REAL: CAST(ROUND(CAST(val AS type)) AS INT)
12990 // For DECIMAL: CAST(CAST(val AS DECIMAL(p,s)) AS INT)
12991 let int_type = DataType::Int { length: None, integer_spelling: false };
12992 let wrap_agg = |agg_this: Expression, int_dt: DataType| -> Expression {
12993 if let Expression::Cast(c) = agg_this {
12994 match &c.to {
12995 DataType::Float { .. } | DataType::Double { .. }
12996 | DataType::Custom { .. } => {
12997 // FLOAT/DOUBLE/REAL: CAST(ROUND(CAST(val AS type)) AS INT)
12998 // Change FLOAT to REAL (Float with real_spelling=true) for DuckDB generator
12999 let inner_type = match &c.to {
13000 DataType::Float { precision, scale, .. } => DataType::Float { precision: *precision, scale: *scale, real_spelling: true },
13001 other => other.clone(),
13002 };
13003 let inner_cast = Expression::Cast(Box::new(crate::expressions::Cast {
13004 this: c.this.clone(),
13005 to: inner_type,
13006 trailing_comments: Vec::new(),
13007 double_colon_syntax: false,
13008 format: None,
13009 default: None,
13010 }));
13011 let rounded = Expression::Function(Box::new(Function::new("ROUND".to_string(), vec![inner_cast])));
13012 Expression::Cast(Box::new(crate::expressions::Cast {
13013 this: rounded,
13014 to: int_dt,
13015 trailing_comments: Vec::new(),
13016 double_colon_syntax: false,
13017 format: None,
13018 default: None,
13019 }))
13020 }
13021 DataType::Decimal { .. } => {
13022 // DECIMAL: CAST(CAST(val AS DECIMAL(p,s)) AS INT)
13023 Expression::Cast(Box::new(crate::expressions::Cast {
13024 this: Expression::Cast(c),
13025 to: int_dt,
13026 trailing_comments: Vec::new(),
13027 double_colon_syntax: false,
13028 format: None,
13029 default: None,
13030 }))
13031 }
13032 _ => Expression::Cast(c),
13033 }
13034 } else {
13035 agg_this
13036 }
13037 };
13038 match e {
13039 Expression::BitwiseOrAgg(mut f) => {
13040 f.this = wrap_agg(f.this, int_type);
13041 Ok(Expression::BitwiseOrAgg(f))
13042 }
13043 Expression::BitwiseAndAgg(mut f) => {
13044 let int_type = DataType::Int { length: None, integer_spelling: false };
13045 f.this = wrap_agg(f.this, int_type);
13046 Ok(Expression::BitwiseAndAgg(f))
13047 }
13048 Expression::BitwiseXorAgg(mut f) => {
13049 let int_type = DataType::Int { length: None, integer_spelling: false };
13050 f.this = wrap_agg(f.this, int_type);
13051 Ok(Expression::BitwiseXorAgg(f))
13052 }
13053 _ => Ok(e),
13054 }
13055 }
13056
13057 Action::BitAggSnowflakeRename => {
13058 // BIT_OR -> BITORAGG, BIT_AND -> BITANDAGG, BIT_XOR -> BITXORAGG for Snowflake
13059 match e {
13060 Expression::BitwiseOrAgg(f) => {
13061 Ok(Expression::Function(Box::new(Function::new("BITORAGG".to_string(), vec![f.this]))))
13062 }
13063 Expression::BitwiseAndAgg(f) => {
13064 Ok(Expression::Function(Box::new(Function::new("BITANDAGG".to_string(), vec![f.this]))))
13065 }
13066 Expression::BitwiseXorAgg(f) => {
13067 Ok(Expression::Function(Box::new(Function::new("BITXORAGG".to_string(), vec![f.this]))))
13068 }
13069 _ => Ok(e),
13070 }
13071 }
13072
13073 Action::StrftimeCastTimestamp => {
13074 // CAST(x AS TIMESTAMP) -> CAST(x AS TIMESTAMP_NTZ) for Spark
13075 if let Expression::Cast(mut c) = e {
13076 if matches!(c.to, DataType::Timestamp { timezone: false, .. }) {
13077 c.to = DataType::Custom { name: "TIMESTAMP_NTZ".to_string() };
13078 }
13079 Ok(Expression::Cast(c))
13080 } else { Ok(e) }
13081 }
13082
13083 Action::DecimalDefaultPrecision => {
13084 // DECIMAL without precision -> DECIMAL(18, 3) for Snowflake
13085 if let Expression::Cast(mut c) = e {
13086 if matches!(c.to, DataType::Decimal { precision: None, .. }) {
13087 c.to = DataType::Decimal {
13088 precision: Some(18),
13089 scale: Some(3),
13090 };
13091 }
13092 Ok(Expression::Cast(c))
13093 } else { Ok(e) }
13094 }
13095
13096 Action::FilterToIff => {
13097 // FILTER(WHERE cond) -> rewrite aggregate: AGG(IFF(cond, val, NULL))
13098 if let Expression::Filter(f) = e {
13099 let condition = *f.expression;
13100 let agg = *f.this;
13101 // Strip WHERE from condition
13102 let cond = match condition {
13103 Expression::Where(w) => w.this,
13104 other => other,
13105 };
13106 // Extract the aggregate function and its argument
13107 // We want AVG(IFF(condition, x, NULL))
13108 match agg {
13109 Expression::Function(mut func) => {
13110 if !func.args.is_empty() {
13111 let orig_arg = func.args[0].clone();
13112 let iff_call = Expression::Function(Box::new(Function::new(
13113 "IFF".to_string(),
13114 vec![cond, orig_arg, Expression::Null(Null)],
13115 )));
13116 func.args[0] = iff_call;
13117 Ok(Expression::Function(func))
13118 } else {
13119 Ok(Expression::Filter(Box::new(crate::expressions::Filter {
13120 this: Box::new(Expression::Function(func)),
13121 expression: Box::new(cond),
13122 })))
13123 }
13124 }
13125 Expression::Avg(mut avg) => {
13126 let iff_call = Expression::Function(Box::new(Function::new(
13127 "IFF".to_string(),
13128 vec![cond, avg.this.clone(), Expression::Null(Null)],
13129 )));
13130 avg.this = iff_call;
13131 Ok(Expression::Avg(avg))
13132 }
13133 Expression::Sum(mut s) => {
13134 let iff_call = Expression::Function(Box::new(Function::new(
13135 "IFF".to_string(),
13136 vec![cond, s.this.clone(), Expression::Null(Null)],
13137 )));
13138 s.this = iff_call;
13139 Ok(Expression::Sum(s))
13140 }
13141 Expression::Count(mut c) => {
13142 if let Some(ref this_expr) = c.this {
13143 let iff_call = Expression::Function(Box::new(Function::new(
13144 "IFF".to_string(),
13145 vec![cond, this_expr.clone(), Expression::Null(Null)],
13146 )));
13147 c.this = Some(iff_call);
13148 }
13149 Ok(Expression::Count(c))
13150 }
13151 other => {
13152 // Fallback: keep as Filter
13153 Ok(Expression::Filter(Box::new(crate::expressions::Filter {
13154 this: Box::new(other),
13155 expression: Box::new(cond),
13156 })))
13157 }
13158 }
13159 } else { Ok(e) }
13160 }
13161
13162 Action::AggFilterToIff => {
13163 // AggFunc.filter -> IFF wrapping: AVG(x) FILTER(WHERE cond) -> AVG(IFF(cond, x, NULL))
13164 // Helper macro to handle the common AggFunc case
13165 macro_rules! handle_agg_filter_to_iff {
13166 ($variant:ident, $agg:expr) => {{
13167 let mut agg = $agg;
13168 if let Some(filter_cond) = agg.filter.take() {
13169 let iff_call = Expression::Function(Box::new(Function::new(
13170 "IFF".to_string(),
13171 vec![filter_cond, agg.this.clone(), Expression::Null(Null)],
13172 )));
13173 agg.this = iff_call;
13174 }
13175 Ok(Expression::$variant(agg))
13176 }};
13177 }
13178
13179 match e {
13180 Expression::Avg(agg) => handle_agg_filter_to_iff!(Avg, agg),
13181 Expression::Sum(agg) => handle_agg_filter_to_iff!(Sum, agg),
13182 Expression::Min(agg) => handle_agg_filter_to_iff!(Min, agg),
13183 Expression::Max(agg) => handle_agg_filter_to_iff!(Max, agg),
13184 Expression::ArrayAgg(agg) => handle_agg_filter_to_iff!(ArrayAgg, agg),
13185 Expression::CountIf(agg) => handle_agg_filter_to_iff!(CountIf, agg),
13186 Expression::Stddev(agg) => handle_agg_filter_to_iff!(Stddev, agg),
13187 Expression::StddevPop(agg) => handle_agg_filter_to_iff!(StddevPop, agg),
13188 Expression::StddevSamp(agg) => handle_agg_filter_to_iff!(StddevSamp, agg),
13189 Expression::Variance(agg) => handle_agg_filter_to_iff!(Variance, agg),
13190 Expression::VarPop(agg) => handle_agg_filter_to_iff!(VarPop, agg),
13191 Expression::VarSamp(agg) => handle_agg_filter_to_iff!(VarSamp, agg),
13192 Expression::Median(agg) => handle_agg_filter_to_iff!(Median, agg),
13193 Expression::Mode(agg) => handle_agg_filter_to_iff!(Mode, agg),
13194 Expression::First(agg) => handle_agg_filter_to_iff!(First, agg),
13195 Expression::Last(agg) => handle_agg_filter_to_iff!(Last, agg),
13196 Expression::AnyValue(agg) => handle_agg_filter_to_iff!(AnyValue, agg),
13197 Expression::ApproxDistinct(agg) => handle_agg_filter_to_iff!(ApproxDistinct, agg),
13198 Expression::Count(mut c) => {
13199 if let Some(filter_cond) = c.filter.take() {
13200 if let Some(ref this_expr) = c.this {
13201 let iff_call = Expression::Function(Box::new(Function::new(
13202 "IFF".to_string(),
13203 vec![filter_cond, this_expr.clone(), Expression::Null(Null)],
13204 )));
13205 c.this = Some(iff_call);
13206 }
13207 }
13208 Ok(Expression::Count(c))
13209 }
13210 other => Ok(other),
13211 }
13212 }
13213
13214 Action::JsonToGetPath => {
13215 // JSON_EXTRACT(JSON('x'), '$.key') -> GET_PATH(PARSE_JSON('x'), 'key')
13216 if let Expression::JsonExtract(je) = e {
13217 // Convert JSON() to PARSE_JSON()
13218 let this = match &je.this {
13219 Expression::Function(f) if f.name.eq_ignore_ascii_case("JSON") && f.args.len() == 1 => {
13220 Expression::Function(Box::new(Function::new("PARSE_JSON".to_string(), f.args.clone())))
13221 }
13222 _ => je.this.clone(),
13223 };
13224 // Convert path: extract key from JSONPath or strip $. prefix from string
13225 let path = match &je.path {
13226 Expression::JSONPath(jp) => {
13227 // Extract the key from JSONPath: $root.key -> 'key'
13228 let mut key_parts = Vec::new();
13229 for expr in &jp.expressions {
13230 match expr {
13231 Expression::JSONPathRoot(_) => {} // skip root
13232 Expression::JSONPathKey(k) => {
13233 if let Expression::Literal(Literal::String(s)) = &*k.this {
13234 key_parts.push(s.clone());
13235 }
13236 }
13237 _ => {}
13238 }
13239 }
13240 if !key_parts.is_empty() {
13241 Expression::Literal(Literal::String(key_parts.join(".")))
13242 } else {
13243 je.path.clone()
13244 }
13245 }
13246 Expression::Literal(Literal::String(s)) if s.starts_with("$.") => {
13247 Expression::Literal(Literal::String(s[2..].to_string()))
13248 }
13249 Expression::Literal(Literal::String(s)) if s.starts_with('$') => {
13250 Expression::Literal(Literal::String(s[1..].to_string()))
13251 }
13252 _ => je.path.clone(),
13253 };
13254 Ok(Expression::Function(Box::new(Function::new(
13255 "GET_PATH".to_string(),
13256 vec![this, path],
13257 ))))
13258 } else { Ok(e) }
13259 }
13260
13261 Action::StructToRow => {
13262 // DuckDB struct/dict -> BigQuery STRUCT(value AS key, ...) / Presto ROW
13263 // Handles both Expression::Struct and Expression::MapFunc(curly_brace_syntax=true)
13264
13265 // Extract key-value pairs from either Struct or MapFunc
13266 let kv_pairs: Option<Vec<(String, Expression)>> = match &e {
13267 Expression::Struct(s) => {
13268 Some(s.fields.iter().map(|(opt_name, field_expr)| {
13269 if let Some(name) = opt_name {
13270 (name.clone(), field_expr.clone())
13271 } else if let Expression::NamedArgument(na) = field_expr {
13272 (na.name.name.clone(), na.value.clone())
13273 } else {
13274 (String::new(), field_expr.clone())
13275 }
13276 }).collect())
13277 }
13278 Expression::MapFunc(m) if m.curly_brace_syntax => {
13279 Some(m.keys.iter().zip(m.values.iter()).map(|(key, value)| {
13280 let key_name = match key {
13281 Expression::Literal(Literal::String(s)) => s.clone(),
13282 Expression::Identifier(id) => id.name.clone(),
13283 _ => String::new(),
13284 };
13285 (key_name, value.clone())
13286 }).collect())
13287 }
13288 _ => None,
13289 };
13290
13291 if let Some(pairs) = kv_pairs {
13292 let mut named_args = Vec::new();
13293 for (key_name, value) in pairs {
13294 if matches!(target, DialectType::BigQuery) && !key_name.is_empty() {
13295 named_args.push(Expression::Alias(Box::new(
13296 crate::expressions::Alias::new(value, Identifier::new(key_name))
13297 )));
13298 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
13299 named_args.push(value);
13300 } else {
13301 named_args.push(value);
13302 }
13303 }
13304
13305 if matches!(target, DialectType::BigQuery) {
13306 Ok(Expression::Function(Box::new(Function::new(
13307 "STRUCT".to_string(),
13308 named_args,
13309 ))))
13310 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
13311 // For Presto/Trino, infer types and wrap in CAST(ROW(...) AS ROW(name TYPE, ...))
13312 let row_func = Expression::Function(Box::new(Function::new(
13313 "ROW".to_string(),
13314 named_args,
13315 )));
13316
13317 // Try to infer types for each pair
13318 let kv_pairs_again: Option<Vec<(String, Expression)>> = match &e {
13319 Expression::Struct(s) => {
13320 Some(s.fields.iter().map(|(opt_name, field_expr)| {
13321 if let Some(name) = opt_name {
13322 (name.clone(), field_expr.clone())
13323 } else if let Expression::NamedArgument(na) = field_expr {
13324 (na.name.name.clone(), na.value.clone())
13325 } else {
13326 (String::new(), field_expr.clone())
13327 }
13328 }).collect())
13329 }
13330 Expression::MapFunc(m) if m.curly_brace_syntax => {
13331 Some(m.keys.iter().zip(m.values.iter()).map(|(key, value)| {
13332 let key_name = match key {
13333 Expression::Literal(Literal::String(s)) => s.clone(),
13334 Expression::Identifier(id) => id.name.clone(),
13335 _ => String::new(),
13336 };
13337 (key_name, value.clone())
13338 }).collect())
13339 }
13340 _ => None,
13341 };
13342
13343 if let Some(pairs) = kv_pairs_again {
13344 // Infer types for all values
13345 let mut all_inferred = true;
13346 let mut fields = Vec::new();
13347 for (name, value) in &pairs {
13348 let inferred_type = match value {
13349 Expression::Literal(Literal::Number(n)) => {
13350 if n.contains('.') {
13351 Some(DataType::Double { precision: None, scale: None })
13352 } else {
13353 Some(DataType::Int { length: None, integer_spelling: true })
13354 }
13355 }
13356 Expression::Literal(Literal::String(_)) => {
13357 Some(DataType::VarChar { length: None, parenthesized_length: false })
13358 }
13359 Expression::Boolean(_) => Some(DataType::Boolean),
13360 _ => None,
13361 };
13362 if let Some(dt) = inferred_type {
13363 fields.push(crate::expressions::StructField::new(name.clone(), dt));
13364 } else {
13365 all_inferred = false;
13366 break;
13367 }
13368 }
13369
13370 if all_inferred && !fields.is_empty() {
13371 let row_type = DataType::Struct { fields, nested: true };
13372 Ok(Expression::Cast(Box::new(Cast {
13373 this: row_func,
13374 to: row_type,
13375 trailing_comments: Vec::new(),
13376 double_colon_syntax: false,
13377 format: None,
13378 default: None,
13379 })))
13380 } else {
13381 Ok(row_func)
13382 }
13383 } else {
13384 Ok(row_func)
13385 }
13386 } else {
13387 Ok(Expression::Function(Box::new(Function::new(
13388 "ROW".to_string(),
13389 named_args,
13390 ))))
13391 }
13392 } else { Ok(e) }
13393 }
13394
13395 Action::SparkStructConvert => {
13396 // Spark STRUCT(val AS name, ...) -> Presto CAST(ROW(...) AS ROW(name TYPE, ...))
13397 // or DuckDB {'name': val, ...}
13398 if let Expression::Function(f) = e {
13399 // Extract name-value pairs from aliased args
13400 let mut pairs: Vec<(String, Expression)> = Vec::new();
13401 for arg in &f.args {
13402 match arg {
13403 Expression::Alias(a) => {
13404 pairs.push((a.alias.name.clone(), a.this.clone()));
13405 }
13406 _ => {
13407 pairs.push((String::new(), arg.clone()));
13408 }
13409 }
13410 }
13411
13412 match target {
13413 DialectType::DuckDB => {
13414 // Convert to DuckDB struct literal {'name': value, ...}
13415 let mut keys = Vec::new();
13416 let mut values = Vec::new();
13417 for (name, value) in &pairs {
13418 keys.push(Expression::Literal(Literal::String(name.clone())));
13419 values.push(value.clone());
13420 }
13421 Ok(Expression::MapFunc(Box::new(crate::expressions::MapConstructor {
13422 keys,
13423 values,
13424 curly_brace_syntax: true,
13425 with_map_keyword: false,
13426 })))
13427 }
13428 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
13429 // Convert to CAST(ROW(val1, val2) AS ROW(name1 TYPE1, name2 TYPE2))
13430 let row_args: Vec<Expression> = pairs.iter().map(|(_, v)| v.clone()).collect();
13431 let row_func = Expression::Function(Box::new(Function::new(
13432 "ROW".to_string(), row_args,
13433 )));
13434
13435 // Infer types
13436 let mut all_inferred = true;
13437 let mut fields = Vec::new();
13438 for (name, value) in &pairs {
13439 let inferred_type = match value {
13440 Expression::Literal(Literal::Number(n)) => {
13441 if n.contains('.') {
13442 Some(DataType::Double { precision: None, scale: None })
13443 } else {
13444 Some(DataType::Int { length: None, integer_spelling: true })
13445 }
13446 }
13447 Expression::Literal(Literal::String(_)) => {
13448 Some(DataType::VarChar { length: None, parenthesized_length: false })
13449 }
13450 Expression::Boolean(_) => Some(DataType::Boolean),
13451 _ => None,
13452 };
13453 if let Some(dt) = inferred_type {
13454 fields.push(crate::expressions::StructField::new(name.clone(), dt));
13455 } else {
13456 all_inferred = false;
13457 break;
13458 }
13459 }
13460
13461 if all_inferred && !fields.is_empty() {
13462 let row_type = DataType::Struct { fields, nested: true };
13463 Ok(Expression::Cast(Box::new(Cast {
13464 this: row_func,
13465 to: row_type,
13466 trailing_comments: Vec::new(),
13467 double_colon_syntax: false,
13468 format: None,
13469 default: None,
13470 })))
13471 } else {
13472 Ok(row_func)
13473 }
13474 }
13475 _ => Ok(Expression::Function(f)),
13476 }
13477 } else { Ok(e) }
13478 }
13479
13480 Action::ApproxCountDistinctToApproxDistinct => {
13481 // APPROX_COUNT_DISTINCT(x) -> APPROX_DISTINCT(x)
13482 if let Expression::ApproxCountDistinct(f) = e {
13483 Ok(Expression::ApproxDistinct(f))
13484 } else {
13485 Ok(e)
13486 }
13487 }
13488
13489 Action::CollectListToArrayAgg => {
13490 // COLLECT_LIST(x) -> ARRAY_AGG(x) FILTER(WHERE x IS NOT NULL)
13491 if let Expression::AggregateFunction(f) = e {
13492 let filter_expr = if !f.args.is_empty() {
13493 let arg = f.args[0].clone();
13494 Some(Expression::IsNull(Box::new(crate::expressions::IsNull {
13495 this: arg,
13496 not: true,
13497 postfix_form: false,
13498 })))
13499 } else {
13500 None
13501 };
13502 let agg = crate::expressions::AggFunc {
13503 this: if f.args.is_empty() { Expression::Null(crate::expressions::Null) } else { f.args[0].clone() },
13504 distinct: f.distinct,
13505 order_by: f.order_by.clone(),
13506 filter: filter_expr,
13507 ignore_nulls: None,
13508 name: None,
13509 having_max: None,
13510 limit: None,
13511 };
13512 Ok(Expression::ArrayAgg(Box::new(agg)))
13513 } else {
13514 Ok(e)
13515 }
13516 }
13517
13518 Action::CollectSetConvert => {
13519 // COLLECT_SET(x) -> target-specific
13520 if let Expression::AggregateFunction(f) = e {
13521 match target {
13522 DialectType::Presto => {
13523 Ok(Expression::AggregateFunction(Box::new(crate::expressions::AggregateFunction {
13524 name: "SET_AGG".to_string(),
13525 args: f.args,
13526 distinct: false,
13527 order_by: f.order_by,
13528 filter: f.filter,
13529 limit: f.limit,
13530 ignore_nulls: f.ignore_nulls,
13531 })))
13532 }
13533 DialectType::Snowflake => {
13534 Ok(Expression::AggregateFunction(Box::new(crate::expressions::AggregateFunction {
13535 name: "ARRAY_UNIQUE_AGG".to_string(),
13536 args: f.args,
13537 distinct: false,
13538 order_by: f.order_by,
13539 filter: f.filter,
13540 limit: f.limit,
13541 ignore_nulls: f.ignore_nulls,
13542 })))
13543 }
13544 DialectType::Trino | DialectType::DuckDB => {
13545 let agg = crate::expressions::AggFunc {
13546 this: if f.args.is_empty() { Expression::Null(crate::expressions::Null) } else { f.args[0].clone() },
13547 distinct: true,
13548 order_by: Vec::new(),
13549 filter: None,
13550 ignore_nulls: None,
13551 name: None,
13552 having_max: None,
13553 limit: None,
13554 };
13555 Ok(Expression::ArrayAgg(Box::new(agg)))
13556 }
13557 _ => Ok(Expression::AggregateFunction(f))
13558 }
13559 } else {
13560 Ok(e)
13561 }
13562 }
13563
13564 Action::PercentileConvert => {
13565 // PERCENTILE(x, 0.5) -> QUANTILE(x, 0.5) / APPROX_PERCENTILE(x, 0.5)
13566 if let Expression::AggregateFunction(f) = e {
13567 let name = match target {
13568 DialectType::DuckDB => "QUANTILE",
13569 DialectType::Presto | DialectType::Trino => "APPROX_PERCENTILE",
13570 _ => "PERCENTILE",
13571 };
13572 Ok(Expression::AggregateFunction(Box::new(crate::expressions::AggregateFunction {
13573 name: name.to_string(),
13574 args: f.args,
13575 distinct: f.distinct,
13576 order_by: f.order_by,
13577 filter: f.filter,
13578 limit: f.limit,
13579 ignore_nulls: f.ignore_nulls,
13580 })))
13581 } else {
13582 Ok(e)
13583 }
13584 }
13585
13586 Action::CorrIsnanWrap => {
13587 // CORR(a, b) -> CASE WHEN ISNAN(CORR(a, b)) THEN NULL ELSE CORR(a, b) END
13588 // The CORR expression could be AggregateFunction, WindowFunction, or Filter-wrapped
13589 let corr_clone = e.clone();
13590 let isnan = Expression::Function(Box::new(Function::new(
13591 "ISNAN".to_string(), vec![corr_clone.clone()],
13592 )));
13593 let case_expr = Expression::Case(Box::new(Case {
13594 operand: None,
13595 whens: vec![(isnan, Expression::Null(crate::expressions::Null))],
13596 else_: Some(corr_clone),
13597 }));
13598 Ok(case_expr)
13599 }
13600
13601 Action::TruncToDateTrunc => {
13602 // TRUNC(timestamp, 'MONTH') -> DATE_TRUNC('MONTH', timestamp)
13603 if let Expression::Function(f) = e {
13604 if f.args.len() == 2 {
13605 let timestamp = f.args[0].clone();
13606 let unit_expr = f.args[1].clone();
13607
13608 if matches!(target, DialectType::ClickHouse) {
13609 // For ClickHouse, produce Expression::DateTrunc which the generator
13610 // outputs as DATE_TRUNC(...) without going through the ClickHouse
13611 // target transform that would convert it to dateTrunc
13612 let unit_str = Self::get_unit_str_static(&unit_expr);
13613 let dt_field = match unit_str.as_str() {
13614 "YEAR" => DateTimeField::Year,
13615 "MONTH" => DateTimeField::Month,
13616 "DAY" => DateTimeField::Day,
13617 "HOUR" => DateTimeField::Hour,
13618 "MINUTE" => DateTimeField::Minute,
13619 "SECOND" => DateTimeField::Second,
13620 "WEEK" => DateTimeField::Week,
13621 "QUARTER" => DateTimeField::Quarter,
13622 _ => DateTimeField::Custom(unit_str),
13623 };
13624 Ok(Expression::DateTrunc(Box::new(crate::expressions::DateTruncFunc {
13625 this: timestamp,
13626 unit: dt_field,
13627 })))
13628 } else {
13629 let new_args = vec![unit_expr, timestamp];
13630 Ok(Expression::Function(Box::new(Function::new("DATE_TRUNC".to_string(), new_args))))
13631 }
13632 } else {
13633 Ok(Expression::Function(f))
13634 }
13635 } else {
13636 Ok(e)
13637 }
13638 }
13639
13640 Action::ArrayContainsConvert => {
13641 if let Expression::ArrayContains(f) = e {
13642 match target {
13643 DialectType::Presto | DialectType::Trino => {
13644 // ARRAY_CONTAINS(arr, val) -> CONTAINS(arr, val)
13645 Ok(Expression::Function(Box::new(Function::new("CONTAINS".to_string(), vec![f.this, f.expression]))))
13646 }
13647 DialectType::Snowflake => {
13648 // ARRAY_CONTAINS(arr, val) -> ARRAY_CONTAINS(CAST(val AS VARIANT), arr)
13649 let cast_val = Expression::Cast(Box::new(crate::expressions::Cast {
13650 this: f.expression,
13651 to: crate::expressions::DataType::Custom { name: "VARIANT".to_string() },
13652 trailing_comments: Vec::new(),
13653 double_colon_syntax: false,
13654 format: None,
13655 default: None,
13656 }));
13657 Ok(Expression::Function(Box::new(Function::new("ARRAY_CONTAINS".to_string(), vec![cast_val, f.this]))))
13658 }
13659 _ => Ok(Expression::ArrayContains(f))
13660 }
13661 } else {
13662 Ok(e)
13663 }
13664 }
13665
13666 Action::StrPositionExpand => {
13667 // StrPosition with position arg -> complex STRPOS expansion for Presto/DuckDB
13668 // LOCATE(substr, str, pos) / STRPOS(str, substr, pos) ->
13669 // For Presto: IF(STRPOS(SUBSTRING(str, pos), substr) = 0, 0, STRPOS(SUBSTRING(str, pos), substr) + pos - 1)
13670 // For DuckDB: CASE WHEN STRPOS(SUBSTRING(str, pos), substr) = 0 THEN 0 ELSE STRPOS(SUBSTRING(str, pos), substr) + pos - 1 END
13671 if let Expression::StrPosition(sp) = e {
13672 let crate::expressions::StrPosition { this, substr, position, occurrence } = *sp;
13673 let string = *this;
13674 let substr_expr = match substr {
13675 Some(s) => *s,
13676 None => Expression::Null(Null),
13677 };
13678 let pos = match position {
13679 Some(p) => *p,
13680 None => Expression::number(1),
13681 };
13682
13683 // SUBSTRING(string, pos)
13684 let substring_call = Expression::Function(Box::new(Function::new(
13685 "SUBSTRING".to_string(), vec![string.clone(), pos.clone()],
13686 )));
13687 // STRPOS(SUBSTRING(string, pos), substr)
13688 let strpos_call = Expression::Function(Box::new(Function::new(
13689 "STRPOS".to_string(), vec![substring_call, substr_expr.clone()],
13690 )));
13691 // STRPOS(...) + pos - 1
13692 let pos_adjusted = Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
13693 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
13694 strpos_call.clone(),
13695 pos.clone(),
13696 ))),
13697 Expression::number(1),
13698 )));
13699 // STRPOS(...) = 0
13700 let is_zero = Expression::Eq(Box::new(crate::expressions::BinaryOp::new(
13701 strpos_call.clone(),
13702 Expression::number(0),
13703 )));
13704
13705 match target {
13706 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
13707 // IF(STRPOS(SUBSTRING(str, pos), substr) = 0, 0, STRPOS(SUBSTRING(str, pos), substr) + pos - 1)
13708 Ok(Expression::Function(Box::new(Function::new(
13709 "IF".to_string(),
13710 vec![is_zero, Expression::number(0), pos_adjusted],
13711 ))))
13712 }
13713 DialectType::DuckDB => {
13714 // CASE WHEN STRPOS(SUBSTRING(str, pos), substr) = 0 THEN 0 ELSE STRPOS(SUBSTRING(str, pos), substr) + pos - 1 END
13715 Ok(Expression::Case(Box::new(Case {
13716 operand: None,
13717 whens: vec![
13718 (is_zero, Expression::number(0)),
13719 ],
13720 else_: Some(pos_adjusted),
13721 })))
13722 }
13723 _ => {
13724 // Reconstruct StrPosition
13725 Ok(Expression::StrPosition(Box::new(crate::expressions::StrPosition {
13726 this: Box::new(string),
13727 substr: Some(Box::new(substr_expr)),
13728 position: Some(Box::new(pos)),
13729 occurrence,
13730 })))
13731 }
13732 }
13733 } else {
13734 Ok(e)
13735 }
13736 }
13737
13738 Action::MonthsBetweenConvert => {
13739 if let Expression::MonthsBetween(mb) = e {
13740 let crate::expressions::BinaryFunc { this: end_date, expression: start_date, .. } = *mb;
13741 match target {
13742 DialectType::DuckDB => {
13743 let cast_end = Self::ensure_cast_date(end_date);
13744 let cast_start = Self::ensure_cast_date(start_date);
13745 let dd = Expression::Function(Box::new(Function::new(
13746 "DATE_DIFF".to_string(), vec![Expression::string("MONTH"), cast_start.clone(), cast_end.clone()],
13747 )));
13748 let day_end = Expression::Function(Box::new(Function::new("DAY".to_string(), vec![cast_end.clone()])));
13749 let day_start = Expression::Function(Box::new(Function::new("DAY".to_string(), vec![cast_start.clone()])));
13750 let last_day_end = Expression::Function(Box::new(Function::new("LAST_DAY".to_string(), vec![cast_end.clone()])));
13751 let last_day_start = Expression::Function(Box::new(Function::new("LAST_DAY".to_string(), vec![cast_start.clone()])));
13752 let day_last_end = Expression::Function(Box::new(Function::new("DAY".to_string(), vec![last_day_end])));
13753 let day_last_start = Expression::Function(Box::new(Function::new("DAY".to_string(), vec![last_day_start])));
13754 let cond1 = Expression::Eq(Box::new(BinaryOp::new(day_end.clone(), day_last_end)));
13755 let cond2 = Expression::Eq(Box::new(BinaryOp::new(day_start.clone(), day_last_start)));
13756 let both_cond = Expression::And(Box::new(BinaryOp::new(cond1, cond2)));
13757 let day_diff = Expression::Sub(Box::new(BinaryOp::new(day_end, day_start)));
13758 let day_diff_paren = Expression::Paren(Box::new(crate::expressions::Paren {
13759 this: day_diff,
13760 trailing_comments: Vec::new(),
13761 }));
13762 let frac = Expression::Div(Box::new(BinaryOp::new(
13763 day_diff_paren,
13764 Expression::Literal(Literal::Number("31.0".to_string())),
13765 )));
13766 let case_expr = Expression::Case(Box::new(Case {
13767 operand: None,
13768 whens: vec![(both_cond, Expression::number(0))],
13769 else_: Some(frac),
13770 }));
13771 Ok(Expression::Add(Box::new(BinaryOp::new(dd, case_expr))))
13772 }
13773 DialectType::Snowflake | DialectType::Redshift => {
13774 let unit = Expression::Identifier(Identifier::new("MONTH"));
13775 Ok(Expression::Function(Box::new(Function::new(
13776 "DATEDIFF".to_string(), vec![unit, start_date, end_date],
13777 ))))
13778 }
13779 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
13780 Ok(Expression::Function(Box::new(Function::new(
13781 "DATE_DIFF".to_string(), vec![Expression::string("MONTH"), start_date, end_date],
13782 ))))
13783 }
13784 _ => {
13785 Ok(Expression::MonthsBetween(Box::new(crate::expressions::BinaryFunc {
13786 this: end_date, expression: start_date, original_name: None,
13787 })))
13788 }
13789 }
13790 } else {
13791 Ok(e)
13792 }
13793 }
13794
13795 Action::AddMonthsConvert => {
13796 if let Expression::AddMonths(am) = e {
13797 let date = am.this;
13798 let val = am.expression;
13799 match target {
13800 DialectType::TSQL | DialectType::Fabric => {
13801 let cast_date = Self::ensure_cast_datetime2(date);
13802 Ok(Expression::Function(Box::new(Function::new(
13803 "DATEADD".to_string(), vec![
13804 Expression::Identifier(Identifier::new("MONTH")),
13805 val, cast_date,
13806 ],
13807 ))))
13808 }
13809 DialectType::DuckDB if matches!(source, DialectType::Snowflake) => {
13810 // DuckDB ADD_MONTHS from Snowflake: CASE WHEN LAST_DAY(date) = date THEN LAST_DAY(date + interval) ELSE date + interval END
13811 // Optionally wrapped in CAST(... AS type) if the input had a specific type
13812
13813 // Determine the cast type from the date expression
13814 let (cast_date, return_type) = match &date {
13815 Expression::Literal(Literal::String(_)) => {
13816 // String literal: CAST(str AS TIMESTAMP), no outer CAST
13817 (Expression::Cast(Box::new(Cast {
13818 this: date.clone(), to: DataType::Timestamp { precision: None, timezone: false },
13819 trailing_comments: Vec::new(), double_colon_syntax: false, format: None, default: None,
13820 })), None)
13821 }
13822 Expression::Cast(c) => {
13823 // Already cast (e.g., '2023-01-31'::DATE) - keep the cast, wrap result in CAST(... AS type)
13824 (date.clone(), Some(c.to.clone()))
13825 }
13826 _ => {
13827 // Expression or NULL::TYPE - keep as-is, check for cast type
13828 if let Expression::Cast(c) = &date {
13829 (date.clone(), Some(c.to.clone()))
13830 } else {
13831 (date.clone(), None)
13832 }
13833 }
13834 };
13835
13836 // Build the interval expression
13837 // For non-integer values (float, decimal, cast), use TO_MONTHS(CAST(ROUND(val) AS INT))
13838 // For integer values, use INTERVAL val MONTH
13839 let is_non_integer_val = match &val {
13840 Expression::Literal(Literal::Number(n)) => n.contains('.'),
13841 Expression::Cast(_) => true, // e.g., 3.2::DECIMAL(10,2)
13842 Expression::Neg(n) => {
13843 if let Expression::Literal(Literal::Number(s)) = &n.this {
13844 s.contains('.')
13845 } else { false }
13846 }
13847 _ => false,
13848 };
13849
13850 let add_interval = if is_non_integer_val {
13851 // TO_MONTHS(CAST(ROUND(val) AS INT))
13852 let round_val = Expression::Function(Box::new(Function::new(
13853 "ROUND".to_string(), vec![val.clone()],
13854 )));
13855 let cast_int = Expression::Cast(Box::new(Cast {
13856 this: round_val, to: DataType::Int { length: None, integer_spelling: false },
13857 trailing_comments: Vec::new(), double_colon_syntax: false, format: None, default: None,
13858 }));
13859 Expression::Function(Box::new(Function::new(
13860 "TO_MONTHS".to_string(), vec![cast_int],
13861 )))
13862 } else {
13863 // INTERVAL val MONTH
13864 // For negative numbers, wrap in parens
13865 let interval_val = match &val {
13866 Expression::Literal(Literal::Number(n)) if n.starts_with('-') => {
13867 Expression::Paren(Box::new(Paren { this: val.clone(), trailing_comments: Vec::new() }))
13868 }
13869 Expression::Neg(_) => {
13870 Expression::Paren(Box::new(Paren { this: val.clone(), trailing_comments: Vec::new() }))
13871 }
13872 Expression::Null(_) => {
13873 Expression::Paren(Box::new(Paren { this: val.clone(), trailing_comments: Vec::new() }))
13874 }
13875 _ => val.clone(),
13876 };
13877 Expression::Interval(Box::new(crate::expressions::Interval {
13878 this: Some(interval_val),
13879 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
13880 unit: crate::expressions::IntervalUnit::Month,
13881 use_plural: false,
13882 }),
13883 }))
13884 };
13885
13886 // Build: date + interval
13887 let date_plus_interval = Expression::Add(Box::new(BinaryOp::new(
13888 cast_date.clone(), add_interval.clone(),
13889 )));
13890
13891 // Build LAST_DAY(date)
13892 let last_day_date = Expression::Function(Box::new(Function::new(
13893 "LAST_DAY".to_string(), vec![cast_date.clone()],
13894 )));
13895
13896 // Build LAST_DAY(date + interval)
13897 let last_day_date_plus = Expression::Function(Box::new(Function::new(
13898 "LAST_DAY".to_string(), vec![date_plus_interval.clone()],
13899 )));
13900
13901 // Build: CASE WHEN LAST_DAY(date) = date THEN LAST_DAY(date + interval) ELSE date + interval END
13902 let case_expr = Expression::Case(Box::new(Case {
13903 operand: None,
13904 whens: vec![(
13905 Expression::Eq(Box::new(BinaryOp::new(
13906 last_day_date, cast_date.clone(),
13907 ))),
13908 last_day_date_plus,
13909 )],
13910 else_: Some(date_plus_interval),
13911 }));
13912
13913 // Wrap in CAST(... AS type) if needed
13914 if let Some(dt) = return_type {
13915 Ok(Expression::Cast(Box::new(Cast {
13916 this: case_expr, to: dt,
13917 trailing_comments: Vec::new(), double_colon_syntax: false, format: None, default: None,
13918 })))
13919 } else {
13920 Ok(case_expr)
13921 }
13922 }
13923 DialectType::DuckDB => {
13924 // Non-Snowflake source: simple date + INTERVAL
13925 let cast_date = if matches!(&date, Expression::Literal(Literal::String(_))) {
13926 Expression::Cast(Box::new(Cast {
13927 this: date, to: DataType::Timestamp { precision: None, timezone: false },
13928 trailing_comments: Vec::new(), double_colon_syntax: false, format: None, default: None,
13929 }))
13930 } else { date };
13931 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
13932 this: Some(val),
13933 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
13934 unit: crate::expressions::IntervalUnit::Month,
13935 use_plural: false,
13936 }),
13937 }));
13938 Ok(Expression::Add(Box::new(BinaryOp::new(cast_date, interval))))
13939 }
13940 DialectType::Snowflake => {
13941 // Keep ADD_MONTHS when source is also Snowflake
13942 if matches!(source, DialectType::Snowflake) {
13943 Ok(Expression::Function(Box::new(Function::new(
13944 "ADD_MONTHS".to_string(), vec![date, val],
13945 ))))
13946 } else {
13947 Ok(Expression::Function(Box::new(Function::new(
13948 "DATEADD".to_string(), vec![
13949 Expression::Identifier(Identifier::new("MONTH")),
13950 val, date,
13951 ],
13952 ))))
13953 }
13954 }
13955 DialectType::Redshift => {
13956 Ok(Expression::Function(Box::new(Function::new(
13957 "DATEADD".to_string(), vec![
13958 Expression::Identifier(Identifier::new("MONTH")),
13959 val, date,
13960 ],
13961 ))))
13962 }
13963 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
13964 let cast_date = if matches!(&date, Expression::Literal(Literal::String(_))) {
13965 Expression::Cast(Box::new(Cast {
13966 this: date, to: DataType::Timestamp { precision: None, timezone: false },
13967 trailing_comments: Vec::new(), double_colon_syntax: false, format: None, default: None,
13968 }))
13969 } else { date };
13970 Ok(Expression::Function(Box::new(Function::new(
13971 "DATE_ADD".to_string(), vec![
13972 Expression::string("MONTH"),
13973 val, cast_date,
13974 ],
13975 ))))
13976 }
13977 DialectType::BigQuery => {
13978 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
13979 this: Some(val),
13980 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
13981 unit: crate::expressions::IntervalUnit::Month,
13982 use_plural: false,
13983 }),
13984 }));
13985 let cast_date = if matches!(&date, Expression::Literal(Literal::String(_))) {
13986 Expression::Cast(Box::new(Cast {
13987 this: date, to: DataType::Custom { name: "DATETIME".to_string() },
13988 trailing_comments: Vec::new(), double_colon_syntax: false, format: None, default: None,
13989 }))
13990 } else { date };
13991 Ok(Expression::Function(Box::new(Function::new(
13992 "DATE_ADD".to_string(), vec![cast_date, interval],
13993 ))))
13994 }
13995 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
13996 Ok(Expression::Function(Box::new(Function::new(
13997 "ADD_MONTHS".to_string(), vec![date, val],
13998 ))))
13999 }
14000 _ => {
14001 // Default: keep as AddMonths expression
14002 Ok(Expression::AddMonths(Box::new(crate::expressions::BinaryFunc {
14003 this: date, expression: val, original_name: None,
14004 })))
14005 }
14006 }
14007 } else {
14008 Ok(e)
14009 }
14010 }
14011
14012 Action::PercentileContConvert => {
14013 // PERCENTILE_CONT(p) WITHIN GROUP (ORDER BY col) ->
14014 // Presto/Trino: APPROX_PERCENTILE(col, p)
14015 // Spark/Databricks: PERCENTILE_APPROX(col, p)
14016 if let Expression::WithinGroup(wg) = e {
14017 // Extract percentile value and order by column
14018 let (percentile, _is_disc) = match &wg.this {
14019 Expression::Function(f) => {
14020 let is_disc = f.name.eq_ignore_ascii_case("PERCENTILE_DISC");
14021 let pct = f.args.first().cloned().unwrap_or(Expression::Literal(Literal::Number("0.5".to_string())));
14022 (pct, is_disc)
14023 }
14024 Expression::AggregateFunction(af) => {
14025 let is_disc = af.name.eq_ignore_ascii_case("PERCENTILE_DISC");
14026 let pct = af.args.first().cloned().unwrap_or(Expression::Literal(Literal::Number("0.5".to_string())));
14027 (pct, is_disc)
14028 }
14029 Expression::PercentileCont(pc) => {
14030 (pc.percentile.clone(), false)
14031 }
14032 _ => return Ok(Expression::WithinGroup(wg)),
14033 };
14034 let col = wg.order_by.first().map(|o| o.this.clone())
14035 .unwrap_or(Expression::Literal(Literal::Number("1".to_string())));
14036
14037 let func_name = match target {
14038 DialectType::Presto | DialectType::Trino | DialectType::Athena => "APPROX_PERCENTILE",
14039 _ => "PERCENTILE_APPROX", // Spark, Databricks
14040 };
14041 Ok(Expression::Function(Box::new(Function::new(
14042 func_name.to_string(), vec![col, percentile],
14043 ))))
14044 } else {
14045 Ok(e)
14046 }
14047 }
14048
14049 Action::CurrentUserSparkParens => {
14050 // CURRENT_USER -> CURRENT_USER() for Spark
14051 if let Expression::CurrentUser(_) = e {
14052 Ok(Expression::Function(Box::new(Function::new("CURRENT_USER".to_string(), vec![]))))
14053 } else {
14054 Ok(e)
14055 }
14056 }
14057
14058 Action::SparkDateFuncCast => {
14059 // MONTH/YEAR/DAY('string') from Spark -> wrap arg in CAST to DATE
14060 let cast_arg = |arg: Expression| -> Expression {
14061 match target {
14062 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
14063 Self::double_cast_timestamp_date(arg)
14064 }
14065 _ => {
14066 // DuckDB, PostgreSQL, etc: CAST(arg AS DATE)
14067 Self::ensure_cast_date(arg)
14068 }
14069 }
14070 };
14071 match e {
14072 Expression::Month(f) => {
14073 Ok(Expression::Month(Box::new(crate::expressions::UnaryFunc::new(cast_arg(f.this)))))
14074 }
14075 Expression::Year(f) => {
14076 Ok(Expression::Year(Box::new(crate::expressions::UnaryFunc::new(cast_arg(f.this)))))
14077 }
14078 Expression::Day(f) => {
14079 Ok(Expression::Day(Box::new(crate::expressions::UnaryFunc::new(cast_arg(f.this)))))
14080 }
14081 other => Ok(other),
14082 }
14083 }
14084
14085 Action::MapFromArraysConvert => {
14086 // Expression::MapFromArrays -> target-specific
14087 if let Expression::MapFromArrays(mfa) = e {
14088 let keys = mfa.this;
14089 let values = mfa.expression;
14090 match target {
14091 DialectType::Snowflake => {
14092 Ok(Expression::Function(Box::new(Function::new(
14093 "OBJECT_CONSTRUCT".to_string(),
14094 vec![keys, values],
14095 ))))
14096 }
14097 _ => {
14098 // Hive, Presto, DuckDB, etc.: MAP(keys, values)
14099 Ok(Expression::Function(Box::new(Function::new(
14100 "MAP".to_string(),
14101 vec![keys, values],
14102 ))))
14103 }
14104 }
14105 } else {
14106 Ok(e)
14107 }
14108 }
14109
14110 Action::AnyToExists => {
14111 if let Expression::Any(q) = e {
14112 if let Some(op) = q.op.clone() {
14113 let lambda_param = crate::expressions::Identifier::new("x");
14114 let rhs = Expression::Identifier(lambda_param.clone());
14115 let body = match op {
14116 crate::expressions::QuantifiedOp::Eq => Expression::Eq(Box::new(BinaryOp::new(q.this, rhs))),
14117 crate::expressions::QuantifiedOp::Neq => Expression::Neq(Box::new(BinaryOp::new(q.this, rhs))),
14118 crate::expressions::QuantifiedOp::Lt => Expression::Lt(Box::new(BinaryOp::new(q.this, rhs))),
14119 crate::expressions::QuantifiedOp::Lte => Expression::Lte(Box::new(BinaryOp::new(q.this, rhs))),
14120 crate::expressions::QuantifiedOp::Gt => Expression::Gt(Box::new(BinaryOp::new(q.this, rhs))),
14121 crate::expressions::QuantifiedOp::Gte => Expression::Gte(Box::new(BinaryOp::new(q.this, rhs))),
14122 };
14123 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
14124 parameters: vec![lambda_param],
14125 body,
14126 colon: false,
14127 parameter_types: Vec::new(),
14128 }));
14129 Ok(Expression::Function(Box::new(Function::new(
14130 "EXISTS".to_string(),
14131 vec![q.subquery, lambda],
14132 ))))
14133 } else {
14134 Ok(Expression::Any(q))
14135 }
14136 } else {
14137 Ok(e)
14138 }
14139 }
14140
14141 Action::GenerateSeriesConvert => {
14142 // GENERATE_SERIES(start, end[, step]) -> SEQUENCE for Spark/Databricks/Hive, wrapped in UNNEST/EXPLODE
14143 // For DuckDB target: wrap in UNNEST(GENERATE_SERIES(...))
14144 // For PG/Redshift target: keep as GENERATE_SERIES but normalize interval string step
14145 if let Expression::Function(f) = e {
14146 if f.name.eq_ignore_ascii_case("GENERATE_SERIES") && f.args.len() >= 2 {
14147 let start = f.args[0].clone();
14148 let end = f.args[1].clone();
14149 let step = f.args.get(2).cloned();
14150
14151 // Normalize step: convert string interval like '1day' or ' 2 days ' to INTERVAL expression
14152 let step = step.map(|s| Self::normalize_interval_string(s, target));
14153
14154 // Helper: wrap CURRENT_TIMESTAMP in CAST(... AS TIMESTAMP) for Presto/Trino/Spark
14155 let maybe_cast_timestamp = |arg: Expression| -> Expression {
14156 if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena
14157 | DialectType::Spark | DialectType::Databricks | DialectType::Hive) {
14158 match &arg {
14159 Expression::CurrentTimestamp(_) => {
14160 Expression::Cast(Box::new(Cast {
14161 this: arg,
14162 to: DataType::Timestamp { precision: None, timezone: false },
14163 trailing_comments: Vec::new(),
14164 double_colon_syntax: false,
14165 format: None,
14166 default: None,
14167 }))
14168 }
14169 _ => arg,
14170 }
14171 } else {
14172 arg
14173 }
14174 };
14175
14176 let start = maybe_cast_timestamp(start);
14177 let end = maybe_cast_timestamp(end);
14178
14179 // For PostgreSQL/Redshift target, keep as GENERATE_SERIES
14180 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
14181 let mut gs_args = vec![start, end];
14182 if let Some(step) = step {
14183 gs_args.push(step);
14184 }
14185 return Ok(Expression::Function(Box::new(Function::new(
14186 "GENERATE_SERIES".to_string(), gs_args,
14187 ))));
14188 }
14189
14190 // For DuckDB target: wrap in UNNEST(GENERATE_SERIES(...))
14191 if matches!(target, DialectType::DuckDB) {
14192 let mut gs_args = vec![start, end];
14193 if let Some(step) = step {
14194 gs_args.push(step);
14195 }
14196 let gs = Expression::Function(Box::new(Function::new(
14197 "GENERATE_SERIES".to_string(), gs_args,
14198 )));
14199 return Ok(Expression::Function(Box::new(Function::new(
14200 "UNNEST".to_string(), vec![gs],
14201 ))));
14202 }
14203
14204 let mut seq_args = vec![start, end];
14205 if let Some(step) = step {
14206 seq_args.push(step);
14207 }
14208
14209 let seq = Expression::Function(Box::new(Function::new(
14210 "SEQUENCE".to_string(), seq_args,
14211 )));
14212
14213 match target {
14214 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
14215 // Wrap in UNNEST
14216 Ok(Expression::Function(Box::new(Function::new(
14217 "UNNEST".to_string(), vec![seq],
14218 ))))
14219 }
14220 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
14221 // Wrap in EXPLODE
14222 Ok(Expression::Function(Box::new(Function::new(
14223 "EXPLODE".to_string(), vec![seq],
14224 ))))
14225 }
14226 _ => {
14227 // Just SEQUENCE for others
14228 Ok(seq)
14229 }
14230 }
14231 } else {
14232 Ok(Expression::Function(f))
14233 }
14234 } else {
14235 Ok(e)
14236 }
14237 }
14238
14239 Action::ConcatCoalesceWrap => {
14240 // CONCAT(a, b) function -> CONCAT(COALESCE(CAST(a AS VARCHAR), ''), ...) for Presto
14241 // CONCAT(a, b) function -> CONCAT(COALESCE(a, ''), ...) for ClickHouse
14242 if let Expression::Function(f) = e {
14243 if f.name.eq_ignore_ascii_case("CONCAT") {
14244 let new_args: Vec<Expression> = f.args.into_iter().map(|arg| {
14245 let cast_arg = if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena) {
14246 Expression::Cast(Box::new(Cast {
14247 this: arg, to: DataType::VarChar { length: None, parenthesized_length: false },
14248 trailing_comments: Vec::new(), double_colon_syntax: false, format: None, default: None,
14249 }))
14250 } else {
14251 arg
14252 };
14253 Expression::Function(Box::new(Function::new(
14254 "COALESCE".to_string(), vec![cast_arg, Expression::string("")],
14255 )))
14256 }).collect();
14257 Ok(Expression::Function(Box::new(Function::new("CONCAT".to_string(), new_args))))
14258 } else {
14259 Ok(Expression::Function(f))
14260 }
14261 } else {
14262 Ok(e)
14263 }
14264 }
14265
14266 Action::PipeConcatToConcat => {
14267 // a || b (Concat operator) -> CONCAT(CAST(a AS VARCHAR), CAST(b AS VARCHAR)) for Presto/Trino
14268 if let Expression::Concat(op) = e {
14269 let cast_left = Expression::Cast(Box::new(Cast {
14270 this: op.left, to: DataType::VarChar { length: None, parenthesized_length: false },
14271 trailing_comments: Vec::new(), double_colon_syntax: false, format: None, default: None,
14272 }));
14273 let cast_right = Expression::Cast(Box::new(Cast {
14274 this: op.right, to: DataType::VarChar { length: None, parenthesized_length: false },
14275 trailing_comments: Vec::new(), double_colon_syntax: false, format: None, default: None,
14276 }));
14277 Ok(Expression::Function(Box::new(Function::new(
14278 "CONCAT".to_string(), vec![cast_left, cast_right],
14279 ))))
14280 } else {
14281 Ok(e)
14282 }
14283 }
14284
14285 Action::DivFuncConvert => {
14286 // DIV(a, b) -> target-specific integer division
14287 if let Expression::Function(f) = e {
14288 if f.name.eq_ignore_ascii_case("DIV") && f.args.len() == 2 {
14289 let a = f.args[0].clone();
14290 let b = f.args[1].clone();
14291 match target {
14292 DialectType::DuckDB => {
14293 // DIV(a, b) -> CAST(a // b AS DECIMAL)
14294 let int_div = Expression::IntDiv(Box::new(crate::expressions::BinaryFunc {
14295 this: a, expression: b, original_name: None,
14296 }));
14297 Ok(Expression::Cast(Box::new(Cast {
14298 this: int_div, to: DataType::Decimal { precision: None, scale: None },
14299 trailing_comments: Vec::new(), double_colon_syntax: false, format: None, default: None,
14300 })))
14301 }
14302 DialectType::BigQuery => {
14303 // DIV(a, b) -> CAST(DIV(a, b) AS NUMERIC)
14304 let div_func = Expression::Function(Box::new(Function::new(
14305 "DIV".to_string(), vec![a, b],
14306 )));
14307 Ok(Expression::Cast(Box::new(Cast {
14308 this: div_func, to: DataType::Custom { name: "NUMERIC".to_string() },
14309 trailing_comments: Vec::new(), double_colon_syntax: false, format: None, default: None,
14310 })))
14311 }
14312 DialectType::SQLite => {
14313 // DIV(a, b) -> CAST(CAST(CAST(a AS REAL) / b AS INTEGER) AS REAL)
14314 let cast_a = Expression::Cast(Box::new(Cast {
14315 this: a, to: DataType::Custom { name: "REAL".to_string() },
14316 trailing_comments: Vec::new(), double_colon_syntax: false, format: None, default: None,
14317 }));
14318 let div = Expression::Div(Box::new(BinaryOp::new(cast_a, b)));
14319 let cast_int = Expression::Cast(Box::new(Cast {
14320 this: div, to: DataType::Int { length: None, integer_spelling: true },
14321 trailing_comments: Vec::new(), double_colon_syntax: false, format: None, default: None,
14322 }));
14323 Ok(Expression::Cast(Box::new(Cast {
14324 this: cast_int, to: DataType::Custom { name: "REAL".to_string() },
14325 trailing_comments: Vec::new(), double_colon_syntax: false, format: None, default: None,
14326 })))
14327 }
14328 _ => Ok(Expression::Function(f)),
14329 }
14330 } else {
14331 Ok(Expression::Function(f))
14332 }
14333 } else {
14334 Ok(e)
14335 }
14336 }
14337
14338 Action::JsonObjectAggConvert => {
14339 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
14340 match e {
14341 Expression::Function(f) => {
14342 Ok(Expression::Function(Box::new(Function::new(
14343 "JSON_GROUP_OBJECT".to_string(), f.args,
14344 ))))
14345 }
14346 Expression::AggregateFunction(af) => {
14347 // AggregateFunction stores all args in the `args` vec
14348 Ok(Expression::Function(Box::new(Function::new(
14349 "JSON_GROUP_OBJECT".to_string(), af.args,
14350 ))))
14351 }
14352 other => Ok(other),
14353 }
14354 }
14355
14356 Action::JsonbExistsConvert => {
14357 // JSONB_EXISTS('json', 'key') -> JSON_EXISTS('json', '$.key') for DuckDB
14358 if let Expression::Function(f) = e {
14359 if f.args.len() == 2 {
14360 let json_expr = f.args[0].clone();
14361 let key = match &f.args[1] {
14362 Expression::Literal(crate::expressions::Literal::String(s)) => format!("$.{}", s),
14363 _ => return Ok(Expression::Function(f)),
14364 };
14365 Ok(Expression::Function(Box::new(Function::new(
14366 "JSON_EXISTS".to_string(), vec![json_expr, Expression::string(&key)],
14367 ))))
14368 } else {
14369 Ok(Expression::Function(f))
14370 }
14371 } else {
14372 Ok(e)
14373 }
14374 }
14375
14376 Action::DateBinConvert => {
14377 // DATE_BIN('interval', ts, origin) -> TIME_BUCKET('interval', ts, origin) for DuckDB
14378 if let Expression::Function(f) = e {
14379 Ok(Expression::Function(Box::new(Function::new(
14380 "TIME_BUCKET".to_string(), f.args,
14381 ))))
14382 } else {
14383 Ok(e)
14384 }
14385 }
14386
14387 Action::MysqlCastCharToText => {
14388 // MySQL CAST(x AS CHAR) was originally TEXT -> convert to target text type
14389 if let Expression::Cast(mut c) = e {
14390 c.to = DataType::Text;
14391 Ok(Expression::Cast(c))
14392 } else {
14393 Ok(e)
14394 }
14395 }
14396
14397 Action::SparkCastVarcharToString => {
14398 // Spark parses VARCHAR(n)/CHAR(n) as TEXT -> normalize to STRING
14399 match e {
14400 Expression::Cast(mut c) => {
14401 c.to = Self::normalize_varchar_to_string(c.to);
14402 Ok(Expression::Cast(c))
14403 }
14404 Expression::TryCast(mut c) => {
14405 c.to = Self::normalize_varchar_to_string(c.to);
14406 Ok(Expression::TryCast(c))
14407 }
14408 _ => Ok(e),
14409 }
14410 }
14411
14412 Action::MinMaxToLeastGreatest => {
14413 // Multi-arg MIN(a,b,c) -> LEAST(a,b,c), MAX(a,b,c) -> GREATEST(a,b,c)
14414 if let Expression::Function(f) = e {
14415 let name = f.name.to_uppercase();
14416 let new_name = match name.as_str() {
14417 "MIN" => "LEAST",
14418 "MAX" => "GREATEST",
14419 _ => return Ok(Expression::Function(f)),
14420 };
14421 Ok(Expression::Function(Box::new(Function::new(
14422 new_name.to_string(),
14423 f.args,
14424 ))))
14425 } else {
14426 Ok(e)
14427 }
14428 }
14429
14430 Action::ClickHouseUniqToApproxCountDistinct => {
14431 // ClickHouse uniq(x) -> APPROX_COUNT_DISTINCT(x) for non-ClickHouse targets
14432 if let Expression::Function(f) = e {
14433 Ok(Expression::Function(Box::new(Function::new(
14434 "APPROX_COUNT_DISTINCT".to_string(),
14435 f.args,
14436 ))))
14437 } else {
14438 Ok(e)
14439 }
14440 }
14441
14442 Action::ClickHouseAnyToAnyValue => {
14443 // ClickHouse any(x) -> ANY_VALUE(x) for non-ClickHouse targets
14444 if let Expression::Function(f) = e {
14445 Ok(Expression::Function(Box::new(Function::new(
14446 "ANY_VALUE".to_string(),
14447 f.args,
14448 ))))
14449 } else {
14450 Ok(e)
14451 }
14452 }
14453
14454 Action::OracleVarchar2ToVarchar => {
14455 // Oracle VARCHAR2(N CHAR/BYTE) / NVARCHAR2(N) -> VarChar(N) for non-Oracle targets
14456 if let Expression::DataType(DataType::Custom { ref name }) = e {
14457 let upper = name.to_uppercase();
14458 // Extract length from VARCHAR2(N ...) or NVARCHAR2(N ...)
14459 let inner = if upper.starts_with("VARCHAR2(") || upper.starts_with("NVARCHAR2(") {
14460 let start = if upper.starts_with("N") { 10 } else { 9 }; // skip "NVARCHAR2(" or "VARCHAR2("
14461 let end = name.len() - 1; // skip trailing ")"
14462 Some(&name[start..end])
14463 } else {
14464 Option::None
14465 };
14466 if let Some(inner_str) = inner {
14467 // Parse the number part, ignoring BYTE/CHAR qualifier
14468 let num_str = inner_str.split_whitespace().next().unwrap_or("");
14469 if let Ok(n) = num_str.parse::<u32>() {
14470 Ok(Expression::DataType(DataType::VarChar { length: Some(n), parenthesized_length: false }))
14471 } else {
14472 Ok(e)
14473 }
14474 } else {
14475 // Plain VARCHAR2 / NVARCHAR2 without parens
14476 Ok(Expression::DataType(DataType::VarChar { length: Option::None, parenthesized_length: false }))
14477 }
14478 } else {
14479 Ok(e)
14480 }
14481 }
14482
14483 }
14484 })
14485 }
14486
14487 /// Check if a DataType is or contains VARCHAR/CHAR (for Spark VARCHAR->STRING normalization)
14488 fn has_varchar_char_type(dt: &crate::expressions::DataType) -> bool {
14489 use crate::expressions::DataType;
14490 match dt {
14491 DataType::VarChar { .. } | DataType::Char { .. } => true,
14492 DataType::Struct { fields, .. } => fields.iter().any(|f| Self::has_varchar_char_type(&f.data_type)),
14493 _ => false,
14494 }
14495 }
14496
14497 /// Recursively normalize VARCHAR/CHAR to STRING in a DataType (for Spark)
14498 fn normalize_varchar_to_string(dt: crate::expressions::DataType) -> crate::expressions::DataType {
14499 use crate::expressions::DataType;
14500 match dt {
14501 DataType::VarChar { .. } | DataType::Char { .. } => DataType::Custom { name: "STRING".to_string() },
14502 DataType::Struct { fields, nested } => {
14503 let fields = fields.into_iter().map(|mut f| {
14504 f.data_type = Self::normalize_varchar_to_string(f.data_type);
14505 f
14506 }).collect();
14507 DataType::Struct { fields, nested }
14508 }
14509 other => other,
14510 }
14511 }
14512
14513 /// Normalize an interval string like '1day' or ' 2 days ' to proper INTERVAL expression
14514 fn normalize_interval_string(expr: Expression, target: DialectType) -> Expression {
14515 if let Expression::Literal(crate::expressions::Literal::String(ref s)) = expr {
14516 // Try to parse patterns like '1day', '1 day', '2 days', ' 2 days '
14517 let trimmed = s.trim();
14518
14519 // Find where digits end and unit text begins
14520 let digit_end = trimmed.find(|c: char| !c.is_ascii_digit()).unwrap_or(trimmed.len());
14521 if digit_end == 0 || digit_end == trimmed.len() {
14522 return expr;
14523 }
14524 let num = &trimmed[..digit_end];
14525 let unit_text = trimmed[digit_end..].trim().to_uppercase();
14526 if unit_text.is_empty() {
14527 return expr;
14528 }
14529
14530 let known_units = ["DAY", "DAYS", "HOUR", "HOURS", "MINUTE", "MINUTES", "SECOND", "SECONDS", "WEEK", "WEEKS", "MONTH", "MONTHS", "YEAR", "YEARS"];
14531 if !known_units.contains(&unit_text.as_str()) {
14532 return expr;
14533 }
14534
14535 let unit_str = unit_text.clone();
14536 // Singularize
14537 let unit_singular = if unit_str.ends_with('S') && unit_str.len() > 3 {
14538 &unit_str[..unit_str.len()-1]
14539 } else {
14540 &unit_str
14541 };
14542 let unit = unit_singular;
14543
14544 match target {
14545 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
14546 // INTERVAL '2' DAY
14547 let iu = match unit {
14548 "DAY" => crate::expressions::IntervalUnit::Day,
14549 "HOUR" => crate::expressions::IntervalUnit::Hour,
14550 "MINUTE" => crate::expressions::IntervalUnit::Minute,
14551 "SECOND" => crate::expressions::IntervalUnit::Second,
14552 "WEEK" => crate::expressions::IntervalUnit::Week,
14553 "MONTH" => crate::expressions::IntervalUnit::Month,
14554 "YEAR" => crate::expressions::IntervalUnit::Year,
14555 _ => return expr,
14556 };
14557 return Expression::Interval(Box::new(crate::expressions::Interval {
14558 this: Some(Expression::string(num)),
14559 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
14560 unit: iu,
14561 use_plural: false,
14562 }),
14563 }));
14564 }
14565 DialectType::PostgreSQL | DialectType::Redshift | DialectType::DuckDB => {
14566 // INTERVAL '2 DAYS'
14567 let plural = if num != "1" && !unit_str.ends_with('S') {
14568 format!("{} {}S", num, unit)
14569 } else if unit_str.ends_with('S') {
14570 format!("{} {}", num, unit_str)
14571 } else {
14572 format!("{} {}", num, unit)
14573 };
14574 return Expression::Interval(Box::new(crate::expressions::Interval {
14575 this: Some(Expression::string(&plural)),
14576 unit: None,
14577 }));
14578 }
14579 _ => {
14580 // Spark/Databricks/Hive: INTERVAL '1' DAY
14581 let iu = match unit {
14582 "DAY" => crate::expressions::IntervalUnit::Day,
14583 "HOUR" => crate::expressions::IntervalUnit::Hour,
14584 "MINUTE" => crate::expressions::IntervalUnit::Minute,
14585 "SECOND" => crate::expressions::IntervalUnit::Second,
14586 "WEEK" => crate::expressions::IntervalUnit::Week,
14587 "MONTH" => crate::expressions::IntervalUnit::Month,
14588 "YEAR" => crate::expressions::IntervalUnit::Year,
14589 _ => return expr,
14590 };
14591 return Expression::Interval(Box::new(crate::expressions::Interval {
14592 this: Some(Expression::string(num)),
14593 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
14594 unit: iu,
14595 use_plural: false,
14596 }),
14597 }));
14598 }
14599 }
14600 }
14601 // If it's already an INTERVAL expression, pass through
14602 expr
14603 }
14604
14605 /// Rewrite SELECT expressions containing UNNEST into expanded form with CROSS JOINs.
14606 /// DuckDB: SELECT UNNEST(arr1), UNNEST(arr2) ->
14607 /// BigQuery: SELECT IF(pos = pos_2, col, NULL) AS col, ... FROM UNNEST(GENERATE_ARRAY(0, ...)) AS pos CROSS JOIN ...
14608 /// Presto: SELECT IF(_u.pos = _u_2.pos_2, _u_2.col) AS col, ... FROM UNNEST(SEQUENCE(1, ...)) AS _u(pos) CROSS JOIN ...
14609 fn rewrite_unnest_expansion(select: &crate::expressions::Select, target: DialectType) -> Option<crate::expressions::Select> {
14610 use crate::expressions::{
14611 Alias, BinaryOp, Column, From, Function, Identifier, Join, JoinKind,
14612 Literal, UnnestFunc,
14613 };
14614
14615 let index_offset: i64 = match target {
14616 DialectType::Presto | DialectType::Trino => 1,
14617 _ => 0, // BigQuery, Snowflake
14618 };
14619
14620 let if_func_name = match target {
14621 DialectType::Snowflake => "IFF",
14622 _ => "IF",
14623 };
14624
14625 let array_length_func = match target {
14626 DialectType::BigQuery => "ARRAY_LENGTH",
14627 DialectType::Presto | DialectType::Trino => "CARDINALITY",
14628 DialectType::Snowflake => "ARRAY_SIZE",
14629 _ => "ARRAY_LENGTH",
14630 };
14631
14632 let use_table_aliases = matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Snowflake);
14633 let null_third_arg = matches!(target, DialectType::BigQuery | DialectType::Snowflake);
14634
14635 fn make_col(name: &str, table: Option<&str>) -> Expression {
14636 if let Some(tbl) = table {
14637 Expression::Column(Column {
14638 name: Identifier::new(name.to_string()),
14639 table: Some(Identifier::new(tbl.to_string())),
14640 join_mark: false,
14641 trailing_comments: Vec::new(),
14642 })
14643 } else {
14644 Expression::Identifier(Identifier::new(name.to_string()))
14645 }
14646 }
14647
14648 fn make_join(this: Expression) -> Join {
14649 Join {
14650 this,
14651 on: None,
14652 using: Vec::new(),
14653 kind: JoinKind::Cross,
14654 use_inner_keyword: false,
14655 use_outer_keyword: false,
14656 deferred_condition: false,
14657 join_hint: None,
14658 match_condition: None,
14659 pivots: Vec::new(),
14660 }
14661 }
14662
14663 // Collect UNNEST info from SELECT expressions
14664 struct UnnestInfo {
14665 arr_expr: Expression,
14666 col_alias: String,
14667 pos_alias: String,
14668 source_alias: String,
14669 original_expr: Expression,
14670 has_outer_alias: Option<String>,
14671 }
14672
14673 let mut unnest_infos: Vec<UnnestInfo> = Vec::new();
14674 let mut col_counter = 0usize;
14675 let mut pos_counter = 1usize;
14676 let mut source_counter = 1usize;
14677
14678 fn extract_unnest_arg(expr: &Expression) -> Option<Expression> {
14679 match expr {
14680 Expression::Unnest(u) => Some(u.this.clone()),
14681 Expression::Function(f) if f.name.eq_ignore_ascii_case("UNNEST") && !f.args.is_empty() => {
14682 Some(f.args[0].clone())
14683 }
14684 Expression::Alias(a) => extract_unnest_arg(&a.this),
14685 Expression::Add(op) | Expression::Sub(op) | Expression::Mul(op) | Expression::Div(op) => {
14686 extract_unnest_arg(&op.left).or_else(|| extract_unnest_arg(&op.right))
14687 }
14688 _ => None,
14689 }
14690 }
14691
14692 fn get_alias_name(expr: &Expression) -> Option<String> {
14693 if let Expression::Alias(a) = expr {
14694 Some(a.alias.name.clone())
14695 } else {
14696 None
14697 }
14698 }
14699
14700 for sel_expr in &select.expressions {
14701 if let Some(arr) = extract_unnest_arg(sel_expr) {
14702 col_counter += 1;
14703 pos_counter += 1;
14704 source_counter += 1;
14705
14706 let col_alias = if col_counter == 1 { "col".to_string() } else { format!("col_{}", col_counter) };
14707 let pos_alias = format!("pos_{}", pos_counter);
14708 let source_alias = format!("_u_{}", source_counter);
14709 let has_outer_alias = get_alias_name(sel_expr);
14710
14711 unnest_infos.push(UnnestInfo {
14712 arr_expr: arr,
14713 col_alias,
14714 pos_alias,
14715 source_alias,
14716 original_expr: sel_expr.clone(),
14717 has_outer_alias,
14718 });
14719 }
14720 }
14721
14722 if unnest_infos.is_empty() {
14723 return None;
14724 }
14725
14726 let series_alias = "pos".to_string();
14727 let series_source_alias = "_u".to_string();
14728 let tbl_ref = if use_table_aliases { Some(series_source_alias.as_str()) } else { None };
14729
14730 // Build new SELECT expressions
14731 let mut new_select_exprs = Vec::new();
14732 for info in &unnest_infos {
14733 let actual_col_name = info.has_outer_alias.as_ref().unwrap_or(&info.col_alias);
14734 let src_ref = if use_table_aliases { Some(info.source_alias.as_str()) } else { None };
14735
14736 let pos_col = make_col(&series_alias, tbl_ref);
14737 let unnest_pos_col = make_col(&info.pos_alias, src_ref);
14738 let col_ref = make_col(actual_col_name, src_ref);
14739
14740 let eq_cond = Expression::Eq(Box::new(BinaryOp::new(pos_col.clone(), unnest_pos_col.clone())));
14741 let mut if_args = vec![eq_cond, col_ref];
14742 if null_third_arg {
14743 if_args.push(Expression::Null(crate::expressions::Null));
14744 }
14745
14746 let if_expr = Expression::Function(Box::new(Function::new(if_func_name.to_string(), if_args)));
14747 let final_expr = Self::replace_unnest_with_if(&info.original_expr, &if_expr);
14748
14749 new_select_exprs.push(Expression::Alias(Box::new(Alias::new(
14750 final_expr,
14751 Identifier::new(actual_col_name.clone()),
14752 ))));
14753 }
14754
14755 // Build array size expressions for GREATEST
14756 let size_exprs: Vec<Expression> = unnest_infos.iter().map(|info| {
14757 Expression::Function(Box::new(Function::new(array_length_func.to_string(), vec![info.arr_expr.clone()])))
14758 }).collect();
14759
14760 let greatest = Expression::Function(Box::new(Function::new("GREATEST".to_string(), size_exprs)));
14761
14762 let series_end = if index_offset == 0 {
14763 Expression::Sub(Box::new(BinaryOp::new(greatest, Expression::Literal(Literal::Number("1".to_string())))))
14764 } else {
14765 greatest
14766 };
14767
14768 // Build the position array source
14769 let series_unnest_expr = match target {
14770 DialectType::BigQuery => {
14771 let gen_array = Expression::Function(Box::new(Function::new(
14772 "GENERATE_ARRAY".to_string(),
14773 vec![Expression::Literal(Literal::Number("0".to_string())), series_end],
14774 )));
14775 Expression::Unnest(Box::new(UnnestFunc {
14776 this: gen_array, expressions: Vec::new(), with_ordinality: false, alias: None, offset_alias: None,
14777 }))
14778 }
14779 DialectType::Presto | DialectType::Trino => {
14780 let sequence = Expression::Function(Box::new(Function::new(
14781 "SEQUENCE".to_string(),
14782 vec![Expression::Literal(Literal::Number("1".to_string())), series_end],
14783 )));
14784 Expression::Unnest(Box::new(UnnestFunc {
14785 this: sequence, expressions: Vec::new(), with_ordinality: false, alias: None, offset_alias: None,
14786 }))
14787 }
14788 DialectType::Snowflake => {
14789 let range_end = Expression::Add(Box::new(BinaryOp::new(
14790 Expression::Paren(Box::new(crate::expressions::Paren { this: series_end, trailing_comments: Vec::new() })),
14791 Expression::Literal(Literal::Number("1".to_string())),
14792 )));
14793 let gen_range = Expression::Function(Box::new(Function::new(
14794 "ARRAY_GENERATE_RANGE".to_string(),
14795 vec![Expression::Literal(Literal::Number("0".to_string())), range_end],
14796 )));
14797 let flatten_arg = Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
14798 name: Identifier::new("INPUT".to_string()),
14799 value: gen_range,
14800 separator: crate::expressions::NamedArgSeparator::DArrow,
14801 }));
14802 let flatten = Expression::Function(Box::new(Function::new("FLATTEN".to_string(), vec![flatten_arg])));
14803 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])))
14804 }
14805 _ => return None,
14806 };
14807
14808 // Build series alias expression
14809 let series_alias_expr = if use_table_aliases {
14810 let col_aliases = if matches!(target, DialectType::Snowflake) {
14811 vec![
14812 Identifier::new("seq".to_string()), Identifier::new("key".to_string()),
14813 Identifier::new("path".to_string()), Identifier::new("index".to_string()),
14814 Identifier::new(series_alias.clone()), Identifier::new("this".to_string()),
14815 ]
14816 } else {
14817 vec![Identifier::new(series_alias.clone())]
14818 };
14819 Expression::Alias(Box::new(Alias {
14820 this: series_unnest_expr,
14821 alias: Identifier::new(series_source_alias.clone()),
14822 column_aliases: col_aliases,
14823 pre_alias_comments: Vec::new(),
14824 trailing_comments: Vec::new(),
14825 }))
14826 } else {
14827 Expression::Alias(Box::new(Alias::new(series_unnest_expr, Identifier::new(series_alias.clone()))))
14828 };
14829
14830 // Build CROSS JOINs for each UNNEST
14831 let mut joins = Vec::new();
14832 for info in &unnest_infos {
14833 let actual_col_name = info.has_outer_alias.as_ref().unwrap_or(&info.col_alias);
14834
14835 let unnest_join_expr = match target {
14836 DialectType::BigQuery => {
14837 // UNNEST([1,2,3]) AS col WITH OFFSET AS pos_2
14838 let unnest = UnnestFunc {
14839 this: info.arr_expr.clone(),
14840 expressions: Vec::new(),
14841 with_ordinality: true,
14842 alias: Some(Identifier::new(actual_col_name.clone())),
14843 offset_alias: Some(Identifier::new(info.pos_alias.clone())),
14844 };
14845 Expression::Unnest(Box::new(unnest))
14846 }
14847 DialectType::Presto | DialectType::Trino => {
14848 let unnest = UnnestFunc {
14849 this: info.arr_expr.clone(),
14850 expressions: Vec::new(),
14851 with_ordinality: true,
14852 alias: None,
14853 offset_alias: None,
14854 };
14855 Expression::Alias(Box::new(Alias {
14856 this: Expression::Unnest(Box::new(unnest)),
14857 alias: Identifier::new(info.source_alias.clone()),
14858 column_aliases: vec![
14859 Identifier::new(actual_col_name.clone()),
14860 Identifier::new(info.pos_alias.clone()),
14861 ],
14862 pre_alias_comments: Vec::new(),
14863 trailing_comments: Vec::new(),
14864 }))
14865 }
14866 DialectType::Snowflake => {
14867 let flatten_arg = Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
14868 name: Identifier::new("INPUT".to_string()),
14869 value: info.arr_expr.clone(),
14870 separator: crate::expressions::NamedArgSeparator::DArrow,
14871 }));
14872 let flatten = Expression::Function(Box::new(Function::new("FLATTEN".to_string(), vec![flatten_arg])));
14873 let table_fn = Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])));
14874 Expression::Alias(Box::new(Alias {
14875 this: table_fn,
14876 alias: Identifier::new(info.source_alias.clone()),
14877 column_aliases: vec![
14878 Identifier::new("seq".to_string()), Identifier::new("key".to_string()),
14879 Identifier::new("path".to_string()), Identifier::new(info.pos_alias.clone()),
14880 Identifier::new(actual_col_name.clone()), Identifier::new("this".to_string()),
14881 ],
14882 pre_alias_comments: Vec::new(),
14883 trailing_comments: Vec::new(),
14884 }))
14885 }
14886 _ => return None,
14887 };
14888
14889 joins.push(make_join(unnest_join_expr));
14890 }
14891
14892 // Build WHERE clause
14893 let mut where_conditions: Vec<Expression> = Vec::new();
14894 for info in &unnest_infos {
14895 let src_ref = if use_table_aliases { Some(info.source_alias.as_str()) } else { None };
14896 let pos_col = make_col(&series_alias, tbl_ref);
14897 let unnest_pos_col = make_col(&info.pos_alias, src_ref);
14898
14899 let arr_size = Expression::Function(Box::new(Function::new(
14900 array_length_func.to_string(), vec![info.arr_expr.clone()],
14901 )));
14902
14903 let size_ref = if index_offset == 0 {
14904 Expression::Paren(Box::new(crate::expressions::Paren {
14905 this: Expression::Sub(Box::new(BinaryOp::new(arr_size, Expression::Literal(Literal::Number("1".to_string()))))),
14906 trailing_comments: Vec::new(),
14907 }))
14908 } else {
14909 arr_size
14910 };
14911
14912 let eq = Expression::Eq(Box::new(BinaryOp::new(pos_col.clone(), unnest_pos_col.clone())));
14913 let gt = Expression::Gt(Box::new(BinaryOp::new(pos_col, size_ref.clone())));
14914 let pos_eq_size = Expression::Eq(Box::new(BinaryOp::new(unnest_pos_col, size_ref)));
14915 let and_cond = Expression::And(Box::new(BinaryOp::new(gt, pos_eq_size)));
14916 let paren_and = Expression::Paren(Box::new(crate::expressions::Paren { this: and_cond, trailing_comments: Vec::new() }));
14917 let or_cond = Expression::Or(Box::new(BinaryOp::new(eq, paren_and)));
14918
14919 where_conditions.push(or_cond);
14920 }
14921
14922 let where_expr = if where_conditions.len() == 1 {
14923 // Single condition: no parens needed
14924 where_conditions.into_iter().next().unwrap()
14925 } else {
14926 // Multiple conditions: wrap each OR in parens, then combine with AND
14927 let wrap = |e: Expression| Expression::Paren(Box::new(crate::expressions::Paren { this: e, trailing_comments: Vec::new() }));
14928 let mut iter = where_conditions.into_iter();
14929 let first = wrap(iter.next().unwrap());
14930 let second = wrap(iter.next().unwrap());
14931 let mut combined = Expression::Paren(Box::new(crate::expressions::Paren {
14932 this: Expression::And(Box::new(BinaryOp::new(first, second))),
14933 trailing_comments: Vec::new(),
14934 }));
14935 for cond in iter {
14936 combined = Expression::And(Box::new(BinaryOp::new(combined, wrap(cond))));
14937 }
14938 combined
14939 };
14940
14941 // Build the new SELECT
14942 let mut new_select = select.clone();
14943 new_select.expressions = new_select_exprs;
14944
14945 if new_select.from.is_some() {
14946 let mut all_joins = vec![make_join(series_alias_expr)];
14947 all_joins.extend(joins);
14948 new_select.joins.extend(all_joins);
14949 } else {
14950 new_select.from = Some(From { expressions: vec![series_alias_expr] });
14951 new_select.joins.extend(joins);
14952 }
14953
14954 if let Some(ref existing_where) = new_select.where_clause {
14955 let combined = Expression::And(Box::new(BinaryOp::new(existing_where.this.clone(), where_expr)));
14956 new_select.where_clause = Some(crate::expressions::Where { this: combined });
14957 } else {
14958 new_select.where_clause = Some(crate::expressions::Where { this: where_expr });
14959 }
14960
14961 Some(new_select)
14962 }
14963
14964 /// Helper to replace UNNEST(...) inside an expression with a replacement expression.
14965 fn replace_unnest_with_if(original: &Expression, replacement: &Expression) -> Expression {
14966 match original {
14967 Expression::Unnest(_) => replacement.clone(),
14968 Expression::Function(f) if f.name.eq_ignore_ascii_case("UNNEST") => replacement.clone(),
14969 Expression::Alias(a) => Self::replace_unnest_with_if(&a.this, replacement),
14970 Expression::Add(op) => {
14971 let left = Self::replace_unnest_with_if(&op.left, replacement);
14972 let right = Self::replace_unnest_with_if(&op.right, replacement);
14973 Expression::Add(Box::new(crate::expressions::BinaryOp::new(left, right)))
14974 }
14975 Expression::Sub(op) => {
14976 let left = Self::replace_unnest_with_if(&op.left, replacement);
14977 let right = Self::replace_unnest_with_if(&op.right, replacement);
14978 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(left, right)))
14979 }
14980 Expression::Mul(op) => {
14981 let left = Self::replace_unnest_with_if(&op.left, replacement);
14982 let right = Self::replace_unnest_with_if(&op.right, replacement);
14983 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(left, right)))
14984 }
14985 Expression::Div(op) => {
14986 let left = Self::replace_unnest_with_if(&op.left, replacement);
14987 let right = Self::replace_unnest_with_if(&op.right, replacement);
14988 Expression::Div(Box::new(crate::expressions::BinaryOp::new(left, right)))
14989 }
14990 _ => original.clone(),
14991 }
14992 }
14993
14994 /// Transform TSQL SELECT INTO -> CREATE TABLE AS for DuckDB/Snowflake
14995 /// or PostgreSQL #temp -> TEMPORARY.
14996 /// Also strips # from INSERT INTO #table for non-TSQL targets.
14997 fn transform_select_into(expr: Expression, _source: DialectType, target: DialectType) -> Expression {
14998 use crate::expressions::{CreateTable, Expression, TableRef};
14999
15000 // Handle INSERT INTO #temp -> INSERT INTO temp for non-TSQL targets
15001 if let Expression::Insert(ref insert) = expr {
15002 if insert.table.name.name.starts_with('#') && !matches!(target, DialectType::TSQL | DialectType::Fabric) {
15003 let mut new_insert = insert.clone();
15004 new_insert.table.name.name = insert.table.name.name.trim_start_matches('#').to_string();
15005 return Expression::Insert(new_insert);
15006 }
15007 return expr;
15008 }
15009
15010 if let Expression::Select(ref select) = expr {
15011 if let Some(ref into) = select.into {
15012 let table_name_raw = match &into.this {
15013 Expression::Table(tr) => tr.name.name.clone(),
15014 Expression::Identifier(id) => id.name.clone(),
15015 _ => String::new(),
15016 };
15017 let is_temp = table_name_raw.starts_with('#') || into.temporary;
15018 let clean_name = table_name_raw.trim_start_matches('#').to_string();
15019
15020 match target {
15021 DialectType::DuckDB | DialectType::Snowflake => {
15022 // SELECT INTO -> CREATE TABLE AS SELECT
15023 let mut new_select = select.clone();
15024 new_select.into = None;
15025 let ct = CreateTable {
15026 name: TableRef::new(clean_name),
15027 on_cluster: None,
15028 columns: Vec::new(),
15029 constraints: Vec::new(),
15030 if_not_exists: false,
15031 temporary: is_temp,
15032 or_replace: false,
15033 table_modifier: None,
15034 as_select: Some(Expression::Select(new_select)),
15035 as_select_parenthesized: false,
15036 on_commit: None,
15037 clone_source: None,
15038 clone_at_clause: None,
15039 shallow_clone: false, is_copy: false,
15040 leading_comments: Vec::new(),
15041 with_properties: Vec::new(),
15042 teradata_post_name_options: Vec::new(),
15043 with_data: None,
15044 with_statistics: None,
15045 teradata_indexes: Vec::new(),
15046 with_cte: None,
15047 properties: Vec::new(),
15048 partition_of: None,
15049 post_table_properties: Vec::new(),
15050 mysql_table_options: Vec::new(),
15051 inherits: Vec::new(),
15052 on_property: None,
15053 copy_grants: false,
15054 using_template: None,
15055 rollup: None,
15056 };
15057 return Expression::CreateTable(Box::new(ct));
15058 }
15059 DialectType::PostgreSQL | DialectType::Redshift => {
15060 // PostgreSQL: #foo -> INTO TEMPORARY foo
15061 if is_temp && !into.temporary {
15062 let mut new_select = select.clone();
15063 let mut new_into = into.clone();
15064 new_into.temporary = true;
15065 new_into.unlogged = false;
15066 new_into.this = Expression::Table(TableRef::new(clean_name));
15067 new_select.into = Some(new_into);
15068 Expression::Select(new_select)
15069 } else {
15070 expr
15071 }
15072 }
15073 _ => expr,
15074 }
15075 } else {
15076 expr
15077 }
15078 } else {
15079 expr
15080 }
15081 }
15082
15083 /// Transform CREATE TABLE WITH properties for cross-dialect transpilation.
15084 /// Handles FORMAT, PARTITIONED_BY, and other Presto WITH properties.
15085 fn transform_create_table_properties(
15086 ct: &mut crate::expressions::CreateTable,
15087 _source: DialectType,
15088 target: DialectType,
15089 ) {
15090 use crate::expressions::{
15091 BinaryOp, BooleanLiteral, Expression, FileFormatProperty, Identifier, Literal,
15092 Properties,
15093 };
15094
15095 // Helper to convert a raw property value string to the correct Expression
15096 let value_to_expr = |v: &str| -> Expression {
15097 let trimmed = v.trim();
15098 // Check if it's a quoted string (starts and ends with ')
15099 if trimmed.starts_with('\'') && trimmed.ends_with('\'') {
15100 Expression::Literal(Literal::String(trimmed[1..trimmed.len()-1].to_string()))
15101 }
15102 // Check if it's a number
15103 else if trimmed.parse::<i64>().is_ok() || trimmed.parse::<f64>().is_ok() {
15104 Expression::Literal(Literal::Number(trimmed.to_string()))
15105 }
15106 // Check if it's ARRAY[...] or ARRAY(...)
15107 else if trimmed.to_uppercase().starts_with("ARRAY") {
15108 // Convert ARRAY['y'] to ARRAY('y') for Hive/Spark
15109 let inner = trimmed
15110 .trim_start_matches(|c: char| c.is_alphabetic()) // Remove ARRAY
15111 .trim_start_matches('[')
15112 .trim_start_matches('(')
15113 .trim_end_matches(']')
15114 .trim_end_matches(')');
15115 let elements: Vec<Expression> = inner
15116 .split(',')
15117 .map(|e| {
15118 let elem = e.trim().trim_matches('\'');
15119 Expression::Literal(Literal::String(elem.to_string()))
15120 })
15121 .collect();
15122 Expression::Function(Box::new(crate::expressions::Function::new(
15123 "ARRAY".to_string(),
15124 elements,
15125 )))
15126 }
15127 // Otherwise, just output as identifier (unquoted)
15128 else {
15129 Expression::Identifier(Identifier::new(trimmed.to_string()))
15130 }
15131 };
15132
15133 if ct.with_properties.is_empty() && ct.properties.is_empty() {
15134 return;
15135 }
15136
15137 // Handle Presto-style WITH properties
15138 if !ct.with_properties.is_empty() {
15139 // Extract FORMAT property and remaining properties
15140 let mut format_value: Option<String> = None;
15141 let mut partitioned_by: Option<String> = None;
15142 let mut other_props: Vec<(String, String)> = Vec::new();
15143
15144 for (key, value) in ct.with_properties.drain(..) {
15145 let key_upper = key.to_uppercase();
15146 if key_upper == "FORMAT" {
15147 // Strip surrounding quotes from value if present
15148 format_value = Some(value.trim_matches('\'').to_string());
15149 } else if key_upper == "PARTITIONED_BY" {
15150 partitioned_by = Some(value);
15151 } else {
15152 other_props.push((key, value));
15153 }
15154 }
15155
15156 match target {
15157 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
15158 // Presto: keep WITH properties but lowercase 'format' key
15159 if let Some(fmt) = format_value {
15160 ct.with_properties.push(("format".to_string(), format!("'{}'", fmt)));
15161 }
15162 if let Some(part) = partitioned_by {
15163 // Convert (col1, col2) to ARRAY['col1', 'col2'] format
15164 let trimmed = part.trim();
15165 let inner = trimmed.trim_start_matches('(').trim_end_matches(')');
15166 // Also handle ARRAY['...'] format - keep as-is
15167 if trimmed.to_uppercase().starts_with("ARRAY") {
15168 ct.with_properties.push(("PARTITIONED_BY".to_string(), part));
15169 } else {
15170 // Parse column names from the parenthesized list
15171 let cols: Vec<&str> = inner.split(',').map(|c| c.trim().trim_matches('"').trim_matches('\'')).collect();
15172 let array_val = format!("ARRAY[{}]", cols.iter().map(|c| format!("'{}'", c)).collect::<Vec<_>>().join(", "));
15173 ct.with_properties.push(("PARTITIONED_BY".to_string(), array_val));
15174 }
15175 }
15176 ct.with_properties.extend(other_props);
15177 }
15178 DialectType::Hive => {
15179 // Hive: FORMAT -> STORED AS, other props -> TBLPROPERTIES
15180 if let Some(fmt) = format_value {
15181 ct.properties.push(Expression::FileFormatProperty(Box::new(
15182 FileFormatProperty {
15183 this: Some(Box::new(Expression::Identifier(Identifier::new(fmt)))),
15184 expressions: vec![],
15185 hive_format: Some(Box::new(Expression::Boolean(BooleanLiteral { value: true }))),
15186 },
15187 )));
15188 }
15189 if let Some(_part) = partitioned_by {
15190 // PARTITIONED_BY handling is complex - move columns to partitioned by
15191 // For now, the partition columns are extracted from the column list
15192 Self::apply_partitioned_by(ct, &_part, target);
15193 }
15194 if !other_props.is_empty() {
15195 let eq_exprs: Vec<Expression> = other_props
15196 .into_iter()
15197 .map(|(k, v)| Expression::Eq(Box::new(BinaryOp::new(
15198 Expression::Literal(Literal::String(k)),
15199 value_to_expr(&v),
15200 ))))
15201 .collect();
15202 ct.properties.push(Expression::Properties(Box::new(
15203 Properties { expressions: eq_exprs },
15204 )));
15205 }
15206 }
15207 DialectType::Spark | DialectType::Databricks => {
15208 // Spark: FORMAT -> USING, other props -> TBLPROPERTIES
15209 if let Some(fmt) = format_value {
15210 ct.properties.push(Expression::FileFormatProperty(Box::new(
15211 FileFormatProperty {
15212 this: Some(Box::new(Expression::Identifier(Identifier::new(fmt)))),
15213 expressions: vec![],
15214 hive_format: None, // None means USING syntax
15215 },
15216 )));
15217 }
15218 if let Some(_part) = partitioned_by {
15219 Self::apply_partitioned_by(ct, &_part, target);
15220 }
15221 if !other_props.is_empty() {
15222 let eq_exprs: Vec<Expression> = other_props
15223 .into_iter()
15224 .map(|(k, v)| Expression::Eq(Box::new(BinaryOp::new(
15225 Expression::Literal(Literal::String(k)),
15226 value_to_expr(&v),
15227 ))))
15228 .collect();
15229 ct.properties.push(Expression::Properties(Box::new(
15230 Properties { expressions: eq_exprs },
15231 )));
15232 }
15233 }
15234 DialectType::DuckDB => {
15235 // DuckDB: strip all WITH properties (FORMAT, PARTITIONED_BY, etc.)
15236 // Keep nothing
15237 }
15238 _ => {
15239 // For other dialects, keep WITH properties as-is
15240 if let Some(fmt) = format_value {
15241 ct.with_properties.push(("FORMAT".to_string(), format!("'{}'", fmt)));
15242 }
15243 if let Some(part) = partitioned_by {
15244 ct.with_properties.push(("PARTITIONED_BY".to_string(), part));
15245 }
15246 ct.with_properties.extend(other_props);
15247 }
15248 }
15249 }
15250
15251 // Handle STORED AS 'PARQUET' (quoted format name) -> STORED AS PARQUET (unquoted)
15252 // and Hive STORED AS -> Presto WITH (format=...) conversion
15253 if !ct.properties.is_empty() {
15254 let is_presto_target = matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena);
15255 let is_duckdb_target = matches!(target, DialectType::DuckDB);
15256
15257 if is_presto_target || is_duckdb_target {
15258 let mut new_properties = Vec::new();
15259 for prop in ct.properties.drain(..) {
15260 match &prop {
15261 Expression::FileFormatProperty(ffp) => {
15262 if is_presto_target {
15263 // Convert STORED AS/USING to WITH (format=...)
15264 if let Some(ref fmt_expr) = ffp.this {
15265 let fmt_str = match fmt_expr.as_ref() {
15266 Expression::Identifier(id) => id.name.clone(),
15267 Expression::Literal(Literal::String(s)) => s.clone(),
15268 _ => {
15269 new_properties.push(prop);
15270 continue;
15271 }
15272 };
15273 ct.with_properties.push(("format".to_string(), format!("'{}'", fmt_str)));
15274 }
15275 }
15276 // DuckDB: just strip file format properties
15277 }
15278 // Convert TBLPROPERTIES to WITH properties for Presto target
15279 Expression::Properties(props) if is_presto_target => {
15280 for expr in &props.expressions {
15281 if let Expression::Eq(eq) = expr {
15282 // Extract key and value from the Eq expression
15283 let key = match &eq.left {
15284 Expression::Literal(Literal::String(s)) => s.clone(),
15285 Expression::Identifier(id) => id.name.clone(),
15286 _ => continue,
15287 };
15288 let value = match &eq.right {
15289 Expression::Literal(Literal::String(s)) => format!("'{}'", s),
15290 Expression::Literal(Literal::Number(n)) => n.clone(),
15291 Expression::Identifier(id) => id.name.clone(),
15292 _ => continue,
15293 };
15294 ct.with_properties.push((key, value));
15295 }
15296 }
15297 }
15298 // Convert PartitionedByProperty for Presto target
15299 Expression::PartitionedByProperty(ref pbp) if is_presto_target => {
15300 // Check if it contains ColumnDef expressions (Hive-style with types)
15301 if let Expression::Tuple(ref tuple) = *pbp.this {
15302 let mut col_names: Vec<String> = Vec::new();
15303 let mut col_defs: Vec<crate::expressions::ColumnDef> = Vec::new();
15304 let mut has_col_defs = false;
15305 for expr in &tuple.expressions {
15306 if let Expression::ColumnDef(ref cd) = expr {
15307 has_col_defs = true;
15308 col_names.push(cd.name.name.clone());
15309 col_defs.push(*cd.clone());
15310 } else if let Expression::Column(ref col) = expr {
15311 col_names.push(col.name.name.clone());
15312 } else if let Expression::Identifier(ref id) = expr {
15313 col_names.push(id.name.clone());
15314 } else {
15315 // For function expressions like MONTHS(y), serialize to SQL
15316 let generic = Dialect::get(DialectType::Generic);
15317 if let Ok(sql) = generic.generate(expr) {
15318 col_names.push(sql);
15319 }
15320 }
15321 }
15322 if has_col_defs {
15323 // Merge partition column defs into the main column list
15324 for cd in col_defs {
15325 ct.columns.push(cd);
15326 }
15327 }
15328 if !col_names.is_empty() {
15329 // Add PARTITIONED_BY property
15330 let array_val = format!("ARRAY[{}]",
15331 col_names.iter().map(|n| format!("'{}'", n)).collect::<Vec<_>>().join(", "));
15332 ct.with_properties.push(("PARTITIONED_BY".to_string(), array_val));
15333 }
15334 }
15335 // Skip - don't keep in properties
15336 }
15337 _ => {
15338 if !is_duckdb_target {
15339 new_properties.push(prop);
15340 }
15341 }
15342 }
15343 }
15344 ct.properties = new_properties;
15345 } else {
15346 // For Hive/Spark targets, unquote format names in STORED AS
15347 for prop in &mut ct.properties {
15348 if let Expression::FileFormatProperty(ref mut ffp) = prop {
15349 if let Some(ref mut fmt_expr) = ffp.this {
15350 if let Expression::Literal(Literal::String(s)) = fmt_expr.as_ref() {
15351 // Convert STORED AS 'PARQUET' to STORED AS PARQUET (unquote)
15352 let unquoted = s.clone();
15353 *fmt_expr = Box::new(Expression::Identifier(Identifier::new(unquoted)));
15354 }
15355 }
15356 }
15357 }
15358 }
15359 }
15360 }
15361
15362 /// Apply PARTITIONED_BY conversion: move partition columns from column list to PARTITIONED BY
15363 fn apply_partitioned_by(ct: &mut crate::expressions::CreateTable, partitioned_by_value: &str, target: DialectType) {
15364 use crate::expressions::{Column, Expression, Identifier, PartitionedByProperty, Tuple};
15365
15366 // Parse the ARRAY['col1', 'col2'] value to extract column names
15367 let mut col_names: Vec<String> = Vec::new();
15368 // The value looks like ARRAY['y', 'z'] or ARRAY('y', 'z')
15369 let inner = partitioned_by_value
15370 .trim()
15371 .trim_start_matches("ARRAY")
15372 .trim_start_matches('[')
15373 .trim_start_matches('(')
15374 .trim_end_matches(']')
15375 .trim_end_matches(')');
15376 for part in inner.split(',') {
15377 let col = part.trim().trim_matches('\'').trim_matches('"');
15378 if !col.is_empty() {
15379 col_names.push(col.to_string());
15380 }
15381 }
15382
15383 if col_names.is_empty() {
15384 return;
15385 }
15386
15387 if matches!(target, DialectType::Hive) {
15388 // Hive: PARTITIONED BY (col_name type, ...) - move columns out of column list
15389 let mut partition_col_defs = Vec::new();
15390 for col_name in &col_names {
15391 // Find and remove from columns
15392 if let Some(pos) = ct.columns.iter().position(|c| c.name.name.eq_ignore_ascii_case(col_name)) {
15393 let col_def = ct.columns.remove(pos);
15394 partition_col_defs.push(Expression::ColumnDef(Box::new(col_def)));
15395 }
15396 }
15397 if !partition_col_defs.is_empty() {
15398 ct.properties.push(Expression::PartitionedByProperty(Box::new(
15399 PartitionedByProperty {
15400 this: Box::new(Expression::Tuple(Box::new(Tuple { expressions: partition_col_defs }))),
15401 },
15402 )));
15403 }
15404 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
15405 // Spark: PARTITIONED BY (col1, col2) - just column names, keep in column list
15406 // Use quoted identifiers to match the quoting style of the original column definitions
15407 let partition_exprs: Vec<Expression> = col_names
15408 .iter()
15409 .map(|name| {
15410 // Check if the column exists in the column list and use its quoting
15411 let is_quoted = ct.columns.iter().any(|c| c.name.name.eq_ignore_ascii_case(name) && c.name.quoted);
15412 let ident = if is_quoted { Identifier::quoted(name.clone()) } else { Identifier::new(name.clone()) };
15413 Expression::Column(Column {
15414 name: ident,
15415 table: None,
15416 join_mark: false,
15417 trailing_comments: Vec::new(),
15418 })
15419 })
15420 .collect();
15421 ct.properties.push(Expression::PartitionedByProperty(Box::new(
15422 PartitionedByProperty {
15423 this: Box::new(Expression::Tuple(Box::new(Tuple { expressions: partition_exprs }))),
15424 },
15425 )));
15426 }
15427 // DuckDB: strip partitioned_by entirely (already handled)
15428 }
15429
15430 /// Convert a DataType to Spark's type string format (using angle brackets)
15431 fn data_type_to_spark_string(dt: &crate::expressions::DataType) -> String {
15432 use crate::expressions::DataType;
15433 match dt {
15434 DataType::Int { .. } => "INT".to_string(),
15435 DataType::BigInt { .. } => "BIGINT".to_string(),
15436 DataType::SmallInt { .. } => "SMALLINT".to_string(),
15437 DataType::TinyInt { .. } => "TINYINT".to_string(),
15438 DataType::Float { .. } => "FLOAT".to_string(),
15439 DataType::Double { .. } => "DOUBLE".to_string(),
15440 DataType::Decimal { precision: Some(p), scale: Some(s) } => format!("DECIMAL({}, {})", p, s),
15441 DataType::Decimal { precision: Some(p), .. } => format!("DECIMAL({})", p),
15442 DataType::Decimal { .. } => "DECIMAL".to_string(),
15443 DataType::VarChar { .. } | DataType::Text | DataType::String { .. } => "STRING".to_string(),
15444 DataType::Char { .. } => "STRING".to_string(),
15445 DataType::Boolean => "BOOLEAN".to_string(),
15446 DataType::Date => "DATE".to_string(),
15447 DataType::Timestamp { .. } => "TIMESTAMP".to_string(),
15448 DataType::Json | DataType::JsonB => "STRING".to_string(),
15449 DataType::Binary { .. } => "BINARY".to_string(),
15450 DataType::Array { element_type, .. } => format!("ARRAY<{}>", Self::data_type_to_spark_string(element_type)),
15451 DataType::Map { key_type, value_type } => format!("MAP<{}, {}>", Self::data_type_to_spark_string(key_type), Self::data_type_to_spark_string(value_type)),
15452 DataType::Struct { fields, .. } => {
15453 let field_strs: Vec<String> = fields.iter().map(|f| {
15454 if f.name.is_empty() {
15455 Self::data_type_to_spark_string(&f.data_type)
15456 } else {
15457 format!("{}: {}", f.name, Self::data_type_to_spark_string(&f.data_type))
15458 }
15459 }).collect();
15460 format!("STRUCT<{}>", field_strs.join(", "))
15461 }
15462 DataType::Custom { name } => name.clone(),
15463 _ => format!("{:?}", dt),
15464 }
15465 }
15466
15467 /// Extract value and unit from an Interval expression
15468 /// Returns (value_expression, IntervalUnit)
15469 fn extract_interval_parts(interval_expr: &Expression) -> (Expression, crate::expressions::IntervalUnit) {
15470 use crate::expressions::{IntervalUnit, IntervalUnitSpec};
15471
15472 if let Expression::Interval(iv) = interval_expr {
15473 let val = iv.this.clone().unwrap_or(Expression::number(0));
15474 let unit = match &iv.unit {
15475 Some(IntervalUnitSpec::Simple { unit, .. }) => *unit,
15476 None => {
15477 // Unit might be embedded in the string value (Snowflake format: '5 DAY')
15478 if let Expression::Literal(crate::expressions::Literal::String(s)) = &val {
15479 let parts: Vec<&str> = s.trim().splitn(2, ' ').collect();
15480 if parts.len() == 2 {
15481 let unit_str = parts[1].trim().to_uppercase();
15482 let parsed_unit = match unit_str.as_str() {
15483 "YEAR" | "YEARS" => IntervalUnit::Year,
15484 "QUARTER" | "QUARTERS" => IntervalUnit::Quarter,
15485 "MONTH" | "MONTHS" => IntervalUnit::Month,
15486 "WEEK" | "WEEKS" | "ISOWEEK" => IntervalUnit::Week,
15487 "DAY" | "DAYS" => IntervalUnit::Day,
15488 "HOUR" | "HOURS" => IntervalUnit::Hour,
15489 "MINUTE" | "MINUTES" => IntervalUnit::Minute,
15490 "SECOND" | "SECONDS" => IntervalUnit::Second,
15491 "MILLISECOND" | "MILLISECONDS" => IntervalUnit::Millisecond,
15492 "MICROSECOND" | "MICROSECONDS" => IntervalUnit::Microsecond,
15493 _ => IntervalUnit::Day,
15494 };
15495 // Return just the numeric part as value and parsed unit
15496 return (Expression::Literal(crate::expressions::Literal::String(parts[0].to_string())), parsed_unit);
15497 }
15498 IntervalUnit::Day
15499 } else {
15500 IntervalUnit::Day
15501 }
15502 }
15503 _ => IntervalUnit::Day,
15504 };
15505 (val, unit)
15506 } else {
15507 // Not an interval - pass through
15508 (interval_expr.clone(), crate::expressions::IntervalUnit::Day)
15509 }
15510 }
15511
15512 /// Normalize BigQuery-specific functions to standard forms that target dialects can handle
15513 fn normalize_bigquery_function(e: Expression, source: DialectType, target: DialectType) -> Result<Expression> {
15514 use crate::expressions::{BinaryOp, Cast, DataType, Function, Identifier, Literal, Paren};
15515
15516 let f = if let Expression::Function(f) = e { *f } else { return Ok(e); };
15517 let name = f.name.to_uppercase();
15518 let mut args = f.args;
15519
15520 /// Helper to extract unit string from an identifier, column, or literal expression
15521 fn get_unit_str(expr: &Expression) -> String {
15522 match expr {
15523 Expression::Identifier(id) => id.name.to_uppercase(),
15524 Expression::Literal(Literal::String(s)) => s.to_uppercase(),
15525 Expression::Column(col) => col.name.name.to_uppercase(),
15526 // Handle WEEK(MONDAY), WEEK(SUNDAY) etc. which are parsed as Function("WEEK", [Column("MONDAY")])
15527 Expression::Function(f) => {
15528 let base = f.name.to_uppercase();
15529 if !f.args.is_empty() {
15530 // e.g., WEEK(MONDAY) -> "WEEK(MONDAY)"
15531 let inner = get_unit_str(&f.args[0]);
15532 format!("{}({})", base, inner)
15533 } else {
15534 base
15535 }
15536 }
15537 _ => "DAY".to_string(),
15538 }
15539 }
15540
15541 /// Parse unit string to IntervalUnit
15542 fn parse_interval_unit(s: &str) -> crate::expressions::IntervalUnit {
15543 match s {
15544 "YEAR" => crate::expressions::IntervalUnit::Year,
15545 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
15546 "MONTH" => crate::expressions::IntervalUnit::Month,
15547 "WEEK" | "ISOWEEK" => crate::expressions::IntervalUnit::Week,
15548 "DAY" => crate::expressions::IntervalUnit::Day,
15549 "HOUR" => crate::expressions::IntervalUnit::Hour,
15550 "MINUTE" => crate::expressions::IntervalUnit::Minute,
15551 "SECOND" => crate::expressions::IntervalUnit::Second,
15552 "MILLISECOND" => crate::expressions::IntervalUnit::Millisecond,
15553 "MICROSECOND" => crate::expressions::IntervalUnit::Microsecond,
15554 _ if s.starts_with("WEEK(") => crate::expressions::IntervalUnit::Week,
15555 _ => crate::expressions::IntervalUnit::Day,
15556 }
15557 }
15558
15559 match name.as_str() {
15560 // TIMESTAMP_DIFF(date1, date2, unit) -> TIMESTAMPDIFF(unit, date2, date1)
15561 // (BigQuery: result = date1 - date2, Standard: result = end - start)
15562 "TIMESTAMP_DIFF" | "DATETIME_DIFF" | "TIME_DIFF" if args.len() == 3 => {
15563 let date1 = args.remove(0);
15564 let date2 = args.remove(0);
15565 let unit_expr = args.remove(0);
15566 let unit_str = get_unit_str(&unit_expr);
15567
15568 if matches!(target, DialectType::BigQuery) {
15569 // BigQuery -> BigQuery: just uppercase the unit
15570 let unit = Expression::Identifier(Identifier::new(unit_str.clone()));
15571 return Ok(Expression::Function(Box::new(Function::new(
15572 f.name, vec![date1, date2, unit],
15573 ))));
15574 }
15575
15576 // For Snowflake: use TimestampDiff expression so it generates TIMESTAMPDIFF
15577 // (Function("TIMESTAMPDIFF") would be converted to DATEDIFF by Snowflake's function normalization)
15578 if matches!(target, DialectType::Snowflake) {
15579 return Ok(Expression::TimestampDiff(Box::new(crate::expressions::TimestampDiff {
15580 this: Box::new(date2),
15581 expression: Box::new(date1),
15582 unit: Some(unit_str),
15583 })));
15584 }
15585
15586 // For DuckDB: DATE_DIFF('UNIT', start, end) with proper CAST
15587 if matches!(target, DialectType::DuckDB) {
15588 let (cast_d1, cast_d2) = if name == "TIME_DIFF" {
15589 // CAST to TIME
15590 let cast_fn = |e: Expression| -> Expression {
15591 match e {
15592 Expression::Literal(Literal::String(s)) => {
15593 Expression::Cast(Box::new(Cast {
15594 this: Expression::Literal(Literal::String(s)),
15595 to: DataType::Custom { name: "TIME".to_string() },
15596 trailing_comments: vec![],
15597 double_colon_syntax: false,
15598 format: None,
15599 default: None,
15600 }))
15601 }
15602 other => other,
15603 }
15604 };
15605 (cast_fn(date1), cast_fn(date2))
15606 } else if name == "DATETIME_DIFF" {
15607 // CAST to TIMESTAMP
15608 (Self::ensure_cast_timestamp(date1), Self::ensure_cast_timestamp(date2))
15609 } else {
15610 // TIMESTAMP_DIFF: CAST to TIMESTAMPTZ
15611 (Self::ensure_cast_timestamptz(date1), Self::ensure_cast_timestamptz(date2))
15612 };
15613 return Ok(Expression::Function(Box::new(Function::new(
15614 "DATE_DIFF".to_string(), vec![
15615 Expression::Literal(Literal::String(unit_str)),
15616 cast_d2,
15617 cast_d1,
15618 ],
15619 ))));
15620 }
15621
15622 // Convert to standard TIMESTAMPDIFF(unit, start, end)
15623 let unit = Expression::Identifier(Identifier::new(unit_str));
15624 Ok(Expression::Function(Box::new(Function::new(
15625 "TIMESTAMPDIFF".to_string(), vec![unit, date2, date1],
15626 ))))
15627 }
15628
15629 // DATEDIFF(unit, start, end) -> target-specific form
15630 // Used by: Redshift, Snowflake, TSQL, Databricks, Spark
15631 "DATEDIFF" if args.len() == 3 => {
15632 let arg0 = args.remove(0);
15633 let arg1 = args.remove(0);
15634 let arg2 = args.remove(0);
15635 let unit_str = get_unit_str(&arg0);
15636
15637 // Redshift DATEDIFF(unit, start, end) order: result = end - start
15638 // Snowflake DATEDIFF(unit, start, end) order: result = end - start
15639 // TSQL DATEDIFF(unit, start, end) order: result = end - start
15640
15641 if matches!(target, DialectType::Snowflake) {
15642 // Snowflake: DATEDIFF(UNIT, start, end) - uppercase unit
15643 let unit = Expression::Identifier(Identifier::new(unit_str));
15644 return Ok(Expression::Function(Box::new(Function::new(
15645 "DATEDIFF".to_string(), vec![unit, arg1, arg2],
15646 ))));
15647 }
15648
15649 if matches!(target, DialectType::DuckDB) {
15650 // DuckDB: DATE_DIFF('UNIT', start, end) with CAST
15651 let cast_d1 = Self::ensure_cast_timestamp(arg1);
15652 let cast_d2 = Self::ensure_cast_timestamp(arg2);
15653 return Ok(Expression::Function(Box::new(Function::new(
15654 "DATE_DIFF".to_string(), vec![
15655 Expression::Literal(Literal::String(unit_str)),
15656 cast_d1,
15657 cast_d2,
15658 ],
15659 ))));
15660 }
15661
15662 if matches!(target, DialectType::BigQuery) {
15663 // BigQuery: DATE_DIFF(end_date, start_date, UNIT) - reversed args, CAST to DATETIME
15664 let cast_d1 = Self::ensure_cast_datetime(arg1);
15665 let cast_d2 = Self::ensure_cast_datetime(arg2);
15666 let unit = Expression::Identifier(Identifier::new(unit_str));
15667 return Ok(Expression::Function(Box::new(Function::new(
15668 "DATE_DIFF".to_string(), vec![cast_d2, cast_d1, unit],
15669 ))));
15670 }
15671
15672 if matches!(target, DialectType::Spark | DialectType::Databricks) {
15673 // Spark/Databricks: DATEDIFF(UNIT, start, end) - uppercase unit
15674 let unit = Expression::Identifier(Identifier::new(unit_str));
15675 return Ok(Expression::Function(Box::new(Function::new(
15676 "DATEDIFF".to_string(), vec![unit, arg1, arg2],
15677 ))));
15678 }
15679
15680 if matches!(target, DialectType::Hive) {
15681 // Hive: DATEDIFF(end, start) for DAY only, use MONTHS_BETWEEN for MONTH
15682 match unit_str.as_str() {
15683 "MONTH" => {
15684 return Ok(Expression::Function(Box::new(Function::new(
15685 "CAST".to_string(), vec![
15686 Expression::Function(Box::new(Function::new(
15687 "MONTHS_BETWEEN".to_string(), vec![arg2, arg1],
15688 ))),
15689 ],
15690 ))));
15691 }
15692 "WEEK" => {
15693 return Ok(Expression::Cast(Box::new(Cast {
15694 this: Expression::Div(Box::new(crate::expressions::BinaryOp::new(
15695 Expression::Function(Box::new(Function::new(
15696 "DATEDIFF".to_string(), vec![arg2, arg1],
15697 ))),
15698 Expression::Literal(Literal::Number("7".to_string())),
15699 ))),
15700 to: DataType::Int { length: None, integer_spelling: false },
15701 trailing_comments: vec![],
15702 double_colon_syntax: false,
15703 format: None,
15704 default: None,
15705 })));
15706 }
15707 _ => {
15708 // Default: DATEDIFF(end, start) for DAY
15709 return Ok(Expression::Function(Box::new(Function::new(
15710 "DATEDIFF".to_string(), vec![arg2, arg1],
15711 ))));
15712 }
15713 }
15714 }
15715
15716 if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena) {
15717 // Presto/Trino: DATE_DIFF('UNIT', start, end)
15718 return Ok(Expression::Function(Box::new(Function::new(
15719 "DATE_DIFF".to_string(), vec![
15720 Expression::Literal(Literal::String(unit_str)),
15721 arg1,
15722 arg2,
15723 ],
15724 ))));
15725 }
15726
15727 if matches!(target, DialectType::TSQL) {
15728 // TSQL: DATEDIFF(UNIT, start, CAST(end AS DATETIME2))
15729 let cast_d2 = Self::ensure_cast_datetime2(arg2);
15730 let unit = Expression::Identifier(Identifier::new(unit_str));
15731 return Ok(Expression::Function(Box::new(Function::new(
15732 "DATEDIFF".to_string(), vec![unit, arg1, cast_d2],
15733 ))));
15734 }
15735
15736 if matches!(target, DialectType::PostgreSQL) {
15737 // PostgreSQL doesn't have DATEDIFF - use date subtraction or EXTRACT
15738 // For now, use DATEDIFF (passthrough) with uppercased unit
15739 let unit = Expression::Identifier(Identifier::new(unit_str));
15740 return Ok(Expression::Function(Box::new(Function::new(
15741 "DATEDIFF".to_string(), vec![unit, arg1, arg2],
15742 ))));
15743 }
15744
15745 // Default: DATEDIFF(UNIT, start, end) with uppercase unit
15746 let unit = Expression::Identifier(Identifier::new(unit_str));
15747 Ok(Expression::Function(Box::new(Function::new(
15748 "DATEDIFF".to_string(), vec![unit, arg1, arg2],
15749 ))))
15750 }
15751
15752 // DATE_DIFF(date1, date2, unit) -> standard form
15753 "DATE_DIFF" if args.len() == 3 => {
15754 let date1 = args.remove(0);
15755 let date2 = args.remove(0);
15756 let unit_expr = args.remove(0);
15757 let unit_str = get_unit_str(&unit_expr);
15758
15759 if matches!(target, DialectType::BigQuery) {
15760 // BigQuery -> BigQuery: just uppercase the unit, normalize WEEK(SUNDAY) -> WEEK
15761 let norm_unit = if unit_str == "WEEK(SUNDAY)" { "WEEK".to_string() } else { unit_str };
15762 let norm_d1 = Self::date_literal_to_cast(date1);
15763 let norm_d2 = Self::date_literal_to_cast(date2);
15764 let unit = Expression::Identifier(Identifier::new(norm_unit));
15765 return Ok(Expression::Function(Box::new(Function::new(
15766 f.name, vec![norm_d1, norm_d2, unit],
15767 ))));
15768 }
15769
15770 if matches!(target, DialectType::MySQL) {
15771 // MySQL DATEDIFF only takes 2 args (date1, date2), returns day difference
15772 let norm_d1 = Self::date_literal_to_cast(date1);
15773 let norm_d2 = Self::date_literal_to_cast(date2);
15774 return Ok(Expression::Function(Box::new(Function::new(
15775 "DATEDIFF".to_string(), vec![norm_d1, norm_d2],
15776 ))));
15777 }
15778
15779 if matches!(target, DialectType::StarRocks) {
15780 // StarRocks: DATE_DIFF('UNIT', date1, date2) - unit as string, args NOT swapped
15781 let norm_d1 = Self::date_literal_to_cast(date1);
15782 let norm_d2 = Self::date_literal_to_cast(date2);
15783 return Ok(Expression::Function(Box::new(Function::new(
15784 "DATE_DIFF".to_string(), vec![
15785 Expression::Literal(Literal::String(unit_str)),
15786 norm_d1,
15787 norm_d2,
15788 ],
15789 ))));
15790 }
15791
15792 if matches!(target, DialectType::DuckDB) {
15793 // DuckDB: DATE_DIFF('UNIT', date2, date1) with proper CAST for dates
15794 let norm_d1 = Self::ensure_cast_date(date1);
15795 let norm_d2 = Self::ensure_cast_date(date2);
15796
15797 // Handle WEEK variants: WEEK(MONDAY)/WEEK(SUNDAY)/ISOWEEK/WEEK
15798 let is_week_variant = unit_str == "WEEK" || unit_str.starts_with("WEEK(") || unit_str == "ISOWEEK";
15799 if is_week_variant {
15800 // For DuckDB, WEEK-based diffs use DATE_TRUNC approach
15801 // WEEK(MONDAY) / ISOWEEK: DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2), DATE_TRUNC('WEEK', d1))
15802 // WEEK / WEEK(SUNDAY): DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2 + INTERVAL '1' DAY), DATE_TRUNC('WEEK', d1 + INTERVAL '1' DAY))
15803 // WEEK(SATURDAY): DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2 + INTERVAL '-5' DAY), DATE_TRUNC('WEEK', d1 + INTERVAL '-5' DAY))
15804 let day_offset = if unit_str == "WEEK(MONDAY)" || unit_str == "ISOWEEK" {
15805 None // ISO weeks start on Monday, aligned with DATE_TRUNC('WEEK')
15806 } else if unit_str == "WEEK" || unit_str == "WEEK(SUNDAY)" {
15807 Some("1") // Shift Sunday to Monday alignment
15808 } else if unit_str == "WEEK(SATURDAY)" {
15809 Some("-5")
15810 } else if unit_str == "WEEK(TUESDAY)" {
15811 Some("-1")
15812 } else if unit_str == "WEEK(WEDNESDAY)" {
15813 Some("-2")
15814 } else if unit_str == "WEEK(THURSDAY)" {
15815 Some("-3")
15816 } else if unit_str == "WEEK(FRIDAY)" {
15817 Some("-4")
15818 } else {
15819 Some("1") // default to Sunday
15820 };
15821
15822 let make_trunc = |date: Expression, offset: Option<&str>| -> Expression {
15823 let shifted = if let Some(off) = offset {
15824 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
15825 this: Some(Expression::Literal(Literal::String(off.to_string()))),
15826 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: crate::expressions::IntervalUnit::Day, use_plural: false }),
15827 }));
15828 Expression::Add(Box::new(crate::expressions::BinaryOp::new(date, interval)))
15829 } else {
15830 date
15831 };
15832 Expression::Function(Box::new(Function::new(
15833 "DATE_TRUNC".to_string(), vec![Expression::Literal(Literal::String("WEEK".to_string())), shifted],
15834 )))
15835 };
15836
15837 let trunc_d2 = make_trunc(norm_d2, day_offset);
15838 let trunc_d1 = make_trunc(norm_d1, day_offset);
15839 return Ok(Expression::Function(Box::new(Function::new(
15840 "DATE_DIFF".to_string(), vec![
15841 Expression::Literal(Literal::String("WEEK".to_string())),
15842 trunc_d2,
15843 trunc_d1,
15844 ],
15845 ))));
15846 }
15847
15848 return Ok(Expression::Function(Box::new(Function::new(
15849 "DATE_DIFF".to_string(), vec![
15850 Expression::Literal(Literal::String(unit_str)),
15851 norm_d2,
15852 norm_d1,
15853 ],
15854 ))));
15855 }
15856
15857 // Default: DATEDIFF(unit, date2, date1)
15858 let unit = Expression::Identifier(Identifier::new(unit_str));
15859 Ok(Expression::Function(Box::new(Function::new(
15860 "DATEDIFF".to_string(), vec![unit, date2, date1],
15861 ))))
15862 }
15863
15864 // TIMESTAMP_ADD(ts, INTERVAL n UNIT) -> target-specific
15865 "TIMESTAMP_ADD" | "DATETIME_ADD" | "TIME_ADD" if args.len() == 2 => {
15866 let ts = args.remove(0);
15867 let interval_expr = args.remove(0);
15868 let (val, unit) = Self::extract_interval_parts(&interval_expr);
15869
15870 match target {
15871 DialectType::Snowflake => {
15872 // TIMESTAMPADD(UNIT, val, CAST(ts AS TIMESTAMPTZ))
15873 // Use TimestampAdd expression so Snowflake generates TIMESTAMPADD
15874 // (Function("TIMESTAMPADD") would be converted to DATEADD by Snowflake's function normalization)
15875 let unit_str = Self::interval_unit_to_string(&unit);
15876 let cast_ts = Self::maybe_cast_ts_to_tz(ts, &name);
15877 Ok(Expression::TimestampAdd(Box::new(crate::expressions::TimestampAdd {
15878 this: Box::new(val),
15879 expression: Box::new(cast_ts),
15880 unit: Some(unit_str),
15881 })))
15882 }
15883 DialectType::Spark | DialectType::Databricks => {
15884 if name == "DATETIME_ADD" && matches!(target, DialectType::Spark) {
15885 // Spark DATETIME_ADD: ts + INTERVAL val UNIT
15886 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
15887 this: Some(val),
15888 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit, use_plural: false }),
15889 }));
15890 Ok(Expression::Add(Box::new(crate::expressions::BinaryOp::new(ts, interval))))
15891 } else if name == "DATETIME_ADD" && matches!(target, DialectType::Databricks) {
15892 // Databricks DATETIME_ADD: TIMESTAMPADD(UNIT, val, ts)
15893 let unit_str = Self::interval_unit_to_string(&unit);
15894 Ok(Expression::Function(Box::new(Function::new(
15895 "TIMESTAMPADD".to_string(),
15896 vec![Expression::Identifier(Identifier::new(unit_str)), val, ts],
15897 ))))
15898 } else {
15899 // Presto-style: DATE_ADD('unit', val, CAST(ts AS TIMESTAMP))
15900 let unit_str = Self::interval_unit_to_string(&unit);
15901 let cast_ts = if name.starts_with("TIMESTAMP") || name.starts_with("DATETIME") {
15902 Self::maybe_cast_ts(ts)
15903 } else {
15904 ts
15905 };
15906 Ok(Expression::Function(Box::new(Function::new(
15907 "DATE_ADD".to_string(),
15908 vec![Expression::Identifier(Identifier::new(unit_str)), val, cast_ts],
15909 ))))
15910 }
15911 }
15912 DialectType::MySQL => {
15913 // DATE_ADD(TIMESTAMP(ts), INTERVAL val UNIT) for MySQL
15914 let mysql_ts = if name.starts_with("TIMESTAMP") {
15915 // Check if already wrapped in TIMESTAMP() function (from cross-dialect normalization)
15916 match &ts {
15917 Expression::Function(ref inner_f) if inner_f.name.eq_ignore_ascii_case("TIMESTAMP") => {
15918 // Already wrapped, keep as-is
15919 ts
15920 }
15921 _ => {
15922 // Unwrap typed literals: TIMESTAMP '...' -> '...' for TIMESTAMP() wrapper
15923 let unwrapped = match ts {
15924 Expression::Literal(Literal::Timestamp(s)) => Expression::Literal(Literal::String(s)),
15925 other => other,
15926 };
15927 Expression::Function(Box::new(Function::new("TIMESTAMP".to_string(), vec![unwrapped])))
15928 }
15929 }
15930 } else {
15931 ts
15932 };
15933 Ok(Expression::DateAdd(Box::new(crate::expressions::DateAddFunc {
15934 this: mysql_ts,
15935 interval: val,
15936 unit,
15937 })))
15938 }
15939 _ => {
15940 // DuckDB and others use DateAdd expression (DuckDB converts to + INTERVAL)
15941 let cast_ts = if matches!(target, DialectType::DuckDB) {
15942 if name == "DATETIME_ADD" {
15943 Self::ensure_cast_timestamp(ts)
15944 } else if name.starts_with("TIMESTAMP") {
15945 Self::maybe_cast_ts_to_tz(ts, &name)
15946 } else {
15947 ts
15948 }
15949 } else {
15950 ts
15951 };
15952 Ok(Expression::DateAdd(Box::new(crate::expressions::DateAddFunc {
15953 this: cast_ts,
15954 interval: val,
15955 unit,
15956 })))
15957 }
15958 }
15959 }
15960
15961 // TIMESTAMP_SUB(ts, INTERVAL n UNIT) -> target-specific
15962 "TIMESTAMP_SUB" | "DATETIME_SUB" | "TIME_SUB" if args.len() == 2 => {
15963 let ts = args.remove(0);
15964 let interval_expr = args.remove(0);
15965 let (val, unit) = Self::extract_interval_parts(&interval_expr);
15966
15967 match target {
15968 DialectType::Snowflake => {
15969 // TIMESTAMPADD(UNIT, val * -1, CAST(ts AS TIMESTAMPTZ))
15970 let unit_str = Self::interval_unit_to_string(&unit);
15971 let cast_ts = Self::maybe_cast_ts_to_tz(ts, &name);
15972 let neg_val = Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
15973 val,
15974 Expression::Neg(Box::new(crate::expressions::UnaryOp { this: Expression::number(1) })),
15975 )));
15976 Ok(Expression::TimestampAdd(Box::new(crate::expressions::TimestampAdd {
15977 this: Box::new(neg_val),
15978 expression: Box::new(cast_ts),
15979 unit: Some(unit_str),
15980 })))
15981 }
15982 DialectType::Spark | DialectType::Databricks => {
15983 if (name == "DATETIME_SUB" && matches!(target, DialectType::Spark))
15984 || (name == "TIMESTAMP_SUB" && matches!(target, DialectType::Spark))
15985 {
15986 // Spark: ts - INTERVAL val UNIT
15987 let cast_ts = if name.starts_with("TIMESTAMP") {
15988 Self::maybe_cast_ts(ts)
15989 } else {
15990 ts
15991 };
15992 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
15993 this: Some(val),
15994 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit, use_plural: false }),
15995 }));
15996 Ok(Expression::Sub(Box::new(crate::expressions::BinaryOp::new(cast_ts, interval))))
15997 } else {
15998 // Databricks: TIMESTAMPADD(UNIT, val * -1, ts)
15999 let unit_str = Self::interval_unit_to_string(&unit);
16000 let neg_val = Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
16001 val,
16002 Expression::Neg(Box::new(crate::expressions::UnaryOp { this: Expression::number(1) })),
16003 )));
16004 Ok(Expression::Function(Box::new(Function::new(
16005 "TIMESTAMPADD".to_string(),
16006 vec![Expression::Identifier(Identifier::new(unit_str)), neg_val, ts],
16007 ))))
16008 }
16009 }
16010 DialectType::MySQL => {
16011 let mysql_ts = if name.starts_with("TIMESTAMP") {
16012 // Check if already wrapped in TIMESTAMP() function (from cross-dialect normalization)
16013 match &ts {
16014 Expression::Function(ref inner_f) if inner_f.name.eq_ignore_ascii_case("TIMESTAMP") => {
16015 // Already wrapped, keep as-is
16016 ts
16017 }
16018 _ => {
16019 let unwrapped = match ts {
16020 Expression::Literal(Literal::Timestamp(s)) => Expression::Literal(Literal::String(s)),
16021 other => other,
16022 };
16023 Expression::Function(Box::new(Function::new("TIMESTAMP".to_string(), vec![unwrapped])))
16024 }
16025 }
16026 } else {
16027 ts
16028 };
16029 Ok(Expression::DateSub(Box::new(crate::expressions::DateAddFunc {
16030 this: mysql_ts,
16031 interval: val,
16032 unit,
16033 })))
16034 }
16035 _ => {
16036 let cast_ts = if matches!(target, DialectType::DuckDB) {
16037 if name == "DATETIME_SUB" {
16038 Self::ensure_cast_timestamp(ts)
16039 } else if name.starts_with("TIMESTAMP") {
16040 Self::maybe_cast_ts_to_tz(ts, &name)
16041 } else {
16042 ts
16043 }
16044 } else {
16045 ts
16046 };
16047 Ok(Expression::DateSub(Box::new(crate::expressions::DateAddFunc {
16048 this: cast_ts,
16049 interval: val,
16050 unit,
16051 })))
16052 }
16053 }
16054 }
16055
16056 // DATE_SUB(date, INTERVAL n UNIT) -> target-specific
16057 "DATE_SUB" if args.len() == 2 => {
16058 let date = args.remove(0);
16059 let interval_expr = args.remove(0);
16060 let (val, unit) = Self::extract_interval_parts(&interval_expr);
16061
16062 match target {
16063 DialectType::Databricks | DialectType::Spark => {
16064 // Databricks/Spark: DATE_ADD(date, -val)
16065 // Use DateAdd expression with negative val so it generates correctly
16066 // The generator will output DATE_ADD(date, INTERVAL -val DAY)
16067 // Then Databricks transform converts 2-arg DATE_ADD(date, interval) to DATEADD(DAY, interval, date)
16068 // Instead, we directly output as a simple negated DateSub
16069 Ok(Expression::DateSub(Box::new(crate::expressions::DateAddFunc {
16070 this: date,
16071 interval: val,
16072 unit,
16073 })))
16074 }
16075 DialectType::DuckDB => {
16076 // DuckDB: CAST(date AS DATE) - INTERVAL 'val' UNIT
16077 let cast_date = Self::ensure_cast_date(date);
16078 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
16079 this: Some(val),
16080 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit, use_plural: false }),
16081 }));
16082 Ok(Expression::Sub(Box::new(crate::expressions::BinaryOp::new(cast_date, interval))))
16083 }
16084 DialectType::Snowflake => {
16085 // Snowflake: Let Snowflake's own DateSub -> DATEADD(UNIT, val * -1, date) handler work
16086 // Just ensure the date is cast properly
16087 let cast_date = Self::ensure_cast_date(date);
16088 Ok(Expression::DateSub(Box::new(crate::expressions::DateAddFunc {
16089 this: cast_date,
16090 interval: val,
16091 unit,
16092 })))
16093 }
16094 DialectType::PostgreSQL => {
16095 // PostgreSQL: date - INTERVAL 'val UNIT'
16096 let unit_str = Self::interval_unit_to_string(&unit);
16097 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
16098 this: Some(Expression::Literal(Literal::String(format!("{} {}", Self::expr_to_string(&val), unit_str)))),
16099 unit: None,
16100 }));
16101 Ok(Expression::Sub(Box::new(crate::expressions::BinaryOp::new(date, interval))))
16102 }
16103 _ => {
16104 Ok(Expression::DateSub(Box::new(crate::expressions::DateAddFunc {
16105 this: date,
16106 interval: val,
16107 unit,
16108 })))
16109 }
16110 }
16111 }
16112
16113 // DATEADD(unit, val, date) -> target-specific form
16114 // Used by: Redshift, Snowflake, TSQL, ClickHouse
16115 "DATEADD" if args.len() == 3 => {
16116 let arg0 = args.remove(0);
16117 let arg1 = args.remove(0);
16118 let arg2 = args.remove(0);
16119 let unit_str = get_unit_str(&arg0);
16120
16121 if matches!(target, DialectType::Snowflake | DialectType::TSQL) {
16122 // Keep DATEADD(UNIT, val, date) with uppercased unit
16123 let unit = Expression::Identifier(Identifier::new(unit_str));
16124 // Only CAST to DATETIME2 for TSQL target when source is NOT Spark/Databricks family
16125 let date = if matches!(target, DialectType::TSQL)
16126 && !matches!(source, DialectType::Spark | DialectType::Databricks | DialectType::Hive) {
16127 Self::ensure_cast_datetime2(arg2)
16128 } else {
16129 arg2
16130 };
16131 return Ok(Expression::Function(Box::new(Function::new(
16132 "DATEADD".to_string(), vec![unit, arg1, date],
16133 ))));
16134 }
16135
16136 if matches!(target, DialectType::DuckDB) {
16137 // DuckDB: date + INTERVAL 'val' UNIT
16138 let iu = parse_interval_unit(&unit_str);
16139 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
16140 this: Some(arg1),
16141 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: iu, use_plural: false }),
16142 }));
16143 let cast_date = Self::ensure_cast_timestamp(arg2);
16144 return Ok(Expression::Add(Box::new(crate::expressions::BinaryOp::new(cast_date, interval))));
16145 }
16146
16147 if matches!(target, DialectType::BigQuery) {
16148 // BigQuery: DATE_ADD(date, INTERVAL val UNIT) or TIMESTAMP_ADD(ts, INTERVAL val UNIT)
16149 let iu = parse_interval_unit(&unit_str);
16150 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
16151 this: Some(arg1),
16152 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: iu, use_plural: false }),
16153 }));
16154 return Ok(Expression::Function(Box::new(Function::new(
16155 "DATE_ADD".to_string(), vec![arg2, interval],
16156 ))));
16157 }
16158
16159 if matches!(target, DialectType::Databricks) {
16160 // Databricks: keep DATEADD(UNIT, val, date) format
16161 let unit = Expression::Identifier(Identifier::new(unit_str));
16162 return Ok(Expression::Function(Box::new(Function::new(
16163 "DATEADD".to_string(), vec![unit, arg1, arg2],
16164 ))));
16165 }
16166
16167 if matches!(target, DialectType::Spark) {
16168 // Spark: convert month-based units to ADD_MONTHS, rest to DATE_ADD
16169 fn multiply_expr_dateadd(expr: Expression, factor: i64) -> Expression {
16170 if let Expression::Literal(crate::expressions::Literal::Number(n)) = &expr {
16171 if let Ok(val) = n.parse::<i64>() {
16172 return Expression::Literal(crate::expressions::Literal::Number((val * factor).to_string()));
16173 }
16174 }
16175 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
16176 expr, Expression::Literal(crate::expressions::Literal::Number(factor.to_string())),
16177 )))
16178 }
16179 match unit_str.as_str() {
16180 "YEAR" => {
16181 let months = multiply_expr_dateadd(arg1, 12);
16182 return Ok(Expression::Function(Box::new(Function::new(
16183 "ADD_MONTHS".to_string(), vec![arg2, months],
16184 ))));
16185 }
16186 "QUARTER" => {
16187 let months = multiply_expr_dateadd(arg1, 3);
16188 return Ok(Expression::Function(Box::new(Function::new(
16189 "ADD_MONTHS".to_string(), vec![arg2, months],
16190 ))));
16191 }
16192 "MONTH" => {
16193 return Ok(Expression::Function(Box::new(Function::new(
16194 "ADD_MONTHS".to_string(), vec![arg2, arg1],
16195 ))));
16196 }
16197 "WEEK" => {
16198 let days = multiply_expr_dateadd(arg1, 7);
16199 return Ok(Expression::Function(Box::new(Function::new(
16200 "DATE_ADD".to_string(), vec![arg2, days],
16201 ))));
16202 }
16203 "DAY" => {
16204 return Ok(Expression::Function(Box::new(Function::new(
16205 "DATE_ADD".to_string(), vec![arg2, arg1],
16206 ))));
16207 }
16208 _ => {
16209 let unit = Expression::Identifier(Identifier::new(unit_str));
16210 return Ok(Expression::Function(Box::new(Function::new(
16211 "DATE_ADD".to_string(), vec![unit, arg1, arg2],
16212 ))));
16213 }
16214 }
16215 }
16216
16217 if matches!(target, DialectType::Hive) {
16218 // Hive: DATE_ADD(date, val) for DAY, or date + INTERVAL for others
16219 match unit_str.as_str() {
16220 "DAY" => {
16221 return Ok(Expression::Function(Box::new(Function::new(
16222 "DATE_ADD".to_string(), vec![arg2, arg1],
16223 ))));
16224 }
16225 "MONTH" => {
16226 return Ok(Expression::Function(Box::new(Function::new(
16227 "ADD_MONTHS".to_string(), vec![arg2, arg1],
16228 ))));
16229 }
16230 _ => {
16231 let iu = parse_interval_unit(&unit_str);
16232 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
16233 this: Some(arg1),
16234 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: iu, use_plural: false }),
16235 }));
16236 return Ok(Expression::Add(Box::new(crate::expressions::BinaryOp::new(arg2, interval))));
16237 }
16238 }
16239 }
16240
16241 if matches!(target, DialectType::PostgreSQL) {
16242 // PostgreSQL: date + INTERVAL 'val UNIT'
16243 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
16244 this: Some(Expression::Literal(Literal::String(format!("{} {}", Self::expr_to_string(&arg1), unit_str)))),
16245 unit: None,
16246 }));
16247 return Ok(Expression::Add(Box::new(crate::expressions::BinaryOp::new(arg2, interval))));
16248 }
16249
16250 if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena) {
16251 // Presto/Trino: DATE_ADD('UNIT', val, date)
16252 return Ok(Expression::Function(Box::new(Function::new(
16253 "DATE_ADD".to_string(), vec![
16254 Expression::Literal(Literal::String(unit_str)),
16255 arg1,
16256 arg2,
16257 ],
16258 ))));
16259 }
16260
16261 if matches!(target, DialectType::ClickHouse) {
16262 // ClickHouse: DATE_ADD(UNIT, val, date)
16263 let unit = Expression::Identifier(Identifier::new(unit_str));
16264 return Ok(Expression::Function(Box::new(Function::new(
16265 "DATE_ADD".to_string(), vec![unit, arg1, arg2],
16266 ))));
16267 }
16268
16269 // Default: keep DATEADD with uppercased unit
16270 let unit = Expression::Identifier(Identifier::new(unit_str));
16271 Ok(Expression::Function(Box::new(Function::new(
16272 "DATEADD".to_string(), vec![unit, arg1, arg2],
16273 ))))
16274 }
16275
16276 // DATE_ADD(unit, val, date) - 3 arg form from ClickHouse/Presto
16277 "DATE_ADD" if args.len() == 3 => {
16278 let arg0 = args.remove(0);
16279 let arg1 = args.remove(0);
16280 let arg2 = args.remove(0);
16281 let unit_str = get_unit_str(&arg0);
16282
16283 if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena) {
16284 // Presto/Trino: DATE_ADD('UNIT', val, date)
16285 return Ok(Expression::Function(Box::new(Function::new(
16286 "DATE_ADD".to_string(), vec![
16287 Expression::Literal(Literal::String(unit_str)),
16288 arg1,
16289 arg2,
16290 ],
16291 ))));
16292 }
16293
16294 if matches!(target, DialectType::Snowflake | DialectType::TSQL | DialectType::Redshift) {
16295 // DATEADD(UNIT, val, date)
16296 let unit = Expression::Identifier(Identifier::new(unit_str));
16297 let date = if matches!(target, DialectType::TSQL) {
16298 Self::ensure_cast_datetime2(arg2)
16299 } else {
16300 arg2
16301 };
16302 return Ok(Expression::Function(Box::new(Function::new(
16303 "DATEADD".to_string(), vec![unit, arg1, date],
16304 ))));
16305 }
16306
16307 if matches!(target, DialectType::DuckDB) {
16308 // DuckDB: date + INTERVAL val UNIT
16309 let iu = parse_interval_unit(&unit_str);
16310 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
16311 this: Some(arg1),
16312 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: iu, use_plural: false }),
16313 }));
16314 return Ok(Expression::Add(Box::new(crate::expressions::BinaryOp::new(arg2, interval))));
16315 }
16316
16317 if matches!(target, DialectType::Spark | DialectType::Databricks) {
16318 // Spark: DATE_ADD(UNIT, val, date) with uppercased unit
16319 let unit = Expression::Identifier(Identifier::new(unit_str));
16320 return Ok(Expression::Function(Box::new(Function::new(
16321 "DATE_ADD".to_string(), vec![unit, arg1, arg2],
16322 ))));
16323 }
16324
16325 // Default: DATE_ADD(UNIT, val, date)
16326 let unit = Expression::Identifier(Identifier::new(unit_str));
16327 Ok(Expression::Function(Box::new(Function::new(
16328 "DATE_ADD".to_string(), vec![unit, arg1, arg2],
16329 ))))
16330 }
16331
16332 // DATE_ADD(date, INTERVAL val UNIT) - 2 arg BigQuery form
16333 "DATE_ADD" if args.len() == 2 => {
16334 let date = args.remove(0);
16335 let interval_expr = args.remove(0);
16336 let (val, unit) = Self::extract_interval_parts(&interval_expr);
16337 let unit_str = Self::interval_unit_to_string(&unit);
16338
16339 match target {
16340 DialectType::DuckDB => {
16341 // DuckDB: CAST(date AS DATE) + INTERVAL 'val' UNIT
16342 let cast_date = Self::ensure_cast_date(date);
16343 let quoted_val = Self::quote_interval_val(&val);
16344 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
16345 this: Some(quoted_val),
16346 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit, use_plural: false }),
16347 }));
16348 Ok(Expression::Add(Box::new(crate::expressions::BinaryOp::new(cast_date, interval))))
16349 }
16350 DialectType::PostgreSQL => {
16351 // PostgreSQL: date + INTERVAL 'val UNIT'
16352 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
16353 this: Some(Expression::Literal(Literal::String(format!("{} {}", Self::expr_to_string(&val), unit_str)))),
16354 unit: None,
16355 }));
16356 Ok(Expression::Add(Box::new(crate::expressions::BinaryOp::new(date, interval))))
16357 }
16358 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
16359 // Presto: DATE_ADD('UNIT', CAST('val' AS BIGINT), date)
16360 let val_str = Self::expr_to_string(&val);
16361 Ok(Expression::Function(Box::new(Function::new(
16362 "DATE_ADD".to_string(), vec![
16363 Expression::Literal(Literal::String(unit_str)),
16364 Expression::Cast(Box::new(Cast {
16365 this: Expression::Literal(Literal::String(val_str)),
16366 to: DataType::BigInt { length: None },
16367 trailing_comments: vec![],
16368 double_colon_syntax: false,
16369 format: None,
16370 default: None,
16371 })),
16372 date,
16373 ],
16374 ))))
16375 }
16376 DialectType::Spark | DialectType::Hive => {
16377 // Spark/Hive: DATE_ADD(date, val) for DAY
16378 match unit_str.as_str() {
16379 "DAY" => {
16380 Ok(Expression::Function(Box::new(Function::new(
16381 "DATE_ADD".to_string(), vec![date, val],
16382 ))))
16383 }
16384 "MONTH" => {
16385 Ok(Expression::Function(Box::new(Function::new(
16386 "ADD_MONTHS".to_string(), vec![date, val],
16387 ))))
16388 }
16389 _ => {
16390 let iu = parse_interval_unit(&unit_str);
16391 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
16392 this: Some(val),
16393 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: iu, use_plural: false }),
16394 }));
16395 Ok(Expression::Function(Box::new(Function::new(
16396 "DATE_ADD".to_string(), vec![date, interval],
16397 ))))
16398 }
16399 }
16400 }
16401 DialectType::Snowflake => {
16402 // Snowflake: DATEADD(UNIT, 'val', CAST(date AS DATE))
16403 let cast_date = Self::ensure_cast_date(date);
16404 let val_str = Self::expr_to_string(&val);
16405 Ok(Expression::Function(Box::new(Function::new(
16406 "DATEADD".to_string(), vec![
16407 Expression::Identifier(Identifier::new(unit_str)),
16408 Expression::Literal(Literal::String(val_str)),
16409 cast_date,
16410 ],
16411 ))))
16412 }
16413 DialectType::TSQL | DialectType::Fabric => {
16414 let cast_date = Self::ensure_cast_datetime2(date);
16415 Ok(Expression::Function(Box::new(Function::new(
16416 "DATEADD".to_string(), vec![
16417 Expression::Identifier(Identifier::new(unit_str)),
16418 val, cast_date,
16419 ],
16420 ))))
16421 }
16422 DialectType::Redshift => {
16423 Ok(Expression::Function(Box::new(Function::new(
16424 "DATEADD".to_string(), vec![
16425 Expression::Identifier(Identifier::new(unit_str)),
16426 val, date,
16427 ],
16428 ))))
16429 }
16430 DialectType::MySQL => {
16431 // MySQL: DATE_ADD(date, INTERVAL 'val' UNIT)
16432 let quoted_val = Self::quote_interval_val(&val);
16433 let iu = parse_interval_unit(&unit_str);
16434 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
16435 this: Some(quoted_val),
16436 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: iu, use_plural: false }),
16437 }));
16438 Ok(Expression::Function(Box::new(Function::new(
16439 "DATE_ADD".to_string(), vec![date, interval],
16440 ))))
16441 }
16442 DialectType::BigQuery => {
16443 // BigQuery: DATE_ADD(date, INTERVAL 'val' UNIT)
16444 let quoted_val = Self::quote_interval_val(&val);
16445 let iu = parse_interval_unit(&unit_str);
16446 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
16447 this: Some(quoted_val),
16448 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: iu, use_plural: false }),
16449 }));
16450 Ok(Expression::Function(Box::new(Function::new(
16451 "DATE_ADD".to_string(), vec![date, interval],
16452 ))))
16453 }
16454 DialectType::Databricks => {
16455 Ok(Expression::Function(Box::new(Function::new(
16456 "DATEADD".to_string(), vec![
16457 Expression::Identifier(Identifier::new(unit_str)),
16458 val, date,
16459 ],
16460 ))))
16461 }
16462 _ => {
16463 // Default: keep as DATE_ADD with decomposed interval
16464 Ok(Expression::DateAdd(Box::new(crate::expressions::DateAddFunc {
16465 this: date,
16466 interval: val,
16467 unit,
16468 })))
16469 }
16470 }
16471 }
16472
16473 // ADD_MONTHS(date, val) -> target-specific form
16474 "ADD_MONTHS" if args.len() == 2 => {
16475 let date = args.remove(0);
16476 let val = args.remove(0);
16477
16478 if matches!(target, DialectType::TSQL) {
16479 // TSQL: DATEADD(MONTH, val, CAST(date AS DATETIME2))
16480 let cast_date = Self::ensure_cast_datetime2(date);
16481 return Ok(Expression::Function(Box::new(Function::new(
16482 "DATEADD".to_string(), vec![
16483 Expression::Identifier(Identifier::new("MONTH")),
16484 val,
16485 cast_date,
16486 ],
16487 ))));
16488 }
16489
16490 if matches!(target, DialectType::DuckDB) {
16491 // DuckDB: date + INTERVAL val MONTH
16492 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
16493 this: Some(val),
16494 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
16495 unit: crate::expressions::IntervalUnit::Month,
16496 use_plural: false,
16497 }),
16498 }));
16499 return Ok(Expression::Add(Box::new(crate::expressions::BinaryOp::new(date, interval))));
16500 }
16501
16502 if matches!(target, DialectType::Snowflake) {
16503 // Snowflake: keep ADD_MONTHS when source is also Snowflake, else DATEADD
16504 if matches!(source, DialectType::Snowflake) {
16505 return Ok(Expression::Function(Box::new(Function::new(
16506 "ADD_MONTHS".to_string(), vec![date, val],
16507 ))));
16508 }
16509 return Ok(Expression::Function(Box::new(Function::new(
16510 "DATEADD".to_string(), vec![
16511 Expression::Identifier(Identifier::new("MONTH")),
16512 val,
16513 date,
16514 ],
16515 ))));
16516 }
16517
16518 if matches!(target, DialectType::Spark | DialectType::Databricks) {
16519 // Spark: ADD_MONTHS(date, val) - keep as is
16520 return Ok(Expression::Function(Box::new(Function::new(
16521 "ADD_MONTHS".to_string(), vec![date, val],
16522 ))));
16523 }
16524
16525 if matches!(target, DialectType::Hive) {
16526 return Ok(Expression::Function(Box::new(Function::new(
16527 "ADD_MONTHS".to_string(), vec![date, val],
16528 ))));
16529 }
16530
16531 if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena) {
16532 // Presto: DATE_ADD('MONTH', val, date)
16533 return Ok(Expression::Function(Box::new(Function::new(
16534 "DATE_ADD".to_string(), vec![
16535 Expression::Literal(Literal::String("MONTH".to_string())),
16536 val,
16537 date,
16538 ],
16539 ))));
16540 }
16541
16542 // Default: keep ADD_MONTHS
16543 Ok(Expression::Function(Box::new(Function::new(
16544 "ADD_MONTHS".to_string(), vec![date, val],
16545 ))))
16546 }
16547
16548 // SAFE_DIVIDE(x, y) -> target-specific form directly
16549 "SAFE_DIVIDE" if args.len() == 2 => {
16550 let x = args.remove(0);
16551 let y = args.remove(0);
16552 // Wrap x and y in parens if they're complex expressions
16553 let y_ref = match &y {
16554 Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => y.clone(),
16555 _ => Expression::Paren(Box::new(Paren { this: y.clone(), trailing_comments: vec![] })),
16556 };
16557 let x_ref = match &x {
16558 Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => x.clone(),
16559 _ => Expression::Paren(Box::new(Paren { this: x.clone(), trailing_comments: vec![] })),
16560 };
16561 let condition = Expression::Neq(Box::new(crate::expressions::BinaryOp::new(y_ref.clone(), Expression::number(0))));
16562 let div_expr = Expression::Div(Box::new(crate::expressions::BinaryOp::new(x_ref.clone(), y_ref.clone())));
16563
16564 match target {
16565 DialectType::DuckDB | DialectType::PostgreSQL => {
16566 // CASE WHEN y <> 0 THEN x / y ELSE NULL END
16567 let result_div = if matches!(target, DialectType::PostgreSQL) {
16568 let cast_x = Expression::Cast(Box::new(Cast {
16569 this: x_ref,
16570 to: DataType::Custom { name: "DOUBLE PRECISION".to_string() },
16571 trailing_comments: vec![],
16572 double_colon_syntax: false,
16573 format: None,
16574 default: None,
16575 }));
16576 Expression::Div(Box::new(crate::expressions::BinaryOp::new(cast_x, y_ref)))
16577 } else {
16578 div_expr
16579 };
16580 Ok(Expression::Case(Box::new(crate::expressions::Case {
16581 operand: None,
16582 whens: vec![(condition, result_div)],
16583 else_: Some(Expression::Null(crate::expressions::Null)),
16584 })))
16585 }
16586 DialectType::Snowflake => {
16587 // IFF(y <> 0, x / y, NULL)
16588 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
16589 condition,
16590 true_value: div_expr,
16591 false_value: Some(Expression::Null(crate::expressions::Null)),
16592 original_name: Some("IFF".to_string()),
16593 })))
16594 }
16595 DialectType::Presto | DialectType::Trino => {
16596 // IF(y <> 0, CAST(x AS DOUBLE) / y, NULL)
16597 let cast_x = Expression::Cast(Box::new(Cast {
16598 this: x_ref,
16599 to: DataType::Double { precision: None, scale: None },
16600 trailing_comments: vec![],
16601 double_colon_syntax: false,
16602 format: None,
16603 default: None,
16604 }));
16605 let cast_div = Expression::Div(Box::new(crate::expressions::BinaryOp::new(cast_x, y_ref)));
16606 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
16607 condition,
16608 true_value: cast_div,
16609 false_value: Some(Expression::Null(crate::expressions::Null)),
16610 original_name: None,
16611 })))
16612 }
16613 _ => {
16614 // IF(y <> 0, x / y, NULL)
16615 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
16616 condition,
16617 true_value: div_expr,
16618 false_value: Some(Expression::Null(crate::expressions::Null)),
16619 original_name: None,
16620 })))
16621 }
16622 }
16623 }
16624
16625 // GENERATE_UUID() -> UUID() with CAST to string
16626 "GENERATE_UUID" => {
16627 let uuid_expr = Expression::Uuid(Box::new(crate::expressions::Uuid {
16628 this: None,
16629 name: None,
16630 is_string: None,
16631 }));
16632 // Most targets need CAST(UUID() AS TEXT/VARCHAR/STRING)
16633 let cast_type = match target {
16634 DialectType::DuckDB => Some(DataType::Text),
16635 DialectType::Presto | DialectType::Trino => Some(DataType::VarChar { length: None, parenthesized_length: false }),
16636 DialectType::Spark | DialectType::Databricks | DialectType::Hive => Some(DataType::String { length: None }),
16637 _ => None,
16638 };
16639 if let Some(dt) = cast_type {
16640 Ok(Expression::Cast(Box::new(Cast {
16641 this: uuid_expr,
16642 to: dt,
16643 trailing_comments: vec![],
16644 double_colon_syntax: false,
16645 format: None,
16646 default: None,
16647 })))
16648 } else {
16649 Ok(uuid_expr)
16650 }
16651 }
16652
16653 // COUNTIF(x) -> CountIf expression
16654 "COUNTIF" if args.len() == 1 => {
16655 let arg = args.remove(0);
16656 Ok(Expression::CountIf(Box::new(crate::expressions::AggFunc {
16657 this: arg,
16658 distinct: false,
16659 filter: None,
16660 order_by: vec![],
16661 name: None,
16662 ignore_nulls: None,
16663 having_max: None,
16664 limit: None,
16665 })))
16666 }
16667
16668 // EDIT_DISTANCE(col1, col2, ...) -> Levenshtein expression
16669 "EDIT_DISTANCE" => {
16670 // Strip named arguments (max_distance => N) and pass as positional
16671 let mut positional_args: Vec<Expression> = vec![];
16672 for arg in args {
16673 match arg {
16674 Expression::NamedArgument(na) => {
16675 positional_args.push(na.value);
16676 }
16677 other => positional_args.push(other),
16678 }
16679 }
16680 if positional_args.len() >= 2 {
16681 let col1 = positional_args.remove(0);
16682 let col2 = positional_args.remove(0);
16683 let levenshtein = crate::expressions::BinaryFunc {
16684 this: col1,
16685 expression: col2,
16686 original_name: None,
16687 };
16688 // Pass extra args through a function wrapper with all args
16689 if !positional_args.is_empty() {
16690 let mut all_args = vec![levenshtein.this, levenshtein.expression];
16691 all_args.extend(positional_args);
16692 // PostgreSQL: use LEVENSHTEIN_LESS_EQUAL when max_distance is provided
16693 let func_name = if matches!(target, DialectType::PostgreSQL) {
16694 "LEVENSHTEIN_LESS_EQUAL"
16695 } else {
16696 "LEVENSHTEIN"
16697 };
16698 return Ok(Expression::Function(Box::new(Function::new(
16699 func_name.to_string(), all_args,
16700 ))));
16701 }
16702 Ok(Expression::Levenshtein(Box::new(levenshtein)))
16703 } else {
16704 Ok(Expression::Function(Box::new(Function::new("EDIT_DISTANCE".to_string(), positional_args))))
16705 }
16706 }
16707
16708 // TIMESTAMP_SECONDS(x) -> UnixToTime with scale 0
16709 "TIMESTAMP_SECONDS" if args.len() == 1 => {
16710 let arg = args.remove(0);
16711 Ok(Expression::UnixToTime(Box::new(crate::expressions::UnixToTime {
16712 this: Box::new(arg),
16713 scale: Some(0),
16714 zone: None,
16715 hours: None,
16716 minutes: None,
16717 format: None,
16718 target_type: None,
16719 })))
16720 }
16721
16722 // TIMESTAMP_MILLIS(x) -> UnixToTime with scale 3
16723 "TIMESTAMP_MILLIS" if args.len() == 1 => {
16724 let arg = args.remove(0);
16725 Ok(Expression::UnixToTime(Box::new(crate::expressions::UnixToTime {
16726 this: Box::new(arg),
16727 scale: Some(3),
16728 zone: None,
16729 hours: None,
16730 minutes: None,
16731 format: None,
16732 target_type: None,
16733 })))
16734 }
16735
16736 // TIMESTAMP_MICROS(x) -> UnixToTime with scale 6
16737 "TIMESTAMP_MICROS" if args.len() == 1 => {
16738 let arg = args.remove(0);
16739 Ok(Expression::UnixToTime(Box::new(crate::expressions::UnixToTime {
16740 this: Box::new(arg),
16741 scale: Some(6),
16742 zone: None,
16743 hours: None,
16744 minutes: None,
16745 format: None,
16746 target_type: None,
16747 })))
16748 }
16749
16750 // DIV(x, y) -> IntDiv expression
16751 "DIV" if args.len() == 2 => {
16752 let x = args.remove(0);
16753 let y = args.remove(0);
16754 Ok(Expression::IntDiv(Box::new(crate::expressions::BinaryFunc {
16755 this: x,
16756 expression: y,
16757 original_name: None,
16758 })))
16759 }
16760
16761 // TO_HEX(x) -> target-specific form
16762 "TO_HEX" if args.len() == 1 => {
16763 let arg = args.remove(0);
16764 // Check if inner function already returns hex string in certain targets
16765 let inner_returns_hex = matches!(&arg, Expression::Function(f) if matches!(f.name.as_str(), "MD5" | "SHA1" | "SHA256" | "SHA512"));
16766 if matches!(target, DialectType::BigQuery) {
16767 // BQ->BQ: keep as TO_HEX
16768 Ok(Expression::Function(Box::new(Function::new("TO_HEX".to_string(), vec![arg]))))
16769 } else if matches!(target, DialectType::DuckDB) && inner_returns_hex {
16770 // DuckDB: MD5/SHA already return hex strings, so TO_HEX is redundant
16771 Ok(arg)
16772 } else if matches!(target, DialectType::Snowflake) && inner_returns_hex {
16773 // Snowflake: TO_HEX(SHA1(x)) -> TO_CHAR(SHA1_BINARY(x))
16774 // TO_HEX(MD5(x)) -> TO_CHAR(MD5_BINARY(x))
16775 // TO_HEX(SHA256(x)) -> TO_CHAR(SHA2_BINARY(x, 256))
16776 // TO_HEX(SHA512(x)) -> TO_CHAR(SHA2_BINARY(x, 512))
16777 if let Expression::Function(ref inner_f) = arg {
16778 let inner_args = inner_f.args.clone();
16779 let binary_func = match inner_f.name.to_uppercase().as_str() {
16780 "SHA1" => Expression::Function(Box::new(Function::new("SHA1_BINARY".to_string(), inner_args))),
16781 "MD5" => Expression::Function(Box::new(Function::new("MD5_BINARY".to_string(), inner_args))),
16782 "SHA256" => {
16783 let mut a = inner_args;
16784 a.push(Expression::number(256));
16785 Expression::Function(Box::new(Function::new("SHA2_BINARY".to_string(), a)))
16786 }
16787 "SHA512" => {
16788 let mut a = inner_args;
16789 a.push(Expression::number(512));
16790 Expression::Function(Box::new(Function::new("SHA2_BINARY".to_string(), a)))
16791 }
16792 _ => arg.clone(),
16793 };
16794 Ok(Expression::Function(Box::new(Function::new("TO_CHAR".to_string(), vec![binary_func]))))
16795 } else {
16796 let inner = Expression::Function(Box::new(Function::new("HEX".to_string(), vec![arg])));
16797 Ok(Expression::Lower(Box::new(crate::expressions::UnaryFunc::new(inner))))
16798 }
16799 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
16800 let inner = Expression::Function(Box::new(Function::new("TO_HEX".to_string(), vec![arg])));
16801 Ok(Expression::Lower(Box::new(crate::expressions::UnaryFunc::new(inner))))
16802 } else {
16803 let inner = Expression::Function(Box::new(Function::new("HEX".to_string(), vec![arg])));
16804 Ok(Expression::Lower(Box::new(crate::expressions::UnaryFunc::new(inner))))
16805 }
16806 }
16807
16808 // LAST_DAY(date, unit) -> strip unit for most targets, or transform for PostgreSQL
16809 "LAST_DAY" if args.len() == 2 => {
16810 let date = args.remove(0);
16811 let _unit = args.remove(0); // Strip the unit (MONTH is default)
16812 Ok(Expression::Function(Box::new(Function::new(
16813 "LAST_DAY".to_string(), vec![date],
16814 ))))
16815 }
16816
16817 // GENERATE_ARRAY(start, end, step?) -> GenerateSeries expression
16818 "GENERATE_ARRAY" => {
16819 let start = args.get(0).cloned();
16820 let end = args.get(1).cloned();
16821 let step = args.get(2).cloned();
16822 Ok(Expression::GenerateSeries(Box::new(crate::expressions::GenerateSeries {
16823 start: start.map(Box::new),
16824 end: end.map(Box::new),
16825 step: step.map(Box::new),
16826 is_end_exclusive: None,
16827 })))
16828 }
16829
16830 // GENERATE_TIMESTAMP_ARRAY(start, end, step) -> GenerateSeries expression
16831 "GENERATE_TIMESTAMP_ARRAY" => {
16832 let start = args.get(0).cloned();
16833 let end = args.get(1).cloned();
16834 let step = args.get(2).cloned();
16835
16836 if matches!(target, DialectType::DuckDB) {
16837 // DuckDB: GENERATE_SERIES(CAST(start AS TIMESTAMP), CAST(end AS TIMESTAMP), step)
16838 // Only cast string literals - leave columns/expressions as-is
16839 let maybe_cast_ts = |expr: Expression| -> Expression {
16840 if matches!(&expr, Expression::Literal(Literal::String(_))) {
16841 Expression::Cast(Box::new(Cast {
16842 this: expr,
16843 to: DataType::Timestamp { precision: None, timezone: false },
16844 trailing_comments: vec![],
16845 double_colon_syntax: false,
16846 format: None,
16847 default: None,
16848 }))
16849 } else {
16850 expr
16851 }
16852 };
16853 let cast_start = start.map(maybe_cast_ts);
16854 let cast_end = end.map(maybe_cast_ts);
16855 Ok(Expression::GenerateSeries(Box::new(crate::expressions::GenerateSeries {
16856 start: cast_start.map(Box::new),
16857 end: cast_end.map(Box::new),
16858 step: step.map(Box::new),
16859 is_end_exclusive: None,
16860 })))
16861 } else {
16862 Ok(Expression::GenerateSeries(Box::new(crate::expressions::GenerateSeries {
16863 start: start.map(Box::new),
16864 end: end.map(Box::new),
16865 step: step.map(Box::new),
16866 is_end_exclusive: None,
16867 })))
16868 }
16869 }
16870
16871 // TO_JSON(x) -> target-specific (from Spark/Hive)
16872 "TO_JSON" => {
16873 match target {
16874 DialectType::Presto | DialectType::Trino => {
16875 // JSON_FORMAT(CAST(x AS JSON))
16876 let arg = args.into_iter().next().unwrap_or(Expression::Null(crate::expressions::Null));
16877 let cast_json = Expression::Cast(Box::new(Cast {
16878 this: arg,
16879 to: DataType::Custom { name: "JSON".to_string() },
16880 trailing_comments: vec![],
16881 double_colon_syntax: false,
16882 format: None,
16883 default: None,
16884 }));
16885 Ok(Expression::Function(Box::new(Function::new("JSON_FORMAT".to_string(), vec![cast_json]))))
16886 }
16887 DialectType::BigQuery => {
16888 Ok(Expression::Function(Box::new(Function::new("TO_JSON_STRING".to_string(), args))))
16889 }
16890 DialectType::DuckDB => {
16891 // CAST(TO_JSON(x) AS TEXT)
16892 let arg = args.into_iter().next().unwrap_or(Expression::Null(crate::expressions::Null));
16893 let to_json = Expression::Function(Box::new(Function::new("TO_JSON".to_string(), vec![arg])));
16894 Ok(Expression::Cast(Box::new(Cast {
16895 this: to_json,
16896 to: DataType::Text,
16897 trailing_comments: vec![],
16898 double_colon_syntax: false,
16899 format: None,
16900 default: None,
16901 })))
16902 }
16903 _ => Ok(Expression::Function(Box::new(Function::new("TO_JSON".to_string(), args))))
16904 }
16905 }
16906
16907 // TO_JSON_STRING(x) -> target-specific
16908 "TO_JSON_STRING" => {
16909 match target {
16910 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
16911 Ok(Expression::Function(Box::new(Function::new("TO_JSON".to_string(), args))))
16912 }
16913 DialectType::Presto | DialectType::Trino => {
16914 // JSON_FORMAT(CAST(x AS JSON))
16915 let arg = args.into_iter().next().unwrap_or(Expression::Null(crate::expressions::Null));
16916 let cast_json = Expression::Cast(Box::new(Cast {
16917 this: arg,
16918 to: DataType::Custom { name: "JSON".to_string() },
16919 trailing_comments: vec![],
16920 double_colon_syntax: false,
16921 format: None,
16922 default: None,
16923 }));
16924 Ok(Expression::Function(Box::new(Function::new("JSON_FORMAT".to_string(), vec![cast_json]))))
16925 }
16926 DialectType::DuckDB => {
16927 // CAST(TO_JSON(x) AS TEXT)
16928 let arg = args.into_iter().next().unwrap_or(Expression::Null(crate::expressions::Null));
16929 let to_json = Expression::Function(Box::new(Function::new("TO_JSON".to_string(), vec![arg])));
16930 Ok(Expression::Cast(Box::new(Cast {
16931 this: to_json,
16932 to: DataType::Text,
16933 trailing_comments: vec![],
16934 double_colon_syntax: false,
16935 format: None,
16936 default: None,
16937 })))
16938 }
16939 DialectType::Snowflake => {
16940 // TO_JSON(x)
16941 Ok(Expression::Function(Box::new(Function::new("TO_JSON".to_string(), args))))
16942 }
16943 _ => Ok(Expression::Function(Box::new(Function::new("TO_JSON_STRING".to_string(), args))))
16944 }
16945 }
16946
16947 // SAFE_ADD(x, y) -> SafeAdd expression
16948 "SAFE_ADD" if args.len() == 2 => {
16949 let x = args.remove(0);
16950 let y = args.remove(0);
16951 Ok(Expression::SafeAdd(Box::new(crate::expressions::SafeAdd {
16952 this: Box::new(x),
16953 expression: Box::new(y),
16954 })))
16955 }
16956
16957 // SAFE_SUBTRACT(x, y) -> SafeSubtract expression
16958 "SAFE_SUBTRACT" if args.len() == 2 => {
16959 let x = args.remove(0);
16960 let y = args.remove(0);
16961 Ok(Expression::SafeSubtract(Box::new(crate::expressions::SafeSubtract {
16962 this: Box::new(x),
16963 expression: Box::new(y),
16964 })))
16965 }
16966
16967 // SAFE_MULTIPLY(x, y) -> SafeMultiply expression
16968 "SAFE_MULTIPLY" if args.len() == 2 => {
16969 let x = args.remove(0);
16970 let y = args.remove(0);
16971 Ok(Expression::SafeMultiply(Box::new(crate::expressions::SafeMultiply {
16972 this: Box::new(x),
16973 expression: Box::new(y),
16974 })))
16975 }
16976
16977 // REGEXP_CONTAINS(str, pattern) -> RegexpLike expression
16978 "REGEXP_CONTAINS" if args.len() == 2 => {
16979 let str_expr = args.remove(0);
16980 let pattern = args.remove(0);
16981 Ok(Expression::RegexpLike(Box::new(crate::expressions::RegexpFunc {
16982 this: str_expr,
16983 pattern,
16984 flags: None,
16985 })))
16986 }
16987
16988 // CONTAINS_SUBSTR(a, b) -> CONTAINS(LOWER(a), LOWER(b))
16989 "CONTAINS_SUBSTR" if args.len() == 2 => {
16990 let a = args.remove(0);
16991 let b = args.remove(0);
16992 let lower_a = Expression::Lower(Box::new(crate::expressions::UnaryFunc::new(a)));
16993 let lower_b = Expression::Lower(Box::new(crate::expressions::UnaryFunc::new(b)));
16994 Ok(Expression::Function(Box::new(Function::new(
16995 "CONTAINS".to_string(), vec![lower_a, lower_b],
16996 ))))
16997 }
16998
16999 // INT64(x) -> CAST(x AS BIGINT)
17000 "INT64" if args.len() == 1 => {
17001 let arg = args.remove(0);
17002 Ok(Expression::Cast(Box::new(Cast {
17003 this: arg,
17004 to: DataType::BigInt { length: None },
17005 trailing_comments: vec![],
17006 double_colon_syntax: false,
17007 format: None,
17008 default: None,
17009 })))
17010 }
17011
17012 // INSTR(str, substr) -> target-specific
17013 "INSTR" if args.len() >= 2 => {
17014 let str_expr = args.remove(0);
17015 let substr = args.remove(0);
17016 if matches!(target, DialectType::Snowflake) {
17017 // CHARINDEX(substr, str)
17018 Ok(Expression::Function(Box::new(Function::new("CHARINDEX".to_string(), vec![substr, str_expr]))))
17019 } else if matches!(target, DialectType::BigQuery) {
17020 // Keep as INSTR
17021 Ok(Expression::Function(Box::new(Function::new("INSTR".to_string(), vec![str_expr, substr]))))
17022 } else {
17023 // Default: keep as INSTR
17024 Ok(Expression::Function(Box::new(Function::new("INSTR".to_string(), vec![str_expr, substr]))))
17025 }
17026 }
17027
17028 // BigQuery DATE_TRUNC(expr, unit) -> DATE_TRUNC('unit', expr) for standard SQL
17029 "DATE_TRUNC" if args.len() == 2 => {
17030 let expr = args.remove(0);
17031 let unit_expr = args.remove(0);
17032 let unit_str = get_unit_str(&unit_expr);
17033
17034 match target {
17035 DialectType::DuckDB | DialectType::Snowflake | DialectType::PostgreSQL
17036 | DialectType::Presto | DialectType::Trino
17037 | DialectType::Databricks | DialectType::Spark
17038 | DialectType::Redshift | DialectType::ClickHouse | DialectType::TSQL => {
17039 // Standard: DATE_TRUNC('UNIT', expr)
17040 Ok(Expression::Function(Box::new(Function::new(
17041 "DATE_TRUNC".to_string(),
17042 vec![Expression::Literal(Literal::String(unit_str)), expr],
17043 ))))
17044 }
17045 _ => {
17046 // Keep BigQuery arg order: DATE_TRUNC(expr, unit)
17047 Ok(Expression::Function(Box::new(Function::new(
17048 "DATE_TRUNC".to_string(),
17049 vec![expr, unit_expr],
17050 ))))
17051 }
17052 }
17053 }
17054
17055 // TIMESTAMP_TRUNC / DATETIME_TRUNC -> target-specific
17056 "TIMESTAMP_TRUNC" | "DATETIME_TRUNC" if args.len() >= 2 => {
17057 // TIMESTAMP_TRUNC(ts, unit) or TIMESTAMP_TRUNC(ts, unit, timezone)
17058 let ts = args.remove(0);
17059 let unit_expr = args.remove(0);
17060 let tz = if !args.is_empty() { Some(args.remove(0)) } else { None };
17061 let unit_str = get_unit_str(&unit_expr);
17062
17063 match target {
17064 DialectType::DuckDB => {
17065 // DuckDB: DATE_TRUNC('UNIT', CAST(ts AS TIMESTAMPTZ))
17066 // With timezone: DATE_TRUNC('UNIT', ts AT TIME ZONE 'tz') AT TIME ZONE 'tz' (for DAY granularity)
17067 // Without timezone for MINUTE+ granularity: just DATE_TRUNC
17068 let is_coarse = matches!(unit_str.as_str(), "DAY" | "WEEK" | "MONTH" | "QUARTER" | "YEAR");
17069 // For DATETIME_TRUNC, cast string args to TIMESTAMP
17070 let cast_ts = if name == "DATETIME_TRUNC" {
17071 match ts {
17072 Expression::Literal(Literal::String(ref _s)) => {
17073 Expression::Cast(Box::new(Cast {
17074 this: ts,
17075 to: DataType::Timestamp { precision: None, timezone: false },
17076 trailing_comments: vec![],
17077 double_colon_syntax: false,
17078 format: None,
17079 default: None,
17080 }))
17081 }
17082 _ => Self::maybe_cast_ts_to_tz(ts, &name),
17083 }
17084 } else {
17085 Self::maybe_cast_ts_to_tz(ts, &name)
17086 };
17087
17088 if let Some(tz_arg) = tz {
17089 if is_coarse {
17090 // DATE_TRUNC('UNIT', ts AT TIME ZONE 'tz') AT TIME ZONE 'tz'
17091 let at_tz = Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
17092 this: cast_ts,
17093 zone: tz_arg.clone(),
17094 }));
17095 let date_trunc = Expression::Function(Box::new(Function::new(
17096 "DATE_TRUNC".to_string(), vec![Expression::Literal(Literal::String(unit_str)), at_tz],
17097 )));
17098 Ok(Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
17099 this: date_trunc,
17100 zone: tz_arg,
17101 })))
17102 } else {
17103 // For MINUTE/HOUR: no AT TIME ZONE wrapper, just DATE_TRUNC('UNIT', ts)
17104 Ok(Expression::Function(Box::new(Function::new(
17105 "DATE_TRUNC".to_string(), vec![Expression::Literal(Literal::String(unit_str)), cast_ts],
17106 ))))
17107 }
17108 } else {
17109 // No timezone: DATE_TRUNC('UNIT', CAST(ts AS TIMESTAMPTZ))
17110 Ok(Expression::Function(Box::new(Function::new(
17111 "DATE_TRUNC".to_string(), vec![Expression::Literal(Literal::String(unit_str)), cast_ts],
17112 ))))
17113 }
17114 }
17115 DialectType::Databricks | DialectType::Spark => {
17116 // Databricks/Spark: DATE_TRUNC('UNIT', ts)
17117 Ok(Expression::Function(Box::new(Function::new(
17118 "DATE_TRUNC".to_string(), vec![Expression::Literal(Literal::String(unit_str)), ts],
17119 ))))
17120 }
17121 _ => {
17122 // Default: keep as TIMESTAMP_TRUNC('UNIT', ts, [tz])
17123 let unit = Expression::Literal(Literal::String(unit_str));
17124 let mut date_trunc_args = vec![unit, ts];
17125 if let Some(tz_arg) = tz {
17126 date_trunc_args.push(tz_arg);
17127 }
17128 Ok(Expression::Function(Box::new(Function::new(
17129 "TIMESTAMP_TRUNC".to_string(), date_trunc_args,
17130 ))))
17131 }
17132 }
17133 }
17134
17135 // TIME(h, m, s) -> target-specific, TIME('string') -> CAST('string' AS TIME)
17136 "TIME" => {
17137 if args.len() == 3 {
17138 // TIME(h, m, s) constructor
17139 match target {
17140 DialectType::TSQL => {
17141 // TIMEFROMPARTS(h, m, s, 0, 0)
17142 args.push(Expression::number(0));
17143 args.push(Expression::number(0));
17144 Ok(Expression::Function(Box::new(Function::new("TIMEFROMPARTS".to_string(), args))))
17145 }
17146 DialectType::MySQL => {
17147 Ok(Expression::Function(Box::new(Function::new("MAKETIME".to_string(), args))))
17148 }
17149 DialectType::PostgreSQL => {
17150 Ok(Expression::Function(Box::new(Function::new("MAKE_TIME".to_string(), args))))
17151 }
17152 _ => Ok(Expression::Function(Box::new(Function::new("TIME".to_string(), args))))
17153 }
17154 } else if args.len() == 1 {
17155 let arg = args.remove(0);
17156 if matches!(target, DialectType::Spark) {
17157 // Spark: CAST(x AS TIMESTAMP) (yes, TIMESTAMP not TIME)
17158 Ok(Expression::Cast(Box::new(Cast {
17159 this: arg,
17160 to: DataType::Timestamp { timezone: false, precision: None },
17161 trailing_comments: vec![],
17162 double_colon_syntax: false,
17163 format: None,
17164 default: None,
17165 })))
17166 } else {
17167 // Most targets: CAST(x AS TIME)
17168 Ok(Expression::Cast(Box::new(Cast {
17169 this: arg,
17170 to: DataType::Time { precision: None, timezone: false },
17171 trailing_comments: vec![],
17172 double_colon_syntax: false,
17173 format: None,
17174 default: None,
17175 })))
17176 }
17177 } else if args.len() == 2 {
17178 // TIME(expr, timezone) -> CAST(CAST(expr AS TIMESTAMPTZ) AT TIME ZONE tz AS TIME)
17179 let expr = args.remove(0);
17180 let tz = args.remove(0);
17181 let cast_tstz = Expression::Cast(Box::new(Cast {
17182 this: expr,
17183 to: DataType::Timestamp { timezone: true, precision: None },
17184 trailing_comments: vec![],
17185 double_colon_syntax: false,
17186 format: None,
17187 default: None,
17188 }));
17189 let at_tz = Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
17190 this: cast_tstz,
17191 zone: tz,
17192 }));
17193 Ok(Expression::Cast(Box::new(Cast {
17194 this: at_tz,
17195 to: DataType::Time { precision: None, timezone: false },
17196 trailing_comments: vec![],
17197 double_colon_syntax: false,
17198 format: None,
17199 default: None,
17200 })))
17201 } else {
17202 Ok(Expression::Function(Box::new(Function::new("TIME".to_string(), args))))
17203 }
17204 }
17205
17206 // DATETIME('string') -> CAST('string' AS TIMESTAMP)
17207 // DATETIME('date', TIME 'time') -> CAST(CAST('date' AS DATE) + CAST('time' AS TIME) AS TIMESTAMP)
17208 // DATETIME('string', 'timezone') -> CAST(CAST('string' AS TIMESTAMPTZ) AT TIME ZONE tz AS TIMESTAMP)
17209 // DATETIME(y, m, d, h, min, s) -> target-specific
17210 "DATETIME" => {
17211 // For BigQuery target: keep DATETIME function but convert TIME literal to CAST
17212 if matches!(target, DialectType::BigQuery) {
17213 if args.len() == 2 {
17214 let has_time_literal = matches!(&args[1], Expression::Literal(Literal::Time(_)));
17215 if has_time_literal {
17216 let first = args.remove(0);
17217 let second = args.remove(0);
17218 let time_as_cast = match second {
17219 Expression::Literal(Literal::Time(s)) => Expression::Cast(Box::new(Cast {
17220 this: Expression::Literal(Literal::String(s)),
17221 to: DataType::Time { precision: None, timezone: false },
17222 trailing_comments: vec![],
17223 double_colon_syntax: false,
17224 format: None,
17225 default: None,
17226 })),
17227 other => other,
17228 };
17229 return Ok(Expression::Function(Box::new(Function::new(
17230 "DATETIME".to_string(), vec![first, time_as_cast],
17231 ))));
17232 }
17233 }
17234 return Ok(Expression::Function(Box::new(Function::new("DATETIME".to_string(), args))));
17235 }
17236
17237 if args.len() == 1 {
17238 let arg = args.remove(0);
17239 Ok(Expression::Cast(Box::new(Cast {
17240 this: arg,
17241 to: DataType::Timestamp { timezone: false, precision: None },
17242 trailing_comments: vec![],
17243 double_colon_syntax: false,
17244 format: None,
17245 default: None,
17246 })))
17247 } else if args.len() == 2 {
17248 let first = args.remove(0);
17249 let second = args.remove(0);
17250 // Check if second arg is a TIME literal
17251 let is_time_literal = matches!(&second, Expression::Literal(Literal::Time(_)));
17252 if is_time_literal {
17253 // DATETIME('date', TIME 'time') -> CAST(CAST(date AS DATE) + CAST('time' AS TIME) AS TIMESTAMP)
17254 let cast_date = Expression::Cast(Box::new(Cast {
17255 this: first,
17256 to: DataType::Date,
17257 trailing_comments: vec![],
17258 double_colon_syntax: false,
17259 format: None,
17260 default: None,
17261 }));
17262 // Convert TIME 'x' literal to string 'x' so CAST produces CAST('x' AS TIME) not CAST(TIME 'x' AS TIME)
17263 let time_as_string = match second {
17264 Expression::Literal(Literal::Time(s)) => Expression::Literal(Literal::String(s)),
17265 other => other,
17266 };
17267 let cast_time = Expression::Cast(Box::new(Cast {
17268 this: time_as_string,
17269 to: DataType::Time { precision: None, timezone: false },
17270 trailing_comments: vec![],
17271 double_colon_syntax: false,
17272 format: None,
17273 default: None,
17274 }));
17275 let add_expr = Expression::Add(Box::new(BinaryOp::new(cast_date, cast_time)));
17276 Ok(Expression::Cast(Box::new(Cast {
17277 this: add_expr,
17278 to: DataType::Timestamp { timezone: false, precision: None },
17279 trailing_comments: vec![],
17280 double_colon_syntax: false,
17281 format: None,
17282 default: None,
17283 })))
17284 } else {
17285 // DATETIME('string', 'timezone')
17286 let cast_tstz = Expression::Cast(Box::new(Cast {
17287 this: first,
17288 to: DataType::Timestamp { timezone: true, precision: None },
17289 trailing_comments: vec![],
17290 double_colon_syntax: false,
17291 format: None,
17292 default: None,
17293 }));
17294 let at_tz = Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
17295 this: cast_tstz,
17296 zone: second,
17297 }));
17298 Ok(Expression::Cast(Box::new(Cast {
17299 this: at_tz,
17300 to: DataType::Timestamp { timezone: false, precision: None },
17301 trailing_comments: vec![],
17302 double_colon_syntax: false,
17303 format: None,
17304 default: None,
17305 })))
17306 }
17307 } else if args.len() >= 3 {
17308 // DATETIME(y, m, d, h, min, s) -> TIMESTAMP_FROM_PARTS for Snowflake
17309 // For other targets, use MAKE_TIMESTAMP or similar
17310 if matches!(target, DialectType::Snowflake) {
17311 Ok(Expression::Function(Box::new(Function::new("TIMESTAMP_FROM_PARTS".to_string(), args))))
17312 } else {
17313 Ok(Expression::Function(Box::new(Function::new("DATETIME".to_string(), args))))
17314 }
17315 } else {
17316 Ok(Expression::Function(Box::new(Function::new("DATETIME".to_string(), args))))
17317 }
17318 }
17319
17320 // TIMESTAMP(x) -> CAST(x AS TIMESTAMP WITH TIME ZONE) for Presto
17321 // TIMESTAMP(x, tz) -> CAST(x AS TIMESTAMP) AT TIME ZONE tz for DuckDB
17322 "TIMESTAMP" => {
17323 if args.len() == 1 {
17324 let arg = args.remove(0);
17325 Ok(Expression::Cast(Box::new(Cast {
17326 this: arg,
17327 to: DataType::Timestamp { timezone: true, precision: None },
17328 trailing_comments: vec![],
17329 double_colon_syntax: false,
17330 format: None,
17331 default: None,
17332 })))
17333 } else if args.len() == 2 {
17334 let arg = args.remove(0);
17335 let tz = args.remove(0);
17336 let cast_ts = Expression::Cast(Box::new(Cast {
17337 this: arg,
17338 to: DataType::Timestamp { timezone: false, precision: None },
17339 trailing_comments: vec![],
17340 double_colon_syntax: false,
17341 format: None,
17342 default: None,
17343 }));
17344 if matches!(target, DialectType::Snowflake) {
17345 // CONVERT_TIMEZONE('tz', CAST(x AS TIMESTAMP))
17346 Ok(Expression::Function(Box::new(Function::new(
17347 "CONVERT_TIMEZONE".to_string(), vec![tz, cast_ts],
17348 ))))
17349 } else {
17350 Ok(Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
17351 this: cast_ts,
17352 zone: tz,
17353 })))
17354 }
17355 } else {
17356 Ok(Expression::Function(Box::new(Function::new("TIMESTAMP".to_string(), args))))
17357 }
17358 }
17359
17360 // STRING(x) -> CAST(x AS VARCHAR/TEXT)
17361 // STRING(x, tz) -> CAST(CAST(x AS TIMESTAMP) AT TIME ZONE 'UTC' AT TIME ZONE tz AS VARCHAR/TEXT)
17362 "STRING" => {
17363 if args.len() == 1 {
17364 let arg = args.remove(0);
17365 let cast_type = match target {
17366 DialectType::DuckDB => DataType::Text,
17367 _ => DataType::VarChar { length: None, parenthesized_length: false },
17368 };
17369 Ok(Expression::Cast(Box::new(Cast {
17370 this: arg,
17371 to: cast_type,
17372 trailing_comments: vec![],
17373 double_colon_syntax: false,
17374 format: None,
17375 default: None,
17376 })))
17377 } else if args.len() == 2 {
17378 let arg = args.remove(0);
17379 let tz = args.remove(0);
17380 let cast_type = match target {
17381 DialectType::DuckDB => DataType::Text,
17382 _ => DataType::VarChar { length: None, parenthesized_length: false },
17383 };
17384 if matches!(target, DialectType::Snowflake) {
17385 // STRING(x, tz) -> CAST(CONVERT_TIMEZONE('UTC', tz, x) AS VARCHAR)
17386 let convert_tz = Expression::Function(Box::new(Function::new(
17387 "CONVERT_TIMEZONE".to_string(),
17388 vec![Expression::Literal(Literal::String("UTC".to_string())), tz, arg],
17389 )));
17390 Ok(Expression::Cast(Box::new(Cast {
17391 this: convert_tz,
17392 to: cast_type,
17393 trailing_comments: vec![],
17394 double_colon_syntax: false,
17395 format: None,
17396 default: None,
17397 })))
17398 } else {
17399 // STRING(x, tz) -> CAST(CAST(x AS TIMESTAMP) AT TIME ZONE 'UTC' AT TIME ZONE tz AS TEXT/VARCHAR)
17400 let cast_ts = Expression::Cast(Box::new(Cast {
17401 this: arg,
17402 to: DataType::Timestamp { timezone: false, precision: None },
17403 trailing_comments: vec![],
17404 double_colon_syntax: false,
17405 format: None,
17406 default: None,
17407 }));
17408 let at_utc = Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
17409 this: cast_ts,
17410 zone: Expression::Literal(Literal::String("UTC".to_string())),
17411 }));
17412 let at_tz = Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
17413 this: at_utc,
17414 zone: tz,
17415 }));
17416 Ok(Expression::Cast(Box::new(Cast {
17417 this: at_tz,
17418 to: cast_type,
17419 trailing_comments: vec![],
17420 double_colon_syntax: false,
17421 format: None,
17422 default: None,
17423 })))
17424 }
17425 } else {
17426 Ok(Expression::Function(Box::new(Function::new("STRING".to_string(), args))))
17427 }
17428 }
17429
17430 // UNIX_SECONDS, UNIX_MILLIS, UNIX_MICROS as functions (not expressions)
17431 "UNIX_SECONDS" if args.len() == 1 => {
17432 let ts = args.remove(0);
17433 match target {
17434 DialectType::DuckDB => {
17435 // CAST(EPOCH(CAST(ts AS TIMESTAMPTZ)) AS BIGINT)
17436 let cast_ts = Self::ensure_cast_timestamptz(ts);
17437 let epoch = Expression::Function(Box::new(Function::new("EPOCH".to_string(), vec![cast_ts])));
17438 Ok(Expression::Cast(Box::new(Cast {
17439 this: epoch,
17440 to: DataType::BigInt { length: None },
17441 trailing_comments: vec![],
17442 double_colon_syntax: false,
17443 format: None,
17444 default: None,
17445 })))
17446 }
17447 DialectType::Snowflake => {
17448 // TIMESTAMPDIFF(SECONDS, CAST('1970-01-01 00:00:00+00' AS TIMESTAMPTZ), ts)
17449 let epoch = Expression::Cast(Box::new(Cast {
17450 this: Expression::Literal(Literal::String("1970-01-01 00:00:00+00".to_string())),
17451 to: DataType::Timestamp { timezone: true, precision: None },
17452 trailing_comments: vec![],
17453 double_colon_syntax: false,
17454 format: None,
17455 default: None,
17456 }));
17457 Ok(Expression::TimestampDiff(Box::new(crate::expressions::TimestampDiff {
17458 this: Box::new(epoch),
17459 expression: Box::new(ts),
17460 unit: Some("SECONDS".to_string()),
17461 })))
17462 }
17463 _ => Ok(Expression::Function(Box::new(Function::new("UNIX_SECONDS".to_string(), vec![ts]))))
17464 }
17465 }
17466
17467 "UNIX_MILLIS" if args.len() == 1 => {
17468 let ts = args.remove(0);
17469 match target {
17470 DialectType::DuckDB => {
17471 // EPOCH_MS(CAST(ts AS TIMESTAMPTZ))
17472 let cast_ts = Self::ensure_cast_timestamptz(ts);
17473 Ok(Expression::Function(Box::new(Function::new("EPOCH_MS".to_string(), vec![cast_ts]))))
17474 }
17475 _ => Ok(Expression::Function(Box::new(Function::new("UNIX_MILLIS".to_string(), vec![ts]))))
17476 }
17477 }
17478
17479 "UNIX_MICROS" if args.len() == 1 => {
17480 let ts = args.remove(0);
17481 match target {
17482 DialectType::DuckDB => {
17483 // EPOCH_US(CAST(ts AS TIMESTAMPTZ))
17484 let cast_ts = Self::ensure_cast_timestamptz(ts);
17485 Ok(Expression::Function(Box::new(Function::new("EPOCH_US".to_string(), vec![cast_ts]))))
17486 }
17487 _ => Ok(Expression::Function(Box::new(Function::new("UNIX_MICROS".to_string(), vec![ts]))))
17488 }
17489 }
17490
17491 // ARRAY_CONCAT -> target-specific
17492 "ARRAY_CONCAT" => {
17493 match target {
17494 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
17495 // CONCAT(arr1, arr2, ...)
17496 Ok(Expression::Function(Box::new(Function::new("CONCAT".to_string(), args))))
17497 }
17498 DialectType::Presto | DialectType::Trino => {
17499 // CONCAT(arr1, arr2, ...)
17500 Ok(Expression::Function(Box::new(Function::new("CONCAT".to_string(), args))))
17501 }
17502 DialectType::Snowflake => {
17503 // ARRAY_CAT(arr1, ARRAY_CAT(arr2, arr3))
17504 if args.len() == 1 {
17505 // ARRAY_CAT requires 2 args, add empty array as []
17506 let empty_arr = Expression::ArrayFunc(Box::new(crate::expressions::ArrayConstructor {
17507 expressions: vec![],
17508 bracket_notation: true,
17509 use_list_keyword: false,
17510 }));
17511 let mut new_args = args;
17512 new_args.push(empty_arr);
17513 Ok(Expression::Function(Box::new(Function::new("ARRAY_CAT".to_string(), new_args))))
17514 } else if args.is_empty() {
17515 Ok(Expression::Function(Box::new(Function::new("ARRAY_CAT".to_string(), args))))
17516 } else {
17517 let mut it = args.into_iter().rev();
17518 let mut result = it.next().unwrap();
17519 for arr in it {
17520 result = Expression::Function(Box::new(Function::new(
17521 "ARRAY_CAT".to_string(), vec![arr, result],
17522 )));
17523 }
17524 Ok(result)
17525 }
17526 }
17527 DialectType::PostgreSQL => {
17528 // ARRAY_CAT(arr1, ARRAY_CAT(arr2, arr3))
17529 if args.len() <= 1 {
17530 Ok(Expression::Function(Box::new(Function::new("ARRAY_CAT".to_string(), args))))
17531 } else {
17532 let mut it = args.into_iter().rev();
17533 let mut result = it.next().unwrap();
17534 for arr in it {
17535 result = Expression::Function(Box::new(Function::new(
17536 "ARRAY_CAT".to_string(), vec![arr, result],
17537 )));
17538 }
17539 Ok(result)
17540 }
17541 }
17542 DialectType::Redshift => {
17543 // ARRAY_CONCAT(arr1, ARRAY_CONCAT(arr2, arr3))
17544 if args.len() <= 2 {
17545 Ok(Expression::Function(Box::new(Function::new("ARRAY_CONCAT".to_string(), args))))
17546 } else {
17547 let mut it = args.into_iter().rev();
17548 let mut result = it.next().unwrap();
17549 for arr in it {
17550 result = Expression::Function(Box::new(Function::new(
17551 "ARRAY_CONCAT".to_string(), vec![arr, result],
17552 )));
17553 }
17554 Ok(result)
17555 }
17556 }
17557 DialectType::DuckDB => {
17558 // ARRAY_CONCAT(arr1, ARRAY_CONCAT(arr2, arr3))
17559 if args.len() <= 2 {
17560 Ok(Expression::Function(Box::new(Function::new("ARRAY_CONCAT".to_string(), args))))
17561 } else {
17562 let mut it = args.into_iter().rev();
17563 let mut result = it.next().unwrap();
17564 for arr in it {
17565 result = Expression::Function(Box::new(Function::new(
17566 "ARRAY_CONCAT".to_string(), vec![arr, result],
17567 )));
17568 }
17569 Ok(result)
17570 }
17571 }
17572 _ => Ok(Expression::Function(Box::new(Function::new("ARRAY_CONCAT".to_string(), args))))
17573 }
17574 }
17575
17576 // ARRAY_CONCAT_AGG -> Snowflake: ARRAY_FLATTEN(ARRAY_AGG(x))
17577 "ARRAY_CONCAT_AGG" if args.len() == 1 => {
17578 let arg = args.remove(0);
17579 match target {
17580 DialectType::Snowflake => {
17581 let array_agg = Expression::ArrayAgg(Box::new(crate::expressions::AggFunc {
17582 this: arg,
17583 distinct: false,
17584 filter: None,
17585 order_by: vec![],
17586 name: None,
17587 ignore_nulls: None,
17588 having_max: None,
17589 limit: None,
17590 }));
17591 Ok(Expression::Function(Box::new(Function::new(
17592 "ARRAY_FLATTEN".to_string(), vec![array_agg],
17593 ))))
17594 }
17595 _ => Ok(Expression::Function(Box::new(Function::new("ARRAY_CONCAT_AGG".to_string(), vec![arg]))))
17596 }
17597 }
17598
17599 // MD5/SHA1/SHA256/SHA512 -> target-specific hash functions
17600 "MD5" if args.len() == 1 => {
17601 let arg = args.remove(0);
17602 match target {
17603 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
17604 // UNHEX(MD5(x))
17605 let md5 = Expression::Function(Box::new(Function::new("MD5".to_string(), vec![arg])));
17606 Ok(Expression::Function(Box::new(Function::new("UNHEX".to_string(), vec![md5]))))
17607 }
17608 DialectType::Snowflake => {
17609 // MD5_BINARY(x)
17610 Ok(Expression::Function(Box::new(Function::new("MD5_BINARY".to_string(), vec![arg]))))
17611 }
17612 _ => Ok(Expression::Function(Box::new(Function::new("MD5".to_string(), vec![arg]))))
17613 }
17614 }
17615
17616 "SHA1" if args.len() == 1 => {
17617 let arg = args.remove(0);
17618 match target {
17619 DialectType::DuckDB => {
17620 // UNHEX(SHA1(x))
17621 let sha1 = Expression::Function(Box::new(Function::new("SHA1".to_string(), vec![arg])));
17622 Ok(Expression::Function(Box::new(Function::new("UNHEX".to_string(), vec![sha1]))))
17623 }
17624 _ => Ok(Expression::Function(Box::new(Function::new("SHA1".to_string(), vec![arg]))))
17625 }
17626 }
17627
17628 "SHA256" if args.len() == 1 => {
17629 let arg = args.remove(0);
17630 match target {
17631 DialectType::DuckDB => {
17632 // UNHEX(SHA256(x))
17633 let sha = Expression::Function(Box::new(Function::new("SHA256".to_string(), vec![arg])));
17634 Ok(Expression::Function(Box::new(Function::new("UNHEX".to_string(), vec![sha]))))
17635 }
17636 DialectType::Snowflake => {
17637 // SHA2_BINARY(x, 256)
17638 Ok(Expression::Function(Box::new(Function::new("SHA2_BINARY".to_string(), vec![arg, Expression::number(256)]))))
17639 }
17640 DialectType::Redshift | DialectType::Spark => {
17641 // SHA2(x, 256)
17642 Ok(Expression::Function(Box::new(Function::new("SHA2".to_string(), vec![arg, Expression::number(256)]))))
17643 }
17644 _ => Ok(Expression::Function(Box::new(Function::new("SHA256".to_string(), vec![arg]))))
17645 }
17646 }
17647
17648 "SHA512" if args.len() == 1 => {
17649 let arg = args.remove(0);
17650 match target {
17651 DialectType::Snowflake => {
17652 // SHA2_BINARY(x, 512)
17653 Ok(Expression::Function(Box::new(Function::new("SHA2_BINARY".to_string(), vec![arg, Expression::number(512)]))))
17654 }
17655 DialectType::Redshift | DialectType::Spark => {
17656 // SHA2(x, 512)
17657 Ok(Expression::Function(Box::new(Function::new("SHA2".to_string(), vec![arg, Expression::number(512)]))))
17658 }
17659 _ => Ok(Expression::Function(Box::new(Function::new("SHA512".to_string(), vec![arg]))))
17660 }
17661 }
17662
17663 // REGEXP_EXTRACT_ALL(str, pattern) -> add default group arg
17664 "REGEXP_EXTRACT_ALL" if args.len() == 2 => {
17665 let str_expr = args.remove(0);
17666 let pattern = args.remove(0);
17667
17668 // Check if pattern contains capturing groups (parentheses)
17669 let has_groups = match &pattern {
17670 Expression::Literal(Literal::String(s)) => s.contains('(') && s.contains(')'),
17671 _ => false,
17672 };
17673
17674 match target {
17675 DialectType::DuckDB => {
17676 let group = if has_groups { Expression::number(1) } else { Expression::number(0) };
17677 Ok(Expression::Function(Box::new(Function::new("REGEXP_EXTRACT_ALL".to_string(), vec![str_expr, pattern, group]))))
17678 }
17679 DialectType::Spark | DialectType::Databricks => {
17680 // Spark's default group_index is 1 (same as BigQuery), so omit for capturing groups
17681 if has_groups {
17682 Ok(Expression::Function(Box::new(Function::new("REGEXP_EXTRACT_ALL".to_string(), vec![str_expr, pattern]))))
17683 } else {
17684 Ok(Expression::Function(Box::new(Function::new("REGEXP_EXTRACT_ALL".to_string(), vec![str_expr, pattern, Expression::number(0)]))))
17685 }
17686 }
17687 DialectType::Presto | DialectType::Trino => {
17688 if has_groups {
17689 Ok(Expression::Function(Box::new(Function::new("REGEXP_EXTRACT_ALL".to_string(), vec![str_expr, pattern, Expression::number(1)]))))
17690 } else {
17691 Ok(Expression::Function(Box::new(Function::new("REGEXP_EXTRACT_ALL".to_string(), vec![str_expr, pattern]))))
17692 }
17693 }
17694 DialectType::Snowflake => {
17695 if has_groups {
17696 // REGEXP_EXTRACT_ALL(str, pattern, 1, 1, 'c', 1)
17697 Ok(Expression::Function(Box::new(Function::new("REGEXP_EXTRACT_ALL".to_string(), vec![
17698 str_expr, pattern, Expression::number(1), Expression::number(1),
17699 Expression::Literal(Literal::String("c".to_string())), Expression::number(1),
17700 ]))))
17701 } else {
17702 Ok(Expression::Function(Box::new(Function::new("REGEXP_EXTRACT_ALL".to_string(), vec![str_expr, pattern]))))
17703 }
17704 }
17705 _ => Ok(Expression::Function(Box::new(Function::new("REGEXP_EXTRACT_ALL".to_string(), vec![str_expr, pattern]))))
17706 }
17707 }
17708
17709 // MOD(x, y) -> x % y for PostgreSQL/DuckDB
17710 "MOD" if args.len() == 2 => {
17711 match target {
17712 DialectType::PostgreSQL | DialectType::DuckDB => {
17713 let x = args.remove(0);
17714 let y = args.remove(0);
17715 Ok(Expression::Mod(Box::new(crate::expressions::BinaryOp::new(x, y))))
17716 }
17717 _ => Ok(Expression::Function(Box::new(Function::new("MOD".to_string(), args))))
17718 }
17719 }
17720
17721 // CONCAT(a, b, ...) -> a || b || ... for DuckDB with 3+ args
17722 "CONCAT" if args.len() > 2 => {
17723 match target {
17724 DialectType::DuckDB => {
17725 let mut it = args.into_iter();
17726 let mut result = it.next().unwrap();
17727 for arg in it {
17728 result = Expression::DPipe(Box::new(crate::expressions::DPipe { this: Box::new(result), expression: Box::new(arg), safe: None }));
17729 }
17730 Ok(result)
17731 }
17732 _ => Ok(Expression::Function(Box::new(Function::new("CONCAT".to_string(), args))))
17733 }
17734 }
17735
17736 // GENERATE_DATE_ARRAY(start, end[, step]) -> target-specific
17737 "GENERATE_DATE_ARRAY" => {
17738 if matches!(target, DialectType::BigQuery) {
17739 // BQ->BQ: add default interval if not present
17740 if args.len() == 2 {
17741 let start = args.remove(0);
17742 let end = args.remove(0);
17743 let default_interval = Expression::Interval(Box::new(crate::expressions::Interval {
17744 this: Some(Expression::Literal(Literal::String("1".to_string()))),
17745 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: crate::expressions::IntervalUnit::Day, use_plural: false }),
17746 }));
17747 Ok(Expression::Function(Box::new(Function::new("GENERATE_DATE_ARRAY".to_string(), vec![start, end, default_interval]))))
17748 } else {
17749 Ok(Expression::Function(Box::new(Function::new("GENERATE_DATE_ARRAY".to_string(), args))))
17750 }
17751 } else if matches!(target, DialectType::DuckDB) {
17752 // DuckDB: CAST(GENERATE_SERIES(CAST(start AS DATE), CAST(end AS DATE), step) AS DATE[])
17753 let start = args.get(0).cloned();
17754 let end = args.get(1).cloned();
17755 let step = args.get(2).cloned().or_else(|| Some(Expression::Interval(Box::new(crate::expressions::Interval {
17756 this: Some(Expression::Literal(Literal::String("1".to_string()))),
17757 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: crate::expressions::IntervalUnit::Day, use_plural: false }),
17758 }))));
17759
17760 // Wrap start/end in CAST(... AS DATE) only for string literals
17761 let maybe_cast_date = |expr: Expression| -> Expression {
17762 if matches!(&expr, Expression::Literal(Literal::String(_))) {
17763 Expression::Cast(Box::new(Cast {
17764 this: expr,
17765 to: DataType::Date,
17766 trailing_comments: vec![],
17767 double_colon_syntax: false,
17768 format: None,
17769 default: None,
17770 }))
17771 } else {
17772 expr
17773 }
17774 };
17775 let cast_start = start.map(maybe_cast_date);
17776 let cast_end = end.map(maybe_cast_date);
17777
17778 let gen_series = Expression::GenerateSeries(Box::new(crate::expressions::GenerateSeries {
17779 start: cast_start.map(Box::new),
17780 end: cast_end.map(Box::new),
17781 step: step.map(Box::new),
17782 is_end_exclusive: None,
17783 }));
17784
17785 // Wrap in CAST(... AS DATE[])
17786 Ok(Expression::Cast(Box::new(Cast {
17787 this: gen_series,
17788 to: DataType::Array { element_type: Box::new(DataType::Date), dimension: None },
17789 trailing_comments: vec![],
17790 double_colon_syntax: false,
17791 format: None,
17792 default: None,
17793 })))
17794 } else if matches!(target, DialectType::Snowflake) {
17795 // Snowflake: keep as GENERATE_DATE_ARRAY function for later transform
17796 // (transform_generate_date_array_snowflake will convert to ARRAY_GENERATE_RANGE + DATEADD)
17797 if args.len() == 2 {
17798 let start = args.remove(0);
17799 let end = args.remove(0);
17800 let default_interval = Expression::Interval(Box::new(crate::expressions::Interval {
17801 this: Some(Expression::Literal(Literal::String("1".to_string()))),
17802 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: crate::expressions::IntervalUnit::Day, use_plural: false }),
17803 }));
17804 Ok(Expression::Function(Box::new(Function::new("GENERATE_DATE_ARRAY".to_string(), vec![start, end, default_interval]))))
17805 } else {
17806 Ok(Expression::Function(Box::new(Function::new("GENERATE_DATE_ARRAY".to_string(), args))))
17807 }
17808 } else {
17809 // Convert to GenerateSeries for other targets
17810 let start = args.get(0).cloned();
17811 let end = args.get(1).cloned();
17812 let step = args.get(2).cloned().or_else(|| Some(Expression::Interval(Box::new(crate::expressions::Interval {
17813 this: Some(Expression::Literal(Literal::String("1".to_string()))),
17814 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: crate::expressions::IntervalUnit::Day, use_plural: false }),
17815 }))));
17816 Ok(Expression::GenerateSeries(Box::new(crate::expressions::GenerateSeries {
17817 start: start.map(Box::new),
17818 end: end.map(Box::new),
17819 step: step.map(Box::new),
17820 is_end_exclusive: None,
17821 })))
17822 }
17823 }
17824
17825 // PARSE_DATE(format, str) -> target-specific
17826 "PARSE_DATE" if args.len() == 2 => {
17827 let format = args.remove(0);
17828 let str_expr = args.remove(0);
17829 match target {
17830 DialectType::DuckDB => {
17831 // CAST(STRPTIME(str, duck_format) AS DATE)
17832 let duck_format = Self::bq_format_to_duckdb(&format);
17833 let strptime = Expression::Function(Box::new(Function::new("STRPTIME".to_string(), vec![str_expr, duck_format])));
17834 Ok(Expression::Cast(Box::new(Cast {
17835 this: strptime,
17836 to: DataType::Date,
17837 trailing_comments: vec![],
17838 double_colon_syntax: false,
17839 format: None,
17840 default: None,
17841 })))
17842 }
17843 DialectType::Snowflake => {
17844 // _POLYGLOT_DATE(str, snowflake_format)
17845 // Use marker so Snowflake target transform keeps it as DATE() instead of TO_DATE()
17846 let sf_format = Self::bq_format_to_snowflake(&format);
17847 Ok(Expression::Function(Box::new(Function::new("_POLYGLOT_DATE".to_string(), vec![str_expr, sf_format]))))
17848 }
17849 _ => Ok(Expression::Function(Box::new(Function::new("PARSE_DATE".to_string(), vec![format, str_expr]))))
17850 }
17851 }
17852
17853 // PARSE_TIMESTAMP(format, str) -> target-specific
17854 "PARSE_TIMESTAMP" if args.len() >= 2 => {
17855 let format = args.remove(0);
17856 let str_expr = args.remove(0);
17857 let tz = if !args.is_empty() { Some(args.remove(0)) } else { None };
17858 match target {
17859 DialectType::DuckDB => {
17860 let duck_format = Self::bq_format_to_duckdb(&format);
17861 let strptime = Expression::Function(Box::new(Function::new("STRPTIME".to_string(), vec![str_expr, duck_format])));
17862 Ok(strptime)
17863 }
17864 _ => {
17865 let mut result_args = vec![format, str_expr];
17866 if let Some(tz_arg) = tz { result_args.push(tz_arg); }
17867 Ok(Expression::Function(Box::new(Function::new("PARSE_TIMESTAMP".to_string(), result_args))))
17868 }
17869 }
17870 }
17871
17872 // FORMAT_DATE(format, date) -> target-specific
17873 "FORMAT_DATE" if args.len() == 2 => {
17874 let format = args.remove(0);
17875 let date_expr = args.remove(0);
17876 match target {
17877 DialectType::DuckDB => {
17878 // STRFTIME(CAST(date AS DATE), format)
17879 let cast_date = Expression::Cast(Box::new(Cast {
17880 this: date_expr,
17881 to: DataType::Date,
17882 trailing_comments: vec![],
17883 double_colon_syntax: false,
17884 format: None,
17885 default: None,
17886 }));
17887 Ok(Expression::Function(Box::new(Function::new("STRFTIME".to_string(), vec![cast_date, format]))))
17888 }
17889 _ => Ok(Expression::Function(Box::new(Function::new("FORMAT_DATE".to_string(), vec![format, date_expr]))))
17890 }
17891 }
17892
17893 // FORMAT_DATETIME(format, datetime) -> target-specific
17894 "FORMAT_DATETIME" if args.len() == 2 => {
17895 let format = args.remove(0);
17896 let dt_expr = args.remove(0);
17897
17898 if matches!(target, DialectType::BigQuery) {
17899 // BQ->BQ: normalize %H:%M:%S to %T, %x to %D
17900 let norm_format = Self::bq_format_normalize_bq(&format);
17901 // Also strip DATETIME keyword from typed literals
17902 let norm_dt = match dt_expr {
17903 Expression::Literal(Literal::Timestamp(s)) => {
17904 Expression::Cast(Box::new(Cast {
17905 this: Expression::Literal(Literal::String(s)),
17906 to: DataType::Custom { name: "DATETIME".to_string() },
17907 trailing_comments: vec![],
17908 double_colon_syntax: false,
17909 format: None,
17910 default: None,
17911 }))
17912 }
17913 other => other,
17914 };
17915 return Ok(Expression::Function(Box::new(Function::new("FORMAT_DATETIME".to_string(), vec![norm_format, norm_dt]))));
17916 }
17917
17918 match target {
17919 DialectType::DuckDB => {
17920 // STRFTIME(CAST(dt AS TIMESTAMP), duckdb_format)
17921 let cast_dt = Self::ensure_cast_timestamp(dt_expr);
17922 let duck_format = Self::bq_format_to_duckdb(&format);
17923 Ok(Expression::Function(Box::new(Function::new("STRFTIME".to_string(), vec![cast_dt, duck_format]))))
17924 }
17925 _ => Ok(Expression::Function(Box::new(Function::new("FORMAT_DATETIME".to_string(), vec![format, dt_expr]))))
17926 }
17927 }
17928
17929 // FORMAT_TIMESTAMP(format, ts) -> target-specific
17930 "FORMAT_TIMESTAMP" if args.len() == 2 => {
17931 let format = args.remove(0);
17932 let ts_expr = args.remove(0);
17933 match target {
17934 DialectType::DuckDB => {
17935 // STRFTIME(CAST(CAST(ts AS TIMESTAMPTZ) AS TIMESTAMP), format)
17936 let cast_tstz = Self::ensure_cast_timestamptz(ts_expr);
17937 let cast_ts = Expression::Cast(Box::new(Cast {
17938 this: cast_tstz,
17939 to: DataType::Timestamp { timezone: false, precision: None },
17940 trailing_comments: vec![],
17941 double_colon_syntax: false,
17942 format: None,
17943 default: None,
17944 }));
17945 Ok(Expression::Function(Box::new(Function::new("STRFTIME".to_string(), vec![cast_ts, format]))))
17946 }
17947 DialectType::Snowflake => {
17948 // TO_CHAR(CAST(CAST(ts AS TIMESTAMPTZ) AS TIMESTAMP), snowflake_format)
17949 let cast_tstz = Self::ensure_cast_timestamptz(ts_expr);
17950 let cast_ts = Expression::Cast(Box::new(Cast {
17951 this: cast_tstz,
17952 to: DataType::Timestamp { timezone: false, precision: None },
17953 trailing_comments: vec![],
17954 double_colon_syntax: false,
17955 format: None,
17956 default: None,
17957 }));
17958 let sf_format = Self::bq_format_to_snowflake(&format);
17959 Ok(Expression::Function(Box::new(Function::new("TO_CHAR".to_string(), vec![cast_ts, sf_format]))))
17960 }
17961 _ => Ok(Expression::Function(Box::new(Function::new("FORMAT_TIMESTAMP".to_string(), vec![format, ts_expr]))))
17962 }
17963 }
17964
17965 // UNIX_DATE(date) -> DATE_DIFF('DAY', '1970-01-01', date) for DuckDB
17966 "UNIX_DATE" if args.len() == 1 => {
17967 let date = args.remove(0);
17968 match target {
17969 DialectType::DuckDB => {
17970 let epoch = Expression::Cast(Box::new(Cast {
17971 this: Expression::Literal(Literal::String("1970-01-01".to_string())),
17972 to: DataType::Date,
17973 trailing_comments: vec![],
17974 double_colon_syntax: false,
17975 format: None,
17976 default: None,
17977 }));
17978 // DATE_DIFF('DAY', epoch, date) but date might be DATE '...' literal
17979 // Need to convert DATE literal to CAST
17980 let norm_date = Self::date_literal_to_cast(date);
17981 Ok(Expression::Function(Box::new(Function::new(
17982 "DATE_DIFF".to_string(), vec![
17983 Expression::Literal(Literal::String("DAY".to_string())),
17984 epoch,
17985 norm_date,
17986 ],
17987 ))))
17988 }
17989 _ => Ok(Expression::Function(Box::new(Function::new("UNIX_DATE".to_string(), vec![date]))))
17990 }
17991 }
17992
17993 // UNIX_SECONDS(ts) -> target-specific
17994 "UNIX_SECONDS" if args.len() == 1 => {
17995 let ts = args.remove(0);
17996 match target {
17997 DialectType::DuckDB => {
17998 // CAST(EPOCH(CAST(ts AS TIMESTAMPTZ)) AS BIGINT)
17999 let norm_ts = Self::ts_literal_to_cast_tz(ts);
18000 let epoch = Expression::Function(Box::new(Function::new("EPOCH".to_string(), vec![norm_ts])));
18001 Ok(Expression::Cast(Box::new(Cast {
18002 this: epoch,
18003 to: DataType::BigInt { length: None },
18004 trailing_comments: vec![],
18005 double_colon_syntax: false,
18006 format: None,
18007 default: None,
18008 })))
18009 }
18010 DialectType::Snowflake => {
18011 // TIMESTAMPDIFF(SECONDS, CAST('1970-01-01 00:00:00+00' AS TIMESTAMPTZ), ts)
18012 let epoch = Expression::Cast(Box::new(Cast {
18013 this: Expression::Literal(Literal::String("1970-01-01 00:00:00+00".to_string())),
18014 to: DataType::Timestamp { timezone: true, precision: None },
18015 trailing_comments: vec![],
18016 double_colon_syntax: false,
18017 format: None,
18018 default: None,
18019 }));
18020 Ok(Expression::Function(Box::new(Function::new(
18021 "TIMESTAMPDIFF".to_string(), vec![
18022 Expression::Identifier(Identifier::new("SECONDS".to_string())),
18023 epoch,
18024 ts,
18025 ],
18026 ))))
18027 }
18028 _ => Ok(Expression::Function(Box::new(Function::new("UNIX_SECONDS".to_string(), vec![ts]))))
18029 }
18030 }
18031
18032 // UNIX_MILLIS(ts) -> target-specific
18033 "UNIX_MILLIS" if args.len() == 1 => {
18034 let ts = args.remove(0);
18035 match target {
18036 DialectType::DuckDB => {
18037 let norm_ts = Self::ts_literal_to_cast_tz(ts);
18038 Ok(Expression::Function(Box::new(Function::new("EPOCH_MS".to_string(), vec![norm_ts]))))
18039 }
18040 _ => Ok(Expression::Function(Box::new(Function::new("UNIX_MILLIS".to_string(), vec![ts]))))
18041 }
18042 }
18043
18044 // UNIX_MICROS(ts) -> target-specific
18045 "UNIX_MICROS" if args.len() == 1 => {
18046 let ts = args.remove(0);
18047 match target {
18048 DialectType::DuckDB => {
18049 let norm_ts = Self::ts_literal_to_cast_tz(ts);
18050 Ok(Expression::Function(Box::new(Function::new("EPOCH_US".to_string(), vec![norm_ts]))))
18051 }
18052 _ => Ok(Expression::Function(Box::new(Function::new("UNIX_MICROS".to_string(), vec![ts]))))
18053 }
18054 }
18055
18056 // INSTR(str, substr) -> target-specific
18057 "INSTR" => {
18058 if matches!(target, DialectType::BigQuery) {
18059 // BQ->BQ: keep as INSTR
18060 Ok(Expression::Function(Box::new(Function::new("INSTR".to_string(), args))))
18061 } else if matches!(target, DialectType::Snowflake) && args.len() == 2 {
18062 // Snowflake: CHARINDEX(substr, str) - swap args
18063 let str_expr = args.remove(0);
18064 let substr = args.remove(0);
18065 Ok(Expression::Function(Box::new(Function::new("CHARINDEX".to_string(), vec![substr, str_expr]))))
18066 } else {
18067 // Keep as INSTR for other targets
18068 Ok(Expression::Function(Box::new(Function::new("INSTR".to_string(), args))))
18069 }
18070 }
18071
18072 // CURRENT_TIMESTAMP / CURRENT_DATE handling - parens normalization and timezone
18073 "CURRENT_TIMESTAMP" | "CURRENT_DATE" | "CURRENT_DATETIME" | "CURRENT_TIME" => {
18074 if matches!(target, DialectType::BigQuery) {
18075 // BQ->BQ: always output with parens (function form), keep any timezone arg
18076 Ok(Expression::Function(Box::new(Function::new(name, args))))
18077 } else if name == "CURRENT_DATE" && args.len() == 1 {
18078 // CURRENT_DATE('UTC') - has timezone arg
18079 let tz_arg = args.remove(0);
18080 match target {
18081 DialectType::DuckDB => {
18082 // CAST(CURRENT_TIMESTAMP AT TIME ZONE 'UTC' AS DATE)
18083 let ct = Expression::CurrentTimestamp(crate::expressions::CurrentTimestamp { precision: None, sysdate: false });
18084 let at_tz = Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
18085 this: ct,
18086 zone: tz_arg,
18087 }));
18088 Ok(Expression::Cast(Box::new(Cast {
18089 this: at_tz,
18090 to: DataType::Date,
18091 trailing_comments: vec![],
18092 double_colon_syntax: false,
18093 format: None,
18094 default: None,
18095 })))
18096 }
18097 DialectType::Snowflake => {
18098 // CAST(CONVERT_TIMEZONE('UTC', CURRENT_TIMESTAMP()) AS DATE)
18099 let ct = Expression::Function(Box::new(Function::new("CURRENT_TIMESTAMP".to_string(), vec![])));
18100 let convert = Expression::Function(Box::new(Function::new("CONVERT_TIMEZONE".to_string(), vec![tz_arg, ct])));
18101 Ok(Expression::Cast(Box::new(Cast {
18102 this: convert,
18103 to: DataType::Date,
18104 trailing_comments: vec![],
18105 double_colon_syntax: false,
18106 format: None,
18107 default: None,
18108 })))
18109 }
18110 _ => {
18111 // PostgreSQL, MySQL, etc.: CURRENT_DATE AT TIME ZONE 'UTC'
18112 let cd = Expression::CurrentDate(crate::expressions::CurrentDate);
18113 Ok(Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
18114 this: cd,
18115 zone: tz_arg,
18116 })))
18117 }
18118 }
18119 } else if (name == "CURRENT_TIMESTAMP" || name == "CURRENT_TIME" || name == "CURRENT_DATE") && args.is_empty()
18120 && matches!(target, DialectType::PostgreSQL | DialectType::DuckDB | DialectType::Presto | DialectType::Trino)
18121 {
18122 // These targets want no-parens CURRENT_TIMESTAMP / CURRENT_DATE / CURRENT_TIME
18123 if name == "CURRENT_TIMESTAMP" {
18124 Ok(Expression::CurrentTimestamp(crate::expressions::CurrentTimestamp {
18125 precision: None,
18126 sysdate: false,
18127 }))
18128 } else if name == "CURRENT_DATE" {
18129 Ok(Expression::CurrentDate(crate::expressions::CurrentDate))
18130 } else {
18131 // CURRENT_TIME
18132 Ok(Expression::CurrentTime(crate::expressions::CurrentTime {
18133 precision: None,
18134 }))
18135 }
18136 } else {
18137 // All other targets: keep as function (with parens)
18138 Ok(Expression::Function(Box::new(Function::new(name, args))))
18139 }
18140 }
18141
18142 // JSON_QUERY(json, path) -> target-specific
18143 "JSON_QUERY" if args.len() == 2 => {
18144 match target {
18145 DialectType::DuckDB | DialectType::SQLite => {
18146 // json -> path syntax
18147 let json_expr = args.remove(0);
18148 let path = args.remove(0);
18149 Ok(Expression::JsonExtract(Box::new(crate::expressions::JsonExtractFunc {
18150 this: json_expr,
18151 path,
18152 returning: None,
18153 arrow_syntax: true,
18154 hash_arrow_syntax: false,
18155 wrapper_option: None,
18156 quotes_option: None,
18157 on_scalar_string: false,
18158 on_error: None,
18159 })))
18160 }
18161 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
18162 Ok(Expression::Function(Box::new(Function::new(
18163 "GET_JSON_OBJECT".to_string(), args,
18164 ))))
18165 }
18166 DialectType::PostgreSQL | DialectType::Redshift => {
18167 Ok(Expression::Function(Box::new(Function::new(
18168 "JSON_EXTRACT_PATH".to_string(), args,
18169 ))))
18170 }
18171 _ => Ok(Expression::Function(Box::new(Function::new("JSON_QUERY".to_string(), args))))
18172 }
18173 }
18174
18175 // JSON_VALUE_ARRAY(json, path) -> target-specific
18176 "JSON_VALUE_ARRAY" if args.len() == 2 => {
18177 match target {
18178 DialectType::DuckDB => {
18179 // CAST(json -> path AS TEXT[])
18180 let json_expr = args.remove(0);
18181 let path = args.remove(0);
18182 let arrow = Expression::JsonExtract(Box::new(crate::expressions::JsonExtractFunc {
18183 this: json_expr,
18184 path,
18185 returning: None,
18186 arrow_syntax: true,
18187 hash_arrow_syntax: false,
18188 wrapper_option: None,
18189 quotes_option: None,
18190 on_scalar_string: false,
18191 on_error: None,
18192 }));
18193 Ok(Expression::Cast(Box::new(Cast {
18194 this: arrow,
18195 to: DataType::Array { element_type: Box::new(DataType::Text), dimension: None },
18196 trailing_comments: vec![],
18197 double_colon_syntax: false,
18198 format: None,
18199 default: None,
18200 })))
18201 }
18202 DialectType::Snowflake => {
18203 let json_expr = args.remove(0);
18204 let path_expr = args.remove(0);
18205 // Convert JSON path from $.path to just path
18206 let sf_path = if let Expression::Literal(Literal::String(ref s)) = path_expr {
18207 let trimmed = s.trim_start_matches('$').trim_start_matches('.');
18208 Expression::Literal(Literal::String(trimmed.to_string()))
18209 } else {
18210 path_expr
18211 };
18212 let parse_json = Expression::Function(Box::new(Function::new("PARSE_JSON".to_string(), vec![json_expr])));
18213 let get_path = Expression::Function(Box::new(Function::new("GET_PATH".to_string(), vec![parse_json, sf_path])));
18214 // TRANSFORM(get_path, x -> CAST(x AS VARCHAR))
18215 let cast_expr = Expression::Cast(Box::new(Cast {
18216 this: Expression::Identifier(Identifier::new("x")),
18217 to: DataType::VarChar { length: None, parenthesized_length: false },
18218 trailing_comments: vec![],
18219 double_colon_syntax: false,
18220 format: None,
18221 default: None,
18222 }));
18223 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
18224 parameters: vec![Identifier::new("x")],
18225 body: cast_expr,
18226 colon: false,
18227 parameter_types: vec![],
18228 }));
18229 Ok(Expression::Function(Box::new(Function::new("TRANSFORM".to_string(), vec![get_path, lambda]))))
18230 }
18231 _ => Ok(Expression::Function(Box::new(Function::new("JSON_VALUE_ARRAY".to_string(), args))))
18232 }
18233 }
18234
18235 // BigQuery REGEXP_EXTRACT(val, regex[, position[, occurrence]]) -> target dialects
18236 // BigQuery's 3rd arg is "position" (starting char index), 4th is "occurrence" (which match to return)
18237 // This is different from Hive/Spark where 3rd arg is "group_index"
18238 "REGEXP_EXTRACT" if matches!(source, DialectType::BigQuery) => {
18239 match target {
18240 DialectType::DuckDB | DialectType::Presto | DialectType::Trino | DialectType::Athena => {
18241 if args.len() == 2 {
18242 // REGEXP_EXTRACT(val, regex) -> REGEXP_EXTRACT(val, regex, 1)
18243 args.push(Expression::number(1));
18244 Ok(Expression::Function(Box::new(Function::new("REGEXP_EXTRACT".to_string(), args))))
18245 } else if args.len() == 3 {
18246 let val = args.remove(0);
18247 let regex = args.remove(0);
18248 let position = args.remove(0);
18249 let is_pos_1 = matches!(&position, Expression::Literal(Literal::Number(n)) if n == "1");
18250 if is_pos_1 {
18251 Ok(Expression::Function(Box::new(Function::new("REGEXP_EXTRACT".to_string(), vec![val, regex, Expression::number(1)]))))
18252 } else {
18253 let substring_expr = Expression::Function(Box::new(Function::new("SUBSTRING".to_string(), vec![val, position])));
18254 let nullif_expr = Expression::Function(Box::new(Function::new("NULLIF".to_string(), vec![substring_expr, Expression::Literal(Literal::String(String::new()))])));
18255 Ok(Expression::Function(Box::new(Function::new("REGEXP_EXTRACT".to_string(), vec![nullif_expr, regex, Expression::number(1)]))))
18256 }
18257 } else if args.len() == 4 {
18258 let val = args.remove(0);
18259 let regex = args.remove(0);
18260 let position = args.remove(0);
18261 let occurrence = args.remove(0);
18262 let is_pos_1 = matches!(&position, Expression::Literal(Literal::Number(n)) if n == "1");
18263 let is_occ_1 = matches!(&occurrence, Expression::Literal(Literal::Number(n)) if n == "1");
18264 if is_pos_1 && is_occ_1 {
18265 Ok(Expression::Function(Box::new(Function::new("REGEXP_EXTRACT".to_string(), vec![val, regex, Expression::number(1)]))))
18266 } else {
18267 let subject = if is_pos_1 {
18268 val
18269 } else {
18270 let substring_expr = Expression::Function(Box::new(Function::new("SUBSTRING".to_string(), vec![val, position])));
18271 Expression::Function(Box::new(Function::new("NULLIF".to_string(), vec![substring_expr, Expression::Literal(Literal::String(String::new()))])))
18272 };
18273 let extract_all = Expression::Function(Box::new(Function::new("REGEXP_EXTRACT_ALL".to_string(), vec![subject, regex, Expression::number(1)])));
18274 Ok(Expression::Function(Box::new(Function::new("ARRAY_EXTRACT".to_string(), vec![extract_all, occurrence]))))
18275 }
18276 } else {
18277 Ok(Expression::Function(Box::new(Function { name: f.name, args, distinct: f.distinct, trailing_comments: f.trailing_comments, use_bracket_syntax: f.use_bracket_syntax, no_parens: f.no_parens, quoted: f.quoted })))
18278 }
18279 }
18280 DialectType::Snowflake => {
18281 // BigQuery REGEXP_EXTRACT -> Snowflake REGEXP_SUBSTR
18282 Ok(Expression::Function(Box::new(Function::new("REGEXP_SUBSTR".to_string(), args))))
18283 }
18284 _ => {
18285 // For other targets (Hive/Spark/BigQuery): pass through as-is
18286 // BigQuery's default group behavior matches Hive/Spark for 2-arg case
18287 Ok(Expression::Function(Box::new(Function { name: f.name, args, distinct: f.distinct, trailing_comments: f.trailing_comments, use_bracket_syntax: f.use_bracket_syntax, no_parens: f.no_parens, quoted: f.quoted })))
18288 }
18289 }
18290 }
18291
18292 // BigQuery STRUCT(args) -> target-specific struct expression
18293 "STRUCT" => {
18294 // Convert Function args to Struct fields
18295 let mut fields: Vec<(Option<String>, Expression)> = Vec::new();
18296 for (i, arg) in args.into_iter().enumerate() {
18297 match arg {
18298 Expression::Alias(a) => {
18299 // Named field: expr AS name
18300 fields.push((Some(a.alias.name.clone()), a.this));
18301 }
18302 other => {
18303 // Unnamed field: for Spark/Hive, keep as None
18304 // For Snowflake, auto-name as _N
18305 // For DuckDB, use column name for column refs, _N for others
18306 if matches!(target, DialectType::Snowflake) {
18307 fields.push((Some(format!("_{}", i)), other));
18308 } else if matches!(target, DialectType::DuckDB) {
18309 let auto_name = match &other {
18310 Expression::Column(col) => col.name.name.clone(),
18311 _ => format!("_{}", i),
18312 };
18313 fields.push((Some(auto_name), other));
18314 } else {
18315 fields.push((None, other));
18316 }
18317 }
18318 }
18319 }
18320
18321 match target {
18322 DialectType::Snowflake => {
18323 // OBJECT_CONSTRUCT('name', value, ...)
18324 let mut oc_args = Vec::new();
18325 for (name, val) in &fields {
18326 if let Some(n) = name {
18327 oc_args.push(Expression::Literal(Literal::String(n.clone())));
18328 oc_args.push(val.clone());
18329 } else {
18330 oc_args.push(val.clone());
18331 }
18332 }
18333 Ok(Expression::Function(Box::new(Function::new("OBJECT_CONSTRUCT".to_string(), oc_args))))
18334 }
18335 DialectType::DuckDB => {
18336 // {'name': value, ...}
18337 Ok(Expression::Struct(Box::new(crate::expressions::Struct { fields })))
18338 }
18339 DialectType::Hive => {
18340 // STRUCT(val1, val2, ...) - strip aliases
18341 let hive_fields: Vec<(Option<String>, Expression)> = fields.into_iter().map(|(_, v)| (None, v)).collect();
18342 Ok(Expression::Struct(Box::new(crate::expressions::Struct { fields: hive_fields })))
18343 }
18344 DialectType::Spark | DialectType::Databricks => {
18345 // Use Expression::Struct to bypass Spark target transform auto-naming
18346 Ok(Expression::Struct(Box::new(crate::expressions::Struct { fields })))
18347 }
18348 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
18349 // Check if all fields are named AND all have inferable types - if so, wrap in CAST(ROW(...) AS ROW(name TYPE, ...))
18350 let all_named = !fields.is_empty() && fields.iter().all(|(name, _)| name.is_some());
18351 let all_types_inferable = all_named && fields.iter().all(|(_, val)| Self::can_infer_presto_type(val));
18352 let row_args: Vec<Expression> = fields.iter().map(|(_, v)| v.clone()).collect();
18353 let row_expr = Expression::Function(Box::new(Function::new("ROW".to_string(), row_args)));
18354 if all_named && all_types_inferable {
18355 // Build ROW type with inferred types
18356 let mut row_type_fields = Vec::new();
18357 for (name, val) in &fields {
18358 if let Some(n) = name {
18359 let type_str = Self::infer_sql_type_for_presto(val);
18360 row_type_fields.push(crate::expressions::StructField::new(
18361 n.clone(),
18362 crate::expressions::DataType::Custom { name: type_str },
18363 ));
18364 }
18365 }
18366 let row_type = crate::expressions::DataType::Struct { fields: row_type_fields, nested: true };
18367 Ok(Expression::Cast(Box::new(Cast {
18368 this: row_expr,
18369 to: row_type,
18370 trailing_comments: Vec::new(),
18371 double_colon_syntax: false,
18372 format: None,
18373 default: None,
18374 })))
18375 } else {
18376 Ok(row_expr)
18377 }
18378 }
18379 _ => {
18380 // Default: keep as STRUCT function with original args
18381 let mut new_args = Vec::new();
18382 for (name, val) in fields {
18383 if let Some(n) = name {
18384 new_args.push(Expression::Alias(Box::new(crate::expressions::Alias::new(
18385 val, Identifier::new(n),
18386 ))));
18387 } else {
18388 new_args.push(val);
18389 }
18390 }
18391 Ok(Expression::Function(Box::new(Function::new("STRUCT".to_string(), new_args))))
18392 }
18393 }
18394 }
18395
18396 // ROUND(x, n, 'ROUND_HALF_EVEN') -> ROUND_EVEN(x, n) for DuckDB
18397 "ROUND" if args.len() == 3 => {
18398 let x = args.remove(0);
18399 let n = args.remove(0);
18400 let mode = args.remove(0);
18401 // Check if mode is 'ROUND_HALF_EVEN'
18402 let is_half_even = matches!(&mode, Expression::Literal(Literal::String(s)) if s.eq_ignore_ascii_case("ROUND_HALF_EVEN"));
18403 if is_half_even && matches!(target, DialectType::DuckDB) {
18404 Ok(Expression::Function(Box::new(Function::new("ROUND_EVEN".to_string(), vec![x, n]))))
18405 } else {
18406 // Pass through with all args
18407 Ok(Expression::Function(Box::new(Function::new("ROUND".to_string(), vec![x, n, mode]))))
18408 }
18409 }
18410
18411 // MAKE_INTERVAL(year, month, named_args...) -> INTERVAL string for Snowflake/DuckDB
18412 "MAKE_INTERVAL" => {
18413 // MAKE_INTERVAL(1, 2, minute => 5, day => 3)
18414 // The positional args are: year, month
18415 // Named args are: day =>, minute =>, etc.
18416 // For Snowflake: INTERVAL '1 year, 2 month, 5 minute, 3 day'
18417 // For DuckDB: INTERVAL '1 year 2 month 5 minute 3 day'
18418 // For BigQuery->BigQuery: reorder named args (day before minute)
18419 if matches!(target, DialectType::Snowflake | DialectType::DuckDB) {
18420 let mut parts: Vec<(String, String)> = Vec::new();
18421 let mut pos_idx = 0;
18422 let pos_units = ["year", "month"];
18423 for arg in &args {
18424 if let Expression::NamedArgument(na) = arg {
18425 // Named arg like minute => 5
18426 let unit = na.name.name.clone();
18427 if let Expression::Literal(Literal::Number(n)) = &na.value {
18428 parts.push((unit, n.clone()));
18429 }
18430 } else if pos_idx < pos_units.len() {
18431 if let Expression::Literal(Literal::Number(n)) = arg {
18432 parts.push((pos_units[pos_idx].to_string(), n.clone()));
18433 }
18434 pos_idx += 1;
18435 }
18436 }
18437 // Don't sort - preserve original argument order
18438 let separator = if matches!(target, DialectType::Snowflake) { ", " } else { " " };
18439 let interval_str = parts.iter()
18440 .map(|(u, v)| format!("{} {}", v, u))
18441 .collect::<Vec<_>>()
18442 .join(separator);
18443 Ok(Expression::Interval(Box::new(crate::expressions::Interval {
18444 this: Some(Expression::Literal(Literal::String(interval_str))),
18445 unit: None,
18446 })))
18447 } else if matches!(target, DialectType::BigQuery) {
18448 // BigQuery->BigQuery: reorder named args (day, minute, etc.)
18449 let mut positional = Vec::new();
18450 let mut named: Vec<(String, Expression, crate::expressions::NamedArgSeparator)> = Vec::new();
18451 let _pos_units = ["year", "month"];
18452 let mut _pos_idx = 0;
18453 for arg in args {
18454 if let Expression::NamedArgument(na) = arg {
18455 named.push((na.name.name.clone(), na.value, na.separator));
18456 } else {
18457 positional.push(arg);
18458 _pos_idx += 1;
18459 }
18460 }
18461 // Sort named args by: day, hour, minute, second
18462 let unit_order = |u: &str| -> usize {
18463 match u.to_lowercase().as_str() {
18464 "day" => 0, "hour" => 1, "minute" => 2, "second" => 3, _ => 4,
18465 }
18466 };
18467 named.sort_by_key(|(u, _, _)| unit_order(u));
18468 let mut result_args = positional;
18469 for (name, value, sep) in named {
18470 result_args.push(Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
18471 name: Identifier::new(&name),
18472 value,
18473 separator: sep,
18474 })));
18475 }
18476 Ok(Expression::Function(Box::new(Function::new("MAKE_INTERVAL".to_string(), result_args))))
18477 } else {
18478 Ok(Expression::Function(Box::new(Function::new("MAKE_INTERVAL".to_string(), args))))
18479 }
18480 }
18481
18482 // ARRAY_TO_STRING(array, sep, null_text) -> ARRAY_TO_STRING(LIST_TRANSFORM(array, x -> COALESCE(x, null_text)), sep) for DuckDB
18483 "ARRAY_TO_STRING" if args.len() == 3 => {
18484 let arr = args.remove(0);
18485 let sep = args.remove(0);
18486 let null_text = args.remove(0);
18487 match target {
18488 DialectType::DuckDB => {
18489 // LIST_TRANSFORM(array, x -> COALESCE(x, null_text))
18490 let _lambda_param = Expression::Identifier(crate::expressions::Identifier::new("x"));
18491 let coalesce = Expression::Coalesce(Box::new(crate::expressions::VarArgFunc {
18492 original_name: None,
18493 expressions: vec![
18494 Expression::Identifier(crate::expressions::Identifier::new("x")),
18495 null_text,
18496 ],
18497 }));
18498 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
18499 parameters: vec![crate::expressions::Identifier::new("x")],
18500 body: coalesce,
18501 colon: false,
18502 parameter_types: vec![],
18503 }));
18504 let list_transform = Expression::Function(Box::new(Function::new("LIST_TRANSFORM".to_string(), vec![arr, lambda])));
18505 Ok(Expression::Function(Box::new(Function::new("ARRAY_TO_STRING".to_string(), vec![list_transform, sep]))))
18506 }
18507 _ => Ok(Expression::Function(Box::new(Function::new("ARRAY_TO_STRING".to_string(), vec![arr, sep, null_text]))))
18508 }
18509 }
18510
18511 // LENGTH(x) -> CASE TYPEOF(x) ... for DuckDB
18512 "LENGTH" if args.len() == 1 => {
18513 let arg = args.remove(0);
18514 match target {
18515 DialectType::DuckDB => {
18516 // CASE TYPEOF(foo) WHEN 'BLOB' THEN OCTET_LENGTH(CAST(foo AS BLOB)) ELSE LENGTH(CAST(foo AS TEXT)) END
18517 let typeof_func = Expression::Function(Box::new(Function::new("TYPEOF".to_string(), vec![arg.clone()])));
18518 let blob_cast = Expression::Cast(Box::new(Cast {
18519 this: arg.clone(),
18520 to: DataType::VarBinary { length: None },
18521 trailing_comments: vec![],
18522 double_colon_syntax: false,
18523 format: None,
18524 default: None,
18525 }));
18526 let octet_length = Expression::Function(Box::new(Function::new("OCTET_LENGTH".to_string(), vec![blob_cast])));
18527 let text_cast = Expression::Cast(Box::new(Cast {
18528 this: arg,
18529 to: DataType::Text,
18530 trailing_comments: vec![],
18531 double_colon_syntax: false,
18532 format: None,
18533 default: None,
18534 }));
18535 let length_text = Expression::Function(Box::new(Function::new("LENGTH".to_string(), vec![text_cast])));
18536 Ok(Expression::Case(Box::new(crate::expressions::Case {
18537 operand: Some(typeof_func),
18538 whens: vec![(Expression::Literal(Literal::String("BLOB".to_string())), octet_length)],
18539 else_: Some(length_text),
18540 })))
18541 }
18542 _ => Ok(Expression::Function(Box::new(Function::new("LENGTH".to_string(), vec![arg]))))
18543 }
18544 }
18545
18546 // PERCENTILE_CONT(x, fraction RESPECT NULLS) -> QUANTILE_CONT(x, fraction) for DuckDB
18547 "PERCENTILE_CONT" if args.len() >= 2 && matches!(source, DialectType::BigQuery) => {
18548 // BigQuery PERCENTILE_CONT(x, fraction [RESPECT|IGNORE NULLS]) OVER ()
18549 // The args should be [x, fraction] with the null handling stripped
18550 // For DuckDB: QUANTILE_CONT(x, fraction)
18551 // For Spark: PERCENTILE_CONT(x, fraction) RESPECT NULLS (handled at window level)
18552 match target {
18553 DialectType::DuckDB => {
18554 // Strip down to just 2 args, rename to QUANTILE_CONT
18555 let x = args[0].clone();
18556 let frac = args[1].clone();
18557 Ok(Expression::Function(Box::new(Function::new("QUANTILE_CONT".to_string(), vec![x, frac]))))
18558 }
18559 _ => Ok(Expression::Function(Box::new(Function::new("PERCENTILE_CONT".to_string(), args))))
18560 }
18561 }
18562
18563 // All others: pass through
18564 _ => Ok(Expression::Function(Box::new(Function { name: f.name, args, distinct: f.distinct, trailing_comments: f.trailing_comments, use_bracket_syntax: f.use_bracket_syntax, no_parens: f.no_parens, quoted: f.quoted })))
18565 }
18566 }
18567
18568 /// Check if we can reliably infer the SQL type for Presto/Trino ROW CAST.
18569 /// Returns false for column references and other non-literal expressions where the type is unknown.
18570 fn can_infer_presto_type(expr: &Expression) -> bool {
18571 match expr {
18572 Expression::Literal(_) => true,
18573 Expression::Boolean(_) => true,
18574 Expression::Array(_) | Expression::ArrayFunc(_) => true,
18575 Expression::Struct(_) | Expression::StructFunc(_) => true,
18576 Expression::Function(f) => {
18577 let up = f.name.to_uppercase();
18578 up == "STRUCT" || up == "ROW" || up == "CURRENT_DATE" || up == "CURRENT_TIMESTAMP" || up == "NOW"
18579 }
18580 Expression::Cast(_) => true,
18581 Expression::Neg(inner) => Self::can_infer_presto_type(&inner.this),
18582 _ => false,
18583 }
18584 }
18585
18586 /// Infer SQL type name for a Presto/Trino ROW CAST from a literal expression
18587 fn infer_sql_type_for_presto(expr: &Expression) -> String {
18588 use crate::expressions::Literal;
18589 match expr {
18590 Expression::Literal(Literal::String(_)) => "VARCHAR".to_string(),
18591 Expression::Literal(Literal::Number(n)) => {
18592 if n.contains('.') { "DOUBLE".to_string() } else { "INTEGER".to_string() }
18593 }
18594 Expression::Boolean(_) => "BOOLEAN".to_string(),
18595 Expression::Literal(Literal::Date(_)) => "DATE".to_string(),
18596 Expression::Literal(Literal::Timestamp(_)) => "TIMESTAMP".to_string(),
18597 Expression::Literal(Literal::Datetime(_)) => "TIMESTAMP".to_string(),
18598 Expression::Array(_) | Expression::ArrayFunc(_) => "ARRAY(VARCHAR)".to_string(),
18599 Expression::Struct(_) | Expression::StructFunc(_) => "ROW".to_string(),
18600 Expression::Function(f) => {
18601 let up = f.name.to_uppercase();
18602 if up == "STRUCT" || up == "ROW" { "ROW".to_string() }
18603 else if up == "CURRENT_DATE" { "DATE".to_string() }
18604 else if up == "CURRENT_TIMESTAMP" || up == "NOW" { "TIMESTAMP".to_string() }
18605 else { "VARCHAR".to_string() }
18606 }
18607 Expression::Cast(c) => {
18608 // If already cast, use the target type
18609 Self::data_type_to_presto_string(&c.to)
18610 }
18611 _ => "VARCHAR".to_string(),
18612 }
18613 }
18614
18615 /// Convert a DataType to its Presto/Trino string representation for ROW type
18616 fn data_type_to_presto_string(dt: &crate::expressions::DataType) -> String {
18617 use crate::expressions::DataType;
18618 match dt {
18619 DataType::VarChar { .. } | DataType::Text | DataType::String { .. } => "VARCHAR".to_string(),
18620 DataType::Int { .. } | DataType::BigInt { .. } | DataType::SmallInt { .. } | DataType::TinyInt { .. } => "INTEGER".to_string(),
18621 DataType::Float { .. } | DataType::Double { .. } => "DOUBLE".to_string(),
18622 DataType::Boolean => "BOOLEAN".to_string(),
18623 DataType::Date => "DATE".to_string(),
18624 DataType::Timestamp { .. } => "TIMESTAMP".to_string(),
18625 DataType::Struct { fields, .. } => {
18626 let field_strs: Vec<String> = fields.iter().map(|f| {
18627 format!("{} {}", f.name, Self::data_type_to_presto_string(&f.data_type))
18628 }).collect();
18629 format!("ROW({})", field_strs.join(", "))
18630 }
18631 DataType::Array { element_type, .. } => {
18632 format!("ARRAY({})", Self::data_type_to_presto_string(element_type))
18633 }
18634 DataType::Custom { name } => {
18635 // Pass through custom type names (e.g., "INTEGER", "VARCHAR" from earlier inference)
18636 name.clone()
18637 }
18638 _ => "VARCHAR".to_string(),
18639 }
18640 }
18641
18642 /// Convert IntervalUnit to string
18643 fn interval_unit_to_string(unit: &crate::expressions::IntervalUnit) -> String {
18644 match unit {
18645 crate::expressions::IntervalUnit::Year => "YEAR".to_string(),
18646 crate::expressions::IntervalUnit::Quarter => "QUARTER".to_string(),
18647 crate::expressions::IntervalUnit::Month => "MONTH".to_string(),
18648 crate::expressions::IntervalUnit::Week => "WEEK".to_string(),
18649 crate::expressions::IntervalUnit::Day => "DAY".to_string(),
18650 crate::expressions::IntervalUnit::Hour => "HOUR".to_string(),
18651 crate::expressions::IntervalUnit::Minute => "MINUTE".to_string(),
18652 crate::expressions::IntervalUnit::Second => "SECOND".to_string(),
18653 crate::expressions::IntervalUnit::Millisecond => "MILLISECOND".to_string(),
18654 crate::expressions::IntervalUnit::Microsecond => "MICROSECOND".to_string(),
18655 }
18656 }
18657
18658 /// Extract unit string from an expression (uppercased)
18659 fn get_unit_str_static(expr: &Expression) -> String {
18660 use crate::expressions::Literal;
18661 match expr {
18662 Expression::Identifier(id) => id.name.to_uppercase(),
18663 Expression::Literal(Literal::String(s)) => s.to_uppercase(),
18664 Expression::Column(col) => col.name.name.to_uppercase(),
18665 Expression::Function(f) => {
18666 let base = f.name.to_uppercase();
18667 if !f.args.is_empty() {
18668 let inner = Self::get_unit_str_static(&f.args[0]);
18669 format!("{}({})", base, inner)
18670 } else {
18671 base
18672 }
18673 }
18674 _ => "DAY".to_string(),
18675 }
18676 }
18677
18678 /// Parse unit string to IntervalUnit
18679 fn parse_interval_unit_static(s: &str) -> crate::expressions::IntervalUnit {
18680 match s {
18681 "YEAR" | "YY" | "YYYY" => crate::expressions::IntervalUnit::Year,
18682 "QUARTER" | "QQ" | "Q" => crate::expressions::IntervalUnit::Quarter,
18683 "MONTH" | "MM" | "M" => crate::expressions::IntervalUnit::Month,
18684 "WEEK" | "WK" | "WW" | "ISOWEEK" => crate::expressions::IntervalUnit::Week,
18685 "DAY" | "DD" | "D" | "DY" => crate::expressions::IntervalUnit::Day,
18686 "HOUR" | "HH" => crate::expressions::IntervalUnit::Hour,
18687 "MINUTE" | "MI" | "N" => crate::expressions::IntervalUnit::Minute,
18688 "SECOND" | "SS" | "S" => crate::expressions::IntervalUnit::Second,
18689 "MILLISECOND" | "MS" => crate::expressions::IntervalUnit::Millisecond,
18690 "MICROSECOND" | "MCS" | "US" => crate::expressions::IntervalUnit::Microsecond,
18691 _ if s.starts_with("WEEK(") => crate::expressions::IntervalUnit::Week,
18692 _ => crate::expressions::IntervalUnit::Day,
18693 }
18694 }
18695
18696 /// Convert expression to simple string for interval building
18697 fn expr_to_string_static(expr: &Expression) -> String {
18698 use crate::expressions::Literal;
18699 match expr {
18700 Expression::Literal(Literal::Number(s)) => s.clone(),
18701 Expression::Literal(Literal::String(s)) => s.clone(),
18702 Expression::Identifier(id) => id.name.clone(),
18703 Expression::Neg(f) => format!("-{}", Self::expr_to_string_static(&f.this)),
18704 _ => "1".to_string(),
18705 }
18706 }
18707
18708 /// Extract a simple string representation from a literal expression
18709 fn expr_to_string(expr: &Expression) -> String {
18710 use crate::expressions::Literal;
18711 match expr {
18712 Expression::Literal(Literal::Number(s)) => s.clone(),
18713 Expression::Literal(Literal::String(s)) => s.clone(),
18714 Expression::Neg(f) => format!("-{}", Self::expr_to_string(&f.this)),
18715 Expression::Identifier(id) => id.name.clone(),
18716 _ => "1".to_string(),
18717 }
18718 }
18719
18720 /// Quote an interval value expression as a string literal if it's a number (or negated number)
18721 fn quote_interval_val(expr: &Expression) -> Expression {
18722 use crate::expressions::Literal;
18723 match expr {
18724 Expression::Literal(Literal::Number(n)) => {
18725 Expression::Literal(Literal::String(n.clone()))
18726 }
18727 Expression::Literal(Literal::String(_)) => expr.clone(),
18728 Expression::Neg(inner) => {
18729 if let Expression::Literal(Literal::Number(n)) = &inner.this {
18730 Expression::Literal(Literal::String(format!("-{}", n)))
18731 } else {
18732 expr.clone()
18733 }
18734 }
18735 _ => expr.clone(),
18736 }
18737 }
18738
18739 /// Check if a timestamp string contains timezone info (offset like +02:00, or named timezone)
18740 fn timestamp_string_has_timezone(ts: &str) -> bool {
18741 let trimmed = ts.trim();
18742 // Check for numeric timezone offsets: +N, -N, +NN:NN, -NN:NN at end
18743 if let Some(last_space) = trimmed.rfind(' ') {
18744 let suffix = &trimmed[last_space + 1..];
18745 if (suffix.starts_with('+') || suffix.starts_with('-')) && suffix.len() > 1 {
18746 let rest = &suffix[1..];
18747 if rest.chars().all(|c| c.is_ascii_digit() || c == ':') {
18748 return true;
18749 }
18750 }
18751 }
18752 // Check for named timezone abbreviations
18753 let ts_lower = trimmed.to_lowercase();
18754 let tz_abbrevs = [" utc", " gmt", " cet", " est", " pst", " cst", " mst"];
18755 for abbrev in &tz_abbrevs {
18756 if ts_lower.ends_with(abbrev) {
18757 return true;
18758 }
18759 }
18760 false
18761 }
18762
18763 /// Maybe CAST timestamp literal to TIMESTAMPTZ for Snowflake
18764 fn maybe_cast_ts_to_tz(expr: Expression, func_name: &str) -> Expression {
18765 use crate::expressions::{Cast, DataType, Literal};
18766 match expr {
18767 Expression::Literal(Literal::Timestamp(s)) => {
18768 let tz = func_name.starts_with("TIMESTAMP");
18769 Expression::Cast(Box::new(Cast {
18770 this: Expression::Literal(Literal::String(s)),
18771 to: if tz {
18772 DataType::Timestamp { timezone: true, precision: None }
18773 } else {
18774 DataType::Timestamp { timezone: false, precision: None }
18775 },
18776 trailing_comments: vec![],
18777 double_colon_syntax: false,
18778 format: None,
18779 default: None,
18780 }))
18781 }
18782 other => other,
18783 }
18784 }
18785
18786 /// Maybe CAST timestamp literal to TIMESTAMP (no tz)
18787 fn maybe_cast_ts(expr: Expression) -> Expression {
18788 use crate::expressions::{Cast, DataType, Literal};
18789 match expr {
18790 Expression::Literal(Literal::Timestamp(s)) => {
18791 Expression::Cast(Box::new(Cast {
18792 this: Expression::Literal(Literal::String(s)),
18793 to: DataType::Timestamp { timezone: false, precision: None },
18794 trailing_comments: vec![],
18795 double_colon_syntax: false,
18796 format: None,
18797 default: None,
18798 }))
18799 }
18800 other => other,
18801 }
18802 }
18803
18804 /// Convert DATE 'x' literal to CAST('x' AS DATE)
18805 fn date_literal_to_cast(expr: Expression) -> Expression {
18806 use crate::expressions::{Cast, DataType, Literal};
18807 match expr {
18808 Expression::Literal(Literal::Date(s)) => {
18809 Expression::Cast(Box::new(Cast {
18810 this: Expression::Literal(Literal::String(s)),
18811 to: DataType::Date,
18812 trailing_comments: vec![],
18813 double_colon_syntax: false,
18814 format: None,
18815 default: None,
18816 }))
18817 }
18818 other => other,
18819 }
18820 }
18821
18822 /// Ensure an expression that should be a date is CAST(... AS DATE).
18823 /// Handles both DATE literals and string literals that look like dates.
18824 fn ensure_cast_date(expr: Expression) -> Expression {
18825 use crate::expressions::{Cast, DataType, Literal};
18826 match expr {
18827 Expression::Literal(Literal::Date(s)) => {
18828 Expression::Cast(Box::new(Cast {
18829 this: Expression::Literal(Literal::String(s)),
18830 to: DataType::Date,
18831 trailing_comments: vec![],
18832 double_colon_syntax: false,
18833 format: None,
18834 default: None,
18835 }))
18836 }
18837 Expression::Literal(Literal::String(ref _s)) => {
18838 // String literal that should be a date -> CAST('s' AS DATE)
18839 Expression::Cast(Box::new(Cast {
18840 this: expr,
18841 to: DataType::Date,
18842 trailing_comments: vec![],
18843 double_colon_syntax: false,
18844 format: None,
18845 default: None,
18846 }))
18847 }
18848 // Already a CAST or other expression -> leave as-is
18849 other => other,
18850 }
18851 }
18852
18853 /// Force CAST(expr AS DATE) for any expression (not just literals)
18854 /// Skips if the expression is already a CAST to DATE
18855 fn force_cast_date(expr: Expression) -> Expression {
18856 use crate::expressions::{Cast, DataType};
18857 // If it's already a CAST to DATE, don't double-wrap
18858 if let Expression::Cast(ref c) = expr {
18859 if matches!(c.to, DataType::Date) {
18860 return expr;
18861 }
18862 }
18863 Expression::Cast(Box::new(Cast {
18864 this: expr,
18865 to: DataType::Date,
18866 trailing_comments: vec![],
18867 double_colon_syntax: false,
18868 format: None,
18869 default: None,
18870 }))
18871 }
18872
18873 /// Internal TO_DATE function that won't be converted to CAST by the Snowflake handler.
18874 /// Uses the name `_POLYGLOT_TO_DATE` which is not recognized by the TO_DATE -> CAST logic.
18875 /// The Snowflake DATEDIFF handler converts these back to TO_DATE.
18876 const PRESERVED_TO_DATE: &'static str = "_POLYGLOT_TO_DATE";
18877
18878 fn ensure_to_date_preserved(expr: Expression) -> Expression {
18879 use crate::expressions::{Literal, Function};
18880 if matches!(expr, Expression::Literal(Literal::String(_))) {
18881 Expression::Function(Box::new(Function::new(Self::PRESERVED_TO_DATE.to_string(), vec![expr])))
18882 } else {
18883 expr
18884 }
18885 }
18886
18887 /// TRY_CAST(expr AS DATE) - used for DuckDB when TO_DATE is unwrapped
18888 fn try_cast_date(expr: Expression) -> Expression {
18889 use crate::expressions::{Cast, DataType};
18890 Expression::TryCast(Box::new(Cast {
18891 this: expr,
18892 to: DataType::Date,
18893 trailing_comments: vec![],
18894 double_colon_syntax: false,
18895 format: None,
18896 default: None,
18897 }))
18898 }
18899
18900 /// CAST(CAST(expr AS TIMESTAMP) AS DATE) - used when Hive string dates need to be cast
18901 fn double_cast_timestamp_date(expr: Expression) -> Expression {
18902 use crate::expressions::{Cast, DataType};
18903 let inner = Expression::Cast(Box::new(Cast {
18904 this: expr,
18905 to: DataType::Timestamp { timezone: false, precision: None },
18906 trailing_comments: vec![],
18907 double_colon_syntax: false,
18908 format: None,
18909 default: None,
18910 }));
18911 Expression::Cast(Box::new(Cast {
18912 this: inner,
18913 to: DataType::Date,
18914 trailing_comments: vec![],
18915 double_colon_syntax: false,
18916 format: None,
18917 default: None,
18918 }))
18919 }
18920
18921 /// CAST(CAST(expr AS DATETIME) AS DATE) - BigQuery variant
18922 fn double_cast_datetime_date(expr: Expression) -> Expression {
18923 use crate::expressions::{Cast, DataType};
18924 let inner = Expression::Cast(Box::new(Cast {
18925 this: expr,
18926 to: DataType::Custom { name: "DATETIME".to_string() },
18927 trailing_comments: vec![],
18928 double_colon_syntax: false,
18929 format: None,
18930 default: None,
18931 }));
18932 Expression::Cast(Box::new(Cast {
18933 this: inner,
18934 to: DataType::Date,
18935 trailing_comments: vec![],
18936 double_colon_syntax: false,
18937 format: None,
18938 default: None,
18939 }))
18940 }
18941
18942 /// CAST(CAST(expr AS DATETIME2) AS DATE) - TSQL variant
18943 fn double_cast_datetime2_date(expr: Expression) -> Expression {
18944 use crate::expressions::{Cast, DataType};
18945 let inner = Expression::Cast(Box::new(Cast {
18946 this: expr,
18947 to: DataType::Custom { name: "DATETIME2".to_string() },
18948 trailing_comments: vec![],
18949 double_colon_syntax: false,
18950 format: None,
18951 default: None,
18952 }));
18953 Expression::Cast(Box::new(Cast {
18954 this: inner,
18955 to: DataType::Date,
18956 trailing_comments: vec![],
18957 double_colon_syntax: false,
18958 format: None,
18959 default: None,
18960 }))
18961 }
18962
18963 /// Convert Hive/Java-style date format strings to C-style (strftime) format
18964 /// e.g., "yyyy-MM-dd'T'HH" -> "%Y-%m-%d'T'%H"
18965 fn hive_format_to_c_format(fmt: &str) -> String {
18966 let mut result = String::new();
18967 let chars: Vec<char> = fmt.chars().collect();
18968 let mut i = 0;
18969 while i < chars.len() {
18970 match chars[i] {
18971 'y' => {
18972 let mut count = 0;
18973 while i < chars.len() && chars[i] == 'y' { count += 1; i += 1; }
18974 if count >= 4 { result.push_str("%Y"); }
18975 else if count == 2 { result.push_str("%y"); }
18976 else { result.push_str("%Y"); }
18977 }
18978 'M' => {
18979 let mut count = 0;
18980 while i < chars.len() && chars[i] == 'M' { count += 1; i += 1; }
18981 if count >= 3 { result.push_str("%b"); }
18982 else if count == 2 { result.push_str("%m"); }
18983 else { result.push_str("%m"); }
18984 }
18985 'd' => {
18986 let mut _count = 0;
18987 while i < chars.len() && chars[i] == 'd' { _count += 1; i += 1; }
18988 result.push_str("%d");
18989 }
18990 'H' => {
18991 let mut _count = 0;
18992 while i < chars.len() && chars[i] == 'H' { _count += 1; i += 1; }
18993 result.push_str("%H");
18994 }
18995 'h' => {
18996 let mut _count = 0;
18997 while i < chars.len() && chars[i] == 'h' { _count += 1; i += 1; }
18998 result.push_str("%I");
18999 }
19000 'm' => {
19001 let mut _count = 0;
19002 while i < chars.len() && chars[i] == 'm' { _count += 1; i += 1; }
19003 result.push_str("%M");
19004 }
19005 's' => {
19006 let mut _count = 0;
19007 while i < chars.len() && chars[i] == 's' { _count += 1; i += 1; }
19008 result.push_str("%S");
19009 }
19010 'S' => {
19011 // Fractional seconds - skip
19012 while i < chars.len() && chars[i] == 'S' { i += 1; }
19013 result.push_str("%f");
19014 }
19015 'a' => {
19016 // AM/PM
19017 while i < chars.len() && chars[i] == 'a' { i += 1; }
19018 result.push_str("%p");
19019 }
19020 'E' => {
19021 let mut count = 0;
19022 while i < chars.len() && chars[i] == 'E' { count += 1; i += 1; }
19023 if count >= 4 { result.push_str("%A"); }
19024 else { result.push_str("%a"); }
19025 }
19026 '\'' => {
19027 // Quoted literal text - pass through the quotes and content
19028 result.push('\'');
19029 i += 1;
19030 while i < chars.len() && chars[i] != '\'' {
19031 result.push(chars[i]);
19032 i += 1;
19033 }
19034 if i < chars.len() { result.push('\''); i += 1; }
19035 }
19036 c => {
19037 result.push(c);
19038 i += 1;
19039 }
19040 }
19041 }
19042 result
19043 }
19044
19045 /// Convert Hive/Java format to Presto format (uses %T for HH:mm:ss)
19046 fn hive_format_to_presto_format(fmt: &str) -> String {
19047 let c_fmt = Self::hive_format_to_c_format(fmt);
19048 // Presto uses %T for HH:MM:SS
19049 c_fmt.replace("%H:%M:%S", "%T")
19050 }
19051
19052 /// Ensure a timestamp-like expression for DuckDB with CAST(... AS TIMESTAMP)
19053 fn ensure_cast_timestamp(expr: Expression) -> Expression {
19054 use crate::expressions::{Cast, DataType, Literal};
19055 match expr {
19056 Expression::Literal(Literal::Timestamp(s)) => {
19057 Expression::Cast(Box::new(Cast {
19058 this: Expression::Literal(Literal::String(s)),
19059 to: DataType::Timestamp { timezone: false, precision: None },
19060 trailing_comments: vec![],
19061 double_colon_syntax: false,
19062 format: None,
19063 default: None,
19064 }))
19065 }
19066 Expression::Literal(Literal::String(ref _s)) => {
19067 Expression::Cast(Box::new(Cast {
19068 this: expr,
19069 to: DataType::Timestamp { timezone: false, precision: None },
19070 trailing_comments: vec![],
19071 double_colon_syntax: false,
19072 format: None,
19073 default: None,
19074 }))
19075 }
19076 Expression::Literal(Literal::Datetime(s)) => {
19077 Expression::Cast(Box::new(Cast {
19078 this: Expression::Literal(Literal::String(s)),
19079 to: DataType::Timestamp { timezone: false, precision: None },
19080 trailing_comments: vec![],
19081 double_colon_syntax: false,
19082 format: None,
19083 default: None,
19084 }))
19085 }
19086 other => other,
19087 }
19088 }
19089
19090 /// Force CAST to TIMESTAMP for any expression (not just literals)
19091 /// Used when transpiling from Redshift/TSQL where DATEDIFF/DATEADD args need explicit timestamp cast
19092 fn force_cast_timestamp(expr: Expression) -> Expression {
19093 use crate::expressions::{Cast, DataType};
19094 // Don't double-wrap if already a CAST to TIMESTAMP
19095 if let Expression::Cast(ref c) = expr {
19096 if matches!(c.to, DataType::Timestamp { .. }) {
19097 return expr;
19098 }
19099 }
19100 Expression::Cast(Box::new(Cast {
19101 this: expr,
19102 to: DataType::Timestamp { timezone: false, precision: None },
19103 trailing_comments: vec![],
19104 double_colon_syntax: false,
19105 format: None,
19106 default: None,
19107 }))
19108 }
19109
19110 /// Ensure a timestamp-like expression for DuckDB with CAST(... AS TIMESTAMPTZ)
19111 fn ensure_cast_timestamptz(expr: Expression) -> Expression {
19112 use crate::expressions::{Cast, DataType, Literal};
19113 match expr {
19114 Expression::Literal(Literal::Timestamp(s)) => {
19115 Expression::Cast(Box::new(Cast {
19116 this: Expression::Literal(Literal::String(s)),
19117 to: DataType::Timestamp { timezone: true, precision: None },
19118 trailing_comments: vec![],
19119 double_colon_syntax: false,
19120 format: None,
19121 default: None,
19122 }))
19123 }
19124 Expression::Literal(Literal::String(ref _s)) => {
19125 Expression::Cast(Box::new(Cast {
19126 this: expr,
19127 to: DataType::Timestamp { timezone: true, precision: None },
19128 trailing_comments: vec![],
19129 double_colon_syntax: false,
19130 format: None,
19131 default: None,
19132 }))
19133 }
19134 Expression::Literal(Literal::Datetime(s)) => {
19135 Expression::Cast(Box::new(Cast {
19136 this: Expression::Literal(Literal::String(s)),
19137 to: DataType::Timestamp { timezone: true, precision: None },
19138 trailing_comments: vec![],
19139 double_colon_syntax: false,
19140 format: None,
19141 default: None,
19142 }))
19143 }
19144 other => other,
19145 }
19146 }
19147
19148 /// Ensure expression is CAST to DATETIME (for BigQuery)
19149 fn ensure_cast_datetime(expr: Expression) -> Expression {
19150 use crate::expressions::{Cast, DataType, Literal};
19151 match expr {
19152 Expression::Literal(Literal::String(ref _s)) => {
19153 Expression::Cast(Box::new(Cast {
19154 this: expr,
19155 to: DataType::Custom { name: "DATETIME".to_string() },
19156 trailing_comments: vec![],
19157 double_colon_syntax: false,
19158 format: None,
19159 default: None,
19160 }))
19161 }
19162 other => other,
19163 }
19164 }
19165
19166 /// Force CAST expression to DATETIME (for BigQuery) - always wraps unless already DATETIME
19167 fn force_cast_datetime(expr: Expression) -> Expression {
19168 use crate::expressions::{Cast, DataType};
19169 if let Expression::Cast(ref c) = expr {
19170 if let DataType::Custom { ref name } = c.to {
19171 if name.eq_ignore_ascii_case("DATETIME") {
19172 return expr;
19173 }
19174 }
19175 }
19176 Expression::Cast(Box::new(Cast {
19177 this: expr,
19178 to: DataType::Custom { name: "DATETIME".to_string() },
19179 trailing_comments: vec![],
19180 double_colon_syntax: false,
19181 format: None,
19182 default: None,
19183 }))
19184 }
19185
19186 /// Ensure expression is CAST to DATETIME2 (for TSQL)
19187 fn ensure_cast_datetime2(expr: Expression) -> Expression {
19188 use crate::expressions::{Cast, DataType, Literal};
19189 match expr {
19190 Expression::Literal(Literal::String(ref _s)) => {
19191 Expression::Cast(Box::new(Cast {
19192 this: expr,
19193 to: DataType::Custom { name: "DATETIME2".to_string() },
19194 trailing_comments: vec![],
19195 double_colon_syntax: false,
19196 format: None,
19197 default: None,
19198 }))
19199 }
19200 other => other,
19201 }
19202 }
19203
19204 /// Convert TIMESTAMP 'x' literal to CAST('x' AS TIMESTAMPTZ) for DuckDB
19205 fn ts_literal_to_cast_tz(expr: Expression) -> Expression {
19206 use crate::expressions::{Cast, DataType, Literal};
19207 match expr {
19208 Expression::Literal(Literal::Timestamp(s)) => {
19209 Expression::Cast(Box::new(Cast {
19210 this: Expression::Literal(Literal::String(s)),
19211 to: DataType::Timestamp { timezone: true, precision: None },
19212 trailing_comments: vec![],
19213 double_colon_syntax: false,
19214 format: None,
19215 default: None,
19216 }))
19217 }
19218 other => other,
19219 }
19220 }
19221
19222 /// Convert BigQuery format string to Snowflake format string
19223 fn bq_format_to_snowflake(format_expr: &Expression) -> Expression {
19224 use crate::expressions::Literal;
19225 if let Expression::Literal(Literal::String(s)) = format_expr {
19226 let sf = s
19227 .replace("%Y", "yyyy")
19228 .replace("%m", "mm")
19229 .replace("%d", "DD")
19230 .replace("%H", "HH24")
19231 .replace("%M", "MI")
19232 .replace("%S", "SS")
19233 .replace("%b", "mon")
19234 .replace("%B", "Month")
19235 .replace("%e", "FMDD");
19236 Expression::Literal(Literal::String(sf))
19237 } else {
19238 format_expr.clone()
19239 }
19240 }
19241
19242 /// Convert BigQuery format string to DuckDB format string
19243 fn bq_format_to_duckdb(format_expr: &Expression) -> Expression {
19244 use crate::expressions::Literal;
19245 if let Expression::Literal(Literal::String(s)) = format_expr {
19246 let duck = s
19247 .replace("%T", "%H:%M:%S")
19248 .replace("%F", "%Y-%m-%d")
19249 .replace("%D", "%m/%d/%y")
19250 .replace("%x", "%m/%d/%y")
19251 .replace("%c", "%a %b %-d %H:%M:%S %Y")
19252 .replace("%e", "%-d")
19253 .replace("%E6S", "%S.%f");
19254 Expression::Literal(Literal::String(duck))
19255 } else {
19256 format_expr.clone()
19257 }
19258 }
19259
19260 /// Convert BigQuery CAST FORMAT elements (like YYYY, MM, DD) to strftime (like %Y, %m, %d)
19261 fn bq_cast_format_to_strftime(format_expr: &Expression) -> Expression {
19262 use crate::expressions::Literal;
19263 if let Expression::Literal(Literal::String(s)) = format_expr {
19264 // Replace format elements from longest to shortest to avoid partial matches
19265 let result = s
19266 .replace("YYYYMMDD", "%Y%m%d")
19267 .replace("YYYY", "%Y")
19268 .replace("YY", "%y")
19269 .replace("MONTH", "%B")
19270 .replace("MON", "%b")
19271 .replace("MM", "%m")
19272 .replace("DD", "%d")
19273 .replace("HH24", "%H")
19274 .replace("HH12", "%I")
19275 .replace("HH", "%I")
19276 .replace("MI", "%M")
19277 .replace("SSTZH", "%S%z")
19278 .replace("SS", "%S")
19279 .replace("TZH", "%z");
19280 Expression::Literal(Literal::String(result))
19281 } else {
19282 format_expr.clone()
19283 }
19284 }
19285
19286 /// Normalize BigQuery format strings for BQ->BQ output
19287 fn bq_format_normalize_bq(format_expr: &Expression) -> Expression {
19288 use crate::expressions::Literal;
19289 if let Expression::Literal(Literal::String(s)) = format_expr {
19290 let norm = s
19291 .replace("%H:%M:%S", "%T")
19292 .replace("%x", "%D");
19293 Expression::Literal(Literal::String(norm))
19294 } else {
19295 format_expr.clone()
19296 }
19297 }
19298}
19299
19300#[cfg(test)]
19301mod tests {
19302 use super::*;
19303
19304 #[test]
19305 fn test_dialect_type_from_str() {
19306 assert_eq!("postgres".parse::<DialectType>().unwrap(), DialectType::PostgreSQL);
19307 assert_eq!("postgresql".parse::<DialectType>().unwrap(), DialectType::PostgreSQL);
19308 assert_eq!("mysql".parse::<DialectType>().unwrap(), DialectType::MySQL);
19309 assert_eq!("bigquery".parse::<DialectType>().unwrap(), DialectType::BigQuery);
19310 }
19311
19312 #[test]
19313 fn test_basic_transpile() {
19314 let dialect = Dialect::get(DialectType::Generic);
19315 let result = dialect.transpile_to("SELECT 1", DialectType::PostgreSQL).unwrap();
19316 assert_eq!(result.len(), 1);
19317 assert_eq!(result[0], "SELECT 1");
19318 }
19319
19320 #[test]
19321 fn test_function_transformation_mysql() {
19322 // NVL should be transformed to IFNULL in MySQL
19323 let dialect = Dialect::get(DialectType::Generic);
19324 let result = dialect.transpile_to("SELECT NVL(a, b)", DialectType::MySQL).unwrap();
19325 assert_eq!(result[0], "SELECT IFNULL(a, b)");
19326 }
19327
19328 #[test]
19329 fn test_get_path_duckdb() {
19330 // Test: step by step
19331 let snowflake = Dialect::get(DialectType::Snowflake);
19332
19333 // Step 1: Parse and check what Snowflake produces as intermediate
19334 let result_sf_sf = snowflake.transpile_to(
19335 "SELECT PARSE_JSON('{\"fruit\":\"banana\"}'):fruit",
19336 DialectType::Snowflake,
19337 ).unwrap();
19338 eprintln!("Snowflake->Snowflake colon: {}", result_sf_sf[0]);
19339
19340 // Step 2: DuckDB target
19341 let result_sf_dk = snowflake.transpile_to(
19342 "SELECT PARSE_JSON('{\"fruit\":\"banana\"}'):fruit",
19343 DialectType::DuckDB,
19344 ).unwrap();
19345 eprintln!("Snowflake->DuckDB colon: {}", result_sf_dk[0]);
19346
19347 // Step 3: GET_PATH directly
19348 let result_gp = snowflake.transpile_to(
19349 "SELECT GET_PATH(PARSE_JSON('{\"fruit\":\"banana\"}'), 'fruit')",
19350 DialectType::DuckDB,
19351 ).unwrap();
19352 eprintln!("Snowflake->DuckDB explicit GET_PATH: {}", result_gp[0]);
19353 }
19354
19355 #[test]
19356 fn test_function_transformation_postgres() {
19357 // IFNULL should be transformed to COALESCE in PostgreSQL
19358 let dialect = Dialect::get(DialectType::Generic);
19359 let result = dialect.transpile_to("SELECT IFNULL(a, b)", DialectType::PostgreSQL).unwrap();
19360 assert_eq!(result[0], "SELECT COALESCE(a, b)");
19361
19362 // NVL should also be transformed to COALESCE
19363 let result = dialect.transpile_to("SELECT NVL(a, b)", DialectType::PostgreSQL).unwrap();
19364 assert_eq!(result[0], "SELECT COALESCE(a, b)");
19365 }
19366
19367 #[test]
19368 fn test_hive_cast_to_trycast() {
19369 // Hive CAST should become TRY_CAST for targets that support it
19370 let hive = Dialect::get(DialectType::Hive);
19371 let result = hive.transpile_to("CAST(1 AS INT)", DialectType::DuckDB).unwrap();
19372 assert_eq!(result[0], "TRY_CAST(1 AS INT)");
19373
19374 let result = hive.transpile_to("CAST(1 AS INT)", DialectType::Presto).unwrap();
19375 assert_eq!(result[0], "TRY_CAST(1 AS INTEGER)");
19376 }
19377
19378 #[test]
19379 fn test_hive_array_identity() {
19380 // Hive ARRAY<DATE> should preserve angle bracket syntax
19381 let sql = "CREATE EXTERNAL TABLE `my_table` (`a7` ARRAY<DATE>) ROW FORMAT SERDE 'a' STORED AS INPUTFORMAT 'b' OUTPUTFORMAT 'c' LOCATION 'd' TBLPROPERTIES ('e'='f')";
19382 let hive = Dialect::get(DialectType::Hive);
19383
19384 // Test via transpile_to (this works)
19385 let result = hive.transpile_to(sql, DialectType::Hive).unwrap();
19386 eprintln!("Hive ARRAY via transpile_to: {}", result[0]);
19387 assert!(result[0].contains("ARRAY<DATE>"), "transpile_to: Expected ARRAY<DATE>, got: {}", result[0]);
19388
19389 // Test via parse -> transform -> generate (identity test path)
19390 let ast = hive.parse(sql).unwrap();
19391 let transformed = hive.transform(ast[0].clone()).unwrap();
19392 let output = hive.generate(&transformed).unwrap();
19393 eprintln!("Hive ARRAY via identity path: {}", output);
19394 assert!(output.contains("ARRAY<DATE>"), "identity path: Expected ARRAY<DATE>, got: {}", output);
19395 }
19396
19397 #[test]
19398 fn test_starrocks_delete_between_expansion() {
19399 // StarRocks doesn't support BETWEEN in DELETE statements
19400 let dialect = Dialect::get(DialectType::Generic);
19401
19402 // BETWEEN should be expanded to >= AND <= in DELETE
19403 let result = dialect.transpile_to("DELETE FROM t WHERE a BETWEEN b AND c", DialectType::StarRocks).unwrap();
19404 assert_eq!(result[0], "DELETE FROM t WHERE a >= b AND a <= c");
19405
19406 // NOT BETWEEN should be expanded to < OR > in DELETE
19407 let result = dialect.transpile_to("DELETE FROM t WHERE a NOT BETWEEN b AND c", DialectType::StarRocks).unwrap();
19408 assert_eq!(result[0], "DELETE FROM t WHERE a < b OR a > c");
19409
19410 // BETWEEN in SELECT should NOT be expanded (StarRocks supports it there)
19411 let result = dialect.transpile_to("SELECT * FROM t WHERE a BETWEEN b AND c", DialectType::StarRocks).unwrap();
19412 assert!(result[0].contains("BETWEEN"), "BETWEEN should be preserved in SELECT");
19413 }
19414
19415 #[test]
19416 fn test_snowflake_ltrim_rtrim_parse() {
19417 let sf = Dialect::get(DialectType::Snowflake);
19418 let sql = "SELECT LTRIM(RTRIM(col)) FROM t1";
19419 let result = sf.transpile_to(sql, DialectType::DuckDB);
19420 match &result {
19421 Ok(r) => eprintln!("LTRIM/RTRIM result: {}", r[0]),
19422 Err(e) => eprintln!("LTRIM/RTRIM error: {}", e),
19423 }
19424 assert!(result.is_ok(), "Expected successful parse of LTRIM(RTRIM(col)), got error: {:?}", result.err());
19425 }
19426
19427 #[test]
19428 fn test_duckdb_count_if_parse() {
19429 let duck = Dialect::get(DialectType::DuckDB);
19430 let sql = "COUNT_IF(x)";
19431 let result = duck.transpile_to(sql, DialectType::DuckDB);
19432 match &result {
19433 Ok(r) => eprintln!("COUNT_IF result: {}", r[0]),
19434 Err(e) => eprintln!("COUNT_IF error: {}", e),
19435 }
19436 assert!(result.is_ok(), "Expected successful parse of COUNT_IF(x), got error: {:?}", result.err());
19437 }
19438
19439 #[test]
19440 fn test_tsql_cast_tinyint_parse() {
19441 let tsql = Dialect::get(DialectType::TSQL);
19442 let sql = "CAST(X AS TINYINT)";
19443 let result = tsql.transpile_to(sql, DialectType::DuckDB);
19444 match &result {
19445 Ok(r) => eprintln!("TSQL CAST TINYINT result: {}", r[0]),
19446 Err(e) => eprintln!("TSQL CAST TINYINT error: {}", e),
19447 }
19448 assert!(result.is_ok(), "Expected successful transpile, got error: {:?}", result.err());
19449 }
19450
19451 #[test]
19452 fn test_pg_hash_bitwise_xor() {
19453 let dialect = Dialect::get(DialectType::PostgreSQL);
19454 let result = dialect.transpile_to("x # y", DialectType::PostgreSQL).unwrap();
19455 assert_eq!(result[0], "x # y");
19456 }
19457
19458 #[test]
19459 fn test_pg_array_to_duckdb() {
19460 let dialect = Dialect::get(DialectType::PostgreSQL);
19461 let result = dialect.transpile_to("SELECT ARRAY[1, 2, 3] @> ARRAY[1, 2]", DialectType::DuckDB).unwrap();
19462 assert_eq!(result[0], "SELECT [1, 2, 3] @> [1, 2]");
19463 }
19464
19465}