polyglot_sql/dialects/mod.rs
1//! SQL Dialect System
2//!
3//! This module implements the dialect abstraction layer that enables SQL transpilation
4//! between 30+ database engines. Each dialect encapsulates three concerns:
5//!
6//! - **Tokenization**: Dialect-specific lexing rules (e.g., BigQuery uses backtick quoting,
7//! MySQL uses backtick for identifiers, TSQL uses square brackets).
8//! - **Generation**: How AST nodes are rendered back to SQL text, including identifier quoting
9//! style, function name casing, and syntax variations.
10//! - **Transformation**: AST-level rewrites that convert dialect-specific constructs to/from
11//! a normalized form (e.g., Snowflake `SQUARE(x)` becomes `POWER(x, 2)`).
12//!
13//! The primary entry point is [`Dialect::get`], which returns a configured [`Dialect`] instance
14//! for a given [`DialectType`]. From there, callers can [`parse`](Dialect::parse),
15//! [`generate`](Dialect::generate), [`transform`](Dialect::transform), or
16//! [`transpile_to`](Dialect::transpile_to) another dialect in a single call.
17//!
18//! Each concrete dialect (e.g., `PostgresDialect`, `BigQueryDialect`) implements the
19//! [`DialectImpl`] trait, which provides configuration hooks and expression-level transforms.
20//! Dialect modules live in submodules of this module and are re-exported here.
21
22mod generic;
23mod postgres;
24mod mysql;
25mod bigquery;
26mod snowflake;
27mod duckdb;
28mod tsql;
29mod oracle;
30mod hive;
31mod spark;
32mod sqlite;
33mod presto;
34mod trino;
35mod redshift;
36mod clickhouse;
37mod databricks;
38mod athena;
39mod teradata;
40mod doris;
41mod starrocks;
42mod materialize;
43mod risingwave;
44mod singlestore;
45mod cockroachdb;
46mod tidb;
47mod druid;
48mod solr;
49mod tableau;
50mod dune;
51mod fabric;
52mod drill;
53mod dremio;
54mod exasol;
55
56pub use generic::GenericDialect;
57pub use postgres::PostgresDialect;
58pub use mysql::MySQLDialect;
59pub use bigquery::BigQueryDialect;
60pub use snowflake::SnowflakeDialect;
61pub use duckdb::DuckDBDialect;
62pub use tsql::TSQLDialect;
63pub use oracle::OracleDialect;
64pub use hive::HiveDialect;
65pub use spark::SparkDialect;
66pub use sqlite::SQLiteDialect;
67pub use presto::PrestoDialect;
68pub use trino::TrinoDialect;
69pub use redshift::RedshiftDialect;
70pub use clickhouse::ClickHouseDialect;
71pub use databricks::DatabricksDialect;
72pub use athena::AthenaDialect;
73pub use teradata::TeradataDialect;
74pub use doris::DorisDialect;
75pub use starrocks::StarRocksDialect;
76pub use materialize::MaterializeDialect;
77pub use risingwave::RisingWaveDialect;
78pub use singlestore::SingleStoreDialect;
79pub use cockroachdb::CockroachDBDialect;
80pub use tidb::TiDBDialect;
81pub use druid::DruidDialect;
82pub use solr::SolrDialect;
83pub use tableau::TableauDialect;
84pub use dune::DuneDialect;
85pub use fabric::FabricDialect;
86pub use drill::DrillDialect;
87pub use dremio::DremioDialect;
88pub use exasol::ExasolDialect;
89
90use crate::error::Result;
91use crate::expressions::{Expression, FunctionBody};
92use crate::generator::{Generator, GeneratorConfig};
93use crate::parser::Parser;
94use crate::tokens::{Tokenizer, TokenizerConfig};
95use serde::{Deserialize, Serialize};
96
97/// Enumeration of all supported SQL dialects.
98///
99/// Each variant corresponds to a specific SQL database engine or query language.
100/// The `Generic` variant represents standard SQL with no dialect-specific behavior,
101/// and is used as the default when no dialect is specified.
102///
103/// Dialect names are case-insensitive when parsed from strings via [`FromStr`].
104/// Some dialects accept aliases (e.g., "mssql" and "sqlserver" both resolve to [`TSQL`](DialectType::TSQL)).
105#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
106#[serde(rename_all = "lowercase")]
107pub enum DialectType {
108 /// Standard SQL with no dialect-specific behavior (default).
109 Generic,
110 /// PostgreSQL -- advanced open-source relational database.
111 PostgreSQL,
112 /// MySQL -- widely-used open-source relational database (also accepts "mysql").
113 MySQL,
114 /// Google BigQuery -- serverless cloud data warehouse with unique syntax (backtick quoting, STRUCT types, QUALIFY).
115 BigQuery,
116 /// Snowflake -- cloud data platform with QUALIFY clause, FLATTEN, and variant types.
117 Snowflake,
118 /// DuckDB -- in-process analytical database with modern SQL extensions.
119 DuckDB,
120 /// SQLite -- lightweight embedded relational database.
121 SQLite,
122 /// Apache Hive -- data warehouse on Hadoop with HiveQL syntax.
123 Hive,
124 /// Apache Spark SQL -- distributed query engine (also accepts "spark2").
125 Spark,
126 /// Trino -- distributed SQL query engine (formerly PrestoSQL).
127 Trino,
128 /// PrestoDB -- distributed SQL query engine for big data.
129 Presto,
130 /// Amazon Redshift -- cloud data warehouse based on PostgreSQL.
131 Redshift,
132 /// Transact-SQL (T-SQL) -- Microsoft SQL Server and Azure SQL (also accepts "mssql", "sqlserver").
133 TSQL,
134 /// Oracle Database -- commercial relational database with PL/SQL extensions.
135 Oracle,
136 /// ClickHouse -- column-oriented OLAP database for real-time analytics.
137 ClickHouse,
138 /// Databricks SQL -- Spark-based lakehouse platform with QUALIFY support.
139 Databricks,
140 /// Amazon Athena -- serverless query service (hybrid Trino/Hive engine).
141 Athena,
142 /// Teradata -- enterprise data warehouse with proprietary SQL extensions.
143 Teradata,
144 /// Apache Doris -- real-time analytical database (MySQL-compatible).
145 Doris,
146 /// StarRocks -- sub-second OLAP database (MySQL-compatible).
147 StarRocks,
148 /// Materialize -- streaming SQL database built on differential dataflow.
149 Materialize,
150 /// RisingWave -- distributed streaming database with PostgreSQL compatibility.
151 RisingWave,
152 /// SingleStore (formerly MemSQL) -- distributed SQL database (also accepts "memsql").
153 SingleStore,
154 /// CockroachDB -- distributed SQL database with PostgreSQL compatibility (also accepts "cockroach").
155 CockroachDB,
156 /// TiDB -- distributed HTAP database with MySQL compatibility.
157 TiDB,
158 /// Apache Druid -- real-time analytics database.
159 Druid,
160 /// Apache Solr -- search platform with SQL interface.
161 Solr,
162 /// Tableau -- data visualization platform with its own SQL dialect.
163 Tableau,
164 /// Dune Analytics -- blockchain analytics SQL engine.
165 Dune,
166 /// Microsoft Fabric -- unified analytics platform (T-SQL based).
167 Fabric,
168 /// Apache Drill -- schema-free SQL query engine for big data.
169 Drill,
170 /// Dremio -- data lakehouse platform with Arrow-based query engine.
171 Dremio,
172 /// Exasol -- in-memory analytic database.
173 Exasol,
174}
175
176impl Default for DialectType {
177 fn default() -> Self {
178 DialectType::Generic
179 }
180}
181
182impl std::fmt::Display for DialectType {
183 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
184 match self {
185 DialectType::Generic => write!(f, "generic"),
186 DialectType::PostgreSQL => write!(f, "postgresql"),
187 DialectType::MySQL => write!(f, "mysql"),
188 DialectType::BigQuery => write!(f, "bigquery"),
189 DialectType::Snowflake => write!(f, "snowflake"),
190 DialectType::DuckDB => write!(f, "duckdb"),
191 DialectType::SQLite => write!(f, "sqlite"),
192 DialectType::Hive => write!(f, "hive"),
193 DialectType::Spark => write!(f, "spark"),
194 DialectType::Trino => write!(f, "trino"),
195 DialectType::Presto => write!(f, "presto"),
196 DialectType::Redshift => write!(f, "redshift"),
197 DialectType::TSQL => write!(f, "tsql"),
198 DialectType::Oracle => write!(f, "oracle"),
199 DialectType::ClickHouse => write!(f, "clickhouse"),
200 DialectType::Databricks => write!(f, "databricks"),
201 DialectType::Athena => write!(f, "athena"),
202 DialectType::Teradata => write!(f, "teradata"),
203 DialectType::Doris => write!(f, "doris"),
204 DialectType::StarRocks => write!(f, "starrocks"),
205 DialectType::Materialize => write!(f, "materialize"),
206 DialectType::RisingWave => write!(f, "risingwave"),
207 DialectType::SingleStore => write!(f, "singlestore"),
208 DialectType::CockroachDB => write!(f, "cockroachdb"),
209 DialectType::TiDB => write!(f, "tidb"),
210 DialectType::Druid => write!(f, "druid"),
211 DialectType::Solr => write!(f, "solr"),
212 DialectType::Tableau => write!(f, "tableau"),
213 DialectType::Dune => write!(f, "dune"),
214 DialectType::Fabric => write!(f, "fabric"),
215 DialectType::Drill => write!(f, "drill"),
216 DialectType::Dremio => write!(f, "dremio"),
217 DialectType::Exasol => write!(f, "exasol"),
218 }
219 }
220}
221
222impl std::str::FromStr for DialectType {
223 type Err = crate::error::Error;
224
225 fn from_str(s: &str) -> Result<Self> {
226 match s.to_lowercase().as_str() {
227 "generic" | "" => Ok(DialectType::Generic),
228 "postgres" | "postgresql" => Ok(DialectType::PostgreSQL),
229 "mysql" => Ok(DialectType::MySQL),
230 "bigquery" => Ok(DialectType::BigQuery),
231 "snowflake" => Ok(DialectType::Snowflake),
232 "duckdb" => Ok(DialectType::DuckDB),
233 "sqlite" => Ok(DialectType::SQLite),
234 "hive" => Ok(DialectType::Hive),
235 "spark" | "spark2" => Ok(DialectType::Spark),
236 "trino" => Ok(DialectType::Trino),
237 "presto" => Ok(DialectType::Presto),
238 "redshift" => Ok(DialectType::Redshift),
239 "tsql" | "mssql" | "sqlserver" => Ok(DialectType::TSQL),
240 "oracle" => Ok(DialectType::Oracle),
241 "clickhouse" => Ok(DialectType::ClickHouse),
242 "databricks" => Ok(DialectType::Databricks),
243 "athena" => Ok(DialectType::Athena),
244 "teradata" => Ok(DialectType::Teradata),
245 "doris" => Ok(DialectType::Doris),
246 "starrocks" => Ok(DialectType::StarRocks),
247 "materialize" => Ok(DialectType::Materialize),
248 "risingwave" => Ok(DialectType::RisingWave),
249 "singlestore" | "memsql" => Ok(DialectType::SingleStore),
250 "cockroachdb" | "cockroach" => Ok(DialectType::CockroachDB),
251 "tidb" => Ok(DialectType::TiDB),
252 "druid" => Ok(DialectType::Druid),
253 "solr" => Ok(DialectType::Solr),
254 "tableau" => Ok(DialectType::Tableau),
255 "dune" => Ok(DialectType::Dune),
256 "fabric" => Ok(DialectType::Fabric),
257 "drill" => Ok(DialectType::Drill),
258 "dremio" => Ok(DialectType::Dremio),
259 "exasol" => Ok(DialectType::Exasol),
260 _ => Err(crate::error::Error::parse(format!("Unknown dialect: {}", s))),
261 }
262 }
263}
264
265/// Trait that each concrete SQL dialect must implement.
266///
267/// `DialectImpl` provides the configuration hooks and per-expression transform logic
268/// that distinguish one dialect from another. Implementors supply:
269///
270/// - A [`DialectType`] identifier.
271/// - Optional overrides for tokenizer and generator configuration (defaults to generic SQL).
272/// - An expression-level transform function ([`transform_expr`](DialectImpl::transform_expr))
273/// that rewrites individual AST nodes for this dialect (e.g., converting `NVL` to `COALESCE`).
274/// - An optional preprocessing step ([`preprocess`](DialectImpl::preprocess)) for whole-tree
275/// rewrites that must run before the recursive per-node transform (e.g., eliminating QUALIFY).
276///
277/// The default implementations are no-ops, so a minimal dialect only needs to provide
278/// [`dialect_type`](DialectImpl::dialect_type) and override the methods that differ from
279/// standard SQL.
280pub trait DialectImpl {
281 /// Returns the [`DialectType`] that identifies this dialect.
282 fn dialect_type(&self) -> DialectType;
283
284 /// Returns the tokenizer configuration for this dialect.
285 ///
286 /// Override to customize identifier quoting characters, string escape rules,
287 /// comment styles, and other lexing behavior.
288 fn tokenizer_config(&self) -> TokenizerConfig {
289 TokenizerConfig::default()
290 }
291
292 /// Returns the generator configuration for this dialect.
293 ///
294 /// Override to customize identifier quoting style, function name casing,
295 /// keyword casing, and other SQL generation behavior.
296 fn generator_config(&self) -> GeneratorConfig {
297 GeneratorConfig::default()
298 }
299
300 /// Returns a generator configuration tailored to a specific expression.
301 ///
302 /// Override this for hybrid dialects like Athena that route to different SQL engines
303 /// based on expression type (e.g., Hive-style generation for DDL, Trino-style for DML).
304 /// The default delegates to [`generator_config`](DialectImpl::generator_config).
305 fn generator_config_for_expr(&self, _expr: &Expression) -> GeneratorConfig {
306 self.generator_config()
307 }
308
309 /// Transforms a single expression node for this dialect, without recursing into children.
310 ///
311 /// This is the per-node rewrite hook invoked by [`transform_recursive`]. Return the
312 /// expression unchanged if no dialect-specific rewrite is needed. Transformations
313 /// typically include function renaming, operator substitution, and type mapping.
314 fn transform_expr(&self, expr: Expression) -> Result<Expression> {
315 Ok(expr)
316 }
317
318 /// Applies whole-tree preprocessing transforms before the recursive per-node pass.
319 ///
320 /// Override this to apply structural rewrites that must see the entire tree at once,
321 /// such as `eliminate_qualify`, `eliminate_distinct_on`, `ensure_bools`, or
322 /// `explode_projection_to_unnest`. The default is a no-op pass-through.
323 fn preprocess(&self, expr: Expression) -> Result<Expression> {
324 Ok(expr)
325 }
326}
327
328/// Recursively transforms a [`DataType`](crate::expressions::DataType), handling nested
329/// parametric types such as `ARRAY<INT>`, `STRUCT<a INT, b TEXT>`, and `MAP<STRING, INT>`.
330///
331/// The outer type is first passed through `transform_fn` as an `Expression::DataType`,
332/// and then nested element/field types are recursed into. This ensures that dialect-level
333/// type mappings (e.g., `INT` to `INTEGER`) propagate into complex nested types.
334fn transform_data_type_recursive<F>(dt: crate::expressions::DataType, transform_fn: &F) -> Result<crate::expressions::DataType>
335where
336 F: Fn(Expression) -> Result<Expression>,
337{
338 use crate::expressions::DataType;
339 // First, transform the outermost type through the expression system
340 let dt_expr = transform_fn(Expression::DataType(dt))?;
341 let dt = match dt_expr {
342 Expression::DataType(d) => d,
343 _ => return Ok(match dt_expr { _ => DataType::Custom { name: "UNKNOWN".to_string() } }),
344 };
345 // Then recurse into nested types
346 match dt {
347 DataType::Array { element_type, dimension } => {
348 let inner = transform_data_type_recursive(*element_type, transform_fn)?;
349 Ok(DataType::Array { element_type: Box::new(inner), dimension })
350 }
351 DataType::List { element_type } => {
352 let inner = transform_data_type_recursive(*element_type, transform_fn)?;
353 Ok(DataType::List { element_type: Box::new(inner) })
354 }
355 DataType::Struct { fields, nested } => {
356 let mut new_fields = Vec::new();
357 for mut field in fields {
358 field.data_type = transform_data_type_recursive(field.data_type, transform_fn)?;
359 new_fields.push(field);
360 }
361 Ok(DataType::Struct { fields: new_fields, nested })
362 }
363 DataType::Map { key_type, value_type } => {
364 let k = transform_data_type_recursive(*key_type, transform_fn)?;
365 let v = transform_data_type_recursive(*value_type, transform_fn)?;
366 Ok(DataType::Map { key_type: Box::new(k), value_type: Box::new(v) })
367 }
368 other => Ok(other),
369 }
370}
371
372/// Convert DuckDB C-style format strings to Presto C-style format strings.
373/// DuckDB and Presto both use C-style % directives but with different specifiers for some cases.
374fn duckdb_to_presto_format(fmt: &str) -> String {
375 // Order matters: handle longer patterns first to avoid partial replacements
376 let mut result = fmt.to_string();
377 // First pass: mark multi-char patterns with placeholders
378 result = result.replace("%-m", "\x01NOPADM\x01");
379 result = result.replace("%-d", "\x01NOPADD\x01");
380 result = result.replace("%-I", "\x01NOPADI\x01");
381 result = result.replace("%-H", "\x01NOPADH\x01");
382 result = result.replace("%H:%M:%S", "\x01HMS\x01");
383 result = result.replace("%Y-%m-%d", "\x01YMD\x01");
384 // Now convert individual specifiers
385 result = result.replace("%M", "%i");
386 result = result.replace("%S", "%s");
387 // Restore multi-char patterns with Presto equivalents
388 result = result.replace("\x01NOPADM\x01", "%c");
389 result = result.replace("\x01NOPADD\x01", "%e");
390 result = result.replace("\x01NOPADI\x01", "%l");
391 result = result.replace("\x01NOPADH\x01", "%k");
392 result = result.replace("\x01HMS\x01", "%T");
393 result = result.replace("\x01YMD\x01", "%Y-%m-%d");
394 result
395}
396
397/// Convert DuckDB C-style format strings to BigQuery format strings.
398/// BigQuery uses a mix of strftime-like directives.
399fn duckdb_to_bigquery_format(fmt: &str) -> String {
400 let mut result = fmt.to_string();
401 // Handle longer patterns first
402 result = result.replace("%-d", "%e");
403 result = result.replace("%Y-%m-%d %H:%M:%S", "%F %T");
404 result = result.replace("%Y-%m-%d", "%F");
405 result = result.replace("%H:%M:%S", "%T");
406 result
407}
408
409/// Applies a transform function bottom-up through an entire expression tree.
410///
411/// This is the core tree-rewriting engine used by the dialect system. It performs
412/// a post-order (children-first) traversal: for each node, all children are recursively
413/// transformed before the node itself is passed to `transform_fn`. This bottom-up
414/// strategy means that when `transform_fn` sees a node, its children have already
415/// been rewritten, which simplifies pattern matching on sub-expressions.
416///
417/// The function handles all expression variants including SELECT clauses (FROM, WHERE,
418/// GROUP BY, HAVING, ORDER BY, QUALIFY, WITH/CTEs, WINDOW), binary operators,
419/// function calls, CASE expressions, date/time functions, and more.
420///
421/// # Arguments
422///
423/// * `expr` - The root expression to transform (consumed).
424/// * `transform_fn` - A closure that receives each expression node (after its children
425/// have been transformed) and returns a possibly-rewritten expression.
426///
427/// # Errors
428///
429/// Returns an error if `transform_fn` returns an error for any node.
430pub fn transform_recursive<F>(expr: Expression, transform_fn: &F) -> Result<Expression>
431where
432 F: Fn(Expression) -> Result<Expression>,
433{
434 use crate::expressions::BinaryOp;
435
436 // Helper macro to transform binary ops with Box<BinaryOp>
437 macro_rules! transform_binary {
438 ($variant:ident, $op:expr) => {{
439 let left = transform_recursive($op.left, transform_fn)?;
440 let right = transform_recursive($op.right, transform_fn)?;
441 Expression::$variant(Box::new(BinaryOp {
442 left,
443 right,
444 left_comments: $op.left_comments,
445 operator_comments: $op.operator_comments,
446 trailing_comments: $op.trailing_comments,
447 }))
448 }};
449 }
450
451 // First recursively transform children, then apply the transform function
452 let expr = match expr {
453 Expression::Select(mut select) => {
454 select.expressions = select
455 .expressions
456 .into_iter()
457 .map(|e| transform_recursive(e, transform_fn))
458 .collect::<Result<Vec<_>>>()?;
459
460 // Transform FROM clause
461 if let Some(mut from) = select.from.take() {
462 from.expressions = from
463 .expressions
464 .into_iter()
465 .map(|e| transform_recursive(e, transform_fn))
466 .collect::<Result<Vec<_>>>()?;
467 select.from = Some(from);
468 }
469
470 // Transform JOINs - important for CROSS APPLY / LATERAL transformations
471 select.joins = select
472 .joins
473 .into_iter()
474 .map(|mut join| {
475 join.this = transform_recursive(join.this, transform_fn)?;
476 if let Some(on) = join.on.take() {
477 join.on = Some(transform_recursive(on, transform_fn)?);
478 }
479 // Wrap join in Expression::Join to allow transform_fn to transform it
480 match transform_fn(Expression::Join(Box::new(join)))? {
481 Expression::Join(j) => Ok(*j),
482 _ => Err(crate::error::Error::parse("Join transformation returned non-join expression")),
483 }
484 })
485 .collect::<Result<Vec<_>>>()?;
486
487 // Transform WHERE clause
488 if let Some(mut where_clause) = select.where_clause.take() {
489 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
490 select.where_clause = Some(where_clause);
491 }
492
493 // Transform GROUP BY
494 if let Some(mut group_by) = select.group_by.take() {
495 group_by.expressions = group_by
496 .expressions
497 .into_iter()
498 .map(|e| transform_recursive(e, transform_fn))
499 .collect::<Result<Vec<_>>>()?;
500 select.group_by = Some(group_by);
501 }
502
503 // Transform HAVING
504 if let Some(mut having) = select.having.take() {
505 having.this = transform_recursive(having.this, transform_fn)?;
506 select.having = Some(having);
507 }
508
509 // Transform WITH (CTEs)
510 if let Some(mut with) = select.with.take() {
511 with.ctes = with.ctes.into_iter().map(|mut cte| {
512 let original = cte.this.clone();
513 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
514 cte
515 }).collect();
516 select.with = Some(with);
517 }
518
519 // Transform ORDER BY
520 if let Some(mut order) = select.order_by.take() {
521 order.expressions = order.expressions.into_iter().map(|o| {
522 let mut o = o;
523 let original = o.this.clone();
524 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
525 // Also apply transform to the Ordered wrapper itself (for NULLS FIRST etc.)
526 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
527 Ok(Expression::Ordered(transformed)) => *transformed,
528 Ok(_) | Err(_) => o,
529 }
530 }).collect();
531 select.order_by = Some(order);
532 }
533
534 // Transform WINDOW clause order_by
535 if let Some(ref mut windows) = select.windows {
536 for nw in windows.iter_mut() {
537 nw.spec.order_by = std::mem::take(&mut nw.spec.order_by).into_iter().map(|o| {
538 let mut o = o;
539 let original = o.this.clone();
540 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
541 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
542 Ok(Expression::Ordered(transformed)) => *transformed,
543 Ok(_) | Err(_) => o,
544 }
545 }).collect();
546 }
547 }
548
549 // Transform QUALIFY
550 if let Some(mut qual) = select.qualify.take() {
551 qual.this = transform_recursive(qual.this, transform_fn)?;
552 select.qualify = Some(qual);
553 }
554
555 Expression::Select(select)
556 }
557 Expression::Function(mut f) => {
558 f.args = f
559 .args
560 .into_iter()
561 .map(|e| transform_recursive(e, transform_fn))
562 .collect::<Result<Vec<_>>>()?;
563 Expression::Function(f)
564 }
565 Expression::AggregateFunction(mut f) => {
566 f.args = f
567 .args
568 .into_iter()
569 .map(|e| transform_recursive(e, transform_fn))
570 .collect::<Result<Vec<_>>>()?;
571 if let Some(filter) = f.filter {
572 f.filter = Some(transform_recursive(filter, transform_fn)?);
573 }
574 Expression::AggregateFunction(f)
575 }
576 Expression::WindowFunction(mut wf) => {
577 wf.this = transform_recursive(wf.this, transform_fn)?;
578 wf.over.partition_by = wf
579 .over
580 .partition_by
581 .into_iter()
582 .map(|e| transform_recursive(e, transform_fn))
583 .collect::<Result<Vec<_>>>()?;
584 // Transform order_by items through Expression::Ordered wrapper
585 wf.over.order_by = wf.over.order_by.into_iter().map(|o| {
586 let mut o = o;
587 o.this = transform_recursive(o.this, transform_fn)?;
588 match transform_fn(Expression::Ordered(Box::new(o)))? {
589 Expression::Ordered(transformed) => Ok(*transformed),
590 _ => Err(crate::error::Error::parse("Ordered transformation returned non-Ordered expression")),
591 }
592 }).collect::<Result<Vec<_>>>()?;
593 Expression::WindowFunction(wf)
594 }
595 Expression::Alias(mut a) => {
596 a.this = transform_recursive(a.this, transform_fn)?;
597 Expression::Alias(a)
598 }
599 Expression::Cast(mut c) => {
600 c.this = transform_recursive(c.this, transform_fn)?;
601 // Also transform the target data type (recursively for nested types like ARRAY<INT>, STRUCT<a INT>)
602 c.to = transform_data_type_recursive(c.to, transform_fn)?;
603 Expression::Cast(c)
604 }
605 Expression::And(op) => transform_binary!(And, *op),
606 Expression::Or(op) => transform_binary!(Or, *op),
607 Expression::Add(op) => transform_binary!(Add, *op),
608 Expression::Sub(op) => transform_binary!(Sub, *op),
609 Expression::Mul(op) => transform_binary!(Mul, *op),
610 Expression::Div(op) => transform_binary!(Div, *op),
611 Expression::Eq(op) => transform_binary!(Eq, *op),
612 Expression::Lt(op) => transform_binary!(Lt, *op),
613 Expression::Gt(op) => transform_binary!(Gt, *op),
614 Expression::Paren(mut p) => {
615 p.this = transform_recursive(p.this, transform_fn)?;
616 Expression::Paren(p)
617 }
618 Expression::Coalesce(mut f) => {
619 f.expressions = f
620 .expressions
621 .into_iter()
622 .map(|e| transform_recursive(e, transform_fn))
623 .collect::<Result<Vec<_>>>()?;
624 Expression::Coalesce(f)
625 }
626 Expression::IfNull(mut f) => {
627 f.this = transform_recursive(f.this, transform_fn)?;
628 f.expression = transform_recursive(f.expression, transform_fn)?;
629 Expression::IfNull(f)
630 }
631 Expression::Nvl(mut f) => {
632 f.this = transform_recursive(f.this, transform_fn)?;
633 f.expression = transform_recursive(f.expression, transform_fn)?;
634 Expression::Nvl(f)
635 }
636 Expression::In(mut i) => {
637 i.this = transform_recursive(i.this, transform_fn)?;
638 i.expressions = i
639 .expressions
640 .into_iter()
641 .map(|e| transform_recursive(e, transform_fn))
642 .collect::<Result<Vec<_>>>()?;
643 if let Some(query) = i.query {
644 i.query = Some(transform_recursive(query, transform_fn)?);
645 }
646 Expression::In(i)
647 }
648 Expression::Not(mut n) => {
649 n.this = transform_recursive(n.this, transform_fn)?;
650 Expression::Not(n)
651 }
652 Expression::ArraySlice(mut s) => {
653 s.this = transform_recursive(s.this, transform_fn)?;
654 if let Some(start) = s.start {
655 s.start = Some(transform_recursive(start, transform_fn)?);
656 }
657 if let Some(end) = s.end {
658 s.end = Some(transform_recursive(end, transform_fn)?);
659 }
660 Expression::ArraySlice(s)
661 }
662 Expression::Subscript(mut s) => {
663 s.this = transform_recursive(s.this, transform_fn)?;
664 s.index = transform_recursive(s.index, transform_fn)?;
665 Expression::Subscript(s)
666 }
667 Expression::Array(mut a) => {
668 a.expressions = a.expressions.into_iter()
669 .map(|e| transform_recursive(e, transform_fn))
670 .collect::<Result<Vec<_>>>()?;
671 Expression::Array(a)
672 }
673 Expression::Struct(mut s) => {
674 let mut new_fields = Vec::new();
675 for (name, expr) in s.fields {
676 let transformed = transform_recursive(expr, transform_fn)?;
677 new_fields.push((name, transformed));
678 }
679 s.fields = new_fields;
680 Expression::Struct(s)
681 }
682 Expression::NamedArgument(mut na) => {
683 na.value = transform_recursive(na.value, transform_fn)?;
684 Expression::NamedArgument(na)
685 }
686 Expression::MapFunc(mut m) => {
687 m.keys = m.keys.into_iter()
688 .map(|e| transform_recursive(e, transform_fn))
689 .collect::<Result<Vec<_>>>()?;
690 m.values = m.values.into_iter()
691 .map(|e| transform_recursive(e, transform_fn))
692 .collect::<Result<Vec<_>>>()?;
693 Expression::MapFunc(m)
694 }
695 Expression::ArrayFunc(mut a) => {
696 a.expressions = a.expressions.into_iter()
697 .map(|e| transform_recursive(e, transform_fn))
698 .collect::<Result<Vec<_>>>()?;
699 Expression::ArrayFunc(a)
700 }
701 Expression::Lambda(mut l) => {
702 l.body = transform_recursive(l.body, transform_fn)?;
703 Expression::Lambda(l)
704 }
705 Expression::JsonExtract(mut f) => {
706 f.this = transform_recursive(f.this, transform_fn)?;
707 f.path = transform_recursive(f.path, transform_fn)?;
708 Expression::JsonExtract(f)
709 }
710 Expression::JsonExtractScalar(mut f) => {
711 f.this = transform_recursive(f.this, transform_fn)?;
712 f.path = transform_recursive(f.path, transform_fn)?;
713 Expression::JsonExtractScalar(f)
714 }
715
716 // ===== UnaryFunc-based expressions =====
717 // These all have a single `this: Expression` child
718 Expression::Length(mut f) => {
719 f.this = transform_recursive(f.this, transform_fn)?;
720 Expression::Length(f)
721 }
722 Expression::Upper(mut f) => {
723 f.this = transform_recursive(f.this, transform_fn)?;
724 Expression::Upper(f)
725 }
726 Expression::Lower(mut f) => {
727 f.this = transform_recursive(f.this, transform_fn)?;
728 Expression::Lower(f)
729 }
730 Expression::LTrim(mut f) => {
731 f.this = transform_recursive(f.this, transform_fn)?;
732 Expression::LTrim(f)
733 }
734 Expression::RTrim(mut f) => {
735 f.this = transform_recursive(f.this, transform_fn)?;
736 Expression::RTrim(f)
737 }
738 Expression::Reverse(mut f) => {
739 f.this = transform_recursive(f.this, transform_fn)?;
740 Expression::Reverse(f)
741 }
742 Expression::Abs(mut f) => {
743 f.this = transform_recursive(f.this, transform_fn)?;
744 Expression::Abs(f)
745 }
746 Expression::Ceil(mut f) => {
747 f.this = transform_recursive(f.this, transform_fn)?;
748 Expression::Ceil(f)
749 }
750 Expression::Floor(mut f) => {
751 f.this = transform_recursive(f.this, transform_fn)?;
752 Expression::Floor(f)
753 }
754 Expression::Sign(mut f) => {
755 f.this = transform_recursive(f.this, transform_fn)?;
756 Expression::Sign(f)
757 }
758 Expression::Sqrt(mut f) => {
759 f.this = transform_recursive(f.this, transform_fn)?;
760 Expression::Sqrt(f)
761 }
762 Expression::Cbrt(mut f) => {
763 f.this = transform_recursive(f.this, transform_fn)?;
764 Expression::Cbrt(f)
765 }
766 Expression::Ln(mut f) => {
767 f.this = transform_recursive(f.this, transform_fn)?;
768 Expression::Ln(f)
769 }
770 Expression::Exp(mut f) => {
771 f.this = transform_recursive(f.this, transform_fn)?;
772 Expression::Exp(f)
773 }
774 Expression::Date(mut f) => {
775 f.this = transform_recursive(f.this, transform_fn)?;
776 Expression::Date(f)
777 }
778 Expression::Stddev(mut f) => {
779 f.this = transform_recursive(f.this, transform_fn)?;
780 Expression::Stddev(f)
781 }
782 Expression::Variance(mut f) => {
783 f.this = transform_recursive(f.this, transform_fn)?;
784 Expression::Variance(f)
785 }
786
787 // ===== BinaryFunc-based expressions =====
788 Expression::ModFunc(mut f) => {
789 f.this = transform_recursive(f.this, transform_fn)?;
790 f.expression = transform_recursive(f.expression, transform_fn)?;
791 Expression::ModFunc(f)
792 }
793 Expression::Power(mut f) => {
794 f.this = transform_recursive(f.this, transform_fn)?;
795 f.expression = transform_recursive(f.expression, transform_fn)?;
796 Expression::Power(f)
797 }
798 Expression::MapFromArrays(mut f) => {
799 f.this = transform_recursive(f.this, transform_fn)?;
800 f.expression = transform_recursive(f.expression, transform_fn)?;
801 Expression::MapFromArrays(f)
802 }
803 Expression::ElementAt(mut f) => {
804 f.this = transform_recursive(f.this, transform_fn)?;
805 f.expression = transform_recursive(f.expression, transform_fn)?;
806 Expression::ElementAt(f)
807 }
808 Expression::MapContainsKey(mut f) => {
809 f.this = transform_recursive(f.this, transform_fn)?;
810 f.expression = transform_recursive(f.expression, transform_fn)?;
811 Expression::MapContainsKey(f)
812 }
813 Expression::Left(mut f) => {
814 f.this = transform_recursive(f.this, transform_fn)?;
815 f.length = transform_recursive(f.length, transform_fn)?;
816 Expression::Left(f)
817 }
818 Expression::Right(mut f) => {
819 f.this = transform_recursive(f.this, transform_fn)?;
820 f.length = transform_recursive(f.length, transform_fn)?;
821 Expression::Right(f)
822 }
823 Expression::Repeat(mut f) => {
824 f.this = transform_recursive(f.this, transform_fn)?;
825 f.times = transform_recursive(f.times, transform_fn)?;
826 Expression::Repeat(f)
827 }
828
829 // ===== Complex function expressions =====
830 Expression::Substring(mut f) => {
831 f.this = transform_recursive(f.this, transform_fn)?;
832 f.start = transform_recursive(f.start, transform_fn)?;
833 if let Some(len) = f.length {
834 f.length = Some(transform_recursive(len, transform_fn)?);
835 }
836 Expression::Substring(f)
837 }
838 Expression::Replace(mut f) => {
839 f.this = transform_recursive(f.this, transform_fn)?;
840 f.old = transform_recursive(f.old, transform_fn)?;
841 f.new = transform_recursive(f.new, transform_fn)?;
842 Expression::Replace(f)
843 }
844 Expression::ConcatWs(mut f) => {
845 f.separator = transform_recursive(f.separator, transform_fn)?;
846 f.expressions = f.expressions.into_iter()
847 .map(|e| transform_recursive(e, transform_fn))
848 .collect::<Result<Vec<_>>>()?;
849 Expression::ConcatWs(f)
850 }
851 Expression::Trim(mut f) => {
852 f.this = transform_recursive(f.this, transform_fn)?;
853 if let Some(chars) = f.characters {
854 f.characters = Some(transform_recursive(chars, transform_fn)?);
855 }
856 Expression::Trim(f)
857 }
858 Expression::Split(mut f) => {
859 f.this = transform_recursive(f.this, transform_fn)?;
860 f.delimiter = transform_recursive(f.delimiter, transform_fn)?;
861 Expression::Split(f)
862 }
863 Expression::Lpad(mut f) => {
864 f.this = transform_recursive(f.this, transform_fn)?;
865 f.length = transform_recursive(f.length, transform_fn)?;
866 if let Some(fill) = f.fill {
867 f.fill = Some(transform_recursive(fill, transform_fn)?);
868 }
869 Expression::Lpad(f)
870 }
871 Expression::Rpad(mut f) => {
872 f.this = transform_recursive(f.this, transform_fn)?;
873 f.length = transform_recursive(f.length, transform_fn)?;
874 if let Some(fill) = f.fill {
875 f.fill = Some(transform_recursive(fill, transform_fn)?);
876 }
877 Expression::Rpad(f)
878 }
879
880 // ===== Conditional expressions =====
881 Expression::Case(mut c) => {
882 if let Some(operand) = c.operand {
883 c.operand = Some(transform_recursive(operand, transform_fn)?);
884 }
885 c.whens = c.whens.into_iter().map(|(cond, then)| {
886 let new_cond = transform_recursive(cond.clone(), transform_fn).unwrap_or(cond);
887 let new_then = transform_recursive(then.clone(), transform_fn).unwrap_or(then);
888 (new_cond, new_then)
889 }).collect();
890 if let Some(else_expr) = c.else_ {
891 c.else_ = Some(transform_recursive(else_expr, transform_fn)?);
892 }
893 Expression::Case(c)
894 }
895 Expression::IfFunc(mut f) => {
896 f.condition = transform_recursive(f.condition, transform_fn)?;
897 f.true_value = transform_recursive(f.true_value, transform_fn)?;
898 if let Some(false_val) = f.false_value {
899 f.false_value = Some(transform_recursive(false_val, transform_fn)?);
900 }
901 Expression::IfFunc(f)
902 }
903
904 // ===== Date/Time expressions =====
905 Expression::DateAdd(mut f) => {
906 f.this = transform_recursive(f.this, transform_fn)?;
907 f.interval = transform_recursive(f.interval, transform_fn)?;
908 Expression::DateAdd(f)
909 }
910 Expression::DateSub(mut f) => {
911 f.this = transform_recursive(f.this, transform_fn)?;
912 f.interval = transform_recursive(f.interval, transform_fn)?;
913 Expression::DateSub(f)
914 }
915 Expression::DateDiff(mut f) => {
916 f.this = transform_recursive(f.this, transform_fn)?;
917 f.expression = transform_recursive(f.expression, transform_fn)?;
918 Expression::DateDiff(f)
919 }
920 Expression::DateTrunc(mut f) => {
921 f.this = transform_recursive(f.this, transform_fn)?;
922 Expression::DateTrunc(f)
923 }
924 Expression::Extract(mut f) => {
925 f.this = transform_recursive(f.this, transform_fn)?;
926 Expression::Extract(f)
927 }
928
929 // ===== JSON expressions =====
930 Expression::JsonObject(mut f) => {
931 f.pairs = f.pairs.into_iter().map(|(k, v)| {
932 let new_k = transform_recursive(k, transform_fn)?;
933 let new_v = transform_recursive(v, transform_fn)?;
934 Ok((new_k, new_v))
935 }).collect::<Result<Vec<_>>>()?;
936 Expression::JsonObject(f)
937 }
938
939 // ===== Subquery expressions =====
940 Expression::Subquery(mut s) => {
941 s.this = transform_recursive(s.this, transform_fn)?;
942 Expression::Subquery(s)
943 }
944 Expression::Exists(mut e) => {
945 e.this = transform_recursive(e.this, transform_fn)?;
946 Expression::Exists(e)
947 }
948
949 // ===== Set operations =====
950 Expression::Union(mut u) => {
951 u.left = transform_recursive(u.left, transform_fn)?;
952 u.right = transform_recursive(u.right, transform_fn)?;
953 Expression::Union(u)
954 }
955 Expression::Intersect(mut i) => {
956 i.left = transform_recursive(i.left, transform_fn)?;
957 i.right = transform_recursive(i.right, transform_fn)?;
958 Expression::Intersect(i)
959 }
960 Expression::Except(mut e) => {
961 e.left = transform_recursive(e.left, transform_fn)?;
962 e.right = transform_recursive(e.right, transform_fn)?;
963 Expression::Except(e)
964 }
965
966 // ===== DML expressions =====
967 Expression::Insert(mut ins) => {
968 // Transform VALUES clause expressions
969 let mut new_values = Vec::new();
970 for row in ins.values {
971 let mut new_row = Vec::new();
972 for e in row {
973 new_row.push(transform_recursive(e, transform_fn)?);
974 }
975 new_values.push(new_row);
976 }
977 ins.values = new_values;
978
979 // Transform query (for INSERT ... SELECT)
980 if let Some(query) = ins.query {
981 ins.query = Some(transform_recursive(query, transform_fn)?);
982 }
983
984 // Transform RETURNING clause
985 let mut new_returning = Vec::new();
986 for e in ins.returning {
987 new_returning.push(transform_recursive(e, transform_fn)?);
988 }
989 ins.returning = new_returning;
990
991 // Transform ON CONFLICT clause
992 if let Some(on_conflict) = ins.on_conflict {
993 ins.on_conflict = Some(Box::new(transform_recursive(*on_conflict, transform_fn)?));
994 }
995
996 Expression::Insert(ins)
997 }
998 Expression::Update(mut upd) => {
999 upd.set = upd.set.into_iter().map(|(id, val)| {
1000 let new_val = transform_recursive(val.clone(), transform_fn).unwrap_or(val);
1001 (id, new_val)
1002 }).collect();
1003 if let Some(mut where_clause) = upd.where_clause.take() {
1004 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
1005 upd.where_clause = Some(where_clause);
1006 }
1007 Expression::Update(upd)
1008 }
1009 Expression::Delete(mut del) => {
1010 if let Some(mut where_clause) = del.where_clause.take() {
1011 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
1012 del.where_clause = Some(where_clause);
1013 }
1014 Expression::Delete(del)
1015 }
1016
1017 // ===== CTE expressions =====
1018 Expression::With(mut w) => {
1019 w.ctes = w.ctes.into_iter().map(|mut cte| {
1020 let original = cte.this.clone();
1021 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
1022 cte
1023 }).collect();
1024 Expression::With(w)
1025 }
1026 Expression::Cte(mut c) => {
1027 c.this = transform_recursive(c.this, transform_fn)?;
1028 Expression::Cte(c)
1029 }
1030
1031 // ===== Order expressions =====
1032 Expression::Ordered(mut o) => {
1033 o.this = transform_recursive(o.this, transform_fn)?;
1034 Expression::Ordered(o)
1035 }
1036
1037 // ===== Negation =====
1038 Expression::Neg(mut n) => {
1039 n.this = transform_recursive(n.this, transform_fn)?;
1040 Expression::Neg(n)
1041 }
1042
1043 // ===== Between =====
1044 Expression::Between(mut b) => {
1045 b.this = transform_recursive(b.this, transform_fn)?;
1046 b.low = transform_recursive(b.low, transform_fn)?;
1047 b.high = transform_recursive(b.high, transform_fn)?;
1048 Expression::Between(b)
1049 }
1050
1051 // ===== Like expressions =====
1052 Expression::Like(mut l) => {
1053 l.left = transform_recursive(l.left, transform_fn)?;
1054 l.right = transform_recursive(l.right, transform_fn)?;
1055 Expression::Like(l)
1056 }
1057 Expression::ILike(mut l) => {
1058 l.left = transform_recursive(l.left, transform_fn)?;
1059 l.right = transform_recursive(l.right, transform_fn)?;
1060 Expression::ILike(l)
1061 }
1062
1063 // ===== Additional binary ops not covered by macro =====
1064 Expression::Neq(op) => transform_binary!(Neq, *op),
1065 Expression::Lte(op) => transform_binary!(Lte, *op),
1066 Expression::Gte(op) => transform_binary!(Gte, *op),
1067 Expression::Mod(op) => transform_binary!(Mod, *op),
1068 Expression::Concat(op) => transform_binary!(Concat, *op),
1069 Expression::BitwiseAnd(op) => transform_binary!(BitwiseAnd, *op),
1070 Expression::BitwiseOr(op) => transform_binary!(BitwiseOr, *op),
1071 Expression::BitwiseXor(op) => transform_binary!(BitwiseXor, *op),
1072 Expression::Is(op) => transform_binary!(Is, *op),
1073
1074 // ===== TryCast / SafeCast =====
1075 Expression::TryCast(mut c) => {
1076 c.this = transform_recursive(c.this, transform_fn)?;
1077 c.to = transform_data_type_recursive(c.to, transform_fn)?;
1078 Expression::TryCast(c)
1079 }
1080 Expression::SafeCast(mut c) => {
1081 c.this = transform_recursive(c.this, transform_fn)?;
1082 c.to = transform_data_type_recursive(c.to, transform_fn)?;
1083 Expression::SafeCast(c)
1084 }
1085
1086 // ===== Misc =====
1087 Expression::Unnest(mut f) => {
1088 f.this = transform_recursive(f.this, transform_fn)?;
1089 f.expressions = f.expressions.into_iter()
1090 .map(|e| transform_recursive(e, transform_fn))
1091 .collect::<Result<Vec<_>>>()?;
1092 Expression::Unnest(f)
1093 }
1094 Expression::Explode(mut f) => {
1095 f.this = transform_recursive(f.this, transform_fn)?;
1096 Expression::Explode(f)
1097 }
1098 Expression::GroupConcat(mut f) => {
1099 f.this = transform_recursive(f.this, transform_fn)?;
1100 Expression::GroupConcat(f)
1101 }
1102 Expression::StringAgg(mut f) => {
1103 f.this = transform_recursive(f.this, transform_fn)?;
1104 Expression::StringAgg(f)
1105 }
1106 Expression::ListAgg(mut f) => {
1107 f.this = transform_recursive(f.this, transform_fn)?;
1108 Expression::ListAgg(f)
1109 }
1110 Expression::ArrayAgg(mut f) => {
1111 f.this = transform_recursive(f.this, transform_fn)?;
1112 Expression::ArrayAgg(f)
1113 }
1114 Expression::ParseJson(mut f) => {
1115 f.this = transform_recursive(f.this, transform_fn)?;
1116 Expression::ParseJson(f)
1117 }
1118 Expression::ToJson(mut f) => {
1119 f.this = transform_recursive(f.this, transform_fn)?;
1120 Expression::ToJson(f)
1121 }
1122 Expression::JSONExtract(mut e) => {
1123 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1124 e.expression = Box::new(transform_recursive(*e.expression, transform_fn)?);
1125 Expression::JSONExtract(e)
1126 }
1127 Expression::JSONExtractScalar(mut e) => {
1128 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1129 e.expression = Box::new(transform_recursive(*e.expression, transform_fn)?);
1130 Expression::JSONExtractScalar(e)
1131 }
1132
1133 // StrToTime: recurse into this
1134 Expression::StrToTime(mut e) => {
1135 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1136 Expression::StrToTime(e)
1137 }
1138
1139 // UnixToTime: recurse into this
1140 Expression::UnixToTime(mut e) => {
1141 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1142 Expression::UnixToTime(e)
1143 }
1144
1145 // CreateTable: recurse into column defaults, on_update expressions, and data types
1146 Expression::CreateTable(mut ct) => {
1147 for col in &mut ct.columns {
1148 if let Some(default_expr) = col.default.take() {
1149 col.default = Some(transform_recursive(default_expr, transform_fn)?);
1150 }
1151 if let Some(on_update_expr) = col.on_update.take() {
1152 col.on_update = Some(transform_recursive(on_update_expr, transform_fn)?);
1153 }
1154 // Note: Column data type transformations (INT -> INT64 for BigQuery, etc.)
1155 // are NOT applied here because per-dialect transforms are designed for CAST/expression
1156 // contexts and may not produce correct results for DDL column definitions.
1157 // The DDL type mappings would need dedicated handling per source/target pair.
1158 }
1159 if let Some(as_select) = ct.as_select.take() {
1160 ct.as_select = Some(transform_recursive(as_select, transform_fn)?);
1161 }
1162 Expression::CreateTable(ct)
1163 }
1164
1165 // CreateProcedure: recurse into body expressions
1166 Expression::CreateProcedure(mut cp) => {
1167 if let Some(body) = cp.body.take() {
1168 cp.body = Some(match body {
1169 FunctionBody::Expression(expr) => {
1170 FunctionBody::Expression(transform_recursive(expr, transform_fn)?)
1171 }
1172 FunctionBody::Return(expr) => {
1173 FunctionBody::Return(transform_recursive(expr, transform_fn)?)
1174 }
1175 FunctionBody::Statements(stmts) => {
1176 let transformed_stmts = stmts
1177 .into_iter()
1178 .map(|s| transform_recursive(s, transform_fn))
1179 .collect::<Result<Vec<_>>>()?;
1180 FunctionBody::Statements(transformed_stmts)
1181 }
1182 other => other,
1183 });
1184 }
1185 Expression::CreateProcedure(cp)
1186 }
1187
1188 // CreateFunction: recurse into body expressions
1189 Expression::CreateFunction(mut cf) => {
1190 if let Some(body) = cf.body.take() {
1191 cf.body = Some(match body {
1192 FunctionBody::Expression(expr) => {
1193 FunctionBody::Expression(transform_recursive(expr, transform_fn)?)
1194 }
1195 FunctionBody::Return(expr) => {
1196 FunctionBody::Return(transform_recursive(expr, transform_fn)?)
1197 }
1198 FunctionBody::Statements(stmts) => {
1199 let transformed_stmts = stmts
1200 .into_iter()
1201 .map(|s| transform_recursive(s, transform_fn))
1202 .collect::<Result<Vec<_>>>()?;
1203 FunctionBody::Statements(transformed_stmts)
1204 }
1205 other => other,
1206 });
1207 }
1208 Expression::CreateFunction(cf)
1209 }
1210
1211 // MemberOf: recurse into left and right operands
1212 Expression::MemberOf(op) => transform_binary!(MemberOf, *op),
1213 // ArrayContainsAll (@>): recurse into left and right operands
1214 Expression::ArrayContainsAll(op) => transform_binary!(ArrayContainsAll, *op),
1215 // ArrayContainedBy (<@): recurse into left and right operands
1216 Expression::ArrayContainedBy(op) => transform_binary!(ArrayContainedBy, *op),
1217 // ArrayOverlaps (&&): recurse into left and right operands
1218 Expression::ArrayOverlaps(op) => transform_binary!(ArrayOverlaps, *op),
1219 // TsMatch (@@): recurse into left and right operands
1220 Expression::TsMatch(op) => transform_binary!(TsMatch, *op),
1221 // Adjacent (-|-): recurse into left and right operands
1222 Expression::Adjacent(op) => transform_binary!(Adjacent, *op),
1223
1224 // Table: recurse into when (HistoricalData) and changes fields
1225 Expression::Table(mut t) => {
1226 if let Some(when) = t.when.take() {
1227 let transformed = transform_recursive(Expression::HistoricalData(when), transform_fn)?;
1228 if let Expression::HistoricalData(hd) = transformed {
1229 t.when = Some(hd);
1230 }
1231 }
1232 if let Some(changes) = t.changes.take() {
1233 let transformed = transform_recursive(Expression::Changes(changes), transform_fn)?;
1234 if let Expression::Changes(c) = transformed {
1235 t.changes = Some(c);
1236 }
1237 }
1238 Expression::Table(t)
1239 }
1240
1241 // HistoricalData (Snowflake time travel): recurse into expression
1242 Expression::HistoricalData(mut hd) => {
1243 *hd.expression = transform_recursive(*hd.expression, transform_fn)?;
1244 Expression::HistoricalData(hd)
1245 }
1246
1247 // Changes (Snowflake CHANGES clause): recurse into at_before and end
1248 Expression::Changes(mut c) => {
1249 if let Some(at_before) = c.at_before.take() {
1250 c.at_before = Some(Box::new(transform_recursive(*at_before, transform_fn)?));
1251 }
1252 if let Some(end) = c.end.take() {
1253 c.end = Some(Box::new(transform_recursive(*end, transform_fn)?));
1254 }
1255 Expression::Changes(c)
1256 }
1257
1258 // TableArgument: TABLE(expr) or MODEL(expr)
1259 Expression::TableArgument(mut ta) => {
1260 ta.this = transform_recursive(ta.this, transform_fn)?;
1261 Expression::TableArgument(ta)
1262 }
1263
1264 // JoinedTable: (tbl1 JOIN tbl2 ON ...) - recurse into left and join tables
1265 Expression::JoinedTable(mut jt) => {
1266 jt.left = transform_recursive(jt.left, transform_fn)?;
1267 for join in &mut jt.joins {
1268 join.this = transform_recursive(std::mem::replace(&mut join.this, Expression::Null(crate::expressions::Null)), transform_fn)?;
1269 if let Some(on) = join.on.take() {
1270 join.on = Some(transform_recursive(on, transform_fn)?);
1271 }
1272 }
1273 Expression::JoinedTable(jt)
1274 }
1275
1276 // Lateral: LATERAL func() - recurse into the function expression
1277 Expression::Lateral(mut lat) => {
1278 *lat.this = transform_recursive(*lat.this, transform_fn)?;
1279 Expression::Lateral(lat)
1280 }
1281
1282 // WithinGroup: recurse into order_by items (for NULLS FIRST/LAST etc.)
1283 // but NOT into wg.this - the inner function is handled by StringAggConvert/GroupConcatConvert
1284 // as a unit together with the WithinGroup wrapper
1285 Expression::WithinGroup(mut wg) => {
1286 wg.order_by = wg.order_by.into_iter().map(|mut o| {
1287 let original = o.this.clone();
1288 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
1289 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
1290 Ok(Expression::Ordered(transformed)) => *transformed,
1291 Ok(_) | Err(_) => o,
1292 }
1293 }).collect();
1294 Expression::WithinGroup(wg)
1295 }
1296
1297 // Filter: recurse into both the aggregate and the filter condition
1298 Expression::Filter(mut f) => {
1299 f.this = Box::new(transform_recursive(*f.this, transform_fn)?);
1300 f.expression = Box::new(transform_recursive(*f.expression, transform_fn)?);
1301 Expression::Filter(f)
1302 }
1303
1304 // BitwiseOrAgg/BitwiseAndAgg/BitwiseXorAgg: recurse into the aggregate argument
1305 Expression::BitwiseOrAgg(mut f) => {
1306 f.this = transform_recursive(f.this, transform_fn)?;
1307 Expression::BitwiseOrAgg(f)
1308 }
1309 Expression::BitwiseAndAgg(mut f) => {
1310 f.this = transform_recursive(f.this, transform_fn)?;
1311 Expression::BitwiseAndAgg(f)
1312 }
1313 Expression::BitwiseXorAgg(mut f) => {
1314 f.this = transform_recursive(f.this, transform_fn)?;
1315 Expression::BitwiseXorAgg(f)
1316 }
1317
1318 // Pass through leaf nodes unchanged
1319 other => other,
1320 };
1321
1322 // Then apply the transform function
1323 transform_fn(expr)
1324}
1325
1326/// Main entry point for dialect-specific SQL operations.
1327///
1328/// A `Dialect` bundles together a tokenizer, generator configuration, and expression
1329/// transformer for a specific SQL database engine. It is the high-level API through
1330/// which callers parse, generate, transform, and transpile SQL.
1331///
1332/// # Usage
1333///
1334/// ```rust,ignore
1335/// use polyglot_sql::dialects::{Dialect, DialectType};
1336///
1337/// // Parse PostgreSQL SQL into an AST
1338/// let pg = Dialect::get(DialectType::PostgreSQL);
1339/// let exprs = pg.parse("SELECT id, name FROM users WHERE active")?;
1340///
1341/// // Transpile from PostgreSQL to BigQuery
1342/// let results = pg.transpile_to("SELECT NOW()", DialectType::BigQuery)?;
1343/// assert_eq!(results[0], "SELECT CURRENT_TIMESTAMP()");
1344/// ```
1345///
1346/// Obtain an instance via [`Dialect::get`]. The struct is `Send + Sync` safe so it
1347/// can be shared across threads.
1348pub struct Dialect {
1349 dialect_type: DialectType,
1350 tokenizer: Tokenizer,
1351 generator_config: GeneratorConfig,
1352 transformer: Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>,
1353 /// Optional function to get expression-specific generator config (for hybrid dialects like Athena).
1354 generator_config_for_expr: Option<Box<dyn Fn(&Expression) -> GeneratorConfig + Send + Sync>>,
1355}
1356
1357impl Dialect {
1358 /// Creates a fully configured [`Dialect`] instance for the given [`DialectType`].
1359 ///
1360 /// This is the primary constructor. It initializes the tokenizer, generator config,
1361 /// and expression transformer based on the dialect's [`DialectImpl`] implementation.
1362 /// For hybrid dialects like Athena, it also sets up expression-specific generator
1363 /// config routing.
1364 pub fn get(dialect_type: DialectType) -> Self {
1365 let (tokenizer_config, generator_config, transformer): (_, _, Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>) = match dialect_type {
1366 DialectType::PostgreSQL => {
1367 let d = PostgresDialect;
1368 (d.tokenizer_config(), d.generator_config(), Box::new(move |e| PostgresDialect.transform_expr(e)))
1369 }
1370 DialectType::MySQL => {
1371 let d = MySQLDialect;
1372 (d.tokenizer_config(), d.generator_config(), Box::new(move |e| MySQLDialect.transform_expr(e)))
1373 }
1374 DialectType::BigQuery => {
1375 let d = BigQueryDialect;
1376 (d.tokenizer_config(), d.generator_config(), Box::new(move |e| BigQueryDialect.transform_expr(e)))
1377 }
1378 DialectType::Snowflake => {
1379 let d = SnowflakeDialect;
1380 (d.tokenizer_config(), d.generator_config(), Box::new(move |e| SnowflakeDialect.transform_expr(e)))
1381 }
1382 DialectType::DuckDB => {
1383 let d = DuckDBDialect;
1384 (d.tokenizer_config(), d.generator_config(), Box::new(move |e| DuckDBDialect.transform_expr(e)))
1385 }
1386 DialectType::TSQL => {
1387 let d = TSQLDialect;
1388 (d.tokenizer_config(), d.generator_config(), Box::new(move |e| TSQLDialect.transform_expr(e)))
1389 }
1390 DialectType::Oracle => {
1391 let d = OracleDialect;
1392 (d.tokenizer_config(), d.generator_config(), Box::new(move |e| OracleDialect.transform_expr(e)))
1393 }
1394 DialectType::Hive => {
1395 let d = HiveDialect;
1396 (d.tokenizer_config(), d.generator_config(), Box::new(move |e| HiveDialect.transform_expr(e)))
1397 }
1398 DialectType::Spark => {
1399 let d = SparkDialect;
1400 (d.tokenizer_config(), d.generator_config(), Box::new(move |e| SparkDialect.transform_expr(e)))
1401 }
1402 DialectType::SQLite => {
1403 let d = SQLiteDialect;
1404 (d.tokenizer_config(), d.generator_config(), Box::new(move |e| SQLiteDialect.transform_expr(e)))
1405 }
1406 DialectType::Presto => {
1407 let d = PrestoDialect;
1408 (d.tokenizer_config(), d.generator_config(), Box::new(move |e| PrestoDialect.transform_expr(e)))
1409 }
1410 DialectType::Trino => {
1411 let d = TrinoDialect;
1412 (d.tokenizer_config(), d.generator_config(), Box::new(move |e| TrinoDialect.transform_expr(e)))
1413 }
1414 DialectType::Redshift => {
1415 let d = RedshiftDialect;
1416 (d.tokenizer_config(), d.generator_config(), Box::new(move |e| RedshiftDialect.transform_expr(e)))
1417 }
1418 DialectType::ClickHouse => {
1419 let d = ClickHouseDialect;
1420 (d.tokenizer_config(), d.generator_config(), Box::new(move |e| ClickHouseDialect.transform_expr(e)))
1421 }
1422 DialectType::Databricks => {
1423 let d = DatabricksDialect;
1424 (d.tokenizer_config(), d.generator_config(), Box::new(move |e| DatabricksDialect.transform_expr(e)))
1425 }
1426 DialectType::Athena => {
1427 let d = AthenaDialect;
1428 (d.tokenizer_config(), d.generator_config(), Box::new(move |e| AthenaDialect.transform_expr(e)))
1429 }
1430 DialectType::Teradata => {
1431 let d = TeradataDialect;
1432 (d.tokenizer_config(), d.generator_config(), Box::new(move |e| TeradataDialect.transform_expr(e)))
1433 }
1434 DialectType::Doris => {
1435 let d = DorisDialect;
1436 (d.tokenizer_config(), d.generator_config(), Box::new(move |e| DorisDialect.transform_expr(e)))
1437 }
1438 DialectType::StarRocks => {
1439 let d = StarRocksDialect;
1440 (d.tokenizer_config(), d.generator_config(), Box::new(move |e| StarRocksDialect.transform_expr(e)))
1441 }
1442 DialectType::Materialize => {
1443 let d = MaterializeDialect;
1444 (d.tokenizer_config(), d.generator_config(), Box::new(move |e| MaterializeDialect.transform_expr(e)))
1445 }
1446 DialectType::RisingWave => {
1447 let d = RisingWaveDialect;
1448 (d.tokenizer_config(), d.generator_config(), Box::new(move |e| RisingWaveDialect.transform_expr(e)))
1449 }
1450 DialectType::SingleStore => {
1451 let d = SingleStoreDialect;
1452 (d.tokenizer_config(), d.generator_config(), Box::new(move |e| SingleStoreDialect.transform_expr(e)))
1453 }
1454 DialectType::CockroachDB => {
1455 let d = CockroachDBDialect;
1456 (d.tokenizer_config(), d.generator_config(), Box::new(move |e| CockroachDBDialect.transform_expr(e)))
1457 }
1458 DialectType::TiDB => {
1459 let d = TiDBDialect;
1460 (d.tokenizer_config(), d.generator_config(), Box::new(move |e| TiDBDialect.transform_expr(e)))
1461 }
1462 DialectType::Druid => {
1463 let d = DruidDialect;
1464 (d.tokenizer_config(), d.generator_config(), Box::new(move |e| DruidDialect.transform_expr(e)))
1465 }
1466 DialectType::Solr => {
1467 let d = SolrDialect;
1468 (d.tokenizer_config(), d.generator_config(), Box::new(move |e| SolrDialect.transform_expr(e)))
1469 }
1470 DialectType::Tableau => {
1471 let d = TableauDialect;
1472 (d.tokenizer_config(), d.generator_config(), Box::new(move |e| TableauDialect.transform_expr(e)))
1473 }
1474 DialectType::Dune => {
1475 let d = DuneDialect;
1476 (d.tokenizer_config(), d.generator_config(), Box::new(move |e| DuneDialect.transform_expr(e)))
1477 }
1478 DialectType::Fabric => {
1479 let d = FabricDialect;
1480 (d.tokenizer_config(), d.generator_config(), Box::new(move |e| FabricDialect.transform_expr(e)))
1481 }
1482 DialectType::Drill => {
1483 let d = DrillDialect;
1484 (d.tokenizer_config(), d.generator_config(), Box::new(move |e| DrillDialect.transform_expr(e)))
1485 }
1486 DialectType::Dremio => {
1487 let d = DremioDialect;
1488 (d.tokenizer_config(), d.generator_config(), Box::new(move |e| DremioDialect.transform_expr(e)))
1489 }
1490 DialectType::Exasol => {
1491 let d = ExasolDialect;
1492 (d.tokenizer_config(), d.generator_config(), Box::new(move |e| ExasolDialect.transform_expr(e)))
1493 }
1494 _ => {
1495 let d = GenericDialect;
1496 (d.tokenizer_config(), d.generator_config(), Box::new(move |e| GenericDialect.transform_expr(e)))
1497 }
1498 };
1499
1500 // Set up expression-specific generator config for hybrid dialects
1501 let generator_config_for_expr: Option<Box<dyn Fn(&Expression) -> GeneratorConfig + Send + Sync>> = match dialect_type {
1502 DialectType::Athena => Some(Box::new(|expr| AthenaDialect.generator_config_for_expr(expr))),
1503 _ => None,
1504 };
1505
1506 Self {
1507 dialect_type,
1508 tokenizer: Tokenizer::new(tokenizer_config),
1509 generator_config,
1510 transformer,
1511 generator_config_for_expr,
1512 }
1513 }
1514
1515 /// Get the dialect type
1516 pub fn dialect_type(&self) -> DialectType {
1517 self.dialect_type
1518 }
1519
1520 /// Get the generator configuration
1521 pub fn generator_config(&self) -> &GeneratorConfig {
1522 &self.generator_config
1523 }
1524
1525 /// Parses a SQL string into a list of [`Expression`] AST nodes.
1526 ///
1527 /// The input may contain multiple semicolon-separated statements; each one
1528 /// produces a separate element in the returned vector. Tokenization uses
1529 /// this dialect's configured tokenizer, and parsing uses the dialect-aware parser.
1530 pub fn parse(&self, sql: &str) -> Result<Vec<Expression>> {
1531 let tokens = self.tokenizer.tokenize(sql)?;
1532 let config = crate::parser::ParserConfig {
1533 dialect: Some(self.dialect_type),
1534 ..Default::default()
1535 };
1536 let mut parser = Parser::with_source(tokens, config, sql.to_string());
1537 parser.parse()
1538 }
1539
1540 /// Get the generator config for a specific expression (supports hybrid dialects)
1541 fn get_config_for_expr(&self, expr: &Expression) -> GeneratorConfig {
1542 if let Some(ref config_fn) = self.generator_config_for_expr {
1543 config_fn(expr)
1544 } else {
1545 self.generator_config.clone()
1546 }
1547 }
1548
1549 /// Generates a SQL string from an [`Expression`] AST node.
1550 ///
1551 /// The output uses this dialect's generator configuration for identifier quoting,
1552 /// keyword casing, function name normalization, and syntax style. The result is
1553 /// a single-line (non-pretty) SQL string.
1554 pub fn generate(&self, expr: &Expression) -> Result<String> {
1555 let config = self.get_config_for_expr(expr);
1556 let mut generator = Generator::with_config(config);
1557 generator.generate(expr)
1558 }
1559
1560 /// Generate SQL from an expression with pretty printing enabled
1561 pub fn generate_pretty(&self, expr: &Expression) -> Result<String> {
1562 let mut config = self.get_config_for_expr(expr);
1563 config.pretty = true;
1564 let mut generator = Generator::with_config(config);
1565 generator.generate(expr)
1566 }
1567
1568 /// Generate SQL from an expression with forced identifier quoting (identify=True)
1569 pub fn generate_with_identify(&self, expr: &Expression) -> Result<String> {
1570 let mut config = self.get_config_for_expr(expr);
1571 config.always_quote_identifiers = true;
1572 let mut generator = Generator::with_config(config);
1573 generator.generate(expr)
1574 }
1575
1576 /// Generate SQL from an expression with pretty printing and forced identifier quoting
1577 pub fn generate_pretty_with_identify(&self, expr: &Expression) -> Result<String> {
1578 let mut config = self.generator_config.clone();
1579 config.pretty = true;
1580 config.always_quote_identifiers = true;
1581 let mut generator = Generator::with_config(config);
1582 generator.generate(expr)
1583 }
1584
1585 /// Transforms an expression tree to conform to this dialect's syntax and semantics.
1586 ///
1587 /// The transformation proceeds in two phases:
1588 /// 1. **Preprocessing** -- whole-tree structural rewrites such as eliminating QUALIFY,
1589 /// ensuring boolean predicates, or converting DISTINCT ON to a window-function pattern.
1590 /// 2. **Recursive per-node transform** -- a bottom-up pass via [`transform_recursive`]
1591 /// that applies this dialect's [`DialectImpl::transform_expr`] to every node.
1592 ///
1593 /// This method is used both during transpilation (to rewrite an AST for a target dialect)
1594 /// and for identity transforms (normalizing SQL within the same dialect).
1595 pub fn transform(&self, expr: Expression) -> Result<Expression> {
1596 // Apply preprocessing transforms based on dialect
1597 let preprocessed = self.preprocess(expr)?;
1598 // Then apply recursive transformation
1599 transform_recursive(preprocessed, &self.transformer)
1600 }
1601
1602 /// Apply dialect-specific preprocessing transforms
1603 fn preprocess(&self, expr: Expression) -> Result<Expression> {
1604 use crate::transforms;
1605
1606 match self.dialect_type {
1607 // MySQL doesn't support QUALIFY, DISTINCT ON, FULL OUTER JOIN
1608 DialectType::MySQL => {
1609 let expr = transforms::eliminate_qualify(expr)?;
1610 let expr = transforms::eliminate_full_outer_join(expr)?;
1611 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
1612 Ok(expr)
1613 }
1614 // PostgreSQL doesn't support QUALIFY
1615 DialectType::PostgreSQL => {
1616 let expr = transforms::eliminate_qualify(expr)?;
1617 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
1618 Ok(expr)
1619 }
1620 // BigQuery doesn't support DISTINCT ON or CTE column aliases
1621 DialectType::BigQuery => {
1622 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
1623 let expr = transforms::pushdown_cte_column_names(expr)?;
1624 let expr = transforms::explode_projection_to_unnest(expr, DialectType::BigQuery)?;
1625 Ok(expr)
1626 }
1627 // Snowflake
1628 DialectType::Snowflake => {
1629 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
1630 let expr = transforms::eliminate_window_clause(expr)?;
1631 let expr = transforms::snowflake_flatten_projection_to_unnest(expr)?;
1632 Ok(expr)
1633 }
1634 // TSQL doesn't support QUALIFY
1635 // TSQL requires boolean expressions in WHERE/HAVING (no implicit truthiness)
1636 DialectType::TSQL => {
1637 let expr = transforms::eliminate_qualify(expr)?;
1638 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
1639 let expr = transforms::ensure_bools(expr)?;
1640 Ok(expr)
1641 }
1642 // Spark doesn't support QUALIFY (but Databricks does)
1643 DialectType::Spark => {
1644 let expr = transforms::eliminate_qualify(expr)?;
1645 let expr = transforms::add_auto_table_alias(expr)?;
1646 let expr = transforms::simplify_nested_paren_values(expr)?;
1647 Ok(expr)
1648 }
1649 // Databricks supports QUALIFY natively
1650 DialectType::Databricks => {
1651 let expr = transforms::add_auto_table_alias(expr)?;
1652 let expr = transforms::simplify_nested_paren_values(expr)?;
1653 Ok(expr)
1654 }
1655 // Hive doesn't support QUALIFY
1656 DialectType::Hive => {
1657 let expr = transforms::eliminate_qualify(expr)?;
1658 Ok(expr)
1659 }
1660 // SQLite doesn't support QUALIFY
1661 DialectType::SQLite => {
1662 let expr = transforms::eliminate_qualify(expr)?;
1663 Ok(expr)
1664 }
1665 // Trino doesn't support QUALIFY
1666 DialectType::Trino => {
1667 let expr = transforms::eliminate_qualify(expr)?;
1668 let expr = transforms::explode_projection_to_unnest(expr, DialectType::Trino)?;
1669 Ok(expr)
1670 }
1671 // Presto doesn't support QUALIFY or WINDOW clause
1672 DialectType::Presto => {
1673 let expr = transforms::eliminate_qualify(expr)?;
1674 let expr = transforms::eliminate_window_clause(expr)?;
1675 let expr = transforms::explode_projection_to_unnest(expr, DialectType::Presto)?;
1676 Ok(expr)
1677 }
1678 // DuckDB supports QUALIFY - no elimination needed
1679 // Expand POSEXPLODE to GENERATE_SUBSCRIPTS + UNNEST
1680 DialectType::DuckDB => {
1681 let expr = transforms::expand_posexplode_duckdb(expr)?;
1682 Ok(expr)
1683 }
1684 // Redshift doesn't support QUALIFY or WINDOW clause
1685 DialectType::Redshift => {
1686 let expr = transforms::eliminate_qualify(expr)?;
1687 let expr = transforms::eliminate_window_clause(expr)?;
1688 Ok(expr)
1689 }
1690 // StarRocks doesn't support BETWEEN in DELETE statements or QUALIFY
1691 DialectType::StarRocks => {
1692 let expr = transforms::eliminate_qualify(expr)?;
1693 let expr = transforms::expand_between_in_delete(expr)?;
1694 Ok(expr)
1695 }
1696 // Oracle - no special preprocessing needed
1697 DialectType::Oracle => {
1698 Ok(expr)
1699 }
1700 // Drill - no special preprocessing needed
1701 DialectType::Drill => {
1702 Ok(expr)
1703 }
1704 // Teradata - no special preprocessing needed
1705 DialectType::Teradata => {
1706 Ok(expr)
1707 }
1708 // Other dialects - no preprocessing
1709 _ => Ok(expr),
1710 }
1711 }
1712
1713 /// Transpile SQL from this dialect to another
1714 pub fn transpile_to(&self, sql: &str, target: DialectType) -> Result<Vec<String>> {
1715 self.transpile_to_inner(sql, target, false)
1716 }
1717
1718 /// Transpile SQL from this dialect to another with pretty printing enabled
1719 pub fn transpile_to_pretty(&self, sql: &str, target: DialectType) -> Result<Vec<String>> {
1720 self.transpile_to_inner(sql, target, true)
1721 }
1722
1723 fn transpile_to_inner(&self, sql: &str, target: DialectType, pretty: bool) -> Result<Vec<String>> {
1724 let expressions = self.parse(sql)?;
1725 let target_dialect = Dialect::get(target);
1726
1727 expressions
1728 .into_iter()
1729 .map(|expr| {
1730 // When source and target differ, first normalize the source dialect's
1731 // AST constructs to standard SQL, so that the target dialect can handle them.
1732 // This handles cases like Snowflake's SQUARE -> POWER, DIV0 -> CASE, etc.
1733 let normalized = if self.dialect_type != target && self.dialect_type != DialectType::Generic {
1734 self.transform(expr)?
1735 } else {
1736 expr
1737 };
1738
1739 // For TSQL source targeting non-TSQL: unwrap ISNULL(JSON_QUERY(...), JSON_VALUE(...))
1740 // to just JSON_QUERY(...) so cross_dialect_normalize can convert it cleanly.
1741 // The TSQL read transform wraps JsonQuery in ISNULL for identity, but for
1742 // cross-dialect transpilation we need the unwrapped JSON_QUERY.
1743 let normalized = if matches!(self.dialect_type, DialectType::TSQL | DialectType::Fabric)
1744 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
1745 {
1746 transform_recursive(normalized, &|e| {
1747 if let Expression::Function(ref f) = e {
1748 if f.name.eq_ignore_ascii_case("ISNULL") && f.args.len() == 2 {
1749 // Check if first arg is JSON_QUERY and second is JSON_VALUE
1750 if let (Expression::Function(ref jq), Expression::Function(ref jv)) = (&f.args[0], &f.args[1]) {
1751 if jq.name.eq_ignore_ascii_case("JSON_QUERY") && jv.name.eq_ignore_ascii_case("JSON_VALUE") {
1752 // Unwrap: return just JSON_QUERY(...)
1753 return Ok(f.args[0].clone());
1754 }
1755 }
1756 }
1757 }
1758 Ok(e)
1759 })?
1760 } else {
1761 normalized
1762 };
1763
1764 // Propagate struct field names in arrays (for BigQuery source to non-BigQuery target)
1765 // BigQuery->BigQuery should NOT propagate names (BigQuery handles implicit inheritance)
1766 let normalized = if matches!(self.dialect_type, DialectType::BigQuery) && !matches!(target, DialectType::BigQuery) {
1767 crate::transforms::propagate_struct_field_names(normalized)?
1768 } else {
1769 normalized
1770 };
1771
1772 // Apply cross-dialect semantic normalizations
1773 let normalized = Self::cross_dialect_normalize(normalized, self.dialect_type, target)?;
1774
1775 // For DuckDB target from BigQuery source: wrap UNNEST of struct arrays in
1776 // (SELECT UNNEST(..., max_depth => 2)) subquery
1777 // Must run BEFORE unnest_alias_to_column_alias since it changes alias structure
1778 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
1779 && matches!(target, DialectType::DuckDB)
1780 {
1781 crate::transforms::wrap_duckdb_unnest_struct(normalized)?
1782 } else {
1783 normalized
1784 };
1785
1786 // Convert BigQuery UNNEST aliases to column-alias format for DuckDB/Presto/Spark
1787 // UNNEST(arr) AS x -> UNNEST(arr) AS _t0(x)
1788 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
1789 && matches!(target, DialectType::DuckDB | DialectType::Presto | DialectType::Trino | DialectType::Athena
1790 | DialectType::Spark | DialectType::Databricks)
1791 {
1792 crate::transforms::unnest_alias_to_column_alias(normalized)?
1793 } else if matches!(self.dialect_type, DialectType::BigQuery)
1794 && matches!(target, DialectType::BigQuery | DialectType::Redshift)
1795 {
1796 // For BigQuery/Redshift targets: move UNNEST FROM items to CROSS JOINs
1797 // but don't convert alias format (no _t0 wrapper)
1798 let result = crate::transforms::unnest_from_to_cross_join(normalized)?;
1799 // For Redshift: strip UNNEST when arg is a column reference path
1800 if matches!(target, DialectType::Redshift) {
1801 crate::transforms::strip_unnest_column_refs(result)?
1802 } else {
1803 result
1804 }
1805 } else {
1806 normalized
1807 };
1808
1809 // For Presto/Trino targets from PostgreSQL/Redshift source:
1810 // Wrap UNNEST aliases from GENERATE_SERIES conversion: AS s -> AS _u(s)
1811 let normalized = if matches!(self.dialect_type, DialectType::PostgreSQL | DialectType::Redshift)
1812 && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena)
1813 {
1814 crate::transforms::wrap_unnest_join_aliases(normalized)?
1815 } else {
1816 normalized
1817 };
1818
1819 // Eliminate DISTINCT ON with target-dialect awareness
1820 // This must happen after source transform (which may produce DISTINCT ON)
1821 // and before target transform, with knowledge of the target dialect's NULL ordering behavior
1822 let normalized = crate::transforms::eliminate_distinct_on_for_dialect(normalized, Some(target))?;
1823
1824 // BigQuery GENERATE_DATE_ARRAY in UNNEST -> Snowflake ARRAY_GENERATE_RANGE + DATEADD
1825 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
1826 && matches!(target, DialectType::Snowflake)
1827 {
1828 Self::transform_generate_date_array_snowflake(normalized)?
1829 } else {
1830 normalized
1831 };
1832
1833 // CROSS JOIN UNNEST -> LATERAL VIEW EXPLODE/INLINE for Spark/Hive/Databricks
1834 let normalized = if matches!(target, DialectType::Spark | DialectType::Databricks | DialectType::Hive) {
1835 crate::transforms::unnest_to_explode_select(normalized)?
1836 } else {
1837 normalized
1838 };
1839
1840 let transformed = target_dialect.transform(normalized)?;
1841 let mut sql = if pretty {
1842 target_dialect.generate_pretty(&transformed)?
1843 } else {
1844 target_dialect.generate(&transformed)?
1845 };
1846
1847 // Align a known Snowflake pretty-print edge case with Python sqlglot output.
1848 if pretty && target == DialectType::Snowflake {
1849 sql = Self::normalize_snowflake_pretty(sql);
1850 }
1851
1852 Ok(sql)
1853 })
1854 .collect()
1855 }
1856
1857 /// Transform BigQuery GENERATE_DATE_ARRAY in UNNEST for Snowflake target.
1858 /// Converts:
1859 /// SELECT ..., alias, ... FROM t CROSS JOIN UNNEST(GENERATE_DATE_ARRAY(start, end, INTERVAL '1' unit)) AS alias
1860 /// To:
1861 /// SELECT ..., DATEADD(unit, CAST(alias AS INT), CAST(start AS DATE)) AS alias, ...
1862 /// FROM t, LATERAL FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (DATEDIFF(unit, start, end) + 1 - 1) + 1)) AS _t0(seq, key, path, index, alias, this)
1863 fn transform_generate_date_array_snowflake(expr: Expression) -> Result<Expression> {
1864 use crate::expressions::*;
1865 transform_recursive(expr, &|e| {
1866 let Expression::Select(mut sel) = e else { return Ok(e); };
1867
1868 // Find joins with UNNEST containing GenerateSeries (from GENERATE_DATE_ARRAY conversion)
1869 let mut gda_info: Option<(String, Expression, Expression, String)> = None; // (alias_name, start_expr, end_expr, unit)
1870 let mut gda_join_idx: Option<usize> = None;
1871
1872 for (idx, join) in sel.joins.iter().enumerate() {
1873 // The join.this may be:
1874 // 1. Unnest(UnnestFunc { alias: Some("mnth"), ... })
1875 // 2. Alias(Alias { this: Unnest(UnnestFunc { alias: None, ... }), alias: "mnth", ... })
1876 let (unnest_ref, alias_name) = match &join.this {
1877 Expression::Unnest(ref unnest) => {
1878 let alias = unnest.alias.as_ref().map(|id| id.name.clone());
1879 (Some(unnest.as_ref()), alias)
1880 }
1881 Expression::Alias(ref a) => {
1882 if let Expression::Unnest(ref unnest) = a.this {
1883 (Some(unnest.as_ref()), Some(a.alias.name.clone()))
1884 } else {
1885 (None, None)
1886 }
1887 }
1888 _ => (None, None),
1889 };
1890
1891 if let (Some(unnest), Some(alias)) = (unnest_ref, alias_name) {
1892 // Check the main expression (this) of the UNNEST for GENERATE_DATE_ARRAY function
1893 if let Expression::Function(ref f) = unnest.this {
1894 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY") && f.args.len() >= 2 {
1895 let start_expr = f.args[0].clone();
1896 let end_expr = f.args[1].clone();
1897 let step = f.args.get(2).cloned();
1898
1899 // Extract unit from step interval
1900 let unit = if let Some(Expression::Interval(ref iv)) = step {
1901 if let Some(IntervalUnitSpec::Simple { ref unit, .. }) = iv.unit {
1902 Some(format!("{:?}", unit).to_uppercase())
1903 } else if let Some(ref this) = iv.this {
1904 // The interval may be stored as a string like "1 MONTH"
1905 if let Expression::Literal(Literal::String(ref s)) = this {
1906 let parts: Vec<&str> = s.split_whitespace().collect();
1907 if parts.len() == 2 {
1908 Some(parts[1].to_uppercase())
1909 } else if parts.len() == 1 {
1910 // Single word like "MONTH" or just "1"
1911 let upper = parts[0].to_uppercase();
1912 if matches!(upper.as_str(), "YEAR" | "QUARTER" | "MONTH" | "WEEK" | "DAY" | "HOUR" | "MINUTE" | "SECOND") {
1913 Some(upper)
1914 } else {
1915 None
1916 }
1917 } else {
1918 None
1919 }
1920 } else {
1921 None
1922 }
1923 } else {
1924 None
1925 }
1926 } else {
1927 None
1928 };
1929
1930 if let Some(unit_str) = unit {
1931 gda_info = Some((alias, start_expr, end_expr, unit_str));
1932 gda_join_idx = Some(idx);
1933 }
1934 }
1935 }
1936 }
1937 if gda_info.is_some() { break; }
1938 }
1939
1940 let Some((alias_name, start_expr, end_expr, unit_str)) = gda_info else {
1941 return Ok(Expression::Select(sel));
1942 };
1943 let join_idx = gda_join_idx.unwrap();
1944
1945 // Build ARRAY_GENERATE_RANGE(0, (DATEDIFF(unit, start, end) + 1 - 1) + 1)
1946 let datediff = Expression::Function(Box::new(Function::new(
1947 "DATEDIFF".to_string(),
1948 vec![
1949 Expression::Column(Column { name: Identifier::new(&unit_str), table: None, join_mark: false, trailing_comments: vec![] }),
1950 start_expr.clone(),
1951 end_expr.clone(),
1952 ],
1953 )));
1954 // (DATEDIFF(...) + 1 - 1) + 1
1955 let plus_one = Expression::Add(Box::new(BinaryOp {
1956 left: datediff,
1957 right: Expression::Literal(Literal::Number("1".to_string())),
1958 left_comments: vec![], operator_comments: vec![], trailing_comments: vec![],
1959 }));
1960 let minus_one = Expression::Sub(Box::new(BinaryOp {
1961 left: plus_one,
1962 right: Expression::Literal(Literal::Number("1".to_string())),
1963 left_comments: vec![], operator_comments: vec![], trailing_comments: vec![],
1964 }));
1965 let paren_inner = Expression::Paren(Box::new(Paren { this: minus_one, trailing_comments: vec![] }));
1966 let outer_plus_one = Expression::Add(Box::new(BinaryOp {
1967 left: paren_inner,
1968 right: Expression::Literal(Literal::Number("1".to_string())),
1969 left_comments: vec![], operator_comments: vec![], trailing_comments: vec![],
1970 }));
1971
1972 let array_gen_range = Expression::Function(Box::new(Function::new(
1973 "ARRAY_GENERATE_RANGE".to_string(),
1974 vec![Expression::Literal(Literal::Number("0".to_string())), outer_plus_one],
1975 )));
1976
1977 // Build FLATTEN(INPUT => ARRAY_GENERATE_RANGE(...))
1978 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
1979 name: Identifier::new("INPUT"),
1980 value: array_gen_range,
1981 separator: crate::expressions::NamedArgSeparator::DArrow,
1982 }));
1983 let flatten = Expression::Function(Box::new(Function::new(
1984 "FLATTEN".to_string(),
1985 vec![flatten_input],
1986 )));
1987
1988 // Build LATERAL FLATTEN(...) AS _t0(seq, key, path, index, alias, this)
1989 let alias_table = Alias {
1990 this: flatten,
1991 alias: Identifier::new("_t0"),
1992 column_aliases: vec![
1993 Identifier::new("seq"),
1994 Identifier::new("key"),
1995 Identifier::new("path"),
1996 Identifier::new("index"),
1997 Identifier::new(&alias_name),
1998 Identifier::new("this"),
1999 ],
2000 pre_alias_comments: vec![],
2001 trailing_comments: vec![],
2002 };
2003 let lateral_expr = Expression::Lateral(Box::new(Lateral {
2004 this: Box::new(Expression::Alias(Box::new(alias_table))),
2005 view: None,
2006 outer: None,
2007 alias: None,
2008 alias_quoted: false,
2009 cross_apply: None,
2010 ordinality: None,
2011 column_aliases: vec![],
2012 }));
2013
2014 // Remove the original join and add to FROM expressions
2015 sel.joins.remove(join_idx);
2016 if let Some(ref mut from) = sel.from {
2017 from.expressions.push(lateral_expr);
2018 }
2019
2020 // Build DATEADD(unit, CAST(alias AS INT), CAST(start AS DATE))
2021 let dateadd_expr = Expression::Function(Box::new(Function::new(
2022 "DATEADD".to_string(),
2023 vec![
2024 Expression::Column(Column { name: Identifier::new(&unit_str), table: None, join_mark: false, trailing_comments: vec![] }),
2025 Expression::Cast(Box::new(Cast {
2026 this: Expression::Column(Column { name: Identifier::new(&alias_name), table: None, join_mark: false, trailing_comments: vec![] }),
2027 to: DataType::Int { length: None, integer_spelling: false },
2028 trailing_comments: vec![],
2029 double_colon_syntax: false,
2030 format: None,
2031 default: None,
2032 })),
2033 Expression::Cast(Box::new(Cast {
2034 this: start_expr.clone(),
2035 to: DataType::Date,
2036 trailing_comments: vec![],
2037 double_colon_syntax: false,
2038 format: None,
2039 default: None,
2040 })),
2041 ],
2042 )));
2043
2044 // Replace references to the alias in the SELECT list
2045 let new_exprs: Vec<Expression> = sel.expressions.iter().map(|expr| {
2046 Self::replace_column_ref_with_dateadd(expr, &alias_name, &dateadd_expr)
2047 }).collect();
2048 sel.expressions = new_exprs;
2049
2050 Ok(Expression::Select(sel))
2051 })
2052 }
2053
2054 /// Helper: replace column references to `alias_name` with dateadd expression
2055 fn replace_column_ref_with_dateadd(expr: &Expression, alias_name: &str, dateadd: &Expression) -> Expression {
2056 use crate::expressions::*;
2057 match expr {
2058 Expression::Column(c) if c.name.name == alias_name && c.table.is_none() => {
2059 // Plain column reference -> DATEADD(...) AS alias_name
2060 Expression::Alias(Box::new(Alias {
2061 this: dateadd.clone(),
2062 alias: Identifier::new(alias_name),
2063 column_aliases: vec![],
2064 pre_alias_comments: vec![],
2065 trailing_comments: vec![],
2066 }))
2067 }
2068 Expression::Alias(a) => {
2069 // Check if the inner expression references the alias
2070 let new_this = Self::replace_column_ref_inner(&a.this, alias_name, dateadd);
2071 Expression::Alias(Box::new(Alias {
2072 this: new_this,
2073 alias: a.alias.clone(),
2074 column_aliases: a.column_aliases.clone(),
2075 pre_alias_comments: a.pre_alias_comments.clone(),
2076 trailing_comments: a.trailing_comments.clone(),
2077 }))
2078 }
2079 _ => expr.clone(),
2080 }
2081 }
2082
2083 /// Helper: replace column references in inner expression (not top-level)
2084 fn replace_column_ref_inner(expr: &Expression, alias_name: &str, dateadd: &Expression) -> Expression {
2085 use crate::expressions::*;
2086 match expr {
2087 Expression::Column(c) if c.name.name == alias_name && c.table.is_none() => {
2088 dateadd.clone()
2089 }
2090 Expression::Add(op) => {
2091 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
2092 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
2093 Expression::Add(Box::new(BinaryOp {
2094 left, right,
2095 left_comments: op.left_comments.clone(),
2096 operator_comments: op.operator_comments.clone(),
2097 trailing_comments: op.trailing_comments.clone(),
2098 }))
2099 }
2100 Expression::Sub(op) => {
2101 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
2102 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
2103 Expression::Sub(Box::new(BinaryOp {
2104 left, right,
2105 left_comments: op.left_comments.clone(),
2106 operator_comments: op.operator_comments.clone(),
2107 trailing_comments: op.trailing_comments.clone(),
2108 }))
2109 }
2110 Expression::Mul(op) => {
2111 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
2112 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
2113 Expression::Mul(Box::new(BinaryOp {
2114 left, right,
2115 left_comments: op.left_comments.clone(),
2116 operator_comments: op.operator_comments.clone(),
2117 trailing_comments: op.trailing_comments.clone(),
2118 }))
2119 }
2120 _ => expr.clone(),
2121 }
2122 }
2123
2124 fn normalize_snowflake_pretty(mut sql: String) -> String {
2125 if sql.contains("LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)")
2126 && sql.contains("ARRAY_GENERATE_RANGE(0, (GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1) + 1)")
2127 {
2128 sql = sql.replace(
2129 "AND uc.user_id <> ALL (SELECT DISTINCT\n _id\n FROM users, LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)\n WHERE\n GET_PATH(datasource.value, 'name') = 'something')",
2130 "AND uc.user_id <> ALL (\n SELECT DISTINCT\n _id\n FROM users, LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)\n WHERE\n GET_PATH(datasource.value, 'name') = 'something'\n )",
2131 );
2132
2133 sql = sql.replace(
2134 "CROSS JOIN TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1) + 1))) AS _u(seq, key, path, index, pos, this)",
2135 "CROSS JOIN TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (\n GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1\n) + 1))) AS _u(seq, key, path, index, pos, this)",
2136 );
2137
2138 sql = sql.replace(
2139 "OR (_u.pos > (ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1)\n AND _u_2.pos_2 = (ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1))",
2140 "OR (\n _u.pos > (\n ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1\n )\n AND _u_2.pos_2 = (\n ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1\n )\n )",
2141 );
2142 }
2143
2144 sql
2145 }
2146
2147 /// Apply cross-dialect semantic normalizations that depend on knowing both source and target.
2148 /// This handles cases where the same syntax has different semantics across dialects.
2149 fn cross_dialect_normalize(expr: Expression, source: DialectType, target: DialectType) -> Result<Expression> {
2150 use crate::expressions::{AggFunc, BinaryOp, Case, Cast, ConvertTimezone, DataType, DateTimeField, DateTruncFunc, Function, Identifier, IsNull, Literal, Null, Paren};
2151
2152 // Helper to tag which kind of transform to apply
2153 #[derive(Debug)]
2154 enum Action {
2155 None,
2156 GreatestLeastNull,
2157 ArrayGenerateRange,
2158 Div0TypedDivision,
2159 ArrayAggCollectList,
2160 ArrayAggWithinGroupFilter,
2161 ArrayAggFilter,
2162 CastTimestampToDatetime,
2163 DateTruncWrapCast,
2164 ToDateToCast,
2165 ConvertTimezoneToExpr,
2166 SetToVariable,
2167 RegexpReplaceSnowflakeToDuckDB,
2168 BigQueryFunctionNormalize,
2169 BigQuerySafeDivide,
2170 BigQueryCastType,
2171 BigQueryToHexBare, // _BQ_TO_HEX(x) with no LOWER/UPPER wrapper
2172 BigQueryToHexLower, // LOWER(_BQ_TO_HEX(x))
2173 BigQueryToHexUpper, // UPPER(_BQ_TO_HEX(x))
2174 BigQueryLastDayStripUnit, // LAST_DAY(date, MONTH) -> LAST_DAY(date)
2175 BigQueryCastFormat, // CAST(x AS type FORMAT 'fmt') -> PARSE_DATE/PARSE_TIMESTAMP etc.
2176 BigQueryAnyValueHaving, // ANY_VALUE(x HAVING MAX/MIN y) -> ARG_MAX_NULL/ARG_MIN_NULL for DuckDB
2177 BigQueryApproxQuantiles, // APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
2178 GenericFunctionNormalize, // Cross-dialect function renaming (non-BigQuery sources)
2179 RegexpLikeToDuckDB, // RegexpLike -> REGEXP_MATCHES for DuckDB target (partial match)
2180 EpochConvert, // Expression::Epoch -> target-specific epoch function
2181 EpochMsConvert, // Expression::EpochMs -> target-specific epoch ms function
2182 TSQLTypeNormalize, // TSQL types (MONEY, SMALLMONEY, REAL, DATETIME2) -> standard types
2183 MySQLSafeDivide, // MySQL a/b -> a / NULLIF(b, 0) with optional CAST
2184 NullsOrdering, // Add NULLS FIRST/LAST for ORDER BY
2185 AlterTableRenameStripSchema, // ALTER TABLE db.t1 RENAME TO db.t2 -> ALTER TABLE db.t1 RENAME TO t2
2186 StringAggConvert, // STRING_AGG/WITHIN GROUP -> target-specific aggregate
2187 GroupConcatConvert, // GROUP_CONCAT -> target-specific aggregate
2188 TempTableHash, // TSQL #table -> temp table normalization
2189 ArrayLengthConvert, // CARDINALITY/ARRAY_LENGTH/ARRAY_SIZE -> target-specific
2190 DatePartUnquote, // DATE_PART('month', x) -> DATE_PART(month, x) for Snowflake target
2191 NvlClearOriginal, // Clear NVL original_name for cross-dialect transpilation
2192 HiveCastToTryCast, // Hive/Spark CAST -> TRY_CAST for targets that support it
2193 XorExpand, // MySQL XOR -> (a AND NOT b) OR (NOT a AND b) for non-XOR targets
2194 CastTimestampStripTz, // CAST(x AS TIMESTAMP WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark
2195 JsonExtractToGetJsonObject, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
2196 JsonExtractScalarToGetJsonObject, // JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
2197 JsonQueryValueConvert, // JsonQuery/JsonValue -> target-specific (ISNULL wrapper for TSQL, GET_JSON_OBJECT for Spark, etc.)
2198 JsonLiteralToJsonParse, // JSON 'x' -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake
2199 ArraySyntaxConvert, // ARRAY[x] -> ARRAY(x) for Spark, [x] for BigQuery/DuckDB
2200 AtTimeZoneConvert, // AT TIME ZONE -> AT_TIMEZONE (Presto) / FROM_UTC_TIMESTAMP (Spark)
2201 DayOfWeekConvert, // DAY_OF_WEEK -> dialect-specific
2202 MaxByMinByConvert, // MAX_BY/MIN_BY -> argMax/argMin for ClickHouse
2203 ArrayAggToCollectList, // ARRAY_AGG(x ORDER BY ...) -> COLLECT_LIST(x) for Hive/Spark
2204 ElementAtConvert, // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
2205 CurrentUserParens, // CURRENT_USER -> CURRENT_USER() for Snowflake
2206 CastToJsonForSpark, // CAST(x AS JSON) -> TO_JSON(x) for Spark
2207 CastJsonToFromJson, // CAST(JSON_PARSE(literal) AS ARRAY/MAP) -> FROM_JSON(literal, type_string)
2208 ToJsonConvert, // TO_JSON(x) -> JSON_FORMAT(CAST(x AS JSON)) for Presto etc.
2209 ArrayAggNullFilter, // ARRAY_AGG(x) FILTER(WHERE cond) -> add AND NOT x IS NULL for DuckDB
2210 ArrayAggIgnoreNullsDuckDB, // ARRAY_AGG(x IGNORE NULLS ORDER BY ...) -> ARRAY_AGG(x ORDER BY a NULLS FIRST, ...) for DuckDB
2211 BigQueryPercentileContToDuckDB, // PERCENTILE_CONT(x, frac RESPECT NULLS) -> QUANTILE_CONT(x, frac) for DuckDB
2212 BigQueryArraySelectAsStructToSnowflake, // ARRAY(SELECT AS STRUCT ...) -> (SELECT ARRAY_AGG(OBJECT_CONSTRUCT(...)))
2213 CountDistinctMultiArg, // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN ... END)
2214 VarianceToClickHouse, // Expression::Variance -> varSamp for ClickHouse
2215 StddevToClickHouse, // Expression::Stddev -> stddevSamp for ClickHouse
2216 ApproxQuantileConvert, // Expression::ApproxQuantile -> APPROX_PERCENTILE for Snowflake
2217 ArrayIndexConvert, // array[1] -> array[0] for BigQuery (1-based to 0-based)
2218 DollarParamConvert, // $foo -> @foo for BigQuery
2219 TablesampleReservoir, // TABLESAMPLE (n ROWS) -> TABLESAMPLE RESERVOIR (n ROWS) for DuckDB
2220 BitAggFloatCast, // BIT_OR/BIT_AND/BIT_XOR float arg -> CAST(ROUND(CAST(arg)) AS INT) for DuckDB
2221 BitAggSnowflakeRename, // BIT_OR -> BITORAGG, BIT_AND -> BITANDAGG etc. for Snowflake
2222 StrftimeCastTimestamp, // CAST TIMESTAMP -> TIMESTAMP_NTZ for Spark in STRFTIME
2223 AnyValueIgnoreNulls, // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
2224 CreateTableStripComment, // Strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
2225 EscapeStringNormalize, // e'Hello\nworld' literal newline -> \n
2226 AnyToExists, // PostgreSQL x <op> ANY(array) -> EXISTS(array, x -> ...)
2227 ArrayConcatBracketConvert, // [1,2] -> ARRAY[1,2] for PostgreSQL in ARRAY_CAT
2228 SnowflakeIntervalFormat, // INTERVAL '2' HOUR -> INTERVAL '2 HOUR' for Snowflake
2229 AlterTableToSpRename, // ALTER TABLE RENAME -> EXEC sp_rename for TSQL
2230 StraightJoinCase, // STRAIGHT_JOIN -> straight_join for DuckDB
2231 RespectNullsConvert, // RESPECT NULLS window function handling
2232 MysqlNullsOrdering, // MySQL doesn't support NULLS ordering
2233 MysqlNullsLastRewrite, // Add CASE WHEN to ORDER BY for DuckDB -> MySQL (NULLS LAST simulation)
2234 BigQueryNullsOrdering, // BigQuery doesn't support NULLS FIRST/LAST - strip
2235 SnowflakeFloatProtect, // Protect FLOAT from being converted to DOUBLE by Snowflake target transform
2236 JsonToGetPath, // JSON arrow -> GET_PATH/PARSE_JSON for Snowflake
2237 FilterToIff, // FILTER(WHERE) -> IFF wrapping for Snowflake
2238 AggFilterToIff, // AggFunc.filter -> IFF wrapping for Snowflake (e.g., AVG(x) FILTER(WHERE cond))
2239 StructToRow, // DuckDB struct -> Presto ROW / BigQuery STRUCT
2240 SparkStructConvert, // Spark STRUCT(x AS col1, ...) -> ROW/DuckDB struct
2241 DecimalDefaultPrecision, // DECIMAL -> DECIMAL(18, 3) for Snowflake in BIT agg
2242 ApproxCountDistinctToApproxDistinct, // APPROX_COUNT_DISTINCT -> APPROX_DISTINCT for Presto/Trino
2243 CollectListToArrayAgg, // COLLECT_LIST -> ARRAY_AGG for Presto/DuckDB
2244 CollectSetConvert, // COLLECT_SET -> SET_AGG/ARRAY_AGG(DISTINCT)/ARRAY_UNIQUE_AGG
2245 PercentileConvert, // PERCENTILE -> QUANTILE/APPROX_PERCENTILE
2246 CorrIsnanWrap, // CORR(a,b) -> CASE WHEN ISNAN(CORR(a,b)) THEN NULL ELSE CORR(a,b) END
2247 TruncToDateTrunc, // TRUNC(ts, unit) -> DATE_TRUNC(unit, ts)
2248 ArrayContainsConvert, // ARRAY_CONTAINS -> CONTAINS/target-specific
2249 StrPositionExpand, // StrPosition with position -> complex STRPOS expansion for Presto/DuckDB
2250 TablesampleSnowflakeStrip, // Strip method and PERCENT for Snowflake target
2251 FirstToAnyValue, // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
2252 MonthsBetweenConvert, // Expression::MonthsBetween -> target-specific
2253 CurrentUserSparkParens, // CURRENT_USER -> CURRENT_USER() for Spark
2254 SparkDateFuncCast, // MONTH/YEAR/DAY('str') -> MONTH/YEAR/DAY(CAST('str' AS DATE)) from Spark
2255 MapFromArraysConvert, // Expression::MapFromArrays -> MAP/OBJECT_CONSTRUCT/MAP_FROM_ARRAYS
2256 AddMonthsConvert, // Expression::AddMonths -> target-specific DATEADD/DATE_ADD
2257 PercentileContConvert, // PERCENTILE_CONT/DISC WITHIN GROUP -> APPROX_PERCENTILE/PERCENTILE_APPROX
2258 GenerateSeriesConvert, // GENERATE_SERIES -> SEQUENCE/UNNEST(SEQUENCE)/EXPLODE(SEQUENCE)
2259 ConcatCoalesceWrap, // CONCAT(a, b) -> CONCAT(COALESCE(CAST(a), ''), ...) for Presto/ClickHouse
2260 PipeConcatToConcat, // a || b -> CONCAT(CAST(a), CAST(b)) for Presto
2261 DivFuncConvert, // DIV(a, b) -> a // b for DuckDB, CAST for BigQuery
2262 JsonObjectAggConvert, // JSON_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
2263 JsonbExistsConvert, // JSONB_EXISTS -> JSON_EXISTS for DuckDB
2264 DateBinConvert, // DATE_BIN -> TIME_BUCKET for DuckDB
2265 MysqlCastCharToText, // MySQL CAST(x AS CHAR) -> CAST(x AS TEXT/VARCHAR/STRING) for targets
2266 SparkCastVarcharToString, // Spark CAST(x AS VARCHAR/CHAR) -> CAST(x AS STRING) for Spark targets
2267 JsonExtractToArrow, // JSON_EXTRACT(x, path) -> x -> path for SQLite/DuckDB
2268 MinMaxToLeastGreatest, // Multi-arg MIN(a,b,c) -> LEAST(a,b,c), MAX(a,b,c) -> GREATEST(a,b,c)
2269 ClickHouseUniqToApproxCountDistinct, // uniq(x) -> APPROX_COUNT_DISTINCT(x) for non-ClickHouse targets
2270 ClickHouseAnyToAnyValue, // any(x) -> ANY_VALUE(x) for non-ClickHouse targets
2271 OracleVarchar2ToVarchar, // VARCHAR2(N CHAR/BYTE) -> VARCHAR(N) for non-Oracle targets
2272 }
2273
2274 // Handle SELECT INTO -> CREATE TABLE AS for DuckDB/Snowflake/etc.
2275 let expr = if matches!(source, DialectType::TSQL | DialectType::Fabric) {
2276 Self::transform_select_into(expr, source, target)
2277 } else {
2278 expr
2279 };
2280
2281 // Strip OFFSET ROWS for non-TSQL/Oracle targets
2282 let expr = if !matches!(target, DialectType::TSQL | DialectType::Oracle | DialectType::Fabric) {
2283 if let Expression::Select(mut select) = expr {
2284 if let Some(ref mut offset) = select.offset {
2285 offset.rows = None;
2286 }
2287 Expression::Select(select)
2288 } else {
2289 expr
2290 }
2291 } else {
2292 expr
2293 };
2294
2295 // Handle CreateTable WITH properties transformation before recursive transforms
2296 let expr = if let Expression::CreateTable(mut ct) = expr {
2297 Self::transform_create_table_properties(&mut ct, source, target);
2298
2299 // Handle Hive-style PARTITIONED BY (col_name type, ...) -> target-specific
2300 // When the PARTITIONED BY clause contains column definitions, merge them into the
2301 // main column list and adjust the PARTITIONED BY clause for the target dialect.
2302 if matches!(source, DialectType::Hive | DialectType::Spark | DialectType::Databricks) {
2303 let mut partition_col_names: Vec<String> = Vec::new();
2304 let mut partition_col_defs: Vec<crate::expressions::ColumnDef> = Vec::new();
2305 let mut has_col_def_partitions = false;
2306
2307 // Check if any PARTITIONED BY property contains ColumnDef expressions
2308 for prop in &ct.properties {
2309 if let Expression::PartitionedByProperty(ref pbp) = prop {
2310 if let Expression::Tuple(ref tuple) = *pbp.this {
2311 for expr in &tuple.expressions {
2312 if let Expression::ColumnDef(ref cd) = expr {
2313 has_col_def_partitions = true;
2314 partition_col_names.push(cd.name.name.clone());
2315 partition_col_defs.push(*cd.clone());
2316 }
2317 }
2318 }
2319 }
2320 }
2321
2322 if has_col_def_partitions && !matches!(target, DialectType::Hive) {
2323 // Merge partition columns into main column list
2324 for cd in partition_col_defs {
2325 ct.columns.push(cd);
2326 }
2327
2328 // Replace PARTITIONED BY property with column-name-only version
2329 ct.properties.retain(|p| !matches!(p, Expression::PartitionedByProperty(_)));
2330
2331 if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena) {
2332 // Presto: WITH (PARTITIONED_BY=ARRAY['y', 'z'])
2333 let array_elements: Vec<String> = partition_col_names.iter()
2334 .map(|n| format!("'{}'", n))
2335 .collect();
2336 let array_value = format!("ARRAY[{}]", array_elements.join(", "));
2337 ct.with_properties.push(("PARTITIONED_BY".to_string(), array_value));
2338 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
2339 // Spark: PARTITIONED BY (y, z) - just column names
2340 let name_exprs: Vec<Expression> = partition_col_names.iter()
2341 .map(|n| Expression::Column(crate::expressions::Column {
2342 name: crate::expressions::Identifier::new(n.clone()),
2343 table: None,
2344 join_mark: false,
2345 trailing_comments: Vec::new(),
2346 }))
2347 .collect();
2348 ct.properties.insert(0, Expression::PartitionedByProperty(Box::new(
2349 crate::expressions::PartitionedByProperty {
2350 this: Box::new(Expression::Tuple(Box::new(crate::expressions::Tuple { expressions: name_exprs }))),
2351 }
2352 )));
2353 }
2354 // For DuckDB and other targets, just drop the PARTITIONED BY (already retained above)
2355 }
2356
2357 // Note: Non-ColumnDef partitions (e.g., function expressions like MONTHS(y))
2358 // are handled by transform_create_table_properties which runs first
2359 }
2360
2361 // Strip LOCATION property for Presto/Trino (not supported)
2362 if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena) {
2363 ct.properties.retain(|p| !matches!(p, Expression::LocationProperty(_)));
2364 }
2365
2366 // Strip table-level constraints for Spark/Hive/Databricks
2367 // Keep PRIMARY KEY constraints but strip TSQL-specific modifiers; remove all others
2368 if matches!(target, DialectType::Spark | DialectType::Databricks | DialectType::Hive) {
2369 ct.constraints.retain(|c| matches!(c, crate::expressions::TableConstraint::PrimaryKey { .. }));
2370 for constraint in &mut ct.constraints {
2371 if let crate::expressions::TableConstraint::PrimaryKey { columns, modifiers, .. } = constraint {
2372 // Strip ASC/DESC from column names
2373 for col in columns.iter_mut() {
2374 if col.name.ends_with(" ASC") {
2375 col.name = col.name[..col.name.len() - 4].to_string();
2376 } else if col.name.ends_with(" DESC") {
2377 col.name = col.name[..col.name.len() - 5].to_string();
2378 }
2379 }
2380 // Strip TSQL-specific modifiers
2381 modifiers.clustered = None;
2382 modifiers.with_options.clear();
2383 modifiers.on_filegroup = None;
2384 }
2385 }
2386 }
2387
2388 // Databricks: IDENTITY columns with INT/INTEGER -> BIGINT
2389 if matches!(target, DialectType::Databricks) {
2390 for col in &mut ct.columns {
2391 if col.auto_increment {
2392 if matches!(col.data_type, crate::expressions::DataType::Int { .. }) {
2393 col.data_type = crate::expressions::DataType::BigInt { length: None };
2394 }
2395 }
2396 }
2397 }
2398
2399 // Spark/Databricks: INTEGER -> INT in column definitions
2400 // Python sqlglot always outputs INT for Spark/Databricks
2401 if matches!(target, DialectType::Spark | DialectType::Databricks) {
2402 for col in &mut ct.columns {
2403 if let crate::expressions::DataType::Int { integer_spelling, .. } = &mut col.data_type {
2404 *integer_spelling = false;
2405 }
2406 }
2407 }
2408
2409 // Strip explicit NULL constraints for Hive/Spark (B INTEGER NULL -> B INTEGER)
2410 if matches!(target, DialectType::Hive | DialectType::Spark) {
2411 for col in &mut ct.columns {
2412 // If nullable is explicitly true (NULL), change to None (omit it)
2413 if col.nullable == Some(true) {
2414 col.nullable = None;
2415 }
2416 // Also remove from constraints if stored there
2417 col.constraints.retain(|c| !matches!(c, crate::expressions::ColumnConstraint::Null));
2418 }
2419 }
2420
2421 // Strip TSQL ON filegroup for non-TSQL/Fabric targets
2422 if ct.on_property.is_some() && !matches!(target, DialectType::TSQL | DialectType::Fabric) {
2423 ct.on_property = None;
2424 }
2425
2426 // Snowflake: strip ARRAY type parameters (ARRAY<INT> -> ARRAY, ARRAY<ARRAY<INT>> -> ARRAY)
2427 // Snowflake doesn't support typed arrays in DDL
2428 if matches!(target, DialectType::Snowflake) {
2429 fn strip_array_type_params(dt: &mut crate::expressions::DataType) {
2430 if let crate::expressions::DataType::Array { .. } = dt {
2431 *dt = crate::expressions::DataType::Custom { name: "ARRAY".to_string() };
2432 }
2433 }
2434 for col in &mut ct.columns {
2435 strip_array_type_params(&mut col.data_type);
2436 }
2437 }
2438
2439 // PostgreSQL target: ensure IDENTITY columns have NOT NULL
2440 // If NOT NULL was explicit in source (present in constraint_order), preserve original order.
2441 // If NOT NULL was not explicit, add it after IDENTITY (GENERATED BY DEFAULT AS IDENTITY NOT NULL).
2442 if matches!(target, DialectType::PostgreSQL) {
2443 for col in &mut ct.columns {
2444 if col.auto_increment && !col.constraint_order.is_empty() {
2445 use crate::expressions::ConstraintType;
2446 let has_explicit_not_null = col.constraint_order.iter().any(|ct| *ct == ConstraintType::NotNull);
2447
2448 if has_explicit_not_null {
2449 // Source had explicit NOT NULL - preserve original order
2450 // Just ensure nullable is set
2451 if col.nullable != Some(false) {
2452 col.nullable = Some(false);
2453 }
2454 } else {
2455 // Source didn't have explicit NOT NULL - build order with
2456 // AutoIncrement + NotNull first, then remaining constraints
2457 let mut new_order = Vec::new();
2458 // Put AutoIncrement (IDENTITY) first, followed by synthetic NotNull
2459 new_order.push(ConstraintType::AutoIncrement);
2460 new_order.push(ConstraintType::NotNull);
2461 // Add remaining constraints in original order (except AutoIncrement)
2462 for ct_type in &col.constraint_order {
2463 if *ct_type != ConstraintType::AutoIncrement {
2464 new_order.push(ct_type.clone());
2465 }
2466 }
2467 col.constraint_order = new_order;
2468 col.nullable = Some(false);
2469 }
2470 }
2471 }
2472 }
2473
2474 Expression::CreateTable(ct)
2475 } else {
2476 expr
2477 };
2478
2479 // Handle CreateView column stripping for Presto/Trino target
2480 let expr = if let Expression::CreateView(mut cv) = expr {
2481 // Presto/Trino: drop column list when view has a SELECT body
2482 if matches!(target, DialectType::Presto | DialectType::Trino) && !cv.columns.is_empty() {
2483 if !matches!(&cv.query, Expression::Null(_)) {
2484 cv.columns.clear();
2485 }
2486 }
2487 Expression::CreateView(cv)
2488 } else {
2489 expr
2490 };
2491
2492 transform_recursive(expr, &|e| {
2493 // BigQuery CAST(ARRAY[STRUCT(...)] AS STRUCT_TYPE[]) -> DuckDB: convert unnamed Structs to ROW()
2494 // This converts auto-named struct literals {'_0': x, '_1': y} inside typed arrays to ROW(x, y)
2495 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
2496 if let Expression::Cast(ref c) = e {
2497 // Check if this is a CAST of an array to a struct array type
2498 let is_struct_array_cast = matches!(&c.to, crate::expressions::DataType::Array { .. });
2499 if is_struct_array_cast {
2500 let has_auto_named_structs = match &c.this {
2501 Expression::Array(arr) => arr.expressions.iter().any(|elem| {
2502 if let Expression::Struct(s) = elem {
2503 s.fields.iter().all(|(name, _)| {
2504 name.as_ref().map_or(true, |n| n.starts_with('_') && n[1..].parse::<usize>().is_ok())
2505 })
2506 } else { false }
2507 }),
2508 Expression::ArrayFunc(arr) => arr.expressions.iter().any(|elem| {
2509 if let Expression::Struct(s) = elem {
2510 s.fields.iter().all(|(name, _)| {
2511 name.as_ref().map_or(true, |n| n.starts_with('_') && n[1..].parse::<usize>().is_ok())
2512 })
2513 } else { false }
2514 }),
2515 _ => false,
2516 };
2517 if has_auto_named_structs {
2518 let convert_struct_to_row = |elem: Expression| -> Expression {
2519 if let Expression::Struct(s) = elem {
2520 let row_args: Vec<Expression> = s.fields.into_iter().map(|(_, v)| v).collect();
2521 Expression::Function(Box::new(Function::new("ROW".to_string(), row_args)))
2522 } else {
2523 elem
2524 }
2525 };
2526 let mut c_clone = c.as_ref().clone();
2527 match &mut c_clone.this {
2528 Expression::Array(arr) => {
2529 arr.expressions = arr.expressions.drain(..).map(convert_struct_to_row).collect();
2530 }
2531 Expression::ArrayFunc(arr) => {
2532 arr.expressions = arr.expressions.drain(..).map(convert_struct_to_row).collect();
2533 }
2534 _ => {}
2535 }
2536 return Ok(Expression::Cast(Box::new(c_clone)));
2537 }
2538 }
2539 }
2540 }
2541
2542 // BigQuery SELECT AS STRUCT -> DuckDB struct literal {'key': value, ...}
2543 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
2544 if let Expression::Select(ref sel) = e {
2545 if sel.kind.as_deref() == Some("STRUCT") {
2546 let mut fields = Vec::new();
2547 for expr in &sel.expressions {
2548 match expr {
2549 Expression::Alias(a) => {
2550 fields.push((Some(a.alias.name.clone()), a.this.clone()));
2551 }
2552 Expression::Column(c) => {
2553 fields.push((Some(c.name.name.clone()), expr.clone()));
2554 }
2555 _ => {
2556 fields.push((None, expr.clone()));
2557 }
2558 }
2559 }
2560 let struct_lit = Expression::Struct(Box::new(crate::expressions::Struct { fields }));
2561 let mut new_select = sel.as_ref().clone();
2562 new_select.kind = None;
2563 new_select.expressions = vec![struct_lit];
2564 return Ok(Expression::Select(Box::new(new_select)));
2565 }
2566 }
2567 }
2568
2569 // Convert @variable -> ${variable} for Spark/Hive/Databricks
2570 if matches!(source, DialectType::TSQL | DialectType::Fabric)
2571 && matches!(target, DialectType::Spark | DialectType::Databricks | DialectType::Hive)
2572 {
2573 if let Expression::Parameter(ref p) = e {
2574 if p.style == crate::expressions::ParameterStyle::At {
2575 if let Some(ref name) = p.name {
2576 return Ok(Expression::Parameter(Box::new(crate::expressions::Parameter {
2577 name: Some(name.clone()),
2578 index: p.index,
2579 style: crate::expressions::ParameterStyle::DollarBrace,
2580 quoted: p.quoted,
2581 expression: None,
2582 })));
2583 }
2584 }
2585 }
2586 // Also handle Column("@x") -> Parameter("x", DollarBrace) for TSQL vars
2587 if let Expression::Column(ref col) = e {
2588 if col.name.name.starts_with('@') && col.table.is_none() {
2589 let var_name = col.name.name.trim_start_matches('@').to_string();
2590 return Ok(Expression::Parameter(Box::new(crate::expressions::Parameter {
2591 name: Some(var_name),
2592 index: None,
2593 style: crate::expressions::ParameterStyle::DollarBrace,
2594 quoted: false,
2595 expression: None,
2596 })));
2597 }
2598 }
2599 }
2600
2601 // Convert @variable -> variable in SET statements for Spark/Databricks
2602 if matches!(source, DialectType::TSQL | DialectType::Fabric)
2603 && matches!(target, DialectType::Spark | DialectType::Databricks)
2604 {
2605 if let Expression::SetStatement(ref s) = e {
2606 let mut new_items = s.items.clone();
2607 let mut changed = false;
2608 for item in &mut new_items {
2609 // Strip @ from the SET name (Parameter style)
2610 if let Expression::Parameter(ref p) = item.name {
2611 if p.style == crate::expressions::ParameterStyle::At {
2612 if let Some(ref name) = p.name {
2613 item.name = Expression::Identifier(Identifier::new(name));
2614 changed = true;
2615 }
2616 }
2617 }
2618 // Strip @ from the SET name (Identifier style - SET parser)
2619 if let Expression::Identifier(ref id) = item.name {
2620 if id.name.starts_with('@') {
2621 let var_name = id.name.trim_start_matches('@').to_string();
2622 item.name = Expression::Identifier(Identifier::new(&var_name));
2623 changed = true;
2624 }
2625 }
2626 // Strip @ from the SET name (Column style - alternative parsing)
2627 if let Expression::Column(ref col) = item.name {
2628 if col.name.name.starts_with('@') && col.table.is_none() {
2629 let var_name = col.name.name.trim_start_matches('@').to_string();
2630 item.name = Expression::Identifier(Identifier::new(&var_name));
2631 changed = true;
2632 }
2633 }
2634 }
2635 if changed {
2636 let mut new_set = (**s).clone();
2637 new_set.items = new_items;
2638 return Ok(Expression::SetStatement(Box::new(new_set)));
2639 }
2640 }
2641 }
2642
2643 // Strip NOLOCK hint for non-TSQL targets
2644 if matches!(source, DialectType::TSQL | DialectType::Fabric)
2645 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
2646 {
2647 if let Expression::Table(ref tr) = e {
2648 if !tr.hints.is_empty() {
2649 let mut new_tr = tr.clone();
2650 new_tr.hints.clear();
2651 return Ok(Expression::Table(new_tr));
2652 }
2653 }
2654 }
2655
2656 // Snowflake: TRUE IS TRUE -> TRUE, FALSE IS FALSE -> FALSE
2657 // Snowflake simplifies IS TRUE/IS FALSE on boolean literals
2658 if matches!(target, DialectType::Snowflake) {
2659 if let Expression::IsTrue(ref itf) = e {
2660 if let Expression::Boolean(ref b) = itf.this {
2661 if !itf.not {
2662 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral { value: b.value }));
2663 } else {
2664 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral { value: !b.value }));
2665 }
2666 }
2667 }
2668 if let Expression::IsFalse(ref itf) = e {
2669 if let Expression::Boolean(ref b) = itf.this {
2670 if !itf.not {
2671 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral { value: !b.value }));
2672 } else {
2673 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral { value: b.value }));
2674 }
2675 }
2676 }
2677 }
2678
2679 // BigQuery: split dotted backtick identifiers in table names
2680 // e.g., `a.b.c` -> "a"."b"."c" when source is BigQuery and target is not BigQuery
2681 if matches!(source, DialectType::BigQuery) && !matches!(target, DialectType::BigQuery) {
2682 if let Expression::CreateTable(ref ct) = e {
2683 let mut changed = false;
2684 let mut new_ct = ct.clone();
2685 // Split the table name
2686 if ct.name.schema.is_none() && ct.name.name.name.contains('.') {
2687 let parts: Vec<&str> = ct.name.name.name.split('.').collect();
2688 // Use quoted identifiers when the original was quoted (backtick in BigQuery)
2689 let was_quoted = ct.name.name.quoted;
2690 let mk_id = |s: &str| if was_quoted { Identifier::quoted(s) } else { Identifier::new(s) };
2691 if parts.len() == 3 {
2692 new_ct.name.catalog = Some(mk_id(parts[0]));
2693 new_ct.name.schema = Some(mk_id(parts[1]));
2694 new_ct.name.name = mk_id(parts[2]);
2695 changed = true;
2696 } else if parts.len() == 2 {
2697 new_ct.name.schema = Some(mk_id(parts[0]));
2698 new_ct.name.name = mk_id(parts[1]);
2699 changed = true;
2700 }
2701 }
2702 // Split the clone source name
2703 if let Some(ref clone_src) = ct.clone_source {
2704 if clone_src.schema.is_none() && clone_src.name.name.contains('.') {
2705 let parts: Vec<&str> = clone_src.name.name.split('.').collect();
2706 let was_quoted = clone_src.name.quoted;
2707 let mk_id = |s: &str| if was_quoted { Identifier::quoted(s) } else { Identifier::new(s) };
2708 let mut new_src = clone_src.clone();
2709 if parts.len() == 3 {
2710 new_src.catalog = Some(mk_id(parts[0]));
2711 new_src.schema = Some(mk_id(parts[1]));
2712 new_src.name = mk_id(parts[2]);
2713 new_ct.clone_source = Some(new_src);
2714 changed = true;
2715 } else if parts.len() == 2 {
2716 new_src.schema = Some(mk_id(parts[0]));
2717 new_src.name = mk_id(parts[1]);
2718 new_ct.clone_source = Some(new_src);
2719 changed = true;
2720 }
2721 }
2722 }
2723 if changed {
2724 return Ok(Expression::CreateTable(new_ct));
2725 }
2726 }
2727 }
2728
2729 // BigQuery array subscript: a[1], b[OFFSET(1)], c[ORDINAL(1)], d[SAFE_OFFSET(1)], e[SAFE_ORDINAL(1)]
2730 // -> DuckDB/Presto: convert 0-based to 1-based, handle SAFE_* -> ELEMENT_AT for Presto
2731 if matches!(source, DialectType::BigQuery)
2732 && matches!(target, DialectType::DuckDB | DialectType::Presto | DialectType::Trino | DialectType::Athena)
2733 {
2734 if let Expression::Subscript(ref sub) = e {
2735 let (new_index, is_safe) = match &sub.index {
2736 // a[1] -> a[1+1] = a[2] (plain index is 0-based in BQ)
2737 Expression::Literal(Literal::Number(n)) => {
2738 if let Ok(val) = n.parse::<i64>() {
2739 (Some(Expression::Literal(Literal::Number((val + 1).to_string()))), false)
2740 } else {
2741 (None, false)
2742 }
2743 }
2744 // OFFSET(n) -> n+1 (0-based)
2745 Expression::Function(ref f) if f.name.eq_ignore_ascii_case("OFFSET") && f.args.len() == 1 => {
2746 if let Expression::Literal(Literal::Number(n)) = &f.args[0] {
2747 if let Ok(val) = n.parse::<i64>() {
2748 (Some(Expression::Literal(Literal::Number((val + 1).to_string()))), false)
2749 } else {
2750 (Some(Expression::Add(Box::new(crate::expressions::BinaryOp::new(f.args[0].clone(), Expression::number(1))))), false)
2751 }
2752 } else {
2753 (Some(Expression::Add(Box::new(crate::expressions::BinaryOp::new(f.args[0].clone(), Expression::number(1))))), false)
2754 }
2755 }
2756 // ORDINAL(n) -> n (already 1-based)
2757 Expression::Function(ref f) if f.name.eq_ignore_ascii_case("ORDINAL") && f.args.len() == 1 => {
2758 (Some(f.args[0].clone()), false)
2759 }
2760 // SAFE_OFFSET(n) -> n+1 (0-based, safe)
2761 Expression::Function(ref f) if f.name.eq_ignore_ascii_case("SAFE_OFFSET") && f.args.len() == 1 => {
2762 if let Expression::Literal(Literal::Number(n)) = &f.args[0] {
2763 if let Ok(val) = n.parse::<i64>() {
2764 (Some(Expression::Literal(Literal::Number((val + 1).to_string()))), true)
2765 } else {
2766 (Some(Expression::Add(Box::new(crate::expressions::BinaryOp::new(f.args[0].clone(), Expression::number(1))))), true)
2767 }
2768 } else {
2769 (Some(Expression::Add(Box::new(crate::expressions::BinaryOp::new(f.args[0].clone(), Expression::number(1))))), true)
2770 }
2771 }
2772 // SAFE_ORDINAL(n) -> n (already 1-based, safe)
2773 Expression::Function(ref f) if f.name.eq_ignore_ascii_case("SAFE_ORDINAL") && f.args.len() == 1 => {
2774 (Some(f.args[0].clone()), true)
2775 }
2776 _ => (None, false),
2777 };
2778 if let Some(idx) = new_index {
2779 if is_safe && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena) {
2780 // Presto: SAFE_OFFSET/SAFE_ORDINAL -> ELEMENT_AT(arr, idx)
2781 return Ok(Expression::Function(Box::new(Function::new(
2782 "ELEMENT_AT".to_string(), vec![sub.this.clone(), idx],
2783 ))));
2784 } else {
2785 // DuckDB or non-safe: just use subscript with converted index
2786 return Ok(Expression::Subscript(Box::new(crate::expressions::Subscript {
2787 this: sub.this.clone(),
2788 index: idx,
2789 })));
2790 }
2791 }
2792 }
2793 }
2794
2795 // BigQuery LENGTH(x) -> DuckDB CASE TYPEOF(x) WHEN 'BLOB' THEN OCTET_LENGTH(...) ELSE LENGTH(...) END
2796 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
2797 if let Expression::Length(ref uf) = e {
2798 let arg = uf.this.clone();
2799 let typeof_func = Expression::Function(Box::new(Function::new("TYPEOF".to_string(), vec![arg.clone()])));
2800 let blob_cast = Expression::Cast(Box::new(Cast {
2801 this: arg.clone(),
2802 to: DataType::VarBinary { length: None },
2803 trailing_comments: vec![],
2804 double_colon_syntax: false,
2805 format: None,
2806 default: None,
2807 }));
2808 let octet_length = Expression::Function(Box::new(Function::new("OCTET_LENGTH".to_string(), vec![blob_cast])));
2809 let text_cast = Expression::Cast(Box::new(Cast {
2810 this: arg,
2811 to: DataType::Text,
2812 trailing_comments: vec![],
2813 double_colon_syntax: false,
2814 format: None,
2815 default: None,
2816 }));
2817 let length_text = Expression::Length(Box::new(crate::expressions::UnaryFunc { this: text_cast, original_name: None }));
2818 return Ok(Expression::Case(Box::new(Case {
2819 operand: Some(typeof_func),
2820 whens: vec![(Expression::Literal(Literal::String("BLOB".to_string())), octet_length)],
2821 else_: Some(length_text),
2822 })));
2823 }
2824 }
2825
2826 // BigQuery UNNEST alias handling (only for non-BigQuery sources):
2827 // UNNEST(...) AS x -> UNNEST(...) (drop unused table alias)
2828 // UNNEST(...) AS x(y) -> UNNEST(...) AS y (use column alias as main alias)
2829 if matches!(target, DialectType::BigQuery) && !matches!(source, DialectType::BigQuery) {
2830 if let Expression::Alias(ref a) = e {
2831 if matches!(&a.this, Expression::Unnest(_)) {
2832 if a.column_aliases.is_empty() {
2833 // Drop the entire alias, return just the UNNEST expression
2834 return Ok(a.this.clone());
2835 } else {
2836 // Use first column alias as the main alias
2837 let mut new_alias = a.as_ref().clone();
2838 new_alias.alias = a.column_aliases[0].clone();
2839 new_alias.column_aliases.clear();
2840 return Ok(Expression::Alias(Box::new(new_alias)));
2841 }
2842 }
2843 }
2844 }
2845
2846 // BigQuery IN UNNEST(expr) -> IN (SELECT UNNEST/EXPLODE(expr)) for non-BigQuery targets
2847 if matches!(source, DialectType::BigQuery) && !matches!(target, DialectType::BigQuery) {
2848 if let Expression::In(ref in_expr) = e {
2849 if let Some(ref unnest_inner) = in_expr.unnest {
2850 // Build the function call for the target dialect
2851 let func_expr = if matches!(target, DialectType::Hive | DialectType::Spark | DialectType::Databricks) {
2852 // Use EXPLODE for Hive/Spark
2853 Expression::Function(Box::new(Function::new("EXPLODE".to_string(), vec![*unnest_inner.clone()])))
2854 } else {
2855 // Use UNNEST for Presto/Trino/DuckDB/etc.
2856 Expression::Unnest(Box::new(crate::expressions::UnnestFunc {
2857 this: *unnest_inner.clone(),
2858 expressions: Vec::new(),
2859 with_ordinality: false,
2860 alias: None,
2861 offset_alias: None,
2862 }))
2863 };
2864
2865 // Wrap in SELECT
2866 let mut inner_select = crate::expressions::Select::new();
2867 inner_select.expressions = vec![func_expr];
2868
2869 let subquery_expr = Expression::Select(Box::new(inner_select));
2870
2871 return Ok(Expression::In(Box::new(crate::expressions::In {
2872 this: in_expr.this.clone(),
2873 expressions: Vec::new(),
2874 query: Some(subquery_expr),
2875 not: in_expr.not,
2876 global: in_expr.global,
2877 unnest: None,
2878 })));
2879 }
2880 }
2881 }
2882
2883 // SQLite: GENERATE_SERIES AS t(i) -> (SELECT value AS i FROM GENERATE_SERIES(...)) AS t
2884 // This handles the subquery wrapping for RANGE -> GENERATE_SERIES in FROM context
2885 if matches!(target, DialectType::SQLite) && matches!(source, DialectType::DuckDB) {
2886 if let Expression::Alias(ref a) = e {
2887 if let Expression::Function(ref f) = a.this {
2888 if f.name.eq_ignore_ascii_case("GENERATE_SERIES") && !a.column_aliases.is_empty() {
2889 // Build: (SELECT value AS col_alias FROM GENERATE_SERIES(start, end)) AS table_alias
2890 let col_alias = a.column_aliases[0].clone();
2891 let mut inner_select = crate::expressions::Select::new();
2892 inner_select.expressions = vec![Expression::Alias(Box::new(crate::expressions::Alias::new(
2893 Expression::Identifier(Identifier::new("value".to_string())),
2894 col_alias,
2895 )))];
2896 inner_select.from = Some(crate::expressions::From {
2897 expressions: vec![a.this.clone()],
2898 });
2899 let subquery = Expression::Subquery(Box::new(crate::expressions::Subquery {
2900 this: Expression::Select(Box::new(inner_select)),
2901 alias: Some(a.alias.clone()),
2902 column_aliases: Vec::new(),
2903 order_by: None,
2904 limit: None,
2905 offset: None,
2906 lateral: false,
2907 modifiers_inside: false,
2908 trailing_comments: Vec::new(),
2909 distribute_by: None,
2910 sort_by: None,
2911 cluster_by: None,
2912 }));
2913 return Ok(subquery);
2914 }
2915 }
2916 }
2917 }
2918
2919 // BigQuery implicit UNNEST: comma-join on array path -> CROSS JOIN UNNEST
2920 // e.g., SELECT results FROM Coordinates, Coordinates.position AS results
2921 // -> SELECT results FROM Coordinates CROSS JOIN UNNEST(Coordinates.position) AS results
2922 if matches!(source, DialectType::BigQuery) {
2923 if let Expression::Select(ref s) = e {
2924 if let Some(ref from) = s.from {
2925 if from.expressions.len() >= 2 {
2926 // Collect table names from first expression
2927 let first_tables: Vec<String> = from.expressions.iter().take(1).filter_map(|expr| {
2928 if let Expression::Table(t) = expr {
2929 Some(t.name.name.to_lowercase())
2930 } else {
2931 None
2932 }
2933 }).collect();
2934
2935 // Check if any subsequent FROM expressions are schema-qualified with a matching table name
2936 // or have a dotted name matching a table
2937 let mut needs_rewrite = false;
2938 for expr in from.expressions.iter().skip(1) {
2939 if let Expression::Table(t) = expr {
2940 if let Some(ref schema) = t.schema {
2941 if first_tables.contains(&schema.name.to_lowercase()) {
2942 needs_rewrite = true;
2943 break;
2944 }
2945 }
2946 // Also check dotted names in quoted identifiers (e.g., `Coordinates.position`)
2947 if t.schema.is_none() && t.name.name.contains('.') {
2948 let parts: Vec<&str> = t.name.name.split('.').collect();
2949 if parts.len() >= 2 && first_tables.contains(&parts[0].to_lowercase()) {
2950 needs_rewrite = true;
2951 break;
2952 }
2953 }
2954 }
2955 }
2956
2957 if needs_rewrite {
2958 let mut new_select = s.clone();
2959 let mut new_from_exprs = vec![from.expressions[0].clone()];
2960 let mut new_joins = s.joins.clone();
2961
2962 for expr in from.expressions.iter().skip(1) {
2963 if let Expression::Table(ref t) = expr {
2964 if let Some(ref schema) = t.schema {
2965 if first_tables.contains(&schema.name.to_lowercase()) {
2966 // This is an array path reference, convert to CROSS JOIN UNNEST
2967 let col_expr = Expression::Column(crate::expressions::Column {
2968 name: t.name.clone(),
2969 table: Some(schema.clone()),
2970 join_mark: false,
2971 trailing_comments: vec![],
2972 });
2973 let unnest_expr = Expression::Unnest(Box::new(crate::expressions::UnnestFunc {
2974 this: col_expr,
2975 expressions: Vec::new(),
2976 with_ordinality: false,
2977 alias: None,
2978 offset_alias: None,
2979 }));
2980 let join_this = if let Some(ref alias) = t.alias {
2981 if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena) {
2982 // Presto: UNNEST(x) AS _t0(results)
2983 Expression::Alias(Box::new(crate::expressions::Alias {
2984 this: unnest_expr,
2985 alias: Identifier::new("_t0"),
2986 column_aliases: vec![alias.clone()],
2987 pre_alias_comments: vec![],
2988 trailing_comments: vec![],
2989 }))
2990 } else {
2991 // BigQuery: UNNEST(x) AS results
2992 Expression::Alias(Box::new(crate::expressions::Alias {
2993 this: unnest_expr,
2994 alias: alias.clone(),
2995 column_aliases: vec![],
2996 pre_alias_comments: vec![],
2997 trailing_comments: vec![],
2998 }))
2999 }
3000 } else {
3001 unnest_expr
3002 };
3003 new_joins.push(crate::expressions::Join {
3004 kind: crate::expressions::JoinKind::Cross,
3005 this: join_this,
3006 on: None,
3007 using: Vec::new(),
3008 use_inner_keyword: false,
3009 use_outer_keyword: false,
3010 deferred_condition: false,
3011 join_hint: None,
3012 match_condition: None,
3013 pivots: Vec::new(),
3014 });
3015 } else {
3016 new_from_exprs.push(expr.clone());
3017 }
3018 } else if t.schema.is_none() && t.name.name.contains('.') {
3019 // Dotted name in quoted identifier: `Coordinates.position`
3020 let parts: Vec<&str> = t.name.name.split('.').collect();
3021 if parts.len() >= 2 && first_tables.contains(&parts[0].to_lowercase()) {
3022 let join_this = if matches!(target, DialectType::BigQuery) {
3023 // BigQuery: keep as single quoted identifier, just convert comma -> CROSS JOIN
3024 Expression::Table(t.clone())
3025 } else {
3026 // Other targets: split into "schema"."name"
3027 let mut new_t = t.clone();
3028 new_t.schema = Some(Identifier::quoted(parts[0]));
3029 new_t.name = Identifier::quoted(parts[1]);
3030 Expression::Table(new_t)
3031 };
3032 new_joins.push(crate::expressions::Join {
3033 kind: crate::expressions::JoinKind::Cross,
3034 this: join_this,
3035 on: None,
3036 using: Vec::new(),
3037 use_inner_keyword: false,
3038 use_outer_keyword: false,
3039 deferred_condition: false,
3040 join_hint: None,
3041 match_condition: None,
3042 pivots: Vec::new(),
3043 });
3044 } else {
3045 new_from_exprs.push(expr.clone());
3046 }
3047 } else {
3048 new_from_exprs.push(expr.clone());
3049 }
3050 } else {
3051 new_from_exprs.push(expr.clone());
3052 }
3053 }
3054
3055 new_select.from = Some(crate::expressions::From {
3056 expressions: new_from_exprs,
3057 ..from.clone()
3058 });
3059 new_select.joins = new_joins;
3060 return Ok(Expression::Select(new_select));
3061 }
3062 }
3063 }
3064 }
3065 }
3066
3067 // CROSS JOIN UNNEST -> LATERAL VIEW EXPLODE for Hive/Spark
3068 if matches!(target, DialectType::Hive | DialectType::Spark | DialectType::Databricks) {
3069 if let Expression::Select(ref s) = e {
3070 // Check if any joins are CROSS JOIN with UNNEST/EXPLODE
3071 let is_unnest_or_explode_expr = |expr: &Expression| -> bool {
3072 matches!(expr, Expression::Unnest(_))
3073 || matches!(expr, Expression::Function(f) if f.name.eq_ignore_ascii_case("EXPLODE"))
3074 };
3075 let has_unnest_join = s.joins.iter().any(|j| {
3076 j.kind == crate::expressions::JoinKind::Cross && (
3077 matches!(&j.this, Expression::Alias(a) if is_unnest_or_explode_expr(&a.this))
3078 || is_unnest_or_explode_expr(&j.this)
3079 )
3080 });
3081 if has_unnest_join {
3082 let mut select = s.clone();
3083 let mut new_joins = Vec::new();
3084 for join in select.joins.drain(..) {
3085 if join.kind == crate::expressions::JoinKind::Cross {
3086 // Extract the UNNEST/EXPLODE from the join
3087 let (func_expr, table_alias, col_aliases) = match &join.this {
3088 Expression::Alias(a) => {
3089 let ta = if a.alias.is_empty() { None } else { Some(a.alias.clone()) };
3090 let cas = a.column_aliases.clone();
3091 match &a.this {
3092 Expression::Unnest(u) => {
3093 // Convert UNNEST(x) to EXPLODE(x)
3094 let explode = Expression::Function(Box::new(crate::expressions::Function::new(
3095 "EXPLODE".to_string(),
3096 vec![u.this.clone()],
3097 )));
3098 (Some(explode), ta, cas)
3099 }
3100 Expression::Function(f) if f.name.eq_ignore_ascii_case("EXPLODE") => {
3101 (Some(Expression::Function(f.clone())), ta, cas)
3102 }
3103 _ => (None, None, Vec::new())
3104 }
3105 }
3106 Expression::Unnest(u) => {
3107 let explode = Expression::Function(Box::new(crate::expressions::Function::new(
3108 "EXPLODE".to_string(),
3109 vec![u.this.clone()],
3110 )));
3111 let ta = u.alias.clone();
3112 (Some(explode), ta, Vec::new())
3113 }
3114 _ => (None, None, Vec::new())
3115 };
3116 if let Some(func) = func_expr {
3117 select.lateral_views.push(crate::expressions::LateralView {
3118 this: func,
3119 table_alias,
3120 column_aliases: col_aliases,
3121 outer: false,
3122 });
3123 } else {
3124 new_joins.push(join);
3125 }
3126 } else {
3127 new_joins.push(join);
3128 }
3129 }
3130 select.joins = new_joins;
3131 return Ok(Expression::Select(select));
3132 }
3133 }
3134 }
3135
3136 // UNNEST expansion: DuckDB SELECT UNNEST(arr) in SELECT list -> expanded query
3137 // for BigQuery, Presto/Trino, Snowflake
3138 if matches!(source, DialectType::DuckDB | DialectType::PostgreSQL)
3139 && matches!(target, DialectType::BigQuery | DialectType::Presto | DialectType::Trino | DialectType::Snowflake)
3140 {
3141 if let Expression::Select(ref s) = e {
3142 // Check if any SELECT expressions contain UNNEST
3143 // Note: UNNEST can appear as Expression::Unnest OR Expression::Function("UNNEST")
3144 let has_unnest_in_select = s.expressions.iter().any(|expr| {
3145 fn contains_unnest(e: &Expression) -> bool {
3146 match e {
3147 Expression::Unnest(_) => true,
3148 Expression::Function(f) if f.name.eq_ignore_ascii_case("UNNEST") => true,
3149 Expression::Alias(a) => contains_unnest(&a.this),
3150 Expression::Add(op) | Expression::Sub(op) | Expression::Mul(op) | Expression::Div(op) => {
3151 contains_unnest(&op.left) || contains_unnest(&op.right)
3152 }
3153 _ => false,
3154 }
3155 }
3156 contains_unnest(expr)
3157 });
3158
3159 if has_unnest_in_select {
3160 let rewritten = Self::rewrite_unnest_expansion(s, target);
3161 if let Some(new_select) = rewritten {
3162 return Ok(Expression::Select(Box::new(new_select)));
3163 }
3164 }
3165 }
3166 }
3167
3168 // BigQuery -> PostgreSQL: convert escape sequences in string literals to actual characters
3169 // BigQuery '\n' -> PostgreSQL literal newline in string
3170 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::PostgreSQL) {
3171 if let Expression::Literal(Literal::String(ref s)) = e {
3172 if s.contains("\\n") || s.contains("\\t") || s.contains("\\r") || s.contains("\\\\") {
3173 let converted = s
3174 .replace("\\n", "\n")
3175 .replace("\\t", "\t")
3176 .replace("\\r", "\r")
3177 .replace("\\\\", "\\");
3178 return Ok(Expression::Literal(Literal::String(converted)));
3179 }
3180 }
3181 }
3182
3183 // Cross-dialect: convert Literal::Timestamp to target-specific CAST form
3184 // when source != target (identity tests keep the Literal::Timestamp for native handling)
3185 if source != target {
3186 if let Expression::Literal(Literal::Timestamp(ref s)) = e {
3187 let s = s.clone();
3188 // MySQL: TIMESTAMP handling depends on source dialect
3189 // BigQuery TIMESTAMP is timezone-aware -> TIMESTAMP() function in MySQL
3190 // Other sources' TIMESTAMP is non-timezone -> CAST('x' AS DATETIME) in MySQL
3191 if matches!(target, DialectType::MySQL) {
3192 if matches!(source, DialectType::BigQuery) {
3193 // BigQuery TIMESTAMP is timezone-aware -> MySQL TIMESTAMP() function
3194 return Ok(Expression::Function(Box::new(Function::new(
3195 "TIMESTAMP".to_string(), vec![Expression::Literal(Literal::String(s))],
3196 ))));
3197 } else {
3198 // Non-timezone TIMESTAMP -> CAST('x' AS DATETIME) in MySQL
3199 return Ok(Expression::Cast(Box::new(Cast {
3200 this: Expression::Literal(Literal::String(s)),
3201 to: DataType::Custom { name: "DATETIME".to_string() },
3202 trailing_comments: Vec::new(),
3203 double_colon_syntax: false,
3204 format: None,
3205 default: None,
3206 })));
3207 }
3208 }
3209 let dt = match target {
3210 DialectType::BigQuery | DialectType::StarRocks => {
3211 DataType::Custom { name: "DATETIME".to_string() }
3212 }
3213 DialectType::Snowflake => {
3214 // BigQuery TIMESTAMP is timezone-aware -> use TIMESTAMPTZ for Snowflake
3215 if matches!(source, DialectType::BigQuery) {
3216 DataType::Custom { name: "TIMESTAMPTZ".to_string() }
3217 } else if matches!(source, DialectType::PostgreSQL | DialectType::Redshift | DialectType::Snowflake) {
3218 DataType::Timestamp { precision: None, timezone: false }
3219 } else {
3220 DataType::Custom { name: "TIMESTAMPNTZ".to_string() }
3221 }
3222 }
3223 DialectType::Spark | DialectType::Databricks => {
3224 // BigQuery TIMESTAMP is timezone-aware -> use plain TIMESTAMP for Spark/Databricks
3225 if matches!(source, DialectType::BigQuery) {
3226 DataType::Timestamp { precision: None, timezone: false }
3227 } else {
3228 DataType::Custom { name: "TIMESTAMP_NTZ".to_string() }
3229 }
3230 }
3231 DialectType::ClickHouse => {
3232 DataType::Custom { name: "Nullable(DateTime)".to_string() }
3233 }
3234 DialectType::TSQL | DialectType::Fabric => {
3235 DataType::Custom { name: "DATETIME2".to_string() }
3236 }
3237 DialectType::DuckDB => {
3238 // DuckDB: use TIMESTAMPTZ when source is BigQuery (BQ TIMESTAMP is always UTC/tz-aware)
3239 // or when the timestamp string explicitly has timezone info
3240 if matches!(source, DialectType::BigQuery) || Self::timestamp_string_has_timezone(&s) {
3241 DataType::Custom { name: "TIMESTAMPTZ".to_string() }
3242 } else {
3243 DataType::Timestamp { precision: None, timezone: false }
3244 }
3245 }
3246 _ => {
3247 DataType::Timestamp { precision: None, timezone: false }
3248 }
3249 };
3250 return Ok(Expression::Cast(Box::new(Cast {
3251 this: Expression::Literal(Literal::String(s)),
3252 to: dt,
3253 trailing_comments: vec![],
3254 double_colon_syntax: false,
3255 format: None,
3256 default: None,
3257 })));
3258 }
3259 }
3260
3261 // PostgreSQL DELETE requires explicit AS for table aliases
3262 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
3263 if let Expression::Delete(ref del) = e {
3264 if del.alias.is_some() && !del.alias_explicit_as {
3265 let mut new_del = del.clone();
3266 new_del.alias_explicit_as = true;
3267 return Ok(Expression::Delete(new_del));
3268 }
3269 }
3270 }
3271
3272 // CREATE DATABASE -> CREATE SCHEMA for DuckDB target
3273 if matches!(target, DialectType::DuckDB) {
3274 if let Expression::CreateDatabase(db) = e {
3275 let mut schema = crate::expressions::CreateSchema::new(db.name.name.clone());
3276 schema.if_not_exists = db.if_not_exists;
3277 return Ok(Expression::CreateSchema(Box::new(schema)));
3278 }
3279 if let Expression::DropDatabase(db) = e {
3280 let mut schema = crate::expressions::DropSchema::new(db.name.name.clone());
3281 schema.if_exists = db.if_exists;
3282 return Ok(Expression::DropSchema(Box::new(schema)));
3283 }
3284 }
3285
3286 // Strip ClickHouse Nullable(...) wrapper for non-ClickHouse targets
3287 if matches!(source, DialectType::ClickHouse) && !matches!(target, DialectType::ClickHouse) {
3288 if let Expression::Cast(ref c) = e {
3289 if let DataType::Custom { ref name } = c.to {
3290 let upper = name.to_uppercase();
3291 if upper.starts_with("NULLABLE(") && upper.ends_with(")") {
3292 let inner = &name[9..name.len()-1]; // strip "Nullable(" and ")"
3293 let inner_upper = inner.to_uppercase();
3294 let new_dt = match inner_upper.as_str() {
3295 "DATETIME" | "DATETIME64" => DataType::Timestamp { precision: None, timezone: false },
3296 "DATE" => DataType::Date,
3297 "INT64" | "BIGINT" => DataType::BigInt { length: None },
3298 "INT32" | "INT" | "INTEGER" => DataType::Int { length: None, integer_spelling: false },
3299 "FLOAT64" | "DOUBLE" => DataType::Double { precision: None, scale: None },
3300 "STRING" => DataType::Text,
3301 _ => DataType::Custom { name: inner.to_string() },
3302 };
3303 let mut new_cast = c.clone();
3304 new_cast.to = new_dt;
3305 return Ok(Expression::Cast(new_cast));
3306 }
3307 }
3308 }
3309 }
3310
3311 // ARRAY_CONCAT_AGG -> Snowflake: ARRAY_FLATTEN(ARRAY_AGG(...))
3312 if matches!(target, DialectType::Snowflake) {
3313 if let Expression::ArrayConcatAgg(ref agg) = e {
3314 let mut agg_clone = agg.as_ref().clone();
3315 agg_clone.name = None; // Clear name so generator uses default "ARRAY_AGG"
3316 let array_agg = Expression::ArrayAgg(Box::new(agg_clone));
3317 let flatten = Expression::Function(Box::new(Function::new(
3318 "ARRAY_FLATTEN".to_string(), vec![array_agg],
3319 )));
3320 return Ok(flatten);
3321 }
3322 }
3323
3324 // ARRAY_CONCAT_AGG -> others: keep as function for cross-dialect
3325 if !matches!(target, DialectType::BigQuery | DialectType::Snowflake) {
3326 if let Expression::ArrayConcatAgg(agg) = e {
3327 let arg = agg.this;
3328 return Ok(Expression::Function(Box::new(Function::new(
3329 "ARRAY_CONCAT_AGG".to_string(), vec![arg],
3330 ))));
3331 }
3332 }
3333
3334 // Determine what action to take by inspecting e immutably
3335 let action = {
3336 let source_propagates_nulls = matches!(source, DialectType::Snowflake | DialectType::BigQuery);
3337 let target_ignores_nulls = matches!(target, DialectType::DuckDB | DialectType::PostgreSQL);
3338
3339 match &e {
3340 Expression::Function(f) => {
3341 let name = f.name.to_uppercase();
3342 // DATE_PART: strip quotes from first arg when target is Snowflake (source != Snowflake)
3343 if (name == "DATE_PART" || name == "DATEPART")
3344 && f.args.len() == 2
3345 && matches!(target, DialectType::Snowflake)
3346 && !matches!(source, DialectType::Snowflake)
3347 && matches!(&f.args[0], Expression::Literal(crate::expressions::Literal::String(_)))
3348 {
3349 Action::DatePartUnquote
3350 } else if source_propagates_nulls && target_ignores_nulls
3351 && (name == "GREATEST" || name == "LEAST") && f.args.len() >= 2 {
3352 Action::GreatestLeastNull
3353 } else if matches!(source, DialectType::Snowflake)
3354 && name == "ARRAY_GENERATE_RANGE" && f.args.len() >= 2 {
3355 Action::ArrayGenerateRange
3356 } else if matches!(source, DialectType::Snowflake)
3357 && matches!(target, DialectType::DuckDB)
3358 && name == "DATE_TRUNC" && f.args.len() == 2 {
3359 // Determine if DuckDB DATE_TRUNC needs CAST wrapping to preserve input type.
3360 // Logic based on Python sqlglot's input_type_preserved flag:
3361 // - DATE + non-date-unit (HOUR, MINUTE, etc.) -> wrap
3362 // - TIMESTAMP + date-unit (YEAR, QUARTER, MONTH, WEEK, DAY) -> wrap
3363 // - TIMESTAMPTZ/TIMESTAMPLTZ/TIME -> always wrap
3364 let unit_str = match &f.args[0] {
3365 Expression::Literal(crate::expressions::Literal::String(s)) => Some(s.to_uppercase()),
3366 _ => None,
3367 };
3368 let is_date_unit = unit_str.as_ref().map_or(false, |u| matches!(u.as_str(), "YEAR" | "QUARTER" | "MONTH" | "WEEK" | "DAY"));
3369 match &f.args[1] {
3370 Expression::Cast(c) => match &c.to {
3371 DataType::Time { .. } => Action::DateTruncWrapCast,
3372 DataType::Custom { name } if name.eq_ignore_ascii_case("TIMESTAMPTZ") || name.eq_ignore_ascii_case("TIMESTAMPLTZ") => Action::DateTruncWrapCast,
3373 DataType::Timestamp { timezone: true, .. } => Action::DateTruncWrapCast,
3374 DataType::Date if !is_date_unit => Action::DateTruncWrapCast,
3375 DataType::Timestamp { timezone: false, .. } if is_date_unit => Action::DateTruncWrapCast,
3376 _ => Action::None,
3377 }
3378 _ => Action::None,
3379 }
3380 } else if matches!(source, DialectType::Snowflake)
3381 && matches!(target, DialectType::DuckDB)
3382 && name == "TO_DATE" && f.args.len() == 1
3383 && !matches!(&f.args[0], Expression::Literal(crate::expressions::Literal::String(_))) {
3384 Action::ToDateToCast
3385 } else if !matches!(source, DialectType::Redshift)
3386 && matches!(target, DialectType::Redshift)
3387 && name == "CONVERT_TIMEZONE"
3388 && (f.args.len() == 2 || f.args.len() == 3) {
3389 // Convert Function("CONVERT_TIMEZONE") to Expression::ConvertTimezone
3390 // so Redshift's transform_expr won't expand 2-arg to 3-arg with 'UTC'.
3391 // The Redshift parser adds 'UTC' as default source_tz, but when
3392 // transpiling from other dialects, we should preserve the original form.
3393 Action::ConvertTimezoneToExpr
3394 } else if matches!(source, DialectType::Snowflake)
3395 && matches!(target, DialectType::DuckDB)
3396 && name == "REGEXP_REPLACE"
3397 && f.args.len() == 4
3398 && !matches!(&f.args[3], Expression::Literal(crate::expressions::Literal::String(_))) {
3399 // Snowflake REGEXP_REPLACE with position arg -> DuckDB needs 'g' flag
3400 Action::RegexpReplaceSnowflakeToDuckDB
3401 } else if name == "_BQ_TO_HEX" {
3402 // Internal marker from TO_HEX conversion - bare (no LOWER/UPPER wrapper)
3403 Action::BigQueryToHexBare
3404 } else if matches!(source, DialectType::BigQuery) && !matches!(target, DialectType::BigQuery) {
3405 // BigQuery-specific functions that need to be converted to standard forms
3406 match name.as_str() {
3407 "TIMESTAMP_DIFF" | "DATETIME_DIFF" | "TIME_DIFF"
3408 | "DATE_DIFF"
3409 | "TIMESTAMP_ADD" | "TIMESTAMP_SUB"
3410 | "DATETIME_ADD" | "DATETIME_SUB"
3411 | "TIME_ADD" | "TIME_SUB"
3412 | "DATE_ADD" | "DATE_SUB"
3413 | "SAFE_DIVIDE"
3414 | "GENERATE_UUID"
3415 | "COUNTIF"
3416 | "EDIT_DISTANCE"
3417 | "TIMESTAMP_SECONDS" | "TIMESTAMP_MILLIS" | "TIMESTAMP_MICROS"
3418 | "TIMESTAMP_TRUNC" | "DATETIME_TRUNC" | "DATE_TRUNC"
3419 | "TO_HEX"
3420 | "TO_JSON_STRING"
3421 | "GENERATE_ARRAY" | "GENERATE_TIMESTAMP_ARRAY"
3422 | "DIV"
3423 | "UNIX_DATE" | "UNIX_SECONDS" | "UNIX_MILLIS" | "UNIX_MICROS"
3424 | "LAST_DAY"
3425 | "TIME" | "DATETIME" | "TIMESTAMP" | "STRING"
3426 | "REGEXP_CONTAINS"
3427 | "CONTAINS_SUBSTR"
3428 | "SAFE_ADD" | "SAFE_SUBTRACT" | "SAFE_MULTIPLY"
3429 | "SAFE_CAST"
3430 | "GENERATE_DATE_ARRAY"
3431 | "PARSE_DATE" | "PARSE_TIMESTAMP"
3432 | "FORMAT_DATE" | "FORMAT_DATETIME" | "FORMAT_TIMESTAMP"
3433 | "ARRAY_CONCAT"
3434 | "JSON_QUERY" | "JSON_VALUE_ARRAY"
3435 | "INSTR"
3436 | "MD5" | "SHA1" | "SHA256" | "SHA512"
3437 | "GENERATE_UUID()" // just in case
3438 | "REGEXP_EXTRACT_ALL"
3439 | "REGEXP_EXTRACT"
3440 | "INT64"
3441 | "ARRAY_CONCAT_AGG"
3442 | "DATE_DIFF(" // just in case
3443 | "TO_HEX_MD5" // internal
3444 | "MOD"
3445 | "CONCAT"
3446 | "CURRENT_TIMESTAMP" | "CURRENT_DATE" | "CURRENT_DATETIME" | "CURRENT_TIME"
3447 | "STRUCT"
3448 | "ROUND"
3449 | "MAKE_INTERVAL"
3450 | "ARRAY_TO_STRING"
3451 | "PERCENTILE_CONT"
3452 => Action::BigQueryFunctionNormalize,
3453 "ARRAY" if matches!(target, DialectType::Snowflake)
3454 && f.args.len() == 1
3455 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT"))
3456 => Action::BigQueryArraySelectAsStructToSnowflake,
3457 _ => Action::None,
3458 }
3459 } else if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::BigQuery) {
3460 // BigQuery -> BigQuery normalizations
3461 match name.as_str() {
3462 "TIMESTAMP_DIFF" | "DATETIME_DIFF" | "TIME_DIFF"
3463 | "DATE_DIFF"
3464 | "DATE_ADD"
3465 | "TO_HEX"
3466 | "CURRENT_TIMESTAMP" | "CURRENT_DATE" | "CURRENT_TIME" | "CURRENT_DATETIME"
3467 | "GENERATE_DATE_ARRAY"
3468 | "INSTR"
3469 | "FORMAT_DATETIME"
3470 | "DATETIME"
3471 | "MAKE_INTERVAL"
3472 => Action::BigQueryFunctionNormalize,
3473 _ => Action::None,
3474 }
3475 } else {
3476 // Generic function normalization for non-BigQuery sources
3477 match name.as_str() {
3478 "ARBITRARY" | "AGGREGATE"
3479 | "REGEXP_MATCHES" | "REGEXP_FULL_MATCH"
3480 | "STRUCT_EXTRACT"
3481 | "LIST_FILTER" | "LIST_TRANSFORM" | "LIST_SORT" | "LIST_REVERSE_SORT"
3482 | "STRING_TO_ARRAY" | "STR_SPLIT" | "STR_SPLIT_REGEX" | "SPLIT_TO_ARRAY"
3483 | "SUBSTRINGINDEX"
3484 | "ARRAY_LENGTH" | "SIZE" | "CARDINALITY"
3485 | "UNICODE"
3486 | "XOR"
3487 | "ARRAY_REVERSE_SORT"
3488 | "ENCODE" | "DECODE"
3489 | "QUANTILE"
3490 | "EPOCH" | "EPOCH_MS"
3491 | "HASHBYTES"
3492 | "JSON_EXTRACT_PATH" | "JSON_EXTRACT_PATH_TEXT"
3493 | "APPROX_DISTINCT"
3494 | "DATE_PARSE" | "FORMAT_DATETIME"
3495 | "REGEXP_EXTRACT" | "REGEXP_SUBSTR" | "TO_DAYS"
3496 | "RLIKE"
3497 | "DATEDIFF" | "DATE_DIFF" | "MONTHS_BETWEEN"
3498 | "ADD_MONTHS" | "DATEADD" | "DATE_ADD" | "DATE_SUB" | "DATETRUNC"
3499 | "LAST_DAY" | "LAST_DAY_OF_MONTH" | "EOMONTH"
3500 | "ARRAY_CONSTRUCT" | "ARRAY_CAT" | "ARRAY_COMPACT"
3501 | "MAP" | "MAP_FROM_ENTRIES"
3502 | "COLLECT_LIST" | "COLLECT_SET"
3503 | "ISNAN" | "IS_NAN"
3504 | "TO_UTC_TIMESTAMP" | "FROM_UTC_TIMESTAMP"
3505 | "FORMAT_NUMBER"
3506 | "TOMONDAY" | "TOSTARTOFWEEK" | "TOSTARTOFMONTH" | "TOSTARTOFYEAR"
3507 | "ELEMENT_AT"
3508 | "EXPLODE" | "EXPLODE_OUTER" | "POSEXPLODE"
3509 | "SPLIT_PART"
3510 // GENERATE_SERIES: handled separately below
3511 | "JSON_EXTRACT" | "JSON_EXTRACT_SCALAR"
3512 | "JSON_QUERY" | "JSON_VALUE"
3513 | "JSON_EXTRACT_JSON" | "BSON_EXTRACT_BSON"
3514 | "ARRAY_SUM"
3515 | "TO_UNIX_TIMESTAMP" | "UNIX_TIMESTAMP"
3516 | "CURDATE" | "CURTIME"
3517 | "ARRAY_TO_STRING"
3518 | "ARRAY_SORT" | "SORT_ARRAY"
3519 | "LEFT" | "RIGHT"
3520 | "MAP_FROM_ARRAYS"
3521 | "LIKE" | "ILIKE"
3522 | "ARRAY_CONCAT"
3523 | "QUANTILE_CONT" | "QUANTILE_DISC"
3524 | "PERCENTILE_CONT" | "PERCENTILE_DISC"
3525 | "PERCENTILE_APPROX" | "APPROX_PERCENTILE"
3526 | "LOCATE" | "STRPOS" | "INSTR"
3527 | "CHAR"
3528 // CONCAT: handled separately for COALESCE wrapping
3529 | "ARRAY_JOIN"
3530 | "ARRAY_CONTAINS" | "HAS" | "CONTAINS"
3531 | "ISNULL"
3532 | "MONTHNAME"
3533 | "TO_TIMESTAMP"
3534 | "TO_DATE"
3535 | "TO_JSON"
3536 | "STR_TO_DATE"
3537 | "REGEXP_SPLIT"
3538 | "SPLIT"
3539 | "FORMATDATETIME"
3540 | "ARRAYJOIN"
3541 | "SPLITBYSTRING" | "SPLITBYREGEXP"
3542 | "NVL"
3543 | "TO_CHAR"
3544 | "DBMS_RANDOM.VALUE"
3545 | "REGEXP_LIKE"
3546 | "REPLICATE"
3547 | "LEN"
3548 | "COUNT_BIG"
3549 | "DATEFROMPARTS"
3550 | "DATETIMEFROMPARTS"
3551 | "CONVERT" | "TRY_CONVERT"
3552 | "STRFTIME" | "STRPTIME"
3553 | "DATE_FORMAT" | "FORMAT_DATE"
3554 | "PARSE_TIMESTAMP" | "PARSE_DATE"
3555 | "FROM_BASE64" | "TO_BASE64"
3556 | "GETDATE"
3557 | "TO_HEX" | "FROM_HEX" | "UNHEX" | "HEX"
3558 | "TO_UTF8" | "FROM_UTF8"
3559 | "STARTS_WITH" | "STARTSWITH"
3560 | "APPROX_COUNT_DISTINCT"
3561 | "JSON_FORMAT"
3562 | "SYSDATE"
3563 | "LOGICAL_OR" | "LOGICAL_AND"
3564 | "MONTHS_ADD"
3565 | "SCHEMA_NAME"
3566 | "STRTOL"
3567 | "EDITDIST3"
3568 | "FORMAT"
3569 | "LIST_CONTAINS" | "LIST_HAS"
3570 | "VARIANCE" | "STDDEV"
3571 | "ISINF"
3572 | "TO_UNIXTIME"
3573 | "FROM_UNIXTIME"
3574 | "DATEPART" | "DATE_PART"
3575 | "DATENAME"
3576 | "STRING_AGG"
3577 | "JSON_ARRAYAGG"
3578 | "APPROX_QUANTILE"
3579 | "MAKE_DATE"
3580 | "LIST_HAS_ANY" | "ARRAY_HAS_ANY"
3581 | "RANGE"
3582 | "TRY_ELEMENT_AT"
3583 | "STR_TO_MAP"
3584 | "STRING"
3585 | "TIME_TO_STR"
3586 => Action::GenericFunctionNormalize,
3587 // Functions needing specific cross-dialect transforms
3588 "MAX_BY" | "MIN_BY" if matches!(target, DialectType::ClickHouse | DialectType::Spark | DialectType::Databricks | DialectType::DuckDB) => Action::MaxByMinByConvert,
3589 "STRUCT" if matches!(source, DialectType::Spark | DialectType::Databricks)
3590 && !matches!(target, DialectType::Spark | DialectType::Databricks | DialectType::Hive) => Action::SparkStructConvert,
3591 "ARRAY" if matches!(source, DialectType::BigQuery)
3592 && matches!(target, DialectType::Snowflake)
3593 && f.args.len() == 1
3594 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT")) => Action::BigQueryArraySelectAsStructToSnowflake,
3595 "ARRAY" if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena | DialectType::BigQuery | DialectType::DuckDB | DialectType::ClickHouse | DialectType::StarRocks) => Action::ArraySyntaxConvert,
3596 "TRUNC" if f.args.len() == 2 && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::ClickHouse) => Action::TruncToDateTrunc,
3597 "UNIFORM" if matches!(target, DialectType::Snowflake) => Action::GenericFunctionNormalize,
3598 // GENERATE_SERIES -> SEQUENCE/UNNEST/EXPLODE for target dialects
3599 "GENERATE_SERIES" if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
3600 && !matches!(target, DialectType::PostgreSQL | DialectType::Redshift | DialectType::TSQL | DialectType::Fabric) => Action::GenerateSeriesConvert,
3601 // GENERATE_SERIES with interval normalization for PG target
3602 "GENERATE_SERIES" if f.args.len() >= 3
3603 && matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
3604 && matches!(target, DialectType::PostgreSQL | DialectType::Redshift) => Action::GenerateSeriesConvert,
3605 "GENERATE_SERIES" => Action::None, // passthrough for other cases
3606 // CONCAT(a, b) -> COALESCE wrapping for Presto/ClickHouse from PostgreSQL
3607 "CONCAT" if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
3608 && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::ClickHouse) => Action::ConcatCoalesceWrap,
3609 "CONCAT" => Action::GenericFunctionNormalize,
3610 // DIV(a, b) -> target-specific integer division
3611 "DIV" if f.args.len() == 2
3612 && matches!(source, DialectType::PostgreSQL)
3613 && matches!(target, DialectType::DuckDB | DialectType::BigQuery | DialectType::SQLite) => Action::DivFuncConvert,
3614 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
3615 "JSON_OBJECT_AGG" | "JSONB_OBJECT_AGG" if f.args.len() == 2
3616 && matches!(target, DialectType::DuckDB) => Action::JsonObjectAggConvert,
3617 // JSONB_EXISTS -> JSON_EXISTS for DuckDB
3618 "JSONB_EXISTS" if f.args.len() == 2
3619 && matches!(target, DialectType::DuckDB) => Action::JsonbExistsConvert,
3620 // DATE_BIN -> TIME_BUCKET for DuckDB
3621 "DATE_BIN" if matches!(target, DialectType::DuckDB) => Action::DateBinConvert,
3622 // Multi-arg MIN(a,b,c) -> LEAST, MAX(a,b,c) -> GREATEST
3623 "MIN" | "MAX" if f.args.len() > 1 && !matches!(target, DialectType::SQLite) => Action::MinMaxToLeastGreatest,
3624 // ClickHouse uniq -> APPROX_COUNT_DISTINCT for other dialects
3625 "UNIQ" if matches!(source, DialectType::ClickHouse) && !matches!(target, DialectType::ClickHouse) => Action::ClickHouseUniqToApproxCountDistinct,
3626 // ClickHouse any -> ANY_VALUE for other dialects
3627 "ANY" if f.args.len() == 1 && matches!(source, DialectType::ClickHouse) && !matches!(target, DialectType::ClickHouse) => Action::ClickHouseAnyToAnyValue,
3628 _ => Action::None,
3629 }
3630 }
3631 }
3632 Expression::AggregateFunction(af) => {
3633 let name = af.name.to_uppercase();
3634 match name.as_str() {
3635 "ARBITRARY" | "AGGREGATE" => Action::GenericFunctionNormalize,
3636 "JSON_ARRAYAGG" => Action::GenericFunctionNormalize,
3637 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
3638 "JSON_OBJECT_AGG" | "JSONB_OBJECT_AGG" if matches!(target, DialectType::DuckDB) => Action::JsonObjectAggConvert,
3639 "ARRAY_AGG" if matches!(target, DialectType::Hive | DialectType::Spark | DialectType::Databricks) => Action::ArrayAggToCollectList,
3640 "MAX_BY" | "MIN_BY" if matches!(target, DialectType::ClickHouse | DialectType::Spark | DialectType::Databricks | DialectType::DuckDB) => Action::MaxByMinByConvert,
3641 "COLLECT_LIST" if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::DuckDB) => Action::CollectListToArrayAgg,
3642 "COLLECT_SET" if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Snowflake | DialectType::DuckDB) => Action::CollectSetConvert,
3643 "PERCENTILE" if matches!(target, DialectType::DuckDB | DialectType::Presto | DialectType::Trino) => Action::PercentileConvert,
3644 // CORR -> CASE WHEN ISNAN(CORR(a,b)) THEN NULL ELSE CORR(a,b) END for DuckDB
3645 "CORR" if matches!(target, DialectType::DuckDB) && matches!(source, DialectType::Snowflake) => Action::CorrIsnanWrap,
3646 // BigQuery APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
3647 "APPROX_QUANTILES" if matches!(source, DialectType::BigQuery)
3648 && matches!(target, DialectType::DuckDB) => Action::BigQueryApproxQuantiles,
3649 // BigQuery PERCENTILE_CONT(x, frac RESPECT NULLS) -> QUANTILE_CONT(x, frac) for DuckDB
3650 "PERCENTILE_CONT" if matches!(source, DialectType::BigQuery)
3651 && matches!(target, DialectType::DuckDB)
3652 && af.args.len() >= 2 => Action::BigQueryPercentileContToDuckDB,
3653 _ => Action::None,
3654 }
3655 }
3656 Expression::JSONArrayAgg(_) => {
3657 match target {
3658 DialectType::PostgreSQL => Action::GenericFunctionNormalize,
3659 _ => Action::None,
3660 }
3661 }
3662 Expression::ToNumber(tn) => {
3663 // TO_NUMBER(x) with 1 arg -> CAST(x AS DOUBLE) for most targets
3664 if tn.format.is_none() && tn.precision.is_none() && tn.scale.is_none() {
3665 match target {
3666 DialectType::Oracle | DialectType::Snowflake | DialectType::Teradata => Action::None,
3667 _ => Action::GenericFunctionNormalize,
3668 }
3669 } else {
3670 Action::None
3671 }
3672 }
3673 Expression::IfFunc(if_func) => {
3674 if matches!(source, DialectType::Snowflake)
3675 && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::SQLite)
3676 && matches!(if_func.false_value, Some(Expression::Div(_))) {
3677 Action::Div0TypedDivision
3678 } else {
3679 Action::None
3680 }
3681 }
3682 Expression::ToJson(_) => {
3683 match target {
3684 DialectType::Presto | DialectType::Trino => Action::ToJsonConvert,
3685 DialectType::BigQuery => Action::ToJsonConvert,
3686 DialectType::DuckDB => Action::ToJsonConvert,
3687 _ => Action::None,
3688 }
3689 }
3690 Expression::ArrayAgg(ref agg) => {
3691 if matches!(target, DialectType::Hive | DialectType::Spark | DialectType::Databricks) {
3692 // Any source -> Hive/Spark: convert ARRAY_AGG to COLLECT_LIST
3693 Action::ArrayAggToCollectList
3694 } else if matches!(source, DialectType::Spark | DialectType::Databricks | DialectType::Hive)
3695 && matches!(target, DialectType::DuckDB)
3696 && agg.filter.is_some() {
3697 // Spark/Hive ARRAY_AGG excludes NULLs, DuckDB includes them
3698 // Need to add NOT x IS NULL to existing filter
3699 Action::ArrayAggNullFilter
3700 } else if matches!(target, DialectType::DuckDB)
3701 && agg.ignore_nulls == Some(true)
3702 && !agg.order_by.is_empty() {
3703 // BigQuery ARRAY_AGG(x IGNORE NULLS ORDER BY ...) -> DuckDB ARRAY_AGG(x ORDER BY a NULLS FIRST, ...)
3704 Action::ArrayAggIgnoreNullsDuckDB
3705 } else if !matches!(source, DialectType::Snowflake) {
3706 Action::None
3707 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
3708 let is_array_agg = agg.name.as_deref().map(|n| n.to_uppercase()) == Some("ARRAY_AGG".to_string())
3709 || agg.name.is_none();
3710 if is_array_agg {
3711 Action::ArrayAggCollectList
3712 } else {
3713 Action::None
3714 }
3715 } else if matches!(target, DialectType::DuckDB | DialectType::Presto | DialectType::Trino) && agg.filter.is_none() {
3716 Action::ArrayAggFilter
3717 } else {
3718 Action::None
3719 }
3720 }
3721 Expression::WithinGroup(wg) => {
3722 if matches!(source, DialectType::Snowflake)
3723 && matches!(target, DialectType::DuckDB | DialectType::Presto | DialectType::Trino)
3724 && matches!(wg.this, Expression::ArrayAgg(_)) {
3725 Action::ArrayAggWithinGroupFilter
3726 } else if matches!(&wg.this, Expression::AggregateFunction(af) if af.name.eq_ignore_ascii_case("STRING_AGG"))
3727 || matches!(&wg.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("STRING_AGG"))
3728 || matches!(&wg.this, Expression::StringAgg(_)) {
3729 Action::StringAggConvert
3730 } else if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena
3731 | DialectType::Spark | DialectType::Databricks)
3732 && (matches!(&wg.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("PERCENTILE_CONT") || f.name.eq_ignore_ascii_case("PERCENTILE_DISC"))
3733 || matches!(&wg.this, Expression::AggregateFunction(af) if af.name.eq_ignore_ascii_case("PERCENTILE_CONT") || af.name.eq_ignore_ascii_case("PERCENTILE_DISC"))
3734 || matches!(&wg.this, Expression::PercentileCont(_))) {
3735 Action::PercentileContConvert
3736 } else {
3737 Action::None
3738 }
3739 }
3740 // For BigQuery: CAST(x AS TIMESTAMP) -> CAST(x AS DATETIME)
3741 // because BigQuery's TIMESTAMP is really TIMESTAMPTZ, and
3742 // DATETIME is the timezone-unaware type
3743 Expression::Cast(ref c) => {
3744 if c.format.is_some() && (matches!(source, DialectType::BigQuery) || matches!(source, DialectType::Teradata)) {
3745 Action::BigQueryCastFormat
3746 } else if matches!(target, DialectType::BigQuery)
3747 && !matches!(source, DialectType::BigQuery)
3748 && matches!(c.to, DataType::Timestamp { timezone: false, .. })
3749 {
3750 Action::CastTimestampToDatetime
3751 } else if matches!(source,
3752 DialectType::Hive | DialectType::Spark | DialectType::Databricks
3753 ) && matches!(target,
3754 DialectType::Presto | DialectType::Trino | DialectType::Athena
3755 | DialectType::DuckDB | DialectType::Snowflake | DialectType::BigQuery
3756 | DialectType::Databricks | DialectType::TSQL
3757 ) {
3758 Action::HiveCastToTryCast
3759 } else if matches!(c.to, DataType::Timestamp { timezone: true, .. })
3760 && matches!(target, DialectType::Hive | DialectType::Spark | DialectType::Databricks | DialectType::BigQuery) {
3761 // CAST(x AS TIMESTAMP WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark/BigQuery
3762 Action::CastTimestampStripTz
3763 } else if matches!(&c.to, DataType::Json)
3764 && matches!(&c.this, Expression::Literal(Literal::String(_)))
3765 && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena | DialectType::Snowflake) {
3766 // CAST('x' AS JSON) -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake
3767 // Only when the input is a string literal (JSON 'value' syntax)
3768 Action::JsonLiteralToJsonParse
3769 } else if matches!(&c.to, DataType::Json | DataType::JsonB)
3770 && matches!(target, DialectType::Spark | DialectType::Databricks) {
3771 // CAST(x AS JSON) -> TO_JSON(x) for Spark
3772 Action::CastToJsonForSpark
3773 } else if (matches!(&c.to, DataType::Array { .. } | DataType::Map { .. } | DataType::Struct { .. }))
3774 && matches!(target, DialectType::Spark | DialectType::Databricks)
3775 && (
3776 matches!(&c.this, Expression::ParseJson(_))
3777 || matches!(
3778 &c.this,
3779 Expression::Function(f)
3780 if f.name.eq_ignore_ascii_case("JSON_EXTRACT")
3781 || f.name.eq_ignore_ascii_case("JSON_EXTRACT_SCALAR")
3782 || f.name.eq_ignore_ascii_case("GET_JSON_OBJECT")
3783 )
3784 ) {
3785 // CAST(JSON_PARSE(...) AS ARRAY/MAP) or CAST(JSON_EXTRACT/GET_JSON_OBJECT(...) AS ARRAY/MAP)
3786 // -> FROM_JSON(..., type_string) for Spark
3787 Action::CastJsonToFromJson
3788 } else if matches!(target, DialectType::Spark | DialectType::Databricks)
3789 && matches!(c.to, DataType::Timestamp { timezone: false, .. })
3790 && matches!(source, DialectType::DuckDB) {
3791 Action::StrftimeCastTimestamp
3792 } else if matches!(source, DialectType::DuckDB)
3793 && matches!(c.to, DataType::Decimal { precision: None, .. }) {
3794 Action::DecimalDefaultPrecision
3795 } else if matches!(source, DialectType::MySQL | DialectType::SingleStore)
3796 && matches!(c.to, DataType::Char { length: None })
3797 && !matches!(target, DialectType::MySQL | DialectType::SingleStore) {
3798 // MySQL CAST(x AS CHAR) was originally TEXT - convert to target text type
3799 Action::MysqlCastCharToText
3800 } else if matches!(source, DialectType::Spark | DialectType::Databricks | DialectType::Hive)
3801 && matches!(target, DialectType::Spark | DialectType::Databricks | DialectType::Hive)
3802 && Self::has_varchar_char_type(&c.to) {
3803 // Spark parses VARCHAR(n)/CHAR(n) as TEXT, so normalize back to STRING
3804 Action::SparkCastVarcharToString
3805 } else {
3806 Action::None
3807 }
3808 }
3809 Expression::SafeCast(ref c) => {
3810 if c.format.is_some() && matches!(source, DialectType::BigQuery)
3811 && !matches!(target, DialectType::BigQuery)
3812 {
3813 Action::BigQueryCastFormat
3814 } else {
3815 Action::None
3816 }
3817 }
3818 // For DuckDB: DATE_TRUNC should preserve the input type
3819 Expression::DateTrunc(_) | Expression::TimestampTrunc(_) => {
3820 if matches!(source, DialectType::Snowflake) && matches!(target, DialectType::DuckDB) {
3821 Action::DateTruncWrapCast
3822 } else {
3823 Action::None
3824 }
3825 }
3826 // For DuckDB: SET a = 1 -> SET VARIABLE a = 1
3827 Expression::SetStatement(s) => {
3828 if matches!(target, DialectType::DuckDB)
3829 && !matches!(source, DialectType::TSQL | DialectType::Fabric)
3830 && s.items.iter().any(|item| item.kind.is_none()) {
3831 Action::SetToVariable
3832 } else {
3833 Action::None
3834 }
3835 }
3836 // Cross-dialect NULL ordering normalization.
3837 // When nulls_first is not specified, fill in the source dialect's implied
3838 // default so the target generator can correctly add/strip NULLS FIRST/LAST.
3839 Expression::Ordered(o) => {
3840 // MySQL doesn't support NULLS FIRST/LAST - strip or rewrite
3841 if matches!(target, DialectType::MySQL) && o.nulls_first.is_some() {
3842 Action::MysqlNullsOrdering
3843 } else {
3844 // Skip targets that don't support NULLS FIRST/LAST syntax
3845 let target_supports_nulls = !matches!(target,
3846 DialectType::MySQL | DialectType::TSQL
3847 | DialectType::StarRocks | DialectType::Doris
3848 );
3849 if o.nulls_first.is_none() && source != target && target_supports_nulls {
3850 Action::NullsOrdering
3851 } else {
3852 Action::None
3853 }
3854 }
3855 }
3856 // BigQuery data types: convert INT64, BYTES, NUMERIC etc. to standard types
3857 Expression::DataType(dt) => {
3858 if matches!(source, DialectType::BigQuery) && !matches!(target, DialectType::BigQuery) {
3859 match dt {
3860 DataType::Custom { ref name } if name.eq_ignore_ascii_case("INT64") || name.eq_ignore_ascii_case("FLOAT64") || name.eq_ignore_ascii_case("BOOL") || name.eq_ignore_ascii_case("BYTES") || name.eq_ignore_ascii_case("NUMERIC") || name.eq_ignore_ascii_case("STRING") || name.eq_ignore_ascii_case("DATETIME") => Action::BigQueryCastType,
3861 _ => Action::None,
3862 }
3863 } else if matches!(source, DialectType::TSQL) {
3864 // For TSQL source -> any target (including TSQL itself for REAL)
3865 match dt {
3866 // REAL -> FLOAT even for TSQL->TSQL
3867 DataType::Custom { ref name } if name.eq_ignore_ascii_case("REAL")
3868 => Action::TSQLTypeNormalize,
3869 DataType::Float { real_spelling: true, .. }
3870 => Action::TSQLTypeNormalize,
3871 // Other TSQL type normalizations only for non-TSQL targets
3872 DataType::Custom { ref name } if !matches!(target, DialectType::TSQL) && (
3873 name.eq_ignore_ascii_case("MONEY")
3874 || name.eq_ignore_ascii_case("SMALLMONEY")
3875 || name.eq_ignore_ascii_case("DATETIME2")
3876 || name.eq_ignore_ascii_case("IMAGE")
3877 || name.eq_ignore_ascii_case("BIT")
3878 || name.eq_ignore_ascii_case("ROWVERSION")
3879 || name.eq_ignore_ascii_case("UNIQUEIDENTIFIER")
3880 || name.eq_ignore_ascii_case("DATETIMEOFFSET")
3881 || name.to_uppercase().starts_with("NUMERIC")
3882 || name.to_uppercase().starts_with("DATETIME2(")
3883 || name.to_uppercase().starts_with("TIME(")
3884 ) => Action::TSQLTypeNormalize,
3885 DataType::Float { precision: Some(_), .. } if !matches!(target, DialectType::TSQL) => Action::TSQLTypeNormalize,
3886 DataType::TinyInt { .. } if !matches!(target, DialectType::TSQL) => Action::TSQLTypeNormalize,
3887 // INTEGER -> INT for Databricks/Spark targets
3888 DataType::Int { integer_spelling: true, .. } if matches!(target, DialectType::Databricks | DialectType::Spark) => Action::TSQLTypeNormalize,
3889 _ => Action::None,
3890 }
3891 } else if matches!(source, DialectType::Oracle) && !matches!(target, DialectType::Oracle) {
3892 match dt {
3893 DataType::Custom { ref name } if name.to_uppercase().starts_with("VARCHAR2(") || name.to_uppercase().starts_with("NVARCHAR2(") || name.eq_ignore_ascii_case("VARCHAR2") || name.eq_ignore_ascii_case("NVARCHAR2") => Action::OracleVarchar2ToVarchar,
3894 _ => Action::None,
3895 }
3896 } else if matches!(target, DialectType::Snowflake) && !matches!(source, DialectType::Snowflake) {
3897 // When target is Snowflake but source is NOT Snowflake,
3898 // protect FLOAT from being converted to DOUBLE by Snowflake's transform.
3899 // Snowflake treats FLOAT=DOUBLE internally, but non-Snowflake sources
3900 // should keep their FLOAT spelling.
3901 match dt {
3902 DataType::Float { .. } => Action::SnowflakeFloatProtect,
3903 _ => Action::None,
3904 }
3905 } else {
3906 Action::None
3907 }
3908 }
3909 // LOWER patterns from BigQuery TO_HEX conversions:
3910 // - LOWER(LOWER(HEX(x))) from non-BQ targets: flatten
3911 // - LOWER(Function("TO_HEX")) for BQ->BQ: strip LOWER
3912 Expression::Lower(uf) => {
3913 if matches!(source, DialectType::BigQuery) {
3914 match &uf.this {
3915 Expression::Lower(_) => Action::BigQueryToHexLower,
3916 Expression::Function(f) if f.name == "TO_HEX" && matches!(target, DialectType::BigQuery) => {
3917 // BQ->BQ: LOWER(TO_HEX(x)) -> TO_HEX(x)
3918 Action::BigQueryToHexLower
3919 }
3920 _ => Action::None,
3921 }
3922 } else {
3923 Action::None
3924 }
3925 }
3926 // UPPER patterns from BigQuery TO_HEX conversions:
3927 // - UPPER(LOWER(HEX(x))) from non-BQ targets: extract inner
3928 // - UPPER(Function("TO_HEX")) for BQ->BQ: keep as UPPER(TO_HEX(x))
3929 Expression::Upper(uf) => {
3930 if matches!(source, DialectType::BigQuery) {
3931 match &uf.this {
3932 Expression::Lower(_) => Action::BigQueryToHexUpper,
3933 _ => Action::None,
3934 }
3935 } else {
3936 Action::None
3937 }
3938 }
3939 // BigQuery LAST_DAY(date, unit) -> strip unit for non-BigQuery targets
3940 // Snowflake supports LAST_DAY with unit, so keep it there
3941 Expression::LastDay(ld) => {
3942 if matches!(source, DialectType::BigQuery)
3943 && !matches!(target, DialectType::BigQuery | DialectType::Snowflake)
3944 && ld.unit.is_some()
3945 {
3946 Action::BigQueryLastDayStripUnit
3947 } else {
3948 Action::None
3949 }
3950 }
3951 // BigQuery SafeDivide expressions (already parsed as SafeDivide)
3952 Expression::SafeDivide(_) => {
3953 if matches!(source, DialectType::BigQuery) && !matches!(target, DialectType::BigQuery) {
3954 Action::BigQuerySafeDivide
3955 } else {
3956 Action::None
3957 }
3958 }
3959 // BigQuery ANY_VALUE(x HAVING MAX/MIN y) -> ARG_MAX_NULL/ARG_MIN_NULL for DuckDB
3960 // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
3961 Expression::AnyValue(ref agg) => {
3962 if matches!(source, DialectType::BigQuery)
3963 && matches!(target, DialectType::DuckDB)
3964 && agg.having_max.is_some()
3965 {
3966 Action::BigQueryAnyValueHaving
3967 } else if matches!(target, DialectType::Spark | DialectType::Databricks)
3968 && !matches!(source, DialectType::Spark | DialectType::Databricks)
3969 && agg.ignore_nulls.is_none()
3970 {
3971 Action::AnyValueIgnoreNulls
3972 } else {
3973 Action::None
3974 }
3975 }
3976 Expression::Any(ref q) => {
3977 if matches!(source, DialectType::PostgreSQL)
3978 && matches!(target, DialectType::Spark | DialectType::Databricks | DialectType::Hive)
3979 && q.op.is_some()
3980 && !matches!(q.subquery, Expression::Select(_) | Expression::Subquery(_))
3981 {
3982 Action::AnyToExists
3983 } else {
3984 Action::None
3985 }
3986 }
3987 // BigQuery APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
3988 // RegexpLike from non-DuckDB sources -> REGEXP_MATCHES for DuckDB target
3989 // DuckDB's ~ is a full match, but other dialects' REGEXP/RLIKE is a partial match
3990 Expression::RegexpLike(_) if !matches!(source, DialectType::DuckDB)
3991 && matches!(target, DialectType::DuckDB) => {
3992 Action::RegexpLikeToDuckDB
3993 }
3994 // MySQL division -> NULLIF wrapping and/or CAST for specific targets
3995 Expression::Div(ref op) if matches!(source, DialectType::MySQL)
3996 && matches!(target, DialectType::PostgreSQL | DialectType::Redshift
3997 | DialectType::Drill | DialectType::Trino | DialectType::Presto
3998 | DialectType::TSQL | DialectType::Teradata | DialectType::SQLite
3999 | DialectType::BigQuery | DialectType::Snowflake | DialectType::Databricks
4000 | DialectType::Oracle) => {
4001 // Only wrap if RHS is not already NULLIF
4002 if !matches!(&op.right, Expression::Function(f) if f.name.eq_ignore_ascii_case("NULLIF")) {
4003 Action::MySQLSafeDivide
4004 } else {
4005 Action::None
4006 }
4007 }
4008 // ALTER TABLE ... RENAME TO <schema>.<table> -> strip schema for most targets
4009 // For TSQL/Fabric, convert to sp_rename instead
4010 Expression::AlterTable(ref at) if !at.actions.is_empty() => {
4011 if let Some(crate::expressions::AlterTableAction::RenameTable(ref new_tbl)) = at.actions.first() {
4012 if matches!(target, DialectType::TSQL | DialectType::Fabric) {
4013 // TSQL: ALTER TABLE RENAME -> EXEC sp_rename
4014 Action::AlterTableToSpRename
4015 } else if new_tbl.schema.is_some()
4016 && matches!(target, DialectType::BigQuery | DialectType::Doris | DialectType::StarRocks
4017 | DialectType::DuckDB | DialectType::PostgreSQL | DialectType::Redshift) {
4018 Action::AlterTableRenameStripSchema
4019 } else {
4020 Action::None
4021 }
4022 } else {
4023 Action::None
4024 }
4025 }
4026 // EPOCH(x) expression -> target-specific epoch conversion
4027 Expression::Epoch(_) if !matches!(target, DialectType::DuckDB) => {
4028 Action::EpochConvert
4029 }
4030 // EPOCH_MS(x) expression -> target-specific epoch ms conversion
4031 Expression::EpochMs(_) if !matches!(target, DialectType::DuckDB) => {
4032 Action::EpochMsConvert
4033 }
4034 // STRING_AGG -> GROUP_CONCAT for MySQL/SQLite
4035 Expression::StringAgg(_) => {
4036 if matches!(target, DialectType::MySQL | DialectType::SingleStore
4037 | DialectType::Doris | DialectType::StarRocks | DialectType::SQLite)
4038 {
4039 Action::StringAggConvert
4040 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
4041 Action::StringAggConvert
4042 } else {
4043 Action::None
4044 }
4045 }
4046 // GROUP_CONCAT -> STRING_AGG for PostgreSQL/Presto/etc.
4047 // Also handles GROUP_CONCAT normalization for MySQL/SQLite targets
4048 Expression::GroupConcat(_) => {
4049 Action::GroupConcatConvert
4050 }
4051 // CARDINALITY/ARRAY_LENGTH/ARRAY_SIZE -> target-specific array length
4052 Expression::Cardinality(_) | Expression::ArrayLength(_) | Expression::ArraySize(_) => {
4053 Action::ArrayLengthConvert
4054 }
4055 // NVL: clear original_name so generator uses dialect-specific function names
4056 Expression::Nvl(f) if f.original_name.is_some() => {
4057 Action::NvlClearOriginal
4058 }
4059 // XOR: expand for dialects that don't support the XOR keyword
4060 Expression::Xor(_) => {
4061 let target_supports_xor = matches!(target,
4062 DialectType::MySQL | DialectType::SingleStore | DialectType::Doris
4063 | DialectType::StarRocks
4064 );
4065 if !target_supports_xor {
4066 Action::XorExpand
4067 } else {
4068 Action::None
4069 }
4070 }
4071 // TSQL #table -> temp table normalization (CREATE TABLE)
4072 Expression::CreateTable(ct)
4073 if matches!(source, DialectType::TSQL | DialectType::Fabric)
4074 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
4075 && ct.name.name.name.starts_with('#') => {
4076 Action::TempTableHash
4077 }
4078 // TSQL #table -> strip # from table references in SELECT/etc.
4079 Expression::Table(tr)
4080 if matches!(source, DialectType::TSQL | DialectType::Fabric)
4081 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
4082 && tr.name.name.starts_with('#') => {
4083 Action::TempTableHash
4084 }
4085 // TSQL #table -> strip # from DROP TABLE names
4086 Expression::DropTable(ref dt)
4087 if matches!(source, DialectType::TSQL | DialectType::Fabric)
4088 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
4089 && dt.names.iter().any(|n| n.name.name.starts_with('#')) => {
4090 Action::TempTableHash
4091 }
4092 // JSON_EXTRACT -> arrow syntax for SQLite/DuckDB
4093 Expression::JsonExtract(ref f) if !f.arrow_syntax && matches!(target, DialectType::SQLite | DialectType::DuckDB) => {
4094 Action::JsonExtractToArrow
4095 }
4096 // JSON_EXTRACT with JSONPath -> JSON_EXTRACT_PATH for PostgreSQL (non-PG sources only)
4097 Expression::JsonExtract(ref f) if matches!(target, DialectType::PostgreSQL | DialectType::Redshift)
4098 && !matches!(source, DialectType::PostgreSQL | DialectType::Redshift | DialectType::Materialize)
4099 && matches!(&f.path, Expression::Literal(Literal::String(s)) if s.starts_with("$.")) => {
4100 Action::JsonExtractToGetJsonObject
4101 }
4102 // JSON_EXTRACT -> GET_JSON_OBJECT for Hive/Spark
4103 Expression::JsonExtract(_) if matches!(target, DialectType::Hive | DialectType::Spark | DialectType::Databricks) => {
4104 Action::JsonExtractToGetJsonObject
4105 }
4106 // JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
4107 Expression::JsonExtractScalar(_) if matches!(target, DialectType::Hive | DialectType::Spark | DialectType::Databricks) => {
4108 Action::JsonExtractScalarToGetJsonObject
4109 }
4110 // JsonQuery (parsed JSON_QUERY) -> target-specific
4111 Expression::JsonQuery(_) => {
4112 Action::JsonQueryValueConvert
4113 }
4114 // JsonValue (parsed JSON_VALUE) -> target-specific
4115 Expression::JsonValue(_) => {
4116 Action::JsonQueryValueConvert
4117 }
4118 // AT TIME ZONE -> AT_TIMEZONE for Presto, FROM_UTC_TIMESTAMP for Spark,
4119 // TIMESTAMP(DATETIME(...)) for BigQuery, CONVERT_TIMEZONE for Snowflake
4120 Expression::AtTimeZone(_) if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena
4121 | DialectType::Spark | DialectType::Databricks
4122 | DialectType::BigQuery | DialectType::Snowflake) => {
4123 Action::AtTimeZoneConvert
4124 }
4125 // DAY_OF_WEEK -> dialect-specific
4126 Expression::DayOfWeek(_) if matches!(target, DialectType::DuckDB | DialectType::Spark | DialectType::Databricks) => {
4127 Action::DayOfWeekConvert
4128 }
4129 // CURRENT_USER -> CURRENT_USER() for Snowflake
4130 Expression::CurrentUser(_) if matches!(target, DialectType::Snowflake) => {
4131 Action::CurrentUserParens
4132 }
4133 // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
4134 Expression::ElementAt(_) if matches!(target, DialectType::PostgreSQL | DialectType::BigQuery) => {
4135 Action::ElementAtConvert
4136 }
4137 // ARRAY[...] (ArrayFunc bracket_notation=false) -> convert for target dialect
4138 Expression::ArrayFunc(ref arr) if !arr.bracket_notation
4139 && matches!(target, DialectType::Spark | DialectType::Databricks | DialectType::Hive | DialectType::BigQuery | DialectType::DuckDB | DialectType::Snowflake | DialectType::Presto | DialectType::Trino | DialectType::Athena | DialectType::ClickHouse | DialectType::StarRocks) => {
4140 Action::ArraySyntaxConvert
4141 }
4142 // VARIANCE expression -> varSamp for ClickHouse
4143 Expression::Variance(_) if matches!(target, DialectType::ClickHouse) => {
4144 Action::VarianceToClickHouse
4145 }
4146 // STDDEV expression -> stddevSamp for ClickHouse
4147 Expression::Stddev(_) if matches!(target, DialectType::ClickHouse) => {
4148 Action::StddevToClickHouse
4149 }
4150 // ApproxQuantile -> APPROX_PERCENTILE for Snowflake
4151 Expression::ApproxQuantile(_) if matches!(target, DialectType::Snowflake) => {
4152 Action::ApproxQuantileConvert
4153 }
4154 // MonthsBetween -> target-specific
4155 Expression::MonthsBetween(_) if !matches!(target, DialectType::Spark | DialectType::Databricks | DialectType::Hive) => {
4156 Action::MonthsBetweenConvert
4157 }
4158 // AddMonths -> target-specific DATEADD/DATE_ADD
4159 Expression::AddMonths(_) => {
4160 Action::AddMonthsConvert
4161 }
4162 // MapFromArrays -> target-specific (MAP, OBJECT_CONSTRUCT, MAP_FROM_ARRAYS)
4163 Expression::MapFromArrays(_) if !matches!(target, DialectType::Spark | DialectType::Databricks) => {
4164 Action::MapFromArraysConvert
4165 }
4166 // CURRENT_USER -> CURRENT_USER() for Spark
4167 Expression::CurrentUser(_) if matches!(target, DialectType::Spark | DialectType::Databricks) => {
4168 Action::CurrentUserSparkParens
4169 }
4170 // MONTH/YEAR/DAY('string') from Spark -> cast string to DATE for DuckDB/Presto
4171 Expression::Month(ref f) | Expression::Year(ref f) | Expression::Day(ref f)
4172 if matches!(source, DialectType::Spark | DialectType::Databricks | DialectType::Hive)
4173 && matches!(&f.this, Expression::Literal(Literal::String(_)))
4174 && matches!(target, DialectType::DuckDB | DialectType::Presto | DialectType::Trino | DialectType::Athena | DialectType::PostgreSQL | DialectType::Redshift) => {
4175 Action::SparkDateFuncCast
4176 }
4177 // $parameter -> @parameter for BigQuery
4178 Expression::Parameter(ref p) if matches!(target, DialectType::BigQuery)
4179 && matches!(source, DialectType::DuckDB)
4180 && (p.style == crate::expressions::ParameterStyle::Dollar || p.style == crate::expressions::ParameterStyle::DoubleDollar) => {
4181 Action::DollarParamConvert
4182 }
4183 // EscapeString literal: normalize literal newlines to \n
4184 Expression::Literal(Literal::EscapeString(ref s)) if s.contains('\n') || s.contains('\r') || s.contains('\t') => {
4185 Action::EscapeStringNormalize
4186 }
4187 // straight_join: keep lowercase for DuckDB, quote for MySQL
4188 Expression::Column(ref col) if col.name.name == "STRAIGHT_JOIN" && col.table.is_none()
4189 && matches!(source, DialectType::DuckDB)
4190 && matches!(target, DialectType::DuckDB | DialectType::MySQL) => {
4191 Action::StraightJoinCase
4192 }
4193 // DATE and TIMESTAMP literal type conversions are now handled in the generator directly
4194 // Snowflake INTERVAL format: INTERVAL '2' HOUR -> INTERVAL '2 HOUR'
4195 Expression::Interval(ref iv) if matches!(target, DialectType::Snowflake | DialectType::PostgreSQL | DialectType::Redshift)
4196 && iv.unit.is_some()
4197 && matches!(&iv.this, Some(Expression::Literal(Literal::String(_)))) => {
4198 Action::SnowflakeIntervalFormat
4199 }
4200 // TABLESAMPLE -> TABLESAMPLE RESERVOIR for DuckDB target
4201 Expression::TableSample(ref ts) if matches!(target, DialectType::DuckDB) => {
4202 if let Some(ref sample) = ts.sample {
4203 if !sample.explicit_method {
4204 Action::TablesampleReservoir
4205 } else {
4206 Action::None
4207 }
4208 } else {
4209 Action::None
4210 }
4211 }
4212 // TABLESAMPLE from non-Snowflake source to Snowflake: strip method and PERCENT
4213 // Handles both Expression::TableSample wrapper and Expression::Table with table_sample
4214 Expression::TableSample(ref ts) if matches!(target, DialectType::Snowflake)
4215 && !matches!(source, DialectType::Snowflake)
4216 && ts.sample.is_some() => {
4217 if let Some(ref sample) = ts.sample {
4218 if !sample.explicit_method {
4219 Action::TablesampleSnowflakeStrip
4220 } else {
4221 Action::None
4222 }
4223 } else {
4224 Action::None
4225 }
4226 }
4227 Expression::Table(ref t) if matches!(target, DialectType::Snowflake)
4228 && !matches!(source, DialectType::Snowflake)
4229 && t.table_sample.is_some() => {
4230 if let Some(ref sample) = t.table_sample {
4231 if !sample.explicit_method {
4232 Action::TablesampleSnowflakeStrip
4233 } else {
4234 Action::None
4235 }
4236 } else {
4237 Action::None
4238 }
4239 }
4240 // ALTER TABLE RENAME -> EXEC sp_rename for TSQL
4241 Expression::AlterTable(ref at) if matches!(target, DialectType::TSQL | DialectType::Fabric)
4242 && !at.actions.is_empty()
4243 && matches!(at.actions.first(), Some(crate::expressions::AlterTableAction::RenameTable(_))) => {
4244 Action::AlterTableToSpRename
4245 }
4246 // Subscript index: 1-based to 0-based for BigQuery/Hive/Spark
4247 Expression::Subscript(ref sub) if matches!(target, DialectType::BigQuery | DialectType::Hive | DialectType::Spark | DialectType::Databricks)
4248 && matches!(source, DialectType::DuckDB | DialectType::PostgreSQL | DialectType::Presto | DialectType::Trino | DialectType::Redshift | DialectType::ClickHouse)
4249 && matches!(&sub.index, Expression::Literal(Literal::Number(ref n)) if n.parse::<i64>().unwrap_or(0) > 0) => {
4250 Action::ArrayIndexConvert
4251 }
4252 // ANY_VALUE IGNORE NULLS detection moved to the AnyValue arm above
4253 // MysqlNullsOrdering for Ordered is now handled in the Ordered arm above
4254 // RESPECT NULLS handling for SQLite (strip it, add NULLS LAST to ORDER BY)
4255 // and for MySQL (rewrite ORDER BY with CASE WHEN for null ordering)
4256 Expression::WindowFunction(ref wf) => {
4257 // BigQuery doesn't support NULLS FIRST/LAST in window function ORDER BY
4258 // EXCEPT for ROW_NUMBER which keeps NULLS LAST
4259 let is_row_number = matches!(wf.this, Expression::RowNumber(_));
4260 if matches!(target, DialectType::BigQuery)
4261 && !is_row_number
4262 && !wf.over.order_by.is_empty()
4263 && wf.over.order_by.iter().any(|o| o.nulls_first.is_some()) {
4264 Action::BigQueryNullsOrdering
4265 // DuckDB -> MySQL: Add CASE WHEN for NULLS LAST simulation in window ORDER BY
4266 // But NOT when frame is RANGE/GROUPS, since adding CASE WHEN would break value-based frames
4267 } else {
4268 let source_nulls_last = matches!(source, DialectType::DuckDB);
4269 let has_range_frame = wf.over.frame.as_ref().map_or(false, |f| {
4270 matches!(f.kind, crate::expressions::WindowFrameKind::Range | crate::expressions::WindowFrameKind::Groups)
4271 });
4272 if source_nulls_last && matches!(target, DialectType::MySQL)
4273 && !wf.over.order_by.is_empty()
4274 && wf.over.order_by.iter().any(|o| !o.desc)
4275 && !has_range_frame {
4276 Action::MysqlNullsLastRewrite
4277 } else {
4278 match &wf.this {
4279 Expression::FirstValue(ref vf) | Expression::LastValue(ref vf) if vf.ignore_nulls == Some(false) => {
4280 // RESPECT NULLS
4281 match target {
4282 DialectType::SQLite => Action::RespectNullsConvert,
4283 _ => Action::None,
4284 }
4285 }
4286 _ => Action::None,
4287 }
4288 }
4289 }
4290 }
4291 // CREATE TABLE: strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
4292 Expression::CreateTable(ref ct) if matches!(target, DialectType::DuckDB)
4293 && matches!(source, DialectType::DuckDB | DialectType::Spark | DialectType::Databricks | DialectType::Hive) => {
4294 let has_comment = ct.columns.iter().any(|c| c.comment.is_some()
4295 || c.constraints.iter().any(|con| matches!(con, crate::expressions::ColumnConstraint::Comment(_)))
4296 );
4297 let has_props = !ct.properties.is_empty();
4298 if has_comment || has_props {
4299 Action::CreateTableStripComment
4300 } else {
4301 Action::None
4302 }
4303 }
4304 // Array conversion: Expression::Array -> Expression::ArrayFunc for PostgreSQL
4305 Expression::Array(_) if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) => {
4306 Action::ArrayConcatBracketConvert
4307 }
4308 // ArrayFunc (bracket notation) -> Function("ARRAY") for Redshift (from BigQuery source)
4309 Expression::ArrayFunc(ref arr) if arr.bracket_notation
4310 && matches!(source, DialectType::BigQuery)
4311 && matches!(target, DialectType::Redshift) => {
4312 Action::ArrayConcatBracketConvert
4313 }
4314 // BIT_OR/BIT_AND/BIT_XOR: float/decimal arg cast for DuckDB, or rename for Snowflake
4315 Expression::BitwiseOrAgg(ref f) | Expression::BitwiseAndAgg(ref f) | Expression::BitwiseXorAgg(ref f) => {
4316 if matches!(target, DialectType::DuckDB) {
4317 // Check if the arg is CAST(val AS FLOAT/DOUBLE/DECIMAL/REAL)
4318 if let Expression::Cast(ref c) = f.this {
4319 match &c.to {
4320 DataType::Float { .. } | DataType::Double { .. }
4321 | DataType::Decimal { .. } => Action::BitAggFloatCast,
4322 DataType::Custom { ref name } if name.eq_ignore_ascii_case("REAL") => Action::BitAggFloatCast,
4323 _ => Action::None,
4324 }
4325 } else {
4326 Action::None
4327 }
4328 } else if matches!(target, DialectType::Snowflake) {
4329 Action::BitAggSnowflakeRename
4330 } else {
4331 Action::None
4332 }
4333 }
4334 // FILTER -> IFF for Snowflake (aggregate functions with FILTER clause)
4335 Expression::Filter(ref _f) if matches!(target, DialectType::Snowflake) => {
4336 Action::FilterToIff
4337 }
4338 // AggFunc.filter -> IFF wrapping for Snowflake (e.g., AVG(x) FILTER(WHERE cond))
4339 Expression::Avg(ref f) | Expression::Sum(ref f) | Expression::Min(ref f)
4340 | Expression::Max(ref f)
4341 | Expression::CountIf(ref f) | Expression::Stddev(ref f)
4342 | Expression::StddevPop(ref f) | Expression::StddevSamp(ref f)
4343 | Expression::Variance(ref f) | Expression::VarPop(ref f)
4344 | Expression::VarSamp(ref f) | Expression::Median(ref f)
4345 | Expression::Mode(ref f) | Expression::First(ref f) | Expression::Last(ref f)
4346 | Expression::ApproxDistinct(ref f)
4347 if f.filter.is_some() && matches!(target, DialectType::Snowflake) =>
4348 {
4349 Action::AggFilterToIff
4350 }
4351 Expression::Count(ref c) if c.filter.is_some() && matches!(target, DialectType::Snowflake) => {
4352 Action::AggFilterToIff
4353 }
4354 // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN ... END) for dialects that don't support multi-arg DISTINCT
4355 Expression::Count(ref c) if c.distinct && matches!(&c.this, Some(Expression::Tuple(_)))
4356 && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::DuckDB | DialectType::PostgreSQL) => {
4357 Action::CountDistinctMultiArg
4358 }
4359 // JSON arrow -> GET_PATH/PARSE_JSON for Snowflake
4360 Expression::JsonExtract(_) if matches!(target, DialectType::Snowflake) => {
4361 Action::JsonToGetPath
4362 }
4363 // DuckDB struct/dict -> BigQuery STRUCT / Presto ROW
4364 Expression::Struct(_) if matches!(target, DialectType::BigQuery | DialectType::Presto | DialectType::Trino)
4365 && matches!(source, DialectType::DuckDB) => {
4366 Action::StructToRow
4367 }
4368 // DuckDB curly-brace dict {'key': value} -> BigQuery STRUCT / Presto ROW
4369 Expression::MapFunc(ref m) if m.curly_brace_syntax
4370 && matches!(target, DialectType::BigQuery | DialectType::Presto | DialectType::Trino)
4371 && matches!(source, DialectType::DuckDB) => {
4372 Action::StructToRow
4373 }
4374 // APPROX_COUNT_DISTINCT -> APPROX_DISTINCT for Presto/Trino
4375 Expression::ApproxCountDistinct(_)
4376 if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena) => {
4377 Action::ApproxCountDistinctToApproxDistinct
4378 }
4379 // ARRAY_CONTAINS(arr, val) -> CONTAINS(arr, val) for Presto, ARRAY_CONTAINS(CAST(val AS VARIANT), arr) for Snowflake
4380 Expression::ArrayContains(_)
4381 if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Snowflake) => {
4382 Action::ArrayContainsConvert
4383 }
4384 // StrPosition with position -> complex expansion for Presto/DuckDB
4385 // STRPOS doesn't support a position arg in these dialects
4386 Expression::StrPosition(ref sp) if sp.position.is_some()
4387 && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena | DialectType::DuckDB) => {
4388 Action::StrPositionExpand
4389 }
4390 // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
4391 Expression::First(ref f) if f.ignore_nulls == Some(true)
4392 && matches!(target, DialectType::DuckDB) => {
4393 Action::FirstToAnyValue
4394 }
4395 // BEGIN -> START TRANSACTION for Presto/Trino
4396 Expression::Command(ref cmd) if cmd.this.eq_ignore_ascii_case("BEGIN")
4397 && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena) => {
4398 // Handled inline below
4399 Action::None // We'll handle it directly
4400 }
4401 // Note: PostgreSQL ^ is now parsed as Power directly (not BitwiseXor).
4402 // PostgreSQL # is parsed as BitwiseXor (which is correct).
4403 // a || b (Concat operator) -> CONCAT function for Presto/Trino
4404 Expression::Concat(ref _op) if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
4405 && matches!(target, DialectType::Presto | DialectType::Trino) => {
4406 Action::PipeConcatToConcat
4407 }
4408 _ => Action::None,
4409 }
4410 };
4411
4412 match action {
4413 Action::None => {
4414 // Handle inline transforms that don't need a dedicated action
4415 // Oracle DBMS_RANDOM.VALUE() -> RANDOM() for PostgreSQL, RAND() for others
4416 if let Expression::MethodCall(ref mc) = e {
4417 if matches!(source, DialectType::Oracle)
4418 && mc.method.name.eq_ignore_ascii_case("VALUE")
4419 && mc.args.is_empty()
4420 {
4421 let is_dbms_random = match &mc.this {
4422 Expression::Identifier(id) => id.name.eq_ignore_ascii_case("DBMS_RANDOM"),
4423 Expression::Column(col) => col.table.is_none() && col.name.name.eq_ignore_ascii_case("DBMS_RANDOM"),
4424 _ => false,
4425 };
4426 if is_dbms_random {
4427 let func_name = match target {
4428 DialectType::PostgreSQL | DialectType::Redshift | DialectType::DuckDB | DialectType::SQLite => "RANDOM",
4429 DialectType::Oracle => "DBMS_RANDOM.VALUE",
4430 _ => "RAND",
4431 };
4432 return Ok(Expression::Function(Box::new(Function::new(func_name.to_string(), vec![]))));
4433 }
4434 }
4435 }
4436 // TRIM without explicit position -> add BOTH for ClickHouse
4437 if let Expression::Trim(ref trim) = e {
4438 if matches!(target, DialectType::ClickHouse)
4439 && trim.sql_standard_syntax
4440 && trim.characters.is_some()
4441 && !trim.position_explicit
4442 {
4443 let mut new_trim = (**trim).clone();
4444 new_trim.position_explicit = true;
4445 return Ok(Expression::Trim(Box::new(new_trim)));
4446 }
4447 }
4448 // BEGIN -> START TRANSACTION for Presto/Trino
4449 if let Expression::Transaction(ref txn) = e {
4450 if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena) {
4451 // Convert BEGIN to START TRANSACTION by setting mark to "START"
4452 let mut txn = txn.clone();
4453 txn.mark = Some(Box::new(Expression::Identifier(Identifier::new("START".to_string()))));
4454 return Ok(Expression::Transaction(Box::new(*txn)));
4455 }
4456 }
4457 // IS TRUE/FALSE -> simplified forms for Presto/Trino
4458 if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena) {
4459 match &e {
4460 Expression::IsTrue(itf) if !itf.not => {
4461 // x IS TRUE -> x
4462 return Ok(itf.this.clone());
4463 }
4464 Expression::IsTrue(itf) if itf.not => {
4465 // x IS NOT TRUE -> NOT x
4466 return Ok(Expression::Not(Box::new(crate::expressions::UnaryOp {
4467 this: itf.this.clone(),
4468 })));
4469 }
4470 Expression::IsFalse(itf) if !itf.not => {
4471 // x IS FALSE -> NOT x
4472 return Ok(Expression::Not(Box::new(crate::expressions::UnaryOp {
4473 this: itf.this.clone(),
4474 })));
4475 }
4476 Expression::IsFalse(itf) if itf.not => {
4477 // x IS NOT FALSE -> NOT NOT x
4478 let not_x = Expression::Not(Box::new(crate::expressions::UnaryOp {
4479 this: itf.this.clone(),
4480 }));
4481 return Ok(Expression::Not(Box::new(crate::expressions::UnaryOp {
4482 this: not_x,
4483 })));
4484 }
4485 _ => {}
4486 }
4487 }
4488 // x IS NOT FALSE -> NOT x IS FALSE for Redshift
4489 if matches!(target, DialectType::Redshift) {
4490 if let Expression::IsFalse(ref itf) = e {
4491 if itf.not {
4492 return Ok(Expression::Not(Box::new(crate::expressions::UnaryOp {
4493 this: Expression::IsFalse(Box::new(crate::expressions::IsTrueFalse {
4494 this: itf.this.clone(),
4495 not: false,
4496 })),
4497 })));
4498 }
4499 }
4500 }
4501 // REGEXP_REPLACE: add 'g' flag when source defaults to global replacement
4502 // Snowflake default is global, PostgreSQL/DuckDB default is first-match-only
4503 if let Expression::Function(ref f) = e {
4504 if f.name.eq_ignore_ascii_case("REGEXP_REPLACE")
4505 && matches!(source, DialectType::Snowflake)
4506 && matches!(target, DialectType::PostgreSQL | DialectType::DuckDB)
4507 {
4508 if f.args.len() == 3 {
4509 let mut args = f.args.clone();
4510 args.push(Expression::string("g"));
4511 return Ok(Expression::Function(Box::new(Function::new(
4512 "REGEXP_REPLACE".to_string(), args,
4513 ))));
4514 } else if f.args.len() == 4 {
4515 // 4th arg might be position, add 'g' as 5th
4516 let mut args = f.args.clone();
4517 args.push(Expression::string("g"));
4518 return Ok(Expression::Function(Box::new(Function::new(
4519 "REGEXP_REPLACE".to_string(), args,
4520 ))));
4521 }
4522 }
4523 }
4524 Ok(e)
4525 }
4526
4527 Action::GreatestLeastNull => {
4528 let f = if let Expression::Function(f) = e { *f } else { unreachable!("action only triggered for Function expressions") };
4529 let mut null_checks: Vec<Expression> = f.args.iter().map(|a| {
4530 Expression::IsNull(Box::new(IsNull {
4531 this: a.clone(),
4532 not: false,
4533 postfix_form: false,
4534 }))
4535 }).collect();
4536 let condition = if null_checks.len() == 1 {
4537 null_checks.remove(0)
4538 } else {
4539 let first = null_checks.remove(0);
4540 null_checks.into_iter().fold(first, |acc, check| {
4541 Expression::Or(Box::new(BinaryOp::new(acc, check)))
4542 })
4543 };
4544 Ok(Expression::Case(Box::new(Case {
4545 operand: None,
4546 whens: vec![(condition, Expression::Null(Null))],
4547 else_: Some(Expression::Function(Box::new(Function::new(f.name, f.args)))),
4548 })))
4549 }
4550
4551 Action::ArrayGenerateRange => {
4552 let f = if let Expression::Function(f) = e { *f } else { unreachable!("action only triggered for Function expressions") };
4553 let start = f.args[0].clone();
4554 let end = f.args[1].clone();
4555 let step = f.args.get(2).cloned();
4556
4557 let end_minus_1 = Expression::Sub(Box::new(BinaryOp::new(
4558 end.clone(),
4559 Expression::number(1),
4560 )));
4561
4562 match target {
4563 DialectType::PostgreSQL | DialectType::Redshift => {
4564 let mut args = vec![start, end_minus_1];
4565 if let Some(s) = step { args.push(s); }
4566 Ok(Expression::Function(Box::new(Function::new(
4567 "GENERATE_SERIES".to_string(), args,
4568 ))))
4569 }
4570 DialectType::Presto | DialectType::Trino => {
4571 let mut args = vec![start, end_minus_1];
4572 if let Some(s) = step { args.push(s); }
4573 Ok(Expression::Function(Box::new(Function::new(
4574 "SEQUENCE".to_string(), args,
4575 ))))
4576 }
4577 DialectType::BigQuery => {
4578 let mut args = vec![start, end_minus_1];
4579 if let Some(s) = step { args.push(s); }
4580 Ok(Expression::Function(Box::new(Function::new(
4581 "GENERATE_ARRAY".to_string(), args,
4582 ))))
4583 }
4584 DialectType::Snowflake => {
4585 let normalized_end = Expression::Add(Box::new(BinaryOp::new(
4586 Expression::Paren(Box::new(Paren { this: end_minus_1, trailing_comments: vec![] })),
4587 Expression::number(1),
4588 )));
4589 let mut args = vec![start, normalized_end];
4590 if let Some(s) = step { args.push(s); }
4591 Ok(Expression::Function(Box::new(Function::new(
4592 "ARRAY_GENERATE_RANGE".to_string(), args,
4593 ))))
4594 }
4595 _ => {
4596 Ok(Expression::Function(Box::new(Function::new(f.name, f.args))))
4597 }
4598 }
4599 }
4600
4601 Action::Div0TypedDivision => {
4602 let if_func = if let Expression::IfFunc(f) = e { *f } else { unreachable!("action only triggered for IfFunc expressions") };
4603 if let Some(Expression::Div(div)) = if_func.false_value {
4604 let cast_type = if matches!(target, DialectType::SQLite) {
4605 DataType::Float { precision: None, scale: None, real_spelling: true }
4606 } else {
4607 DataType::Double { precision: None, scale: None }
4608 };
4609 let casted_left = Expression::Cast(Box::new(Cast {
4610 this: div.left,
4611 to: cast_type,
4612 trailing_comments: vec![],
4613 double_colon_syntax: false,
4614 format: None,
4615 default: None,
4616 }));
4617 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
4618 condition: if_func.condition,
4619 true_value: if_func.true_value,
4620 false_value: Some(Expression::Div(Box::new(BinaryOp::new(casted_left, div.right)))),
4621 original_name: if_func.original_name,
4622 })))
4623 } else {
4624 // Not actually a Div, reconstruct
4625 Ok(Expression::IfFunc(Box::new(if_func)))
4626 }
4627 }
4628
4629 Action::ArrayAggCollectList => {
4630 let agg = if let Expression::ArrayAgg(a) = e { *a } else { unreachable!("action only triggered for ArrayAgg expressions") };
4631 Ok(Expression::ArrayAgg(Box::new(AggFunc {
4632 name: Some("COLLECT_LIST".to_string()),
4633 ..agg
4634 })))
4635 }
4636
4637 Action::ArrayAggWithinGroupFilter => {
4638 let wg = if let Expression::WithinGroup(w) = e { *w } else { unreachable!("action only triggered for WithinGroup expressions") };
4639 if let Expression::ArrayAgg(inner_agg) = wg.this {
4640 let col = inner_agg.this.clone();
4641 let filter = Expression::IsNull(Box::new(IsNull {
4642 this: col,
4643 not: true,
4644 postfix_form: false,
4645 }));
4646 // For DuckDB, add explicit NULLS FIRST for DESC ordering
4647 let order_by = if matches!(target, DialectType::DuckDB) {
4648 wg.order_by.into_iter().map(|mut o| {
4649 if o.desc && o.nulls_first.is_none() {
4650 o.nulls_first = Some(true);
4651 }
4652 o
4653 }).collect()
4654 } else {
4655 wg.order_by
4656 };
4657 Ok(Expression::ArrayAgg(Box::new(AggFunc {
4658 this: inner_agg.this,
4659 distinct: inner_agg.distinct,
4660 filter: Some(filter),
4661 order_by,
4662 name: inner_agg.name,
4663 ignore_nulls: inner_agg.ignore_nulls,
4664 having_max: inner_agg.having_max,
4665 limit: inner_agg.limit,
4666 })))
4667 } else {
4668 Ok(Expression::WithinGroup(Box::new(wg)))
4669 }
4670 }
4671
4672 Action::ArrayAggFilter => {
4673 let agg = if let Expression::ArrayAgg(a) = e { *a } else { unreachable!("action only triggered for ArrayAgg expressions") };
4674 let col = agg.this.clone();
4675 let filter = Expression::IsNull(Box::new(IsNull {
4676 this: col,
4677 not: true,
4678 postfix_form: false,
4679 }));
4680 Ok(Expression::ArrayAgg(Box::new(AggFunc {
4681 filter: Some(filter),
4682 ..agg
4683 })))
4684 }
4685
4686 Action::ArrayAggNullFilter => {
4687 // ARRAY_AGG(x) FILTER(WHERE cond) -> ARRAY_AGG(x) FILTER(WHERE cond AND NOT x IS NULL)
4688 // For source dialects that exclude NULLs (Spark/Hive) targeting DuckDB which includes them
4689 let agg = if let Expression::ArrayAgg(a) = e { *a } else { unreachable!("action only triggered for ArrayAgg expressions") };
4690 let col = agg.this.clone();
4691 let not_null = Expression::IsNull(Box::new(IsNull {
4692 this: col,
4693 not: true,
4694 postfix_form: true, // Use "NOT x IS NULL" form (prefix NOT)
4695 }));
4696 let new_filter = if let Some(existing_filter) = agg.filter {
4697 // AND the NOT IS NULL with existing filter
4698 Expression::And(Box::new(crate::expressions::BinaryOp::new(
4699 existing_filter,
4700 not_null,
4701 )))
4702 } else {
4703 not_null
4704 };
4705 Ok(Expression::ArrayAgg(Box::new(AggFunc {
4706 filter: Some(new_filter),
4707 ..agg
4708 })))
4709 }
4710
4711 Action::BigQueryArraySelectAsStructToSnowflake => {
4712 // ARRAY(SELECT AS STRUCT x1 AS x1, x2 AS x2 FROM t)
4713 // -> (SELECT ARRAY_AGG(OBJECT_CONSTRUCT('x1', x1, 'x2', x2)) FROM t)
4714 if let Expression::Function(mut f) = e {
4715 let is_match = f.args.len() == 1 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT"));
4716 if is_match {
4717 let inner_select = match f.args.remove(0) {
4718 Expression::Select(s) => *s,
4719 _ => unreachable!("argument already verified to be a Select expression"),
4720 };
4721 // Build OBJECT_CONSTRUCT args from SELECT expressions
4722 let mut oc_args = Vec::new();
4723 for expr in &inner_select.expressions {
4724 match expr {
4725 Expression::Alias(a) => {
4726 let key = Expression::Literal(Literal::String(a.alias.name.clone()));
4727 let value = a.this.clone();
4728 oc_args.push(key);
4729 oc_args.push(value);
4730 }
4731 Expression::Column(c) => {
4732 let key = Expression::Literal(Literal::String(c.name.name.clone()));
4733 oc_args.push(key);
4734 oc_args.push(expr.clone());
4735 }
4736 _ => {
4737 oc_args.push(expr.clone());
4738 }
4739 }
4740 }
4741 let object_construct = Expression::Function(Box::new(Function::new(
4742 "OBJECT_CONSTRUCT".to_string(), oc_args,
4743 )));
4744 let array_agg = Expression::Function(Box::new(Function::new(
4745 "ARRAY_AGG".to_string(), vec![object_construct],
4746 )));
4747 let mut new_select = crate::expressions::Select::new();
4748 new_select.expressions = vec![array_agg];
4749 new_select.from = inner_select.from.clone();
4750 new_select.where_clause = inner_select.where_clause.clone();
4751 new_select.group_by = inner_select.group_by.clone();
4752 new_select.having = inner_select.having.clone();
4753 new_select.joins = inner_select.joins.clone();
4754 Ok(Expression::Subquery(Box::new(crate::expressions::Subquery {
4755 this: Expression::Select(Box::new(new_select)),
4756 alias: None,
4757 column_aliases: Vec::new(),
4758 order_by: None,
4759 limit: None,
4760 offset: None,
4761 distribute_by: None,
4762 sort_by: None,
4763 cluster_by: None,
4764 lateral: false,
4765 modifiers_inside: false,
4766 trailing_comments: Vec::new(),
4767 })))
4768 } else {
4769 Ok(Expression::Function(f))
4770 }
4771 } else {
4772 Ok(e)
4773 }
4774 }
4775
4776 Action::BigQueryPercentileContToDuckDB => {
4777 // PERCENTILE_CONT(x, frac [RESPECT NULLS]) -> QUANTILE_CONT(x, frac) for DuckDB
4778 if let Expression::AggregateFunction(mut af) = e {
4779 af.name = "QUANTILE_CONT".to_string();
4780 af.ignore_nulls = None; // Strip RESPECT/IGNORE NULLS
4781 // Keep only first 2 args
4782 if af.args.len() > 2 {
4783 af.args.truncate(2);
4784 }
4785 Ok(Expression::AggregateFunction(af))
4786 } else {
4787 Ok(e)
4788 }
4789 }
4790
4791 Action::ArrayAggIgnoreNullsDuckDB => {
4792 // ARRAY_AGG(x IGNORE NULLS ORDER BY a, b DESC) -> ARRAY_AGG(x ORDER BY a NULLS FIRST, b DESC)
4793 // Strip IGNORE NULLS, add NULLS FIRST to first ORDER BY column
4794 let mut agg = if let Expression::ArrayAgg(a) = e { *a } else { unreachable!("action only triggered for ArrayAgg expressions") };
4795 agg.ignore_nulls = None; // Strip IGNORE NULLS
4796 if !agg.order_by.is_empty() {
4797 agg.order_by[0].nulls_first = Some(true);
4798 }
4799 Ok(Expression::ArrayAgg(Box::new(agg)))
4800 }
4801
4802 Action::CountDistinctMultiArg => {
4803 // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN a IS NULL THEN NULL WHEN b IS NULL THEN NULL ELSE (a, b) END)
4804 if let Expression::Count(c) = e {
4805 if let Some(Expression::Tuple(t)) = c.this {
4806 let args = t.expressions;
4807 // Build CASE expression:
4808 // WHEN a IS NULL THEN NULL WHEN b IS NULL THEN NULL ELSE (a, b) END
4809 let mut whens = Vec::new();
4810 for arg in &args {
4811 whens.push((
4812 Expression::IsNull(Box::new(IsNull {
4813 this: arg.clone(),
4814 not: false,
4815 postfix_form: false,
4816 })),
4817 Expression::Null(crate::expressions::Null),
4818 ));
4819 }
4820 // Build the tuple for ELSE
4821 let tuple_expr = Expression::Tuple(Box::new(crate::expressions::Tuple {
4822 expressions: args,
4823 }));
4824 let case_expr = Expression::Case(Box::new(crate::expressions::Case {
4825 operand: None,
4826 whens,
4827 else_: Some(tuple_expr),
4828 }));
4829 Ok(Expression::Count(Box::new(crate::expressions::CountFunc {
4830 this: Some(case_expr),
4831 star: false,
4832 distinct: true,
4833 filter: c.filter,
4834 ignore_nulls: c.ignore_nulls,
4835 original_name: c.original_name,
4836 })))
4837 } else {
4838 Ok(Expression::Count(c))
4839 }
4840 } else {
4841 Ok(e)
4842 }
4843 }
4844
4845 Action::CastTimestampToDatetime => {
4846 let c = if let Expression::Cast(c) = e { *c } else { unreachable!("action only triggered for Cast expressions") };
4847 Ok(Expression::Cast(Box::new(Cast {
4848 to: DataType::Custom { name: "DATETIME".to_string() },
4849 ..c
4850 })))
4851 }
4852
4853 Action::CastTimestampStripTz => {
4854 // CAST(x AS TIMESTAMP(n) WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark/BigQuery
4855 let c = if let Expression::Cast(c) = e { *c } else { unreachable!("action only triggered for Cast expressions") };
4856 Ok(Expression::Cast(Box::new(Cast {
4857 to: DataType::Timestamp { precision: None, timezone: false },
4858 ..c
4859 })))
4860 }
4861
4862 Action::ToDateToCast => {
4863 // Convert TO_DATE(x) -> CAST(x AS DATE) for DuckDB
4864 if let Expression::Function(f) = e {
4865 let arg = f.args.into_iter().next().unwrap();
4866 Ok(Expression::Cast(Box::new(Cast {
4867 this: arg,
4868 to: DataType::Date,
4869 double_colon_syntax: false,
4870 trailing_comments: vec![],
4871 format: None,
4872 default: None,
4873 })))
4874 } else {
4875 Ok(e)
4876 }
4877 }
4878 Action::DateTruncWrapCast => {
4879 // Handle both Expression::DateTrunc/TimestampTrunc and
4880 // Expression::Function("DATE_TRUNC", [unit, expr])
4881 match e {
4882 Expression::DateTrunc(d) | Expression::TimestampTrunc(d) => {
4883 let input_type = match &d.this {
4884 Expression::Cast(c) => Some(c.to.clone()),
4885 _ => None,
4886 };
4887 if let Some(cast_type) = input_type {
4888 let is_time = matches!(cast_type, DataType::Time { .. });
4889 if is_time {
4890 let date_expr = Expression::Cast(Box::new(Cast {
4891 this: Expression::Literal(crate::expressions::Literal::String("1970-01-01".to_string())),
4892 to: DataType::Date,
4893 double_colon_syntax: false,
4894 trailing_comments: vec![],
4895 format: None,
4896 default: None,
4897 }));
4898 let add_expr = Expression::Add(Box::new(BinaryOp::new(date_expr, d.this)));
4899 let inner = Expression::DateTrunc(Box::new(DateTruncFunc { this: add_expr, unit: d.unit }));
4900 Ok(Expression::Cast(Box::new(Cast { this: inner, to: cast_type, double_colon_syntax: false, trailing_comments: vec![], format: None, default: None })))
4901 } else {
4902 let inner = Expression::DateTrunc(Box::new(*d));
4903 Ok(Expression::Cast(Box::new(Cast { this: inner, to: cast_type, double_colon_syntax: false, trailing_comments: vec![], format: None, default: None })))
4904 }
4905 } else {
4906 Ok(Expression::DateTrunc(d))
4907 }
4908 }
4909 Expression::Function(f) if f.args.len() == 2 => {
4910 // Function-based DATE_TRUNC(unit, expr)
4911 let input_type = match &f.args[1] {
4912 Expression::Cast(c) => Some(c.to.clone()),
4913 _ => None,
4914 };
4915 if let Some(cast_type) = input_type {
4916 let is_time = matches!(cast_type, DataType::Time { .. });
4917 if is_time {
4918 let date_expr = Expression::Cast(Box::new(Cast {
4919 this: Expression::Literal(crate::expressions::Literal::String("1970-01-01".to_string())),
4920 to: DataType::Date,
4921 double_colon_syntax: false,
4922 trailing_comments: vec![],
4923 format: None,
4924 default: None,
4925 }));
4926 let mut args = f.args;
4927 let unit_arg = args.remove(0);
4928 let time_expr = args.remove(0);
4929 let add_expr = Expression::Add(Box::new(BinaryOp::new(date_expr, time_expr)));
4930 let inner = Expression::Function(Box::new(Function::new(
4931 "DATE_TRUNC".to_string(),
4932 vec![unit_arg, add_expr],
4933 )));
4934 Ok(Expression::Cast(Box::new(Cast { this: inner, to: cast_type, double_colon_syntax: false, trailing_comments: vec![], format: None, default: None })))
4935 } else {
4936 // Wrap the function in CAST
4937 Ok(Expression::Cast(Box::new(Cast { this: Expression::Function(f), to: cast_type, double_colon_syntax: false, trailing_comments: vec![], format: None, default: None })))
4938 }
4939 } else {
4940 Ok(Expression::Function(f))
4941 }
4942 }
4943 other => Ok(other),
4944 }
4945 }
4946
4947 Action::RegexpReplaceSnowflakeToDuckDB => {
4948 // Snowflake REGEXP_REPLACE(s, p, r, position) -> REGEXP_REPLACE(s, p, r, 'g')
4949 if let Expression::Function(f) = e {
4950 let mut args = f.args;
4951 let subject = args.remove(0);
4952 let pattern = args.remove(0);
4953 let replacement = args.remove(0);
4954 Ok(Expression::Function(Box::new(Function::new(
4955 "REGEXP_REPLACE".to_string(),
4956 vec![subject, pattern, replacement, Expression::Literal(crate::expressions::Literal::String("g".to_string()))],
4957 ))))
4958 } else {
4959 Ok(e)
4960 }
4961 }
4962
4963 Action::SetToVariable => {
4964 // For DuckDB: SET a = 1 -> SET VARIABLE a = 1
4965 if let Expression::SetStatement(mut s) = e {
4966 for item in &mut s.items {
4967 if item.kind.is_none() {
4968 // Check if name already has VARIABLE prefix (from DuckDB source parsing)
4969 let already_variable = match &item.name {
4970 Expression::Identifier(id) => id.name.starts_with("VARIABLE "),
4971 _ => false,
4972 };
4973 if already_variable {
4974 // Extract the actual name and set kind
4975 if let Expression::Identifier(ref mut id) = item.name {
4976 let actual_name = id.name["VARIABLE ".len()..].to_string();
4977 id.name = actual_name;
4978 }
4979 }
4980 item.kind = Some("VARIABLE".to_string());
4981 }
4982 }
4983 Ok(Expression::SetStatement(s))
4984 } else {
4985 Ok(e)
4986 }
4987 }
4988
4989 Action::ConvertTimezoneToExpr => {
4990 // Convert Function("CONVERT_TIMEZONE", args) to Expression::ConvertTimezone
4991 // This prevents Redshift's transform_expr from expanding 2-arg to 3-arg with 'UTC'
4992 if let Expression::Function(f) = e {
4993 if f.args.len() == 2 {
4994 let mut args = f.args;
4995 let target_tz = args.remove(0);
4996 let timestamp = args.remove(0);
4997 Ok(Expression::ConvertTimezone(Box::new(ConvertTimezone {
4998 source_tz: None,
4999 target_tz: Some(Box::new(target_tz)),
5000 timestamp: Some(Box::new(timestamp)),
5001 options: vec![],
5002 })))
5003 } else if f.args.len() == 3 {
5004 let mut args = f.args;
5005 let source_tz = args.remove(0);
5006 let target_tz = args.remove(0);
5007 let timestamp = args.remove(0);
5008 Ok(Expression::ConvertTimezone(Box::new(ConvertTimezone {
5009 source_tz: Some(Box::new(source_tz)),
5010 target_tz: Some(Box::new(target_tz)),
5011 timestamp: Some(Box::new(timestamp)),
5012 options: vec![],
5013 })))
5014 } else {
5015 Ok(Expression::Function(f))
5016 }
5017 } else {
5018 Ok(e)
5019 }
5020 }
5021
5022 Action::BigQueryCastType => {
5023 // Convert BigQuery types to standard SQL types
5024 if let Expression::DataType(dt) = e {
5025 match dt {
5026 DataType::Custom { ref name } if name.eq_ignore_ascii_case("INT64") => {
5027 Ok(Expression::DataType(DataType::BigInt { length: None }))
5028 }
5029 DataType::Custom { ref name } if name.eq_ignore_ascii_case("FLOAT64") => {
5030 Ok(Expression::DataType(DataType::Double { precision: None, scale: None }))
5031 }
5032 DataType::Custom { ref name } if name.eq_ignore_ascii_case("BOOL") => {
5033 Ok(Expression::DataType(DataType::Boolean))
5034 }
5035 DataType::Custom { ref name } if name.eq_ignore_ascii_case("BYTES") => {
5036 Ok(Expression::DataType(DataType::VarBinary { length: None }))
5037 }
5038 DataType::Custom { ref name } if name.eq_ignore_ascii_case("NUMERIC") => {
5039 // For DuckDB target, use Custom("DECIMAL") to avoid DuckDB's
5040 // default precision (18, 3) being added to bare DECIMAL
5041 if matches!(target, DialectType::DuckDB) {
5042 Ok(Expression::DataType(DataType::Custom { name: "DECIMAL".to_string() }))
5043 } else {
5044 Ok(Expression::DataType(DataType::Decimal { precision: None, scale: None }))
5045 }
5046 }
5047 DataType::Custom { ref name } if name.eq_ignore_ascii_case("STRING") => {
5048 Ok(Expression::DataType(DataType::String { length: None }))
5049 }
5050 DataType::Custom { ref name } if name.eq_ignore_ascii_case("DATETIME") => {
5051 Ok(Expression::DataType(DataType::Timestamp { precision: None, timezone: false }))
5052 }
5053 _ => Ok(Expression::DataType(dt)),
5054 }
5055 } else {
5056 Ok(e)
5057 }
5058 }
5059
5060 Action::BigQuerySafeDivide => {
5061 // Convert SafeDivide expression to IF/CASE form for most targets
5062 if let Expression::SafeDivide(sd) = e {
5063 let x = *sd.this;
5064 let y = *sd.expression;
5065 // Wrap x and y in parens if they're complex expressions
5066 let y_ref = match &y {
5067 Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => y.clone(),
5068 _ => Expression::Paren(Box::new(Paren { this: y.clone(), trailing_comments: vec![] })),
5069 };
5070 let x_ref = match &x {
5071 Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => x.clone(),
5072 _ => Expression::Paren(Box::new(Paren { this: x.clone(), trailing_comments: vec![] })),
5073 };
5074 let condition = Expression::Neq(Box::new(BinaryOp::new(y_ref.clone(), Expression::number(0))));
5075 let div_expr = Expression::Div(Box::new(BinaryOp::new(x_ref, y_ref)));
5076
5077 if matches!(target, DialectType::Presto | DialectType::Trino) {
5078 // Presto/Trino: IF(y <> 0, CAST(x AS DOUBLE) / y, NULL)
5079 let cast_x = Expression::Cast(Box::new(Cast {
5080 this: match &x { Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => x, _ => Expression::Paren(Box::new(Paren { this: x, trailing_comments: vec![] })) },
5081 to: DataType::Double { precision: None, scale: None },
5082 trailing_comments: vec![],
5083 double_colon_syntax: false,
5084 format: None,
5085 default: None,
5086 }));
5087 let cast_div = Expression::Div(Box::new(BinaryOp::new(cast_x, match &y { Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => y, _ => Expression::Paren(Box::new(Paren { this: y, trailing_comments: vec![] })) })));
5088 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
5089 condition,
5090 true_value: cast_div,
5091 false_value: Some(Expression::Null(Null)),
5092 original_name: None,
5093 })))
5094 } else if matches!(target, DialectType::PostgreSQL) {
5095 // PostgreSQL: CASE WHEN y <> 0 THEN CAST(x AS DOUBLE PRECISION) / y ELSE NULL END
5096 let cast_x = Expression::Cast(Box::new(Cast {
5097 this: match &x { Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => x, _ => Expression::Paren(Box::new(Paren { this: x, trailing_comments: vec![] })) },
5098 to: DataType::Custom { name: "DOUBLE PRECISION".to_string() },
5099 trailing_comments: vec![],
5100 double_colon_syntax: false,
5101 format: None,
5102 default: None,
5103 }));
5104 let y_paren = match &y { Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => y, _ => Expression::Paren(Box::new(Paren { this: y, trailing_comments: vec![] })) };
5105 let cast_div = Expression::Div(Box::new(BinaryOp::new(cast_x, y_paren)));
5106 Ok(Expression::Case(Box::new(Case {
5107 operand: None,
5108 whens: vec![(condition, cast_div)],
5109 else_: Some(Expression::Null(Null)),
5110 })))
5111 } else if matches!(target, DialectType::DuckDB) {
5112 // DuckDB: CASE WHEN y <> 0 THEN x / y ELSE NULL END
5113 Ok(Expression::Case(Box::new(Case {
5114 operand: None,
5115 whens: vec![(condition, div_expr)],
5116 else_: Some(Expression::Null(Null)),
5117 })))
5118 } else if matches!(target, DialectType::Snowflake) {
5119 // Snowflake: IFF(y <> 0, x / y, NULL)
5120 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
5121 condition,
5122 true_value: div_expr,
5123 false_value: Some(Expression::Null(Null)),
5124 original_name: Some("IFF".to_string()),
5125 })))
5126 } else {
5127 // All others: IF(y <> 0, x / y, NULL)
5128 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
5129 condition,
5130 true_value: div_expr,
5131 false_value: Some(Expression::Null(Null)),
5132 original_name: None,
5133 })))
5134 }
5135 } else {
5136 Ok(e)
5137 }
5138 }
5139
5140 Action::BigQueryLastDayStripUnit => {
5141 if let Expression::LastDay(mut ld) = e {
5142 ld.unit = None; // Strip the unit (MONTH is default)
5143 match target {
5144 DialectType::PostgreSQL => {
5145 // LAST_DAY(date) -> CAST(DATE_TRUNC('MONTH', date) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)
5146 let date_trunc = Expression::Function(Box::new(Function::new(
5147 "DATE_TRUNC".to_string(),
5148 vec![
5149 Expression::Literal(crate::expressions::Literal::String("MONTH".to_string())),
5150 ld.this.clone(),
5151 ],
5152 )));
5153 let plus_month = Expression::Add(Box::new(crate::expressions::BinaryOp::new(
5154 date_trunc,
5155 Expression::Interval(Box::new(crate::expressions::Interval {
5156 this: Some(Expression::Literal(crate::expressions::Literal::String("1 MONTH".to_string()))),
5157 unit: None,
5158 })),
5159 )));
5160 let minus_day = Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
5161 plus_month,
5162 Expression::Interval(Box::new(crate::expressions::Interval {
5163 this: Some(Expression::Literal(crate::expressions::Literal::String("1 DAY".to_string()))),
5164 unit: None,
5165 })),
5166 )));
5167 Ok(Expression::Cast(Box::new(Cast {
5168 this: minus_day,
5169 to: DataType::Date,
5170 trailing_comments: vec![],
5171 double_colon_syntax: false,
5172 format: None,
5173 default: None,
5174 })))
5175 }
5176 DialectType::Presto => {
5177 // LAST_DAY(date) -> LAST_DAY_OF_MONTH(date)
5178 Ok(Expression::Function(Box::new(Function::new(
5179 "LAST_DAY_OF_MONTH".to_string(),
5180 vec![ld.this],
5181 ))))
5182 }
5183 DialectType::ClickHouse => {
5184 // ClickHouse LAST_DAY(CAST(x AS Nullable(DATE)))
5185 // Need to wrap the DATE type in Nullable
5186 let nullable_date = match ld.this {
5187 Expression::Cast(mut c) => {
5188 c.to = DataType::Custom { name: "Nullable(DATE)".to_string() };
5189 Expression::Cast(c)
5190 }
5191 other => other,
5192 };
5193 ld.this = nullable_date;
5194 Ok(Expression::LastDay(ld))
5195 }
5196 _ => Ok(Expression::LastDay(ld)),
5197 }
5198 } else {
5199 Ok(e)
5200 }
5201 }
5202
5203 Action::BigQueryCastFormat => {
5204 // CAST(x AS DATE FORMAT 'fmt') -> PARSE_DATE('%m/%d/%Y', x) for BigQuery
5205 // CAST(x AS TIMESTAMP FORMAT 'fmt') -> PARSE_TIMESTAMP(...) for BigQuery
5206 // SAFE_CAST(x AS DATE FORMAT 'fmt') -> CAST(TRY_STRPTIME(x, ...) AS DATE) for DuckDB
5207 let (this, to, format_expr, is_safe) = match e {
5208 Expression::Cast(ref c) if c.format.is_some() => {
5209 (c.this.clone(), c.to.clone(), c.format.as_ref().unwrap().as_ref().clone(), false)
5210 }
5211 Expression::SafeCast(ref c) if c.format.is_some() => {
5212 (c.this.clone(), c.to.clone(), c.format.as_ref().unwrap().as_ref().clone(), true)
5213 }
5214 _ => return Ok(e),
5215 };
5216 // For CAST(x AS STRING FORMAT ...) when target is BigQuery, keep as-is
5217 if matches!(target, DialectType::BigQuery) {
5218 match &to {
5219 DataType::String { .. } | DataType::VarChar { .. } | DataType::Text => {
5220 // CAST(x AS STRING FORMAT 'fmt') stays as CAST expression for BigQuery
5221 return Ok(e);
5222 }
5223 _ => {}
5224 }
5225 }
5226 // Extract timezone from format if AT TIME ZONE is present
5227 let (actual_format_expr, timezone) = match &format_expr {
5228 Expression::AtTimeZone(ref atz) => {
5229 (atz.this.clone(), Some(atz.zone.clone()))
5230 }
5231 _ => (format_expr.clone(), None),
5232 };
5233 let strftime_fmt = Self::bq_cast_format_to_strftime(&actual_format_expr);
5234 match target {
5235 DialectType::BigQuery => {
5236 // CAST(x AS DATE FORMAT 'fmt') -> PARSE_DATE(strftime_fmt, x)
5237 // CAST(x AS TIMESTAMP FORMAT 'fmt' AT TIME ZONE 'tz') -> PARSE_TIMESTAMP(strftime_fmt, x, tz)
5238 let func_name = match &to {
5239 DataType::Date => "PARSE_DATE",
5240 DataType::Timestamp { .. } => "PARSE_TIMESTAMP",
5241 DataType::Time { .. } => "PARSE_TIMESTAMP",
5242 _ => "PARSE_TIMESTAMP",
5243 };
5244 let mut func_args = vec![strftime_fmt, this];
5245 if let Some(tz) = timezone {
5246 func_args.push(tz);
5247 }
5248 Ok(Expression::Function(Box::new(Function::new(func_name.to_string(), func_args))))
5249 }
5250 DialectType::DuckDB => {
5251 // SAFE_CAST(x AS DATE FORMAT 'fmt') -> CAST(TRY_STRPTIME(x, fmt) AS DATE)
5252 // CAST(x AS DATE FORMAT 'fmt') -> CAST(STRPTIME(x, fmt) AS DATE)
5253 let duck_fmt = Self::bq_format_to_duckdb(&strftime_fmt);
5254 let parse_fn_name = if is_safe { "TRY_STRPTIME" } else { "STRPTIME" };
5255 let parse_call = Expression::Function(Box::new(Function::new(parse_fn_name.to_string(), vec![this, duck_fmt])));
5256 Ok(Expression::Cast(Box::new(Cast {
5257 this: parse_call,
5258 to,
5259 trailing_comments: vec![],
5260 double_colon_syntax: false,
5261 format: None,
5262 default: None,
5263 })))
5264 }
5265 _ => Ok(e),
5266 }
5267 }
5268
5269 Action::BigQueryFunctionNormalize => {
5270 Self::normalize_bigquery_function(e, source, target)
5271 }
5272
5273 Action::BigQueryToHexBare => {
5274 // Not used anymore - handled directly in normalize_bigquery_function
5275 Ok(e)
5276 }
5277
5278 Action::BigQueryToHexLower => {
5279 if let Expression::Lower(uf) = e {
5280 match uf.this {
5281 // BQ->BQ: LOWER(TO_HEX(x)) -> TO_HEX(x)
5282 Expression::Function(f) if matches!(target, DialectType::BigQuery) && f.name == "TO_HEX" => {
5283 Ok(Expression::Function(f))
5284 }
5285 // LOWER(LOWER(HEX/TO_HEX(x))) patterns
5286 Expression::Lower(inner_uf) => {
5287 if matches!(target, DialectType::BigQuery) {
5288 // BQ->BQ: extract TO_HEX
5289 if let Expression::Function(f) = inner_uf.this {
5290 Ok(Expression::Function(Box::new(Function::new("TO_HEX".to_string(), f.args))))
5291 } else {
5292 Ok(Expression::Lower(inner_uf))
5293 }
5294 } else {
5295 // Flatten: LOWER(LOWER(x)) -> LOWER(x)
5296 Ok(Expression::Lower(inner_uf))
5297 }
5298 }
5299 other => Ok(Expression::Lower(Box::new(crate::expressions::UnaryFunc { this: other, original_name: None })))
5300 }
5301 } else {
5302 Ok(e)
5303 }
5304 }
5305
5306 Action::BigQueryToHexUpper => {
5307 // UPPER(LOWER(HEX(x))) -> HEX(x) (UPPER cancels LOWER, HEX is already uppercase)
5308 // UPPER(LOWER(TO_HEX(x))) -> TO_HEX(x) for Presto/Trino
5309 if let Expression::Upper(uf) = e {
5310 if let Expression::Lower(inner_uf) = uf.this {
5311 // For BQ->BQ: UPPER(TO_HEX(x)) should stay as UPPER(TO_HEX(x))
5312 if matches!(target, DialectType::BigQuery) {
5313 // Restore TO_HEX name in inner function
5314 if let Expression::Function(f) = inner_uf.this {
5315 let restored = Expression::Function(Box::new(Function::new("TO_HEX".to_string(), f.args)));
5316 Ok(Expression::Upper(Box::new(crate::expressions::UnaryFunc::new(restored))))
5317 } else {
5318 Ok(Expression::Upper(inner_uf))
5319 }
5320 } else {
5321 // Extract the inner HEX/TO_HEX function (UPPER(LOWER(x)) = x when HEX is uppercase)
5322 Ok(inner_uf.this)
5323 }
5324 } else {
5325 Ok(Expression::Upper(uf))
5326 }
5327 } else {
5328 Ok(e)
5329 }
5330 }
5331
5332 Action::BigQueryAnyValueHaving => {
5333 // ANY_VALUE(x HAVING MAX y) -> ARG_MAX_NULL(x, y)
5334 // ANY_VALUE(x HAVING MIN y) -> ARG_MIN_NULL(x, y)
5335 if let Expression::AnyValue(agg) = e {
5336 if let Some((having_expr, is_max)) = agg.having_max {
5337 let func_name = if is_max { "ARG_MAX_NULL" } else { "ARG_MIN_NULL" };
5338 Ok(Expression::Function(Box::new(Function::new(
5339 func_name.to_string(),
5340 vec![agg.this, *having_expr],
5341 ))))
5342 } else {
5343 Ok(Expression::AnyValue(agg))
5344 }
5345 } else {
5346 Ok(e)
5347 }
5348 }
5349
5350 Action::BigQueryApproxQuantiles => {
5351 // APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [0, 1/n, 2/n, ..., 1])
5352 // APPROX_QUANTILES(DISTINCT x, n) -> APPROX_QUANTILE(DISTINCT x, [0, 1/n, ..., 1])
5353 if let Expression::AggregateFunction(agg) = e {
5354 if agg.args.len() >= 2 {
5355 let x_expr = agg.args[0].clone();
5356 let n_expr = &agg.args[1];
5357
5358 // Extract the numeric value from n_expr
5359 let n = match n_expr {
5360 Expression::Literal(crate::expressions::Literal::Number(s)) => s.parse::<usize>().unwrap_or(2),
5361 _ => 2,
5362 };
5363
5364 // Generate quantile array: [0, 1/n, 2/n, ..., 1]
5365 let mut quantiles = Vec::new();
5366 for i in 0..=n {
5367 let q = i as f64 / n as f64;
5368 // Format nicely: 0 -> 0, 0.25 -> 0.25, 1 -> 1
5369 if q == 0.0 {
5370 quantiles.push(Expression::number(0));
5371 } else if q == 1.0 {
5372 quantiles.push(Expression::number(1));
5373 } else {
5374 quantiles.push(Expression::Literal(crate::expressions::Literal::Number(format!("{}", q))));
5375 }
5376 }
5377
5378 let array_expr = Expression::Array(Box::new(crate::expressions::Array {
5379 expressions: quantiles,
5380 }));
5381
5382 // Preserve DISTINCT modifier
5383 let mut new_func = Function::new("APPROX_QUANTILE".to_string(), vec![x_expr, array_expr]);
5384 new_func.distinct = agg.distinct;
5385 Ok(Expression::Function(Box::new(new_func)))
5386 } else {
5387 Ok(Expression::AggregateFunction(agg))
5388 }
5389 } else {
5390 Ok(e)
5391 }
5392 }
5393
5394 Action::GenericFunctionNormalize => {
5395 // Helper closure to convert ARBITRARY to target-specific function
5396 fn convert_arbitrary(arg: Expression, target: DialectType) -> Expression {
5397 let name = match target {
5398 DialectType::ClickHouse => "any",
5399 DialectType::TSQL | DialectType::SQLite => "MAX",
5400 DialectType::Hive => "FIRST",
5401 DialectType::Presto | DialectType::Trino | DialectType::Athena => "ARBITRARY",
5402 _ => "ANY_VALUE",
5403 };
5404 Expression::Function(Box::new(Function::new(name.to_string(), vec![arg])))
5405 }
5406
5407 if let Expression::Function(f) = e {
5408 let name = f.name.to_uppercase();
5409 match name.as_str() {
5410 "ARBITRARY" if f.args.len() == 1 => {
5411 let arg = f.args.into_iter().next().unwrap();
5412 Ok(convert_arbitrary(arg, target))
5413 }
5414 "TO_NUMBER" if f.args.len() == 1 => {
5415 let arg = f.args.into_iter().next().unwrap();
5416 match target {
5417 DialectType::Oracle | DialectType::Snowflake => {
5418 Ok(Expression::Function(Box::new(Function::new("TO_NUMBER".to_string(), vec![arg]))))
5419 }
5420 _ => {
5421 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
5422 this: arg,
5423 to: crate::expressions::DataType::Double { precision: None, scale: None },
5424 double_colon_syntax: false,
5425 trailing_comments: Vec::new(),
5426 format: None,
5427 default: None,
5428 })))
5429 }
5430 }
5431 }
5432 "AGGREGATE" if f.args.len() >= 3 => {
5433 match target {
5434 DialectType::DuckDB | DialectType::Hive | DialectType::Presto | DialectType::Trino => {
5435 Ok(Expression::Function(Box::new(Function::new("REDUCE".to_string(), f.args))))
5436 }
5437 _ => Ok(Expression::Function(f)),
5438 }
5439 }
5440 // REGEXP_MATCHES(x, y) -> RegexpLike for most targets, keep for DuckDB
5441 "REGEXP_MATCHES" if f.args.len() >= 2 => {
5442 if matches!(target, DialectType::DuckDB) {
5443 Ok(Expression::Function(f))
5444 } else {
5445 let mut args = f.args;
5446 let this = args.remove(0);
5447 let pattern = args.remove(0);
5448 let flags = if args.is_empty() { None } else { Some(args.remove(0)) };
5449 Ok(Expression::RegexpLike(Box::new(crate::expressions::RegexpFunc {
5450 this,
5451 pattern,
5452 flags,
5453 })))
5454 }
5455 }
5456 // REGEXP_FULL_MATCH (Hive REGEXP) -> RegexpLike
5457 "REGEXP_FULL_MATCH" if f.args.len() >= 2 => {
5458 if matches!(target, DialectType::DuckDB) {
5459 Ok(Expression::Function(f))
5460 } else {
5461 let mut args = f.args;
5462 let this = args.remove(0);
5463 let pattern = args.remove(0);
5464 let flags = if args.is_empty() { None } else { Some(args.remove(0)) };
5465 Ok(Expression::RegexpLike(Box::new(crate::expressions::RegexpFunc {
5466 this,
5467 pattern,
5468 flags,
5469 })))
5470 }
5471 }
5472 // STRUCT_EXTRACT(x, 'field') -> x.field (StructExtract expression)
5473 "STRUCT_EXTRACT" if f.args.len() == 2 => {
5474 let mut args = f.args;
5475 let this = args.remove(0);
5476 let field_expr = args.remove(0);
5477 // Extract string literal to get field name
5478 let field_name = match &field_expr {
5479 Expression::Literal(crate::expressions::Literal::String(s)) => s.clone(),
5480 Expression::Identifier(id) => id.name.clone(),
5481 _ => return Ok(Expression::Function(Box::new(Function::new("STRUCT_EXTRACT".to_string(), vec![this, field_expr])))),
5482 };
5483 Ok(Expression::StructExtract(Box::new(crate::expressions::StructExtractFunc {
5484 this,
5485 field: crate::expressions::Identifier::new(field_name),
5486 })))
5487 }
5488 // LIST_FILTER([4,5,6], x -> x > 4) -> FILTER(ARRAY(4,5,6), x -> x > 4)
5489 "LIST_FILTER" if f.args.len() == 2 => {
5490 let name = match target {
5491 DialectType::DuckDB => "LIST_FILTER",
5492 _ => "FILTER",
5493 };
5494 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
5495 }
5496 // LIST_TRANSFORM(x, y -> y + 1) -> TRANSFORM(x, y -> y + 1)
5497 "LIST_TRANSFORM" if f.args.len() == 2 => {
5498 let name = match target {
5499 DialectType::DuckDB => "LIST_TRANSFORM",
5500 _ => "TRANSFORM",
5501 };
5502 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
5503 }
5504 // LIST_SORT(x) -> SORT_ARRAY(x) / ARRAY_SORT(x)
5505 "LIST_SORT" if f.args.len() >= 1 => {
5506 let name = match target {
5507 DialectType::DuckDB | DialectType::Presto | DialectType::Trino => "ARRAY_SORT",
5508 _ => "SORT_ARRAY",
5509 };
5510 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
5511 }
5512 // LIST_REVERSE_SORT(x) -> SORT_ARRAY(x, FALSE) for Spark/Hive, ARRAY_SORT(x, lambda) for Presto
5513 "LIST_REVERSE_SORT" if f.args.len() >= 1 => {
5514 match target {
5515 DialectType::DuckDB => Ok(Expression::Function(Box::new(Function::new("ARRAY_REVERSE_SORT".to_string(), f.args)))),
5516 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
5517 let mut args = f.args;
5518 args.push(Expression::Identifier(crate::expressions::Identifier::new("FALSE")));
5519 Ok(Expression::Function(Box::new(Function::new("SORT_ARRAY".to_string(), args))))
5520 }
5521 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
5522 // ARRAY_SORT(x, (a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END)
5523 let arr = f.args.into_iter().next().unwrap();
5524 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
5525 parameters: vec![
5526 crate::expressions::Identifier::new("a"),
5527 crate::expressions::Identifier::new("b"),
5528 ],
5529 body: Expression::Case(Box::new(Case {
5530 operand: None,
5531 whens: vec![
5532 (
5533 Expression::Lt(Box::new(BinaryOp::new(
5534 Expression::Identifier(crate::expressions::Identifier::new("a")),
5535 Expression::Identifier(crate::expressions::Identifier::new("b")),
5536 ))),
5537 Expression::number(1),
5538 ),
5539 (
5540 Expression::Gt(Box::new(BinaryOp::new(
5541 Expression::Identifier(crate::expressions::Identifier::new("a")),
5542 Expression::Identifier(crate::expressions::Identifier::new("b")),
5543 ))),
5544 Expression::Literal(Literal::Number("-1".to_string())),
5545 ),
5546 ],
5547 else_: Some(Expression::number(0)),
5548 })),
5549 colon: false,
5550 parameter_types: Vec::new(),
5551 }));
5552 Ok(Expression::Function(Box::new(Function::new("ARRAY_SORT".to_string(), vec![arr, lambda]))))
5553 }
5554 _ => Ok(Expression::Function(Box::new(Function::new("LIST_REVERSE_SORT".to_string(), f.args)))),
5555 }
5556 }
5557 // SPLIT_TO_ARRAY(x) with 1 arg -> add default ',' separator and rename
5558 "SPLIT_TO_ARRAY" if f.args.len() == 1 => {
5559 let mut args = f.args;
5560 args.push(Expression::string(","));
5561 let name = match target {
5562 DialectType::DuckDB => "STR_SPLIT",
5563 DialectType::Presto | DialectType::Trino => "SPLIT",
5564 DialectType::Spark | DialectType::Databricks | DialectType::Hive => "SPLIT",
5565 DialectType::PostgreSQL => "STRING_TO_ARRAY",
5566 DialectType::Redshift => "SPLIT_TO_ARRAY",
5567 _ => "SPLIT",
5568 };
5569 Ok(Expression::Function(Box::new(Function::new(name.to_string(), args))))
5570 }
5571 // SPLIT_TO_ARRAY(x, sep) with 2 args -> rename based on target
5572 "SPLIT_TO_ARRAY" if f.args.len() == 2 => {
5573 let name = match target {
5574 DialectType::DuckDB => "STR_SPLIT",
5575 DialectType::Presto | DialectType::Trino => "SPLIT",
5576 DialectType::Spark | DialectType::Databricks | DialectType::Hive => "SPLIT",
5577 DialectType::PostgreSQL => "STRING_TO_ARRAY",
5578 DialectType::Redshift => "SPLIT_TO_ARRAY",
5579 _ => "SPLIT",
5580 };
5581 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
5582 }
5583 // STRING_TO_ARRAY/STR_SPLIT -> target-specific split function
5584 "STRING_TO_ARRAY" | "STR_SPLIT" if f.args.len() >= 2 => {
5585 let name = match target {
5586 DialectType::DuckDB => "STR_SPLIT",
5587 DialectType::Presto | DialectType::Trino => "SPLIT",
5588 DialectType::Spark | DialectType::Databricks | DialectType::Hive => "SPLIT",
5589 DialectType::Doris | DialectType::StarRocks => "SPLIT_BY_STRING",
5590 DialectType::PostgreSQL | DialectType::Redshift => "STRING_TO_ARRAY",
5591 _ => "SPLIT",
5592 };
5593 // For Spark/Hive, SPLIT uses regex - need to escape literal with \Q...\E
5594 if matches!(target, DialectType::Spark | DialectType::Databricks | DialectType::Hive) {
5595 let mut args = f.args;
5596 let x = args.remove(0);
5597 let sep = args.remove(0);
5598 // Wrap separator in CONCAT('\\Q', sep, '\\E')
5599 let escaped_sep = Expression::Function(Box::new(Function::new(
5600 "CONCAT".to_string(),
5601 vec![
5602 Expression::string("\\Q"),
5603 sep,
5604 Expression::string("\\E"),
5605 ],
5606 )));
5607 Ok(Expression::Function(Box::new(Function::new(name.to_string(), vec![x, escaped_sep]))))
5608 } else {
5609 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
5610 }
5611 }
5612 // STR_SPLIT_REGEX(x, 'a') / REGEXP_SPLIT(x, 'a') -> target-specific regex split
5613 "STR_SPLIT_REGEX" | "REGEXP_SPLIT" if f.args.len() == 2 => {
5614 let name = match target {
5615 DialectType::DuckDB => "STR_SPLIT_REGEX",
5616 DialectType::Presto | DialectType::Trino => "REGEXP_SPLIT",
5617 DialectType::Spark | DialectType::Databricks | DialectType::Hive => "SPLIT",
5618 _ => "REGEXP_SPLIT",
5619 };
5620 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
5621 }
5622 // SPLIT(x, sep) from Presto/StarRocks/Doris -> target-specific split with regex escaping for Hive/Spark
5623 "SPLIT" if f.args.len() == 2
5624 && matches!(source, DialectType::Presto | DialectType::Trino | DialectType::Athena
5625 | DialectType::StarRocks | DialectType::Doris)
5626 && matches!(target, DialectType::Spark | DialectType::Databricks | DialectType::Hive) => {
5627 // Presto/StarRocks SPLIT is literal, Hive/Spark SPLIT is regex
5628 let mut args = f.args;
5629 let x = args.remove(0);
5630 let sep = args.remove(0);
5631 let escaped_sep = Expression::Function(Box::new(Function::new(
5632 "CONCAT".to_string(),
5633 vec![
5634 Expression::string("\\Q"),
5635 sep,
5636 Expression::string("\\E"),
5637 ],
5638 )));
5639 Ok(Expression::Function(Box::new(Function::new("SPLIT".to_string(), vec![x, escaped_sep]))))
5640 }
5641 // SUBSTRINGINDEX -> SUBSTRING_INDEX (ClickHouse camelCase to standard)
5642 // For ClickHouse target, preserve original name to maintain camelCase
5643 "SUBSTRINGINDEX" => {
5644 let name = if matches!(target, DialectType::ClickHouse) {
5645 f.name.clone()
5646 } else {
5647 "SUBSTRING_INDEX".to_string()
5648 };
5649 Ok(Expression::Function(Box::new(Function::new(name, f.args))))
5650 }
5651 // ARRAY_LENGTH/SIZE/CARDINALITY -> target-specific array length function
5652 "ARRAY_LENGTH" | "SIZE" | "CARDINALITY" => {
5653 // Get the array argument (first arg, drop dimension args)
5654 let mut args = f.args;
5655 let arr = if args.is_empty() {
5656 return Ok(Expression::Function(Box::new(Function::new(name.to_string(), args))));
5657 } else {
5658 args.remove(0)
5659 };
5660 let name = match target {
5661 DialectType::Spark | DialectType::Databricks | DialectType::Hive => "SIZE",
5662 DialectType::Presto | DialectType::Trino => "CARDINALITY",
5663 DialectType::BigQuery => "ARRAY_LENGTH",
5664 DialectType::DuckDB => {
5665 // DuckDB: use ARRAY_LENGTH with all args
5666 let mut all_args = vec![arr];
5667 all_args.extend(args);
5668 return Ok(Expression::Function(Box::new(Function::new("ARRAY_LENGTH".to_string(), all_args))));
5669 }
5670 DialectType::PostgreSQL | DialectType::Redshift => {
5671 // Keep ARRAY_LENGTH with dimension arg
5672 let mut all_args = vec![arr];
5673 all_args.extend(args);
5674 return Ok(Expression::Function(Box::new(Function::new("ARRAY_LENGTH".to_string(), all_args))));
5675 }
5676 DialectType::ClickHouse => "LENGTH",
5677 _ => "ARRAY_LENGTH",
5678 };
5679 Ok(Expression::Function(Box::new(Function::new(name.to_string(), vec![arr]))))
5680 }
5681 // UNICODE(x) -> target-specific codepoint function
5682 "UNICODE" if f.args.len() == 1 => {
5683 match target {
5684 DialectType::SQLite | DialectType::DuckDB => {
5685 Ok(Expression::Function(Box::new(Function::new("UNICODE".to_string(), f.args))))
5686 }
5687 DialectType::Oracle => {
5688 // ASCII(UNISTR(x))
5689 let inner = Expression::Function(Box::new(Function::new("UNISTR".to_string(), f.args)));
5690 Ok(Expression::Function(Box::new(Function::new("ASCII".to_string(), vec![inner]))))
5691 }
5692 DialectType::MySQL => {
5693 // ORD(CONVERT(x USING utf32))
5694 let arg = f.args.into_iter().next().unwrap();
5695 let convert_expr = Expression::ConvertToCharset(Box::new(crate::expressions::ConvertToCharset {
5696 this: Box::new(arg),
5697 dest: Some(Box::new(Expression::Identifier(crate::expressions::Identifier::new("utf32")))),
5698 source: None,
5699 }));
5700 Ok(Expression::Function(Box::new(Function::new("ORD".to_string(), vec![convert_expr]))))
5701 }
5702 _ => {
5703 Ok(Expression::Function(Box::new(Function::new("ASCII".to_string(), f.args))))
5704 }
5705 }
5706 }
5707 // XOR(a, b, ...) -> a XOR b XOR ... for MySQL, BITWISE_XOR for Presto/Trino, # for PostgreSQL, ^ for BigQuery
5708 "XOR" if f.args.len() >= 2 => {
5709 match target {
5710 DialectType::ClickHouse => {
5711 // ClickHouse: keep as xor() function with lowercase name
5712 Ok(Expression::Function(Box::new(Function::new("xor".to_string(), f.args))))
5713 }
5714 DialectType::Presto | DialectType::Trino => {
5715 if f.args.len() == 2 {
5716 Ok(Expression::Function(Box::new(Function::new("BITWISE_XOR".to_string(), f.args))))
5717 } else {
5718 // Nest: BITWISE_XOR(BITWISE_XOR(a, b), c)
5719 let mut args = f.args;
5720 let first = args.remove(0);
5721 let second = args.remove(0);
5722 let mut result = Expression::Function(Box::new(Function::new("BITWISE_XOR".to_string(), vec![first, second])));
5723 for arg in args {
5724 result = Expression::Function(Box::new(Function::new("BITWISE_XOR".to_string(), vec![result, arg])));
5725 }
5726 Ok(result)
5727 }
5728 }
5729 DialectType::MySQL | DialectType::SingleStore | DialectType::Doris | DialectType::StarRocks => {
5730 // Convert XOR(a, b, c) -> Expression::Xor with expressions list
5731 let args = f.args;
5732 Ok(Expression::Xor(Box::new(crate::expressions::Xor {
5733 this: None,
5734 expression: None,
5735 expressions: args,
5736 })))
5737 }
5738 DialectType::PostgreSQL | DialectType::Redshift => {
5739 // PostgreSQL: a # b (hash operator for XOR)
5740 let mut args = f.args;
5741 let first = args.remove(0);
5742 let second = args.remove(0);
5743 let mut result = Expression::BitwiseXor(Box::new(BinaryOp::new(first, second)));
5744 for arg in args {
5745 result = Expression::BitwiseXor(Box::new(BinaryOp::new(result, arg)));
5746 }
5747 Ok(result)
5748 }
5749 DialectType::DuckDB => {
5750 // DuckDB: keep as XOR function (DuckDB ^ is Power, not XOR)
5751 Ok(Expression::Function(Box::new(Function::new("XOR".to_string(), f.args))))
5752 }
5753 DialectType::BigQuery => {
5754 // BigQuery: a ^ b (caret operator for XOR)
5755 let mut args = f.args;
5756 let first = args.remove(0);
5757 let second = args.remove(0);
5758 let mut result = Expression::BitwiseXor(Box::new(BinaryOp::new(first, second)));
5759 for arg in args {
5760 result = Expression::BitwiseXor(Box::new(BinaryOp::new(result, arg)));
5761 }
5762 Ok(result)
5763 }
5764 _ => Ok(Expression::Function(Box::new(Function::new("XOR".to_string(), f.args)))),
5765 }
5766 }
5767 // ARRAY_REVERSE_SORT(x) -> SORT_ARRAY(x, FALSE) for Spark/Hive, ARRAY_SORT(x, lambda) for Presto
5768 "ARRAY_REVERSE_SORT" if f.args.len() >= 1 => {
5769 match target {
5770 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
5771 let mut args = f.args;
5772 args.push(Expression::Identifier(crate::expressions::Identifier::new("FALSE")));
5773 Ok(Expression::Function(Box::new(Function::new("SORT_ARRAY".to_string(), args))))
5774 }
5775 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
5776 // ARRAY_SORT(x, (a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END)
5777 let arr = f.args.into_iter().next().unwrap();
5778 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
5779 parameters: vec![
5780 Identifier::new("a"),
5781 Identifier::new("b"),
5782 ],
5783 colon: false,
5784 parameter_types: Vec::new(),
5785 body: Expression::Case(Box::new(Case {
5786 operand: None,
5787 whens: vec![
5788 (
5789 Expression::Lt(Box::new(BinaryOp::new(
5790 Expression::Identifier(Identifier::new("a")),
5791 Expression::Identifier(Identifier::new("b")),
5792 ))),
5793 Expression::number(1),
5794 ),
5795 (
5796 Expression::Gt(Box::new(BinaryOp::new(
5797 Expression::Identifier(Identifier::new("a")),
5798 Expression::Identifier(Identifier::new("b")),
5799 ))),
5800 Expression::Neg(Box::new(crate::expressions::UnaryOp {
5801 this: Expression::number(1),
5802 })),
5803 ),
5804 ],
5805 else_: Some(Expression::number(0)),
5806 })),
5807 }));
5808 Ok(Expression::Function(Box::new(Function::new("ARRAY_SORT".to_string(), vec![arr, lambda]))))
5809 }
5810 _ => Ok(Expression::Function(Box::new(Function::new("ARRAY_REVERSE_SORT".to_string(), f.args)))),
5811 }
5812 }
5813 // ENCODE(x) -> ENCODE(x, 'utf-8') for Spark/Hive, TO_UTF8(x) for Presto
5814 "ENCODE" if f.args.len() == 1 => {
5815 match target {
5816 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
5817 let mut args = f.args;
5818 args.push(Expression::string("utf-8"));
5819 Ok(Expression::Function(Box::new(Function::new("ENCODE".to_string(), args))))
5820 }
5821 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
5822 Ok(Expression::Function(Box::new(Function::new("TO_UTF8".to_string(), f.args))))
5823 }
5824 _ => Ok(Expression::Function(Box::new(Function::new("ENCODE".to_string(), f.args)))),
5825 }
5826 }
5827 // DECODE(x) -> DECODE(x, 'utf-8') for Spark/Hive, FROM_UTF8(x) for Presto
5828 "DECODE" if f.args.len() == 1 => {
5829 match target {
5830 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
5831 let mut args = f.args;
5832 args.push(Expression::string("utf-8"));
5833 Ok(Expression::Function(Box::new(Function::new("DECODE".to_string(), args))))
5834 }
5835 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
5836 Ok(Expression::Function(Box::new(Function::new("FROM_UTF8".to_string(), f.args))))
5837 }
5838 _ => Ok(Expression::Function(Box::new(Function::new("DECODE".to_string(), f.args)))),
5839 }
5840 }
5841 // QUANTILE(x, p) -> PERCENTILE(x, p) for Spark/Hive
5842 "QUANTILE" if f.args.len() == 2 => {
5843 let name = match target {
5844 DialectType::Spark | DialectType::Databricks | DialectType::Hive => "PERCENTILE",
5845 DialectType::Presto | DialectType::Trino => "APPROX_PERCENTILE",
5846 DialectType::BigQuery => "PERCENTILE_CONT",
5847 _ => "QUANTILE",
5848 };
5849 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
5850 }
5851 // QUANTILE_CONT(x, q) -> PERCENTILE_CONT(q) WITHIN GROUP (ORDER BY x) for PostgreSQL/Snowflake
5852 "QUANTILE_CONT" if f.args.len() == 2 => {
5853 let mut args = f.args;
5854 let column = args.remove(0);
5855 let quantile = args.remove(0);
5856 match target {
5857 DialectType::DuckDB => {
5858 Ok(Expression::Function(Box::new(Function::new("QUANTILE_CONT".to_string(), vec![column, quantile]))))
5859 }
5860 DialectType::PostgreSQL | DialectType::Redshift | DialectType::Snowflake => {
5861 // PERCENTILE_CONT(q) WITHIN GROUP (ORDER BY x)
5862 let inner = Expression::PercentileCont(Box::new(crate::expressions::PercentileFunc {
5863 this: column.clone(),
5864 percentile: quantile,
5865 order_by: None,
5866 filter: None,
5867 }));
5868 Ok(Expression::WithinGroup(Box::new(crate::expressions::WithinGroup {
5869 this: inner,
5870 order_by: vec![crate::expressions::Ordered {
5871 this: column,
5872 desc: false,
5873 nulls_first: None,
5874 explicit_asc: false,
5875 with_fill: None,
5876 }],
5877 })))
5878 }
5879 _ => Ok(Expression::Function(Box::new(Function::new("QUANTILE_CONT".to_string(), vec![column, quantile])))),
5880 }
5881 }
5882 // QUANTILE_DISC(x, q) -> PERCENTILE_DISC(q) WITHIN GROUP (ORDER BY x) for PostgreSQL/Snowflake
5883 "QUANTILE_DISC" if f.args.len() == 2 => {
5884 let mut args = f.args;
5885 let column = args.remove(0);
5886 let quantile = args.remove(0);
5887 match target {
5888 DialectType::DuckDB => {
5889 Ok(Expression::Function(Box::new(Function::new("QUANTILE_DISC".to_string(), vec![column, quantile]))))
5890 }
5891 DialectType::PostgreSQL | DialectType::Redshift | DialectType::Snowflake => {
5892 // PERCENTILE_DISC(q) WITHIN GROUP (ORDER BY x)
5893 let inner = Expression::PercentileDisc(Box::new(crate::expressions::PercentileFunc {
5894 this: column.clone(),
5895 percentile: quantile,
5896 order_by: None,
5897 filter: None,
5898 }));
5899 Ok(Expression::WithinGroup(Box::new(crate::expressions::WithinGroup {
5900 this: inner,
5901 order_by: vec![crate::expressions::Ordered {
5902 this: column,
5903 desc: false,
5904 nulls_first: None,
5905 explicit_asc: false,
5906 with_fill: None,
5907 }],
5908 })))
5909 }
5910 _ => Ok(Expression::Function(Box::new(Function::new("QUANTILE_DISC".to_string(), vec![column, quantile])))),
5911 }
5912 }
5913 // PERCENTILE_APPROX(x, p) / APPROX_PERCENTILE(x, p) -> target-specific
5914 "PERCENTILE_APPROX" | "APPROX_PERCENTILE" if f.args.len() >= 2 => {
5915 let name = match target {
5916 DialectType::Presto | DialectType::Trino | DialectType::Athena => "APPROX_PERCENTILE",
5917 DialectType::Spark | DialectType::Databricks | DialectType::Hive => "PERCENTILE_APPROX",
5918 DialectType::DuckDB => "APPROX_QUANTILE",
5919 DialectType::PostgreSQL | DialectType::Redshift => "PERCENTILE_CONT",
5920 _ => &f.name,
5921 };
5922 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
5923 }
5924 // EPOCH(x) -> UNIX_TIMESTAMP(x) for Spark/Hive
5925 "EPOCH" if f.args.len() == 1 => {
5926 let name = match target {
5927 DialectType::Spark | DialectType::Databricks | DialectType::Hive => "UNIX_TIMESTAMP",
5928 DialectType::Presto | DialectType::Trino => "TO_UNIXTIME",
5929 _ => "EPOCH",
5930 };
5931 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
5932 }
5933 // EPOCH_MS(x) -> target-specific epoch milliseconds conversion
5934 "EPOCH_MS" if f.args.len() == 1 => {
5935 match target {
5936 DialectType::Spark | DialectType::Databricks => {
5937 Ok(Expression::Function(Box::new(Function::new("TIMESTAMP_MILLIS".to_string(), f.args))))
5938 }
5939 DialectType::Hive => {
5940 // Hive: FROM_UNIXTIME(x / 1000)
5941 let arg = f.args.into_iter().next().unwrap();
5942 let div_expr = Expression::Div(Box::new(crate::expressions::BinaryOp::new(
5943 arg,
5944 Expression::number(1000),
5945 )));
5946 Ok(Expression::Function(Box::new(Function::new("FROM_UNIXTIME".to_string(), vec![div_expr]))))
5947 }
5948 DialectType::Presto | DialectType::Trino => {
5949 Ok(Expression::Function(Box::new(Function::new("FROM_UNIXTIME".to_string(),
5950 vec![Expression::Div(Box::new(crate::expressions::BinaryOp::new(
5951 f.args.into_iter().next().unwrap(),
5952 Expression::number(1000),
5953 )))]
5954 ))))
5955 }
5956 _ => Ok(Expression::Function(Box::new(Function::new("EPOCH_MS".to_string(), f.args)))),
5957 }
5958 }
5959 // HASHBYTES('algorithm', x) -> target-specific hash function
5960 "HASHBYTES" if f.args.len() == 2 => {
5961 // Keep HASHBYTES as-is for TSQL target
5962 if matches!(target, DialectType::TSQL) {
5963 return Ok(Expression::Function(f));
5964 }
5965 let algo_expr = &f.args[0];
5966 let algo = match algo_expr {
5967 Expression::Literal(crate::expressions::Literal::String(s)) => s.to_uppercase(),
5968 _ => return Ok(Expression::Function(f)),
5969 };
5970 let data_arg = f.args.into_iter().nth(1).unwrap();
5971 match algo.as_str() {
5972 "SHA1" => {
5973 let name = match target {
5974 DialectType::Spark | DialectType::Databricks => "SHA",
5975 DialectType::Hive => "SHA1",
5976 _ => "SHA1",
5977 };
5978 Ok(Expression::Function(Box::new(Function::new(name.to_string(), vec![data_arg]))))
5979 }
5980 "SHA2_256" => {
5981 Ok(Expression::Function(Box::new(Function::new("SHA2".to_string(), vec![data_arg, Expression::number(256)]))))
5982 }
5983 "SHA2_512" => {
5984 Ok(Expression::Function(Box::new(Function::new("SHA2".to_string(), vec![data_arg, Expression::number(512)]))))
5985 }
5986 "MD5" => {
5987 Ok(Expression::Function(Box::new(Function::new("MD5".to_string(), vec![data_arg]))))
5988 }
5989 _ => Ok(Expression::Function(Box::new(Function::new("HASHBYTES".to_string(), vec![Expression::string(&algo), data_arg])))),
5990 }
5991 }
5992 // JSON_EXTRACT_PATH(json, key1, key2, ...) -> target-specific JSON extraction
5993 "JSON_EXTRACT_PATH" | "JSON_EXTRACT_PATH_TEXT" if f.args.len() >= 2 => {
5994 let is_text = name == "JSON_EXTRACT_PATH_TEXT";
5995 let mut args = f.args;
5996 let json_expr = args.remove(0);
5997 // Build JSON path from remaining keys: $.key1.key2 or $.key1[0]
5998 let mut json_path = "$".to_string();
5999 for a in &args {
6000 match a {
6001 Expression::Literal(crate::expressions::Literal::String(s)) => {
6002 // Numeric string keys become array indices: [0]
6003 if s.chars().all(|c| c.is_ascii_digit()) {
6004 json_path.push('[');
6005 json_path.push_str(s);
6006 json_path.push(']');
6007 } else {
6008 json_path.push('.');
6009 json_path.push_str(s);
6010 }
6011 }
6012 _ => {
6013 json_path.push_str(".?");
6014 }
6015 }
6016 }
6017 match target {
6018 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
6019 Ok(Expression::Function(Box::new(Function::new(
6020 "GET_JSON_OBJECT".to_string(),
6021 vec![json_expr, Expression::string(&json_path)],
6022 ))))
6023 }
6024 DialectType::Presto | DialectType::Trino => {
6025 let func_name = if is_text { "JSON_EXTRACT_SCALAR" } else { "JSON_EXTRACT" };
6026 Ok(Expression::Function(Box::new(Function::new(
6027 func_name.to_string(),
6028 vec![json_expr, Expression::string(&json_path)],
6029 ))))
6030 }
6031 DialectType::BigQuery | DialectType::MySQL => {
6032 let func_name = if is_text { "JSON_EXTRACT_SCALAR" } else { "JSON_EXTRACT" };
6033 Ok(Expression::Function(Box::new(Function::new(
6034 func_name.to_string(),
6035 vec![json_expr, Expression::string(&json_path)],
6036 ))))
6037 }
6038 DialectType::PostgreSQL | DialectType::Materialize => {
6039 // Keep as JSON_EXTRACT_PATH_TEXT / JSON_EXTRACT_PATH for PostgreSQL/Materialize
6040 let func_name = if is_text { "JSON_EXTRACT_PATH_TEXT" } else { "JSON_EXTRACT_PATH" };
6041 let mut new_args = vec![json_expr];
6042 new_args.extend(args);
6043 Ok(Expression::Function(Box::new(Function::new(
6044 func_name.to_string(),
6045 new_args,
6046 ))))
6047 }
6048 DialectType::DuckDB | DialectType::SQLite => {
6049 // Use -> for JSON_EXTRACT_PATH, ->> for JSON_EXTRACT_PATH_TEXT
6050 if is_text {
6051 Ok(Expression::JsonExtractScalar(Box::new(crate::expressions::JsonExtractFunc {
6052 this: json_expr,
6053 path: Expression::string(&json_path),
6054 returning: None,
6055 arrow_syntax: true,
6056 hash_arrow_syntax: false,
6057 wrapper_option: None,
6058 quotes_option: None,
6059 on_scalar_string: false,
6060 on_error: None,
6061 })))
6062 } else {
6063 Ok(Expression::JsonExtract(Box::new(crate::expressions::JsonExtractFunc {
6064 this: json_expr,
6065 path: Expression::string(&json_path),
6066 returning: None,
6067 arrow_syntax: true,
6068 hash_arrow_syntax: false,
6069 wrapper_option: None,
6070 quotes_option: None,
6071 on_scalar_string: false,
6072 on_error: None,
6073 })))
6074 }
6075 }
6076 DialectType::Redshift => {
6077 // Keep as JSON_EXTRACT_PATH_TEXT for Redshift
6078 let mut new_args = vec![json_expr];
6079 new_args.extend(args);
6080 Ok(Expression::Function(Box::new(Function::new(
6081 "JSON_EXTRACT_PATH_TEXT".to_string(),
6082 new_args,
6083 ))))
6084 }
6085 DialectType::TSQL => {
6086 // ISNULL(JSON_QUERY(json, '$.path'), JSON_VALUE(json, '$.path'))
6087 let jq = Expression::Function(Box::new(Function::new(
6088 "JSON_QUERY".to_string(),
6089 vec![json_expr.clone(), Expression::string(&json_path)],
6090 )));
6091 let jv = Expression::Function(Box::new(Function::new(
6092 "JSON_VALUE".to_string(),
6093 vec![json_expr, Expression::string(&json_path)],
6094 )));
6095 Ok(Expression::Function(Box::new(Function::new(
6096 "ISNULL".to_string(),
6097 vec![jq, jv],
6098 ))))
6099 }
6100 DialectType::ClickHouse => {
6101 let func_name = if is_text { "JSONExtractString" } else { "JSONExtractRaw" };
6102 let mut new_args = vec![json_expr];
6103 new_args.extend(args);
6104 Ok(Expression::Function(Box::new(Function::new(
6105 func_name.to_string(),
6106 new_args,
6107 ))))
6108 }
6109 _ => {
6110 let func_name = if is_text { "JSON_EXTRACT_SCALAR" } else { "JSON_EXTRACT" };
6111 Ok(Expression::Function(Box::new(Function::new(
6112 func_name.to_string(),
6113 vec![json_expr, Expression::string(&json_path)],
6114 ))))
6115 }
6116 }
6117 }
6118 // APPROX_DISTINCT(x) -> APPROX_COUNT_DISTINCT(x) for Spark/Hive/BigQuery
6119 "APPROX_DISTINCT" if f.args.len() >= 1 => {
6120 let name = match target {
6121 DialectType::Spark | DialectType::Databricks | DialectType::Hive | DialectType::BigQuery => "APPROX_COUNT_DISTINCT",
6122 _ => "APPROX_DISTINCT",
6123 };
6124 let mut args = f.args;
6125 // Hive doesn't support the accuracy parameter
6126 if name == "APPROX_COUNT_DISTINCT" && matches!(target, DialectType::Hive) {
6127 args.truncate(1);
6128 }
6129 Ok(Expression::Function(Box::new(Function::new(name.to_string(), args))))
6130 }
6131 // REGEXP_EXTRACT(x, pattern) - normalize default group index
6132 "REGEXP_EXTRACT" if f.args.len() == 2 => {
6133 // Determine source default group index
6134 let source_default = match source {
6135 DialectType::Presto | DialectType::Trino | DialectType::DuckDB => 0,
6136 _ => 1, // Hive/Spark/Databricks default = 1
6137 };
6138 // Determine target default group index
6139 let target_default = match target {
6140 DialectType::Presto | DialectType::Trino | DialectType::DuckDB
6141 | DialectType::BigQuery => 0,
6142 DialectType::Snowflake => {
6143 // Snowflake uses REGEXP_SUBSTR
6144 return Ok(Expression::Function(Box::new(Function::new("REGEXP_SUBSTR".to_string(), f.args))));
6145 }
6146 _ => 1, // Hive/Spark/Databricks default = 1
6147 };
6148 if source_default != target_default {
6149 let mut args = f.args;
6150 args.push(Expression::number(source_default));
6151 Ok(Expression::Function(Box::new(Function::new("REGEXP_EXTRACT".to_string(), args))))
6152 } else {
6153 Ok(Expression::Function(Box::new(Function::new("REGEXP_EXTRACT".to_string(), f.args))))
6154 }
6155 }
6156 // RLIKE(str, pattern) -> RegexpLike expression (generates as target-specific form)
6157 "RLIKE" if f.args.len() == 2 => {
6158 let mut args = f.args;
6159 let str_expr = args.remove(0);
6160 let pattern = args.remove(0);
6161 match target {
6162 DialectType::DuckDB => {
6163 // REGEXP_MATCHES(str, pattern)
6164 Ok(Expression::Function(Box::new(Function::new(
6165 "REGEXP_MATCHES".to_string(),
6166 vec![str_expr, pattern],
6167 ))))
6168 }
6169 _ => {
6170 // Convert to RegexpLike which generates as RLIKE/~/REGEXP_LIKE per dialect
6171 Ok(Expression::RegexpLike(Box::new(crate::expressions::RegexpFunc {
6172 this: str_expr,
6173 pattern,
6174 flags: None,
6175 })))
6176 }
6177 }
6178 }
6179 // EOMONTH(date[, month_offset]) -> target-specific
6180 "EOMONTH" if f.args.len() >= 1 => {
6181 let mut args = f.args;
6182 let date_arg = args.remove(0);
6183 let month_offset = if !args.is_empty() { Some(args.remove(0)) } else { None };
6184
6185 // Helper: wrap date in CAST to DATE
6186 let cast_to_date = |e: Expression| -> Expression {
6187 Expression::Cast(Box::new(Cast {
6188 this: e,
6189 to: DataType::Date,
6190 trailing_comments: vec![], double_colon_syntax: false, format: None, default: None,
6191 }))
6192 };
6193
6194 match target {
6195 DialectType::TSQL | DialectType::Fabric => {
6196 // TSQL: EOMONTH(CAST(date AS DATE)) or EOMONTH(DATEADD(MONTH, offset, CAST(date AS DATE)))
6197 let date = cast_to_date(date_arg);
6198 let date = if let Some(offset) = month_offset {
6199 Expression::Function(Box::new(Function::new(
6200 "DATEADD".to_string(), vec![
6201 Expression::Identifier(Identifier::new("MONTH")),
6202 offset, date,
6203 ],
6204 )))
6205 } else {
6206 date
6207 };
6208 Ok(Expression::Function(Box::new(Function::new("EOMONTH".to_string(), vec![date]))))
6209 }
6210 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
6211 // Presto: LAST_DAY_OF_MONTH(CAST(CAST(date AS TIMESTAMP) AS DATE))
6212 // or with offset: LAST_DAY_OF_MONTH(DATE_ADD('MONTH', offset, CAST(CAST(date AS TIMESTAMP) AS DATE)))
6213 let cast_ts = Expression::Cast(Box::new(Cast {
6214 this: date_arg,
6215 to: DataType::Timestamp { timezone: false, precision: None },
6216 trailing_comments: vec![], double_colon_syntax: false, format: None, default: None,
6217 }));
6218 let date = cast_to_date(cast_ts);
6219 let date = if let Some(offset) = month_offset {
6220 Expression::Function(Box::new(Function::new(
6221 "DATE_ADD".to_string(), vec![
6222 Expression::string("MONTH"),
6223 offset, date,
6224 ],
6225 )))
6226 } else {
6227 date
6228 };
6229 Ok(Expression::Function(Box::new(Function::new("LAST_DAY_OF_MONTH".to_string(), vec![date]))))
6230 }
6231 DialectType::PostgreSQL => {
6232 // PostgreSQL: CAST(DATE_TRUNC('MONTH', CAST(date AS DATE) [+ INTERVAL 'offset MONTH']) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)
6233 let date = cast_to_date(date_arg);
6234 let date = if let Some(offset) = month_offset {
6235 let interval_str = format!("{} MONTH", Self::expr_to_string_static(&offset));
6236 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
6237 date,
6238 Expression::Interval(Box::new(crate::expressions::Interval {
6239 this: Some(Expression::string(&interval_str)),
6240 unit: None,
6241 })),
6242 )))
6243 } else {
6244 date
6245 };
6246 let truncated = Expression::Function(Box::new(Function::new(
6247 "DATE_TRUNC".to_string(), vec![Expression::string("MONTH"), date],
6248 )));
6249 let plus_month = Expression::Add(Box::new(crate::expressions::BinaryOp::new(
6250 truncated,
6251 Expression::Interval(Box::new(crate::expressions::Interval {
6252 this: Some(Expression::string("1 MONTH")),
6253 unit: None,
6254 })),
6255 )));
6256 let minus_day = Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
6257 plus_month,
6258 Expression::Interval(Box::new(crate::expressions::Interval {
6259 this: Some(Expression::string("1 DAY")),
6260 unit: None,
6261 })),
6262 )));
6263 Ok(Expression::Cast(Box::new(Cast {
6264 this: minus_day,
6265 to: DataType::Date,
6266 trailing_comments: vec![], double_colon_syntax: false, format: None, default: None,
6267 })))
6268 }
6269 DialectType::DuckDB => {
6270 // DuckDB: LAST_DAY(CAST(date AS DATE) [+ INTERVAL (offset) MONTH])
6271 let date = cast_to_date(date_arg);
6272 let date = if let Some(offset) = month_offset {
6273 // Wrap negative numbers in parentheses for DuckDB INTERVAL
6274 let interval_val = if matches!(&offset, Expression::Neg(_)) {
6275 Expression::Paren(Box::new(crate::expressions::Paren {
6276 this: offset,
6277 trailing_comments: Vec::new(),
6278 }))
6279 } else {
6280 offset
6281 };
6282 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
6283 date,
6284 Expression::Interval(Box::new(crate::expressions::Interval {
6285 this: Some(interval_val),
6286 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
6287 unit: crate::expressions::IntervalUnit::Month,
6288 use_plural: false,
6289 }),
6290 })),
6291 )))
6292 } else {
6293 date
6294 };
6295 Ok(Expression::Function(Box::new(Function::new("LAST_DAY".to_string(), vec![date]))))
6296 }
6297 DialectType::Snowflake | DialectType::Redshift => {
6298 // Snowflake/Redshift: LAST_DAY(TO_DATE(date) or CAST(date AS DATE))
6299 // With offset: LAST_DAY(DATEADD(MONTH, offset, TO_DATE(date)))
6300 let date = if matches!(target, DialectType::Snowflake) {
6301 Expression::Function(Box::new(Function::new("TO_DATE".to_string(), vec![date_arg])))
6302 } else {
6303 cast_to_date(date_arg)
6304 };
6305 let date = if let Some(offset) = month_offset {
6306 Expression::Function(Box::new(Function::new(
6307 "DATEADD".to_string(), vec![
6308 Expression::Identifier(Identifier::new("MONTH")),
6309 offset, date,
6310 ],
6311 )))
6312 } else {
6313 date
6314 };
6315 Ok(Expression::Function(Box::new(Function::new("LAST_DAY".to_string(), vec![date]))))
6316 }
6317 DialectType::Spark | DialectType::Databricks => {
6318 // Spark: LAST_DAY(TO_DATE(date))
6319 // With offset: LAST_DAY(ADD_MONTHS(TO_DATE(date), offset))
6320 let date = Expression::Function(Box::new(Function::new("TO_DATE".to_string(), vec![date_arg])));
6321 let date = if let Some(offset) = month_offset {
6322 Expression::Function(Box::new(Function::new(
6323 "ADD_MONTHS".to_string(), vec![date, offset],
6324 )))
6325 } else {
6326 date
6327 };
6328 Ok(Expression::Function(Box::new(Function::new("LAST_DAY".to_string(), vec![date]))))
6329 }
6330 DialectType::MySQL => {
6331 // MySQL: LAST_DAY(DATE(date)) - no offset
6332 // With offset: LAST_DAY(DATE_ADD(date, INTERVAL offset MONTH)) - no DATE() wrapper
6333 let date = if let Some(offset) = month_offset {
6334 let iu = crate::expressions::IntervalUnit::Month;
6335 Expression::DateAdd(Box::new(crate::expressions::DateAddFunc {
6336 this: date_arg,
6337 interval: offset,
6338 unit: iu,
6339 }))
6340 } else {
6341 Expression::Function(Box::new(Function::new("DATE".to_string(), vec![date_arg])))
6342 };
6343 Ok(Expression::Function(Box::new(Function::new("LAST_DAY".to_string(), vec![date]))))
6344 }
6345 DialectType::BigQuery => {
6346 // BigQuery: LAST_DAY(CAST(date AS DATE))
6347 // With offset: LAST_DAY(DATE_ADD(CAST(date AS DATE), INTERVAL offset MONTH))
6348 let date = cast_to_date(date_arg);
6349 let date = if let Some(offset) = month_offset {
6350 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
6351 this: Some(offset),
6352 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
6353 unit: crate::expressions::IntervalUnit::Month,
6354 use_plural: false,
6355 }),
6356 }));
6357 Expression::Function(Box::new(Function::new(
6358 "DATE_ADD".to_string(), vec![date, interval],
6359 )))
6360 } else {
6361 date
6362 };
6363 Ok(Expression::Function(Box::new(Function::new("LAST_DAY".to_string(), vec![date]))))
6364 }
6365 DialectType::ClickHouse => {
6366 // ClickHouse: LAST_DAY(CAST(date AS Nullable(DATE)))
6367 let date = Expression::Cast(Box::new(Cast {
6368 this: date_arg,
6369 to: DataType::Custom { name: "Nullable(DATE)".to_string() },
6370 trailing_comments: vec![], double_colon_syntax: false, format: None, default: None,
6371 }));
6372 let date = if let Some(offset) = month_offset {
6373 Expression::Function(Box::new(Function::new(
6374 "DATE_ADD".to_string(), vec![
6375 Expression::Identifier(Identifier::new("MONTH")),
6376 offset, date,
6377 ],
6378 )))
6379 } else {
6380 date
6381 };
6382 Ok(Expression::Function(Box::new(Function::new("LAST_DAY".to_string(), vec![date]))))
6383 }
6384 DialectType::Hive => {
6385 // Hive: LAST_DAY(date)
6386 let date = if let Some(offset) = month_offset {
6387 Expression::Function(Box::new(Function::new(
6388 "ADD_MONTHS".to_string(), vec![date_arg, offset],
6389 )))
6390 } else {
6391 date_arg
6392 };
6393 Ok(Expression::Function(Box::new(Function::new("LAST_DAY".to_string(), vec![date]))))
6394 }
6395 _ => {
6396 // Default: LAST_DAY(date)
6397 let date = if let Some(offset) = month_offset {
6398 let unit = Expression::Identifier(Identifier::new("MONTH"));
6399 Expression::Function(Box::new(Function::new(
6400 "DATEADD".to_string(), vec![unit, offset, date_arg],
6401 )))
6402 } else {
6403 date_arg
6404 };
6405 Ok(Expression::Function(Box::new(Function::new("LAST_DAY".to_string(), vec![date]))))
6406 }
6407 }
6408 }
6409 // LAST_DAY(x) / LAST_DAY_OF_MONTH(x) -> target-specific
6410 "LAST_DAY" | "LAST_DAY_OF_MONTH" if !matches!(source, DialectType::BigQuery) && f.args.len() >= 1 => {
6411 let first_arg = f.args.into_iter().next().unwrap();
6412 match target {
6413 DialectType::TSQL | DialectType::Fabric => Ok(Expression::Function(Box::new(Function::new("EOMONTH".to_string(), vec![first_arg])))),
6414 DialectType::Presto | DialectType::Trino | DialectType::Athena => Ok(Expression::Function(Box::new(Function::new("LAST_DAY_OF_MONTH".to_string(), vec![first_arg])))),
6415 _ => Ok(Expression::Function(Box::new(Function::new("LAST_DAY".to_string(), vec![first_arg])))),
6416 }
6417 }
6418 // MAP(keys_array, vals_array) from Presto (2-arg form) -> target-specific
6419 "MAP" if f.args.len() == 2
6420 && matches!(source, DialectType::Presto | DialectType::Trino | DialectType::Athena) => {
6421 let keys_arg = f.args[0].clone();
6422 let vals_arg = f.args[1].clone();
6423
6424 // Helper: extract array elements from Array/ArrayFunc/Function("ARRAY") expressions
6425 fn extract_array_elements(expr: &Expression) -> Option<&Vec<Expression>> {
6426 match expr {
6427 Expression::Array(arr) => Some(&arr.expressions),
6428 Expression::ArrayFunc(arr) => Some(&arr.expressions),
6429 Expression::Function(f) if f.name.eq_ignore_ascii_case("ARRAY") => Some(&f.args),
6430 _ => None,
6431 }
6432 }
6433
6434 match target {
6435 DialectType::Spark | DialectType::Databricks => {
6436 // Presto MAP(keys, vals) -> Spark MAP_FROM_ARRAYS(keys, vals)
6437 Ok(Expression::Function(Box::new(Function::new("MAP_FROM_ARRAYS".to_string(), f.args))))
6438 }
6439 DialectType::Hive => {
6440 // Presto MAP(ARRAY[k1,k2], ARRAY[v1,v2]) -> Hive MAP(k1, v1, k2, v2)
6441 if let (Some(keys), Some(vals)) = (extract_array_elements(&keys_arg), extract_array_elements(&vals_arg)) {
6442 if keys.len() == vals.len() {
6443 let mut interleaved = Vec::new();
6444 for (k, v) in keys.iter().zip(vals.iter()) {
6445 interleaved.push(k.clone());
6446 interleaved.push(v.clone());
6447 }
6448 Ok(Expression::Function(Box::new(Function::new("MAP".to_string(), interleaved))))
6449 } else {
6450 Ok(Expression::Function(Box::new(Function::new("MAP".to_string(), f.args))))
6451 }
6452 } else {
6453 Ok(Expression::Function(Box::new(Function::new("MAP".to_string(), f.args))))
6454 }
6455 }
6456 DialectType::Snowflake => {
6457 // Presto MAP(ARRAY[k1,k2], ARRAY[v1,v2]) -> Snowflake OBJECT_CONSTRUCT(k1, v1, k2, v2)
6458 if let (Some(keys), Some(vals)) = (extract_array_elements(&keys_arg), extract_array_elements(&vals_arg)) {
6459 if keys.len() == vals.len() {
6460 let mut interleaved = Vec::new();
6461 for (k, v) in keys.iter().zip(vals.iter()) {
6462 interleaved.push(k.clone());
6463 interleaved.push(v.clone());
6464 }
6465 Ok(Expression::Function(Box::new(Function::new("OBJECT_CONSTRUCT".to_string(), interleaved))))
6466 } else {
6467 Ok(Expression::Function(Box::new(Function::new("MAP".to_string(), f.args))))
6468 }
6469 } else {
6470 Ok(Expression::Function(Box::new(Function::new("MAP".to_string(), f.args))))
6471 }
6472 }
6473 _ => Ok(Expression::Function(f)),
6474 }
6475 }
6476 // MAP() with 0 args from Spark -> MAP(ARRAY[], ARRAY[]) for Presto/Trino
6477 "MAP" if f.args.is_empty()
6478 && matches!(source, DialectType::Hive | DialectType::Spark | DialectType::Databricks)
6479 && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena) => {
6480 let empty_keys = Expression::Array(Box::new(crate::expressions::Array { expressions: vec![] }));
6481 let empty_vals = Expression::Array(Box::new(crate::expressions::Array { expressions: vec![] }));
6482 Ok(Expression::Function(Box::new(Function::new("MAP".to_string(), vec![empty_keys, empty_vals]))))
6483 }
6484 // MAP(k1, v1, k2, v2, ...) from Hive/Spark -> target-specific
6485 "MAP" if f.args.len() >= 2 && f.args.len() % 2 == 0
6486 && matches!(source, DialectType::Hive | DialectType::Spark | DialectType::Databricks | DialectType::ClickHouse) => {
6487 let args = f.args;
6488 match target {
6489 DialectType::DuckDB => {
6490 // MAP([k1, k2], [v1, v2])
6491 let mut keys = Vec::new();
6492 let mut vals = Vec::new();
6493 for (i, arg) in args.into_iter().enumerate() {
6494 if i % 2 == 0 { keys.push(arg); } else { vals.push(arg); }
6495 }
6496 let keys_arr = Expression::Array(Box::new(crate::expressions::Array {
6497 expressions: keys,
6498 }));
6499 let vals_arr = Expression::Array(Box::new(crate::expressions::Array {
6500 expressions: vals,
6501 }));
6502 Ok(Expression::Function(Box::new(Function::new("MAP".to_string(), vec![keys_arr, vals_arr]))))
6503 }
6504 DialectType::Presto | DialectType::Trino => {
6505 // MAP(ARRAY[k1, k2], ARRAY[v1, v2])
6506 let mut keys = Vec::new();
6507 let mut vals = Vec::new();
6508 for (i, arg) in args.into_iter().enumerate() {
6509 if i % 2 == 0 { keys.push(arg); } else { vals.push(arg); }
6510 }
6511 let keys_arr = Expression::Array(Box::new(crate::expressions::Array { expressions: keys }));
6512 let vals_arr = Expression::Array(Box::new(crate::expressions::Array { expressions: vals }));
6513 Ok(Expression::Function(Box::new(Function::new("MAP".to_string(), vec![keys_arr, vals_arr]))))
6514 }
6515 DialectType::Snowflake => {
6516 Ok(Expression::Function(Box::new(Function::new("OBJECT_CONSTRUCT".to_string(), args))))
6517 }
6518 DialectType::ClickHouse => {
6519 Ok(Expression::Function(Box::new(Function::new("map".to_string(), args))))
6520 }
6521 _ => Ok(Expression::Function(Box::new(Function::new("MAP".to_string(), args)))),
6522 }
6523 }
6524 // COLLECT_LIST(x) -> ARRAY_AGG(x) for most targets
6525 "COLLECT_LIST" if f.args.len() >= 1 => {
6526 let name = match target {
6527 DialectType::Spark | DialectType::Databricks | DialectType::Hive => "COLLECT_LIST",
6528 DialectType::DuckDB | DialectType::PostgreSQL | DialectType::Redshift
6529 | DialectType::Snowflake | DialectType::BigQuery => "ARRAY_AGG",
6530 DialectType::Presto | DialectType::Trino => "ARRAY_AGG",
6531 _ => "ARRAY_AGG",
6532 };
6533 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
6534 }
6535 // COLLECT_SET(x) -> target-specific distinct array aggregation
6536 "COLLECT_SET" if f.args.len() >= 1 => {
6537 let name = match target {
6538 DialectType::Spark | DialectType::Databricks | DialectType::Hive => "COLLECT_SET",
6539 DialectType::Presto | DialectType::Trino | DialectType::Athena => "SET_AGG",
6540 DialectType::Snowflake => "ARRAY_UNIQUE_AGG",
6541 _ => "ARRAY_AGG",
6542 };
6543 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
6544 }
6545 // ISNAN(x) / IS_NAN(x) - normalize
6546 "ISNAN" | "IS_NAN" => {
6547 let name = match target {
6548 DialectType::Spark | DialectType::Databricks | DialectType::Hive => "ISNAN",
6549 DialectType::Presto | DialectType::Trino | DialectType::Athena => "IS_NAN",
6550 DialectType::BigQuery | DialectType::PostgreSQL | DialectType::Redshift => "IS_NAN",
6551 DialectType::ClickHouse => "IS_NAN",
6552 _ => "ISNAN",
6553 };
6554 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
6555 }
6556 // SPLIT_PART(str, delim, index) -> target-specific
6557 "SPLIT_PART" if f.args.len() == 3 => {
6558 match target {
6559 DialectType::Spark | DialectType::Databricks => {
6560 // Keep as SPLIT_PART (Spark 3.4+)
6561 Ok(Expression::Function(Box::new(Function::new("SPLIT_PART".to_string(), f.args))))
6562 }
6563 DialectType::DuckDB | DialectType::PostgreSQL | DialectType::Snowflake
6564 | DialectType::Redshift | DialectType::Trino | DialectType::Presto => {
6565 Ok(Expression::Function(Box::new(Function::new("SPLIT_PART".to_string(), f.args))))
6566 }
6567 DialectType::Hive => {
6568 // SPLIT(str, delim)[index]
6569 // Complex conversion, just keep as-is for now
6570 Ok(Expression::Function(Box::new(Function::new("SPLIT_PART".to_string(), f.args))))
6571 }
6572 _ => Ok(Expression::Function(Box::new(Function::new("SPLIT_PART".to_string(), f.args)))),
6573 }
6574 }
6575 // JSON_EXTRACT(json, path) -> target-specific JSON extraction
6576 "JSON_EXTRACT" | "JSON_EXTRACT_SCALAR" if f.args.len() == 2 => {
6577 let is_scalar = name == "JSON_EXTRACT_SCALAR";
6578 match target {
6579 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
6580 let mut args = f.args;
6581 // Spark/Hive don't support Presto's TRY(expr) wrapper form here.
6582 // Mirror sqlglot by unwrapping TRY(expr) to expr before GET_JSON_OBJECT.
6583 if let Some(Expression::Function(inner)) = args.first() {
6584 if inner.name.eq_ignore_ascii_case("TRY") && inner.args.len() == 1 {
6585 let mut inner_args = inner.args.clone();
6586 args[0] = inner_args.remove(0);
6587 }
6588 }
6589 Ok(Expression::Function(Box::new(Function::new(
6590 "GET_JSON_OBJECT".to_string(),
6591 args,
6592 ))))
6593 }
6594 DialectType::DuckDB | DialectType::SQLite => {
6595 // json -> path syntax
6596 let mut args = f.args;
6597 let json_expr = args.remove(0);
6598 let path = args.remove(0);
6599 Ok(Expression::JsonExtract(Box::new(crate::expressions::JsonExtractFunc {
6600 this: json_expr,
6601 path,
6602 returning: None,
6603 arrow_syntax: true,
6604 hash_arrow_syntax: false,
6605 wrapper_option: None,
6606 quotes_option: None,
6607 on_scalar_string: false,
6608 on_error: None,
6609 })))
6610 }
6611 DialectType::TSQL => {
6612 let func_name = if is_scalar { "JSON_VALUE" } else { "JSON_QUERY" };
6613 Ok(Expression::Function(Box::new(Function::new(func_name.to_string(), f.args))))
6614 }
6615 DialectType::PostgreSQL | DialectType::Redshift => {
6616 let func_name = if is_scalar { "JSON_EXTRACT_PATH_TEXT" } else { "JSON_EXTRACT_PATH" };
6617 Ok(Expression::Function(Box::new(Function::new(func_name.to_string(), f.args))))
6618 }
6619 _ => {
6620 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
6621 }
6622 }
6623 }
6624 // SingleStore JSON_EXTRACT_JSON(json, key1, key2, ...) -> JSON_EXTRACT(json, '$.key1.key2' or '$.key1[key2]')
6625 // BSON_EXTRACT_BSON(json, key1, ...) -> JSONB_EXTRACT(json, '$.key1')
6626 "JSON_EXTRACT_JSON" | "BSON_EXTRACT_BSON" if f.args.len() >= 2
6627 && matches!(source, DialectType::SingleStore) => {
6628 let is_bson = name == "BSON_EXTRACT_BSON";
6629 let mut args = f.args;
6630 let json_expr = args.remove(0);
6631
6632 // Build JSONPath from remaining arguments
6633 let mut path = String::from("$");
6634 for arg in &args {
6635 if let Expression::Literal(crate::expressions::Literal::String(s)) = arg {
6636 // Check if it's a numeric string (array index)
6637 if s.parse::<i64>().is_ok() {
6638 path.push('[');
6639 path.push_str(s);
6640 path.push(']');
6641 } else {
6642 path.push('.');
6643 path.push_str(s);
6644 }
6645 }
6646 }
6647
6648 let target_func = if is_bson { "JSONB_EXTRACT" } else { "JSON_EXTRACT" };
6649 Ok(Expression::Function(Box::new(Function::new(
6650 target_func.to_string(),
6651 vec![json_expr, Expression::string(&path)],
6652 ))))
6653 }
6654 // ARRAY_SUM(lambda, array) from Doris -> ClickHouse arraySum
6655 "ARRAY_SUM" if matches!(target, DialectType::ClickHouse) => {
6656 Ok(Expression::Function(Box::new(Function {
6657 name: "arraySum".to_string(),
6658 args: f.args,
6659 distinct: f.distinct,
6660 trailing_comments: f.trailing_comments,
6661 use_bracket_syntax: f.use_bracket_syntax,
6662 no_parens: f.no_parens,
6663 quoted: f.quoted,
6664 })))
6665 }
6666 // TSQL JSON_QUERY/JSON_VALUE -> target-specific
6667 // Note: For TSQL->TSQL, JsonQuery stays as Expression::JsonQuery (source transform not called)
6668 // and is handled by JsonQueryValueConvert action. This handles the case where
6669 // TSQL read transform converted JsonQuery to Function("JSON_QUERY") for cross-dialect.
6670 "JSON_QUERY" | "JSON_VALUE" if f.args.len() == 2 && matches!(source, DialectType::TSQL | DialectType::Fabric) => {
6671 match target {
6672 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
6673 Ok(Expression::Function(Box::new(Function::new(
6674 "GET_JSON_OBJECT".to_string(),
6675 f.args,
6676 ))))
6677 }
6678 _ => Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args)))),
6679 }
6680 }
6681 // UNIX_TIMESTAMP(x) -> TO_UNIXTIME(x) for Presto
6682 "UNIX_TIMESTAMP" if f.args.len() == 1 => {
6683 let arg = f.args.into_iter().next().unwrap();
6684 let is_hive_source = matches!(source, DialectType::Hive | DialectType::Spark | DialectType::Databricks);
6685 match target {
6686 DialectType::DuckDB if is_hive_source => {
6687 // DuckDB: EPOCH(STRPTIME(x, '%Y-%m-%d %H:%M:%S'))
6688 let strptime = Expression::Function(Box::new(Function::new(
6689 "STRPTIME".to_string(),
6690 vec![arg, Expression::string("%Y-%m-%d %H:%M:%S")],
6691 )));
6692 Ok(Expression::Function(Box::new(Function::new("EPOCH".to_string(), vec![strptime]))))
6693 }
6694 DialectType::Presto | DialectType::Trino if is_hive_source => {
6695 // Presto: TO_UNIXTIME(COALESCE(TRY(DATE_PARSE(CAST(x AS VARCHAR), '%Y-%m-%d %T')), PARSE_DATETIME(DATE_FORMAT(x, '%Y-%m-%d %T'), 'yyyy-MM-dd HH:mm:ss')))
6696 let cast_varchar = Expression::Cast(Box::new(crate::expressions::Cast {
6697 this: arg.clone(),
6698 to: DataType::VarChar { length: None, parenthesized_length: false },
6699 trailing_comments: vec![],
6700 double_colon_syntax: false,
6701 format: None,
6702 default: None,
6703 }));
6704 let date_parse = Expression::Function(Box::new(Function::new(
6705 "DATE_PARSE".to_string(),
6706 vec![cast_varchar, Expression::string("%Y-%m-%d %T")],
6707 )));
6708 let try_expr = Expression::Function(Box::new(Function::new(
6709 "TRY".to_string(), vec![date_parse],
6710 )));
6711 let date_format = Expression::Function(Box::new(Function::new(
6712 "DATE_FORMAT".to_string(),
6713 vec![arg, Expression::string("%Y-%m-%d %T")],
6714 )));
6715 let parse_datetime = Expression::Function(Box::new(Function::new(
6716 "PARSE_DATETIME".to_string(),
6717 vec![date_format, Expression::string("yyyy-MM-dd HH:mm:ss")],
6718 )));
6719 let coalesce = Expression::Function(Box::new(Function::new(
6720 "COALESCE".to_string(), vec![try_expr, parse_datetime],
6721 )));
6722 Ok(Expression::Function(Box::new(Function::new("TO_UNIXTIME".to_string(), vec![coalesce]))))
6723 }
6724 DialectType::Presto | DialectType::Trino => {
6725 Ok(Expression::Function(Box::new(Function::new("TO_UNIXTIME".to_string(), vec![arg]))))
6726 }
6727 _ => Ok(Expression::Function(Box::new(Function::new("UNIX_TIMESTAMP".to_string(), vec![arg])))),
6728 }
6729 }
6730 // TO_UNIX_TIMESTAMP(x) -> UNIX_TIMESTAMP(x) for Spark/Hive
6731 "TO_UNIX_TIMESTAMP" if f.args.len() >= 1 => {
6732 match target {
6733 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
6734 Ok(Expression::Function(Box::new(Function::new("UNIX_TIMESTAMP".to_string(), f.args))))
6735 }
6736 _ => Ok(Expression::Function(Box::new(Function::new("TO_UNIX_TIMESTAMP".to_string(), f.args)))),
6737 }
6738 }
6739 // CURDATE() -> CURRENT_DATE
6740 "CURDATE" => {
6741 Ok(Expression::CurrentDate(crate::expressions::CurrentDate))
6742 }
6743 // CURTIME() -> CURRENT_TIME
6744 "CURTIME" => {
6745 Ok(Expression::CurrentTime(crate::expressions::CurrentTime { precision: None }))
6746 }
6747 // ARRAY_SORT(x) or ARRAY_SORT(x, lambda) -> SORT_ARRAY(x) for Hive (drop lambda)
6748 "ARRAY_SORT" if f.args.len() >= 1 => {
6749 match target {
6750 DialectType::Hive => {
6751 let mut args = f.args;
6752 args.truncate(1); // Drop lambda comparator
6753 Ok(Expression::Function(Box::new(Function::new("SORT_ARRAY".to_string(), args))))
6754 }
6755 _ => Ok(Expression::Function(f)),
6756 }
6757 }
6758 // SORT_ARRAY(x) -> ARRAY_SORT(x) for non-Hive
6759 "SORT_ARRAY" if f.args.len() == 1 => {
6760 match target {
6761 DialectType::Hive => Ok(Expression::Function(f)),
6762 _ => {
6763 Ok(Expression::Function(Box::new(Function::new("ARRAY_SORT".to_string(), f.args))))
6764 }
6765 }
6766 }
6767 // SORT_ARRAY(x, FALSE) -> ARRAY_REVERSE_SORT(x) for DuckDB, ARRAY_SORT(x, lambda) for Presto
6768 "SORT_ARRAY" if f.args.len() == 2 => {
6769 let is_desc = matches!(&f.args[1], Expression::Boolean(b) if !b.value);
6770 if is_desc {
6771 match target {
6772 DialectType::DuckDB => {
6773 Ok(Expression::Function(Box::new(Function::new("ARRAY_REVERSE_SORT".to_string(), vec![f.args.into_iter().next().unwrap()]))))
6774 }
6775 DialectType::Presto | DialectType::Trino => {
6776 let arr_arg = f.args.into_iter().next().unwrap();
6777 let a = Expression::Column(crate::expressions::Column {
6778 name: crate::expressions::Identifier::new("a"),
6779 table: None,
6780 join_mark: false,
6781 trailing_comments: Vec::new(),
6782 });
6783 let b = Expression::Column(crate::expressions::Column {
6784 name: crate::expressions::Identifier::new("b"),
6785 table: None,
6786 join_mark: false,
6787 trailing_comments: Vec::new(),
6788 });
6789 let case_expr = Expression::Case(Box::new(crate::expressions::Case {
6790 operand: None,
6791 whens: vec![
6792 (Expression::Lt(Box::new(BinaryOp::new(a.clone(), b.clone()))),
6793 Expression::Literal(Literal::Number("1".to_string()))),
6794 (Expression::Gt(Box::new(BinaryOp::new(a.clone(), b.clone()))),
6795 Expression::Literal(Literal::Number("-1".to_string()))),
6796 ],
6797 else_: Some(Expression::Literal(Literal::Number("0".to_string()))),
6798 }));
6799 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
6800 parameters: vec![
6801 crate::expressions::Identifier::new("a"),
6802 crate::expressions::Identifier::new("b"),
6803 ],
6804 body: case_expr,
6805 colon: false,
6806 parameter_types: Vec::new(),
6807 }));
6808 Ok(Expression::Function(Box::new(Function::new("ARRAY_SORT".to_string(), vec![arr_arg, lambda]))))
6809 }
6810 _ => Ok(Expression::Function(f))
6811 }
6812 } else {
6813 // SORT_ARRAY(x, TRUE) -> ARRAY_SORT(x)
6814 match target {
6815 DialectType::Hive => Ok(Expression::Function(f)),
6816 _ => Ok(Expression::Function(Box::new(Function::new("ARRAY_SORT".to_string(), vec![f.args.into_iter().next().unwrap()]))))
6817 }
6818 }
6819 }
6820 // LEFT(x, n), RIGHT(x, n) -> SUBSTRING for targets without LEFT/RIGHT
6821 "LEFT" if f.args.len() == 2 => {
6822 match target {
6823 DialectType::Hive | DialectType::Presto | DialectType::Trino | DialectType::Athena => {
6824 let x = f.args[0].clone();
6825 let n = f.args[1].clone();
6826 Ok(Expression::Function(Box::new(Function::new(
6827 "SUBSTRING".to_string(),
6828 vec![x, Expression::number(1), n],
6829 ))))
6830 }
6831 DialectType::Spark | DialectType::Databricks
6832 if matches!(source, DialectType::TSQL | DialectType::Fabric) => {
6833 // TSQL LEFT(x, n) -> LEFT(CAST(x AS STRING), n) for Spark
6834 let x = f.args[0].clone();
6835 let n = f.args[1].clone();
6836 let cast_x = Expression::Cast(Box::new(Cast {
6837 this: x,
6838 to: DataType::VarChar { length: None, parenthesized_length: false },
6839 double_colon_syntax: false,
6840 trailing_comments: Vec::new(),
6841 format: None,
6842 default: None,
6843 }));
6844 Ok(Expression::Function(Box::new(Function::new("LEFT".to_string(), vec![cast_x, n]))))
6845 }
6846 _ => Ok(Expression::Function(f)),
6847 }
6848 }
6849 "RIGHT" if f.args.len() == 2 => {
6850 match target {
6851 DialectType::Hive | DialectType::Presto | DialectType::Trino | DialectType::Athena => {
6852 let x = f.args[0].clone();
6853 let n = f.args[1].clone();
6854 // SUBSTRING(x, LENGTH(x) - (n - 1))
6855 let len_x = Expression::Function(Box::new(Function::new(
6856 "LENGTH".to_string(),
6857 vec![x.clone()],
6858 )));
6859 let n_minus_1 = Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
6860 n,
6861 Expression::number(1),
6862 )));
6863 let n_minus_1_paren = Expression::Paren(Box::new(crate::expressions::Paren {
6864 this: n_minus_1,
6865 trailing_comments: Vec::new(),
6866 }));
6867 let offset = Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
6868 len_x,
6869 n_minus_1_paren,
6870 )));
6871 Ok(Expression::Function(Box::new(Function::new(
6872 "SUBSTRING".to_string(),
6873 vec![x, offset],
6874 ))))
6875 }
6876 DialectType::Spark | DialectType::Databricks
6877 if matches!(source, DialectType::TSQL | DialectType::Fabric) => {
6878 // TSQL RIGHT(x, n) -> RIGHT(CAST(x AS STRING), n) for Spark
6879 let x = f.args[0].clone();
6880 let n = f.args[1].clone();
6881 let cast_x = Expression::Cast(Box::new(Cast {
6882 this: x,
6883 to: DataType::VarChar { length: None, parenthesized_length: false },
6884 double_colon_syntax: false,
6885 trailing_comments: Vec::new(),
6886 format: None,
6887 default: None,
6888 }));
6889 Ok(Expression::Function(Box::new(Function::new("RIGHT".to_string(), vec![cast_x, n]))))
6890 }
6891 _ => Ok(Expression::Function(f)),
6892 }
6893 }
6894 // MAP_FROM_ARRAYS(keys, vals) -> target-specific map construction
6895 "MAP_FROM_ARRAYS" if f.args.len() == 2 => {
6896 match target {
6897 DialectType::Snowflake => {
6898 Ok(Expression::Function(Box::new(Function::new("OBJECT_CONSTRUCT".to_string(), f.args))))
6899 }
6900 DialectType::Spark | DialectType::Databricks => {
6901 Ok(Expression::Function(Box::new(Function::new("MAP_FROM_ARRAYS".to_string(), f.args))))
6902 }
6903 _ => {
6904 Ok(Expression::Function(Box::new(Function::new("MAP".to_string(), f.args))))
6905 }
6906 }
6907 }
6908 // LIKE(foo, 'pat') -> foo LIKE 'pat'; LIKE(foo, 'pat', '!') -> foo LIKE 'pat' ESCAPE '!'
6909 // SQLite uses LIKE(pattern, string[, escape]) with args in reverse order
6910 "LIKE" if f.args.len() >= 2 => {
6911 let (this, pattern) = if matches!(source, DialectType::SQLite) {
6912 // SQLite: LIKE(pattern, string) -> string LIKE pattern
6913 (f.args[1].clone(), f.args[0].clone())
6914 } else {
6915 // Standard: LIKE(string, pattern) -> string LIKE pattern
6916 (f.args[0].clone(), f.args[1].clone())
6917 };
6918 let escape = if f.args.len() >= 3 { Some(f.args[2].clone()) } else { None };
6919 Ok(Expression::Like(Box::new(crate::expressions::LikeOp {
6920 left: this,
6921 right: pattern,
6922 escape,
6923 quantifier: None,
6924 })))
6925 }
6926 // ILIKE(foo, 'pat') -> foo ILIKE 'pat'
6927 "ILIKE" if f.args.len() >= 2 => {
6928 let this = f.args[0].clone();
6929 let pattern = f.args[1].clone();
6930 let escape = if f.args.len() >= 3 { Some(f.args[2].clone()) } else { None };
6931 Ok(Expression::ILike(Box::new(crate::expressions::LikeOp {
6932 left: this,
6933 right: pattern,
6934 escape,
6935 quantifier: None,
6936 })))
6937 }
6938 // CHAR(n) -> CHR(n) for non-MySQL/non-TSQL targets
6939 "CHAR" if f.args.len() == 1 => {
6940 match target {
6941 DialectType::MySQL | DialectType::SingleStore
6942 | DialectType::TSQL => Ok(Expression::Function(f)),
6943 _ => {
6944 Ok(Expression::Function(Box::new(Function::new("CHR".to_string(), f.args))))
6945 }
6946 }
6947 }
6948 // CONCAT(a, b) -> a || b for PostgreSQL
6949 "CONCAT" if f.args.len() == 2 && matches!(target, DialectType::PostgreSQL)
6950 && matches!(source, DialectType::ClickHouse | DialectType::MySQL) => {
6951 let mut args = f.args;
6952 let right = args.pop().unwrap();
6953 let left = args.pop().unwrap();
6954 Ok(Expression::DPipe(Box::new(crate::expressions::DPipe {
6955 this: Box::new(left),
6956 expression: Box::new(right),
6957 safe: None,
6958 })))
6959 }
6960 // ARRAY_TO_STRING(arr, delim) -> target-specific
6961 "ARRAY_TO_STRING" if f.args.len() >= 2 => {
6962 match target {
6963 DialectType::Presto | DialectType::Trino => {
6964 Ok(Expression::Function(Box::new(Function::new("ARRAY_JOIN".to_string(), f.args))))
6965 }
6966 DialectType::TSQL => {
6967 Ok(Expression::Function(Box::new(Function::new("STRING_AGG".to_string(), f.args))))
6968 }
6969 _ => Ok(Expression::Function(f)),
6970 }
6971 }
6972 // ARRAY_CONCAT -> target-specific
6973 "ARRAY_CONCAT" if f.args.len() == 2 => {
6974 match target {
6975 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
6976 Ok(Expression::Function(Box::new(Function::new("CONCAT".to_string(), f.args))))
6977 }
6978 DialectType::Snowflake => {
6979 Ok(Expression::Function(Box::new(Function::new("ARRAY_CAT".to_string(), f.args))))
6980 }
6981 DialectType::Redshift => {
6982 Ok(Expression::Function(Box::new(Function::new("ARRAY_CONCAT".to_string(), f.args))))
6983 }
6984 DialectType::PostgreSQL => {
6985 Ok(Expression::Function(Box::new(Function::new("ARRAY_CAT".to_string(), f.args))))
6986 }
6987 DialectType::DuckDB => {
6988 Ok(Expression::Function(Box::new(Function::new("ARRAY_CONCAT".to_string(), f.args))))
6989 }
6990 DialectType::Presto | DialectType::Trino => {
6991 Ok(Expression::Function(Box::new(Function::new("CONCAT".to_string(), f.args))))
6992 }
6993 _ => Ok(Expression::Function(f)),
6994 }
6995 }
6996 // ARRAY_CONTAINS(arr, x) / HAS(arr, x) / CONTAINS(arr, x) normalization
6997 "HAS" if f.args.len() == 2 => {
6998 match target {
6999 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
7000 Ok(Expression::Function(Box::new(Function::new("ARRAY_CONTAINS".to_string(), f.args))))
7001 }
7002 DialectType::Presto | DialectType::Trino => {
7003 Ok(Expression::Function(Box::new(Function::new("CONTAINS".to_string(), f.args))))
7004 }
7005 _ => Ok(Expression::Function(f)),
7006 }
7007 }
7008 // NVL(a, b, c, d) -> COALESCE(a, b, c, d) - NVL should keep all args
7009 "NVL" if f.args.len() > 2 => {
7010 Ok(Expression::Function(Box::new(Function::new("COALESCE".to_string(), f.args))))
7011 }
7012 // ISNULL(x) in MySQL -> (x IS NULL)
7013 "ISNULL" if f.args.len() == 1 && matches!(source, DialectType::MySQL) && matches!(target, DialectType::MySQL) => {
7014 let arg = f.args.into_iter().next().unwrap();
7015 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
7016 this: Expression::IsNull(Box::new(crate::expressions::IsNull {
7017 this: arg,
7018 not: false,
7019 postfix_form: false,
7020 })),
7021 trailing_comments: Vec::new(),
7022 })))
7023 }
7024 // MONTHNAME(x) -> DATE_FORMAT(x, '%M') for MySQL -> MySQL
7025 "MONTHNAME" if f.args.len() == 1 && matches!(target, DialectType::MySQL) => {
7026 let arg = f.args.into_iter().next().unwrap();
7027 Ok(Expression::Function(Box::new(Function::new(
7028 "DATE_FORMAT".to_string(),
7029 vec![arg, Expression::string("%M")],
7030 ))))
7031 }
7032 // ClickHouse splitByString('s', x) -> DuckDB STR_SPLIT(x, 's') / Hive SPLIT(x, CONCAT('\\Q', 's', '\\E'))
7033 "SPLITBYSTRING" if f.args.len() == 2 => {
7034 let sep = f.args[0].clone();
7035 let str_arg = f.args[1].clone();
7036 match target {
7037 DialectType::DuckDB => {
7038 Ok(Expression::Function(Box::new(Function::new("STR_SPLIT".to_string(), vec![str_arg, sep]))))
7039 }
7040 DialectType::Doris => {
7041 Ok(Expression::Function(Box::new(Function::new("SPLIT_BY_STRING".to_string(), vec![str_arg, sep]))))
7042 }
7043 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
7044 // SPLIT(x, CONCAT('\\Q', sep, '\\E'))
7045 let escaped = Expression::Function(Box::new(Function::new(
7046 "CONCAT".to_string(),
7047 vec![Expression::string("\\Q"), sep, Expression::string("\\E")],
7048 )));
7049 Ok(Expression::Function(Box::new(Function::new("SPLIT".to_string(), vec![str_arg, escaped]))))
7050 }
7051 _ => Ok(Expression::Function(f)),
7052 }
7053 }
7054 // ClickHouse splitByRegexp('pattern', x) -> DuckDB STR_SPLIT_REGEX(x, 'pattern')
7055 "SPLITBYREGEXP" if f.args.len() == 2 => {
7056 let sep = f.args[0].clone();
7057 let str_arg = f.args[1].clone();
7058 match target {
7059 DialectType::DuckDB => {
7060 Ok(Expression::Function(Box::new(Function::new("STR_SPLIT_REGEX".to_string(), vec![str_arg, sep]))))
7061 }
7062 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
7063 Ok(Expression::Function(Box::new(Function::new("SPLIT".to_string(), vec![str_arg, sep]))))
7064 }
7065 _ => Ok(Expression::Function(f)),
7066 }
7067 }
7068 // ClickHouse toMonday(x) -> DATE_TRUNC('WEEK', x) / DATE_TRUNC(x, 'WEEK') for Doris
7069 "TOMONDAY" => {
7070 if f.args.len() == 1 {
7071 let arg = f.args.into_iter().next().unwrap();
7072 match target {
7073 DialectType::Doris => {
7074 Ok(Expression::Function(Box::new(Function::new(
7075 "DATE_TRUNC".to_string(),
7076 vec![arg, Expression::string("WEEK")],
7077 ))))
7078 }
7079 _ => {
7080 Ok(Expression::Function(Box::new(Function::new(
7081 "DATE_TRUNC".to_string(),
7082 vec![Expression::string("WEEK"), arg],
7083 ))))
7084 }
7085 }
7086 } else {
7087 Ok(Expression::Function(f))
7088 }
7089 }
7090 // COLLECT_LIST with FILTER(WHERE x IS NOT NULL) for targets that need it
7091 "COLLECT_LIST" if f.args.len() == 1 => {
7092 match target {
7093 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
7094 Ok(Expression::Function(f))
7095 }
7096 _ => {
7097 Ok(Expression::Function(Box::new(Function::new("ARRAY_AGG".to_string(), f.args))))
7098 }
7099 }
7100 }
7101 // TO_CHAR(x) with 1 arg -> CAST(x AS STRING) for Doris
7102 "TO_CHAR" if f.args.len() == 1 && matches!(target, DialectType::Doris) => {
7103 let arg = f.args.into_iter().next().unwrap();
7104 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
7105 this: arg,
7106 to: DataType::Custom { name: "STRING".to_string() },
7107 double_colon_syntax: false,
7108 trailing_comments: Vec::new(),
7109 format: None,
7110 default: None,
7111 })))
7112 }
7113 // DBMS_RANDOM.VALUE() -> RANDOM() for PostgreSQL
7114 "DBMS_RANDOM.VALUE" if f.args.is_empty() => {
7115 match target {
7116 DialectType::PostgreSQL => {
7117 Ok(Expression::Function(Box::new(Function::new("RANDOM".to_string(), vec![]))))
7118 }
7119 _ => Ok(Expression::Function(f)),
7120 }
7121 }
7122 // ClickHouse formatDateTime -> target-specific
7123 "FORMATDATETIME" if f.args.len() >= 2 => {
7124 match target {
7125 DialectType::MySQL => {
7126 Ok(Expression::Function(Box::new(Function::new("DATE_FORMAT".to_string(), f.args))))
7127 }
7128 _ => Ok(Expression::Function(f)),
7129 }
7130 }
7131 // REPLICATE('x', n) -> REPEAT('x', n) for non-TSQL targets
7132 "REPLICATE" if f.args.len() == 2 => {
7133 match target {
7134 DialectType::TSQL => Ok(Expression::Function(f)),
7135 _ => {
7136 Ok(Expression::Function(Box::new(Function::new("REPEAT".to_string(), f.args))))
7137 }
7138 }
7139 }
7140 // LEN(x) -> LENGTH(x) for non-TSQL targets
7141 // No CAST needed when arg is already a string literal
7142 "LEN" if f.args.len() == 1 => {
7143 match target {
7144 DialectType::TSQL => Ok(Expression::Function(f)),
7145 DialectType::Spark | DialectType::Databricks => {
7146 let arg = f.args.into_iter().next().unwrap();
7147 // Don't wrap string literals with CAST - they're already strings
7148 let is_string = matches!(&arg, Expression::Literal(crate::expressions::Literal::String(_)));
7149 let final_arg = if is_string {
7150 arg
7151 } else {
7152 Expression::Cast(Box::new(Cast {
7153 this: arg,
7154 to: DataType::VarChar { length: None, parenthesized_length: false },
7155 double_colon_syntax: false,
7156 trailing_comments: Vec::new(),
7157 format: None,
7158 default: None,
7159 }))
7160 };
7161 Ok(Expression::Function(Box::new(Function::new(
7162 "LENGTH".to_string(),
7163 vec![final_arg],
7164 ))))
7165 }
7166 _ => {
7167 let arg = f.args.into_iter().next().unwrap();
7168 Ok(Expression::Function(Box::new(Function::new(
7169 "LENGTH".to_string(),
7170 vec![arg],
7171 ))))
7172 }
7173 }
7174 }
7175 // COUNT_BIG(x) -> COUNT(x) for non-TSQL targets
7176 "COUNT_BIG" if f.args.len() == 1 => {
7177 match target {
7178 DialectType::TSQL => Ok(Expression::Function(f)),
7179 _ => {
7180 Ok(Expression::Function(Box::new(Function::new("COUNT".to_string(), f.args))))
7181 }
7182 }
7183 }
7184 // DATEFROMPARTS(y, m, d) -> MAKE_DATE(y, m, d) for non-TSQL targets
7185 "DATEFROMPARTS" if f.args.len() == 3 => {
7186 match target {
7187 DialectType::TSQL => Ok(Expression::Function(f)),
7188 _ => {
7189 Ok(Expression::Function(Box::new(Function::new("MAKE_DATE".to_string(), f.args))))
7190 }
7191 }
7192 }
7193 // REGEXP_LIKE(str, pattern) -> RegexpLike expression (target-specific output)
7194 "REGEXP_LIKE" if f.args.len() >= 2 => {
7195 let str_expr = f.args[0].clone();
7196 let pattern = f.args[1].clone();
7197 let flags = if f.args.len() >= 3 { Some(f.args[2].clone()) } else { None };
7198 match target {
7199 DialectType::DuckDB => {
7200 let mut new_args = vec![str_expr, pattern];
7201 if let Some(fl) = flags {
7202 new_args.push(fl);
7203 }
7204 Ok(Expression::Function(Box::new(Function::new(
7205 "REGEXP_MATCHES".to_string(),
7206 new_args,
7207 ))))
7208 }
7209 _ => {
7210 Ok(Expression::RegexpLike(Box::new(crate::expressions::RegexpFunc {
7211 this: str_expr,
7212 pattern,
7213 flags,
7214 })))
7215 }
7216 }
7217 }
7218 // ClickHouse arrayJoin -> UNNEST for PostgreSQL
7219 "ARRAYJOIN" if f.args.len() == 1 => {
7220 match target {
7221 DialectType::PostgreSQL => {
7222 Ok(Expression::Function(Box::new(Function::new("UNNEST".to_string(), f.args))))
7223 }
7224 _ => Ok(Expression::Function(f)),
7225 }
7226 }
7227 // DATETIMEFROMPARTS(y, m, d, h, mi, s, ms) -> MAKE_TIMESTAMP / TIMESTAMP_FROM_PARTS
7228 "DATETIMEFROMPARTS" if f.args.len() == 7 => {
7229 match target {
7230 DialectType::TSQL => Ok(Expression::Function(f)),
7231 DialectType::DuckDB => {
7232 // MAKE_TIMESTAMP(y, m, d, h, mi, s + (ms / 1000.0))
7233 let mut args = f.args;
7234 let ms = args.pop().unwrap();
7235 let s = args.pop().unwrap();
7236 // s + (ms / 1000.0)
7237 let ms_frac = Expression::Div(Box::new(BinaryOp::new(
7238 ms,
7239 Expression::Literal(crate::expressions::Literal::Number("1000.0".to_string())),
7240 )));
7241 let s_with_ms = Expression::Add(Box::new(BinaryOp::new(
7242 s,
7243 Expression::Paren(Box::new(Paren { this: ms_frac, trailing_comments: vec![] })),
7244 )));
7245 args.push(s_with_ms);
7246 Ok(Expression::Function(Box::new(Function::new("MAKE_TIMESTAMP".to_string(), args))))
7247 }
7248 DialectType::Snowflake => {
7249 // TIMESTAMP_FROM_PARTS(y, m, d, h, mi, s, ms * 1000000)
7250 let mut args = f.args;
7251 let ms = args.pop().unwrap();
7252 // ms * 1000000
7253 let ns = Expression::Mul(Box::new(BinaryOp::new(
7254 ms,
7255 Expression::number(1000000),
7256 )));
7257 args.push(ns);
7258 Ok(Expression::Function(Box::new(Function::new("TIMESTAMP_FROM_PARTS".to_string(), args))))
7259 }
7260 _ => {
7261 // Default: keep function name for other targets
7262 Ok(Expression::Function(Box::new(Function::new("DATETIMEFROMPARTS".to_string(), f.args))))
7263 }
7264 }
7265 }
7266 // CONVERT(type, expr [, style]) -> CAST(expr AS type) for non-TSQL targets
7267 // TRY_CONVERT(type, expr [, style]) -> TRY_CAST(expr AS type) for non-TSQL targets
7268 "CONVERT" | "TRY_CONVERT" if f.args.len() >= 2 => {
7269 let is_try = name == "TRY_CONVERT";
7270 let type_expr = f.args[0].clone();
7271 let value_expr = f.args[1].clone();
7272 let style = if f.args.len() >= 3 { Some(&f.args[2]) } else { None };
7273
7274 // For TSQL->TSQL, normalize types and preserve CONVERT/TRY_CONVERT
7275 if matches!(target, DialectType::TSQL) {
7276 let normalized_type = match &type_expr {
7277 Expression::DataType(dt) => {
7278 let new_dt = match dt {
7279 DataType::Int { .. } => DataType::Custom { name: "INTEGER".to_string() },
7280 _ => dt.clone(),
7281 };
7282 Expression::DataType(new_dt)
7283 }
7284 Expression::Identifier(id) => {
7285 let upper = id.name.to_uppercase();
7286 let normalized = match upper.as_str() {
7287 "INT" => "INTEGER",
7288 _ => &upper,
7289 };
7290 Expression::Identifier(crate::expressions::Identifier::new(normalized))
7291 }
7292 Expression::Column(col) => {
7293 let upper = col.name.name.to_uppercase();
7294 let normalized = match upper.as_str() {
7295 "INT" => "INTEGER",
7296 _ => &upper,
7297 };
7298 Expression::Identifier(crate::expressions::Identifier::new(normalized))
7299 }
7300 _ => type_expr.clone(),
7301 };
7302 let func_name = if is_try { "TRY_CONVERT" } else { "CONVERT" };
7303 let mut new_args = vec![normalized_type, value_expr];
7304 if let Some(s) = style {
7305 new_args.push(s.clone());
7306 }
7307 return Ok(Expression::Function(Box::new(Function::new(func_name.to_string(), new_args))));
7308 }
7309
7310 // For other targets: CONVERT(type, expr) -> CAST(expr AS type)
7311 fn expr_to_datatype(e: &Expression) -> Option<DataType> {
7312 match e {
7313 Expression::DataType(dt) => {
7314 // Convert NVARCHAR/NCHAR Custom types to standard VarChar/Char
7315 match dt {
7316 DataType::Custom { name } if name.starts_with("NVARCHAR(") || name.starts_with("NCHAR(") => {
7317 // Extract the length from "NVARCHAR(200)" or "NCHAR(40)"
7318 let inner = &name[name.find('(').unwrap()+1..name.len()-1];
7319 if inner.eq_ignore_ascii_case("MAX") {
7320 Some(DataType::Text)
7321 } else if let Ok(len) = inner.parse::<u32>() {
7322 if name.starts_with("NCHAR") {
7323 Some(DataType::Char { length: Some(len) })
7324 } else {
7325 Some(DataType::VarChar { length: Some(len), parenthesized_length: false })
7326 }
7327 } else {
7328 Some(dt.clone())
7329 }
7330 }
7331 DataType::Custom { name } if name == "NVARCHAR" => {
7332 Some(DataType::VarChar { length: None, parenthesized_length: false })
7333 }
7334 DataType::Custom { name } if name == "NCHAR" => {
7335 Some(DataType::Char { length: None })
7336 }
7337 DataType::Custom { name } if name == "NVARCHAR(MAX)" || name == "VARCHAR(MAX)" => {
7338 Some(DataType::Text)
7339 }
7340 _ => Some(dt.clone()),
7341 }
7342 }
7343 Expression::Identifier(id) => {
7344 let name = id.name.to_uppercase();
7345 match name.as_str() {
7346 "INT" | "INTEGER" => Some(DataType::Int { length: None, integer_spelling: false }),
7347 "BIGINT" => Some(DataType::BigInt { length: None }),
7348 "SMALLINT" => Some(DataType::SmallInt { length: None }),
7349 "TINYINT" => Some(DataType::TinyInt { length: None }),
7350 "FLOAT" => Some(DataType::Float { precision: None, scale: None, real_spelling: false }),
7351 "REAL" => Some(DataType::Float { precision: None, scale: None, real_spelling: true }),
7352 "DATETIME" | "DATETIME2" => Some(DataType::Timestamp { timezone: false, precision: None }),
7353 "DATE" => Some(DataType::Date),
7354 "BIT" => Some(DataType::Boolean),
7355 "TEXT" => Some(DataType::Text),
7356 "NUMERIC" => Some(DataType::Decimal { precision: None, scale: None }),
7357 "MONEY" => Some(DataType::Decimal { precision: Some(15), scale: Some(4) }),
7358 "SMALLMONEY" => Some(DataType::Decimal { precision: Some(6), scale: Some(4) }),
7359 "VARCHAR" => Some(DataType::VarChar { length: None, parenthesized_length: false }),
7360 "NVARCHAR" => Some(DataType::VarChar { length: None, parenthesized_length: false }),
7361 "CHAR" => Some(DataType::Char { length: None }),
7362 "NCHAR" => Some(DataType::Char { length: None }),
7363 _ => Some(DataType::Custom { name }),
7364 }
7365 }
7366 Expression::Column(col) => {
7367 let name = col.name.name.to_uppercase();
7368 match name.as_str() {
7369 "INT" | "INTEGER" => Some(DataType::Int { length: None, integer_spelling: false }),
7370 "BIGINT" => Some(DataType::BigInt { length: None }),
7371 "FLOAT" => Some(DataType::Float { precision: None, scale: None, real_spelling: false }),
7372 "DATETIME" | "DATETIME2" => Some(DataType::Timestamp { timezone: false, precision: None }),
7373 "DATE" => Some(DataType::Date),
7374 "NUMERIC" => Some(DataType::Decimal { precision: None, scale: None }),
7375 "VARCHAR" => Some(DataType::VarChar { length: None, parenthesized_length: false }),
7376 "NVARCHAR" => Some(DataType::VarChar { length: None, parenthesized_length: false }),
7377 "CHAR" => Some(DataType::Char { length: None }),
7378 "NCHAR" => Some(DataType::Char { length: None }),
7379 _ => Some(DataType::Custom { name }),
7380 }
7381 }
7382 // NVARCHAR(200) parsed as Function("NVARCHAR", [200])
7383 Expression::Function(f) => {
7384 let fname = f.name.to_uppercase();
7385 match fname.as_str() {
7386 "VARCHAR" | "NVARCHAR" => {
7387 let len = f.args.first().and_then(|a| {
7388 if let Expression::Literal(crate::expressions::Literal::Number(n)) = a {
7389 n.parse::<u32>().ok()
7390 } else if let Expression::Identifier(id) = a {
7391 if id.name.eq_ignore_ascii_case("MAX") { None } else { None }
7392 } else { None }
7393 });
7394 // Check for VARCHAR(MAX) -> TEXT
7395 let is_max = f.args.first().map_or(false, |a| {
7396 matches!(a, Expression::Identifier(id) if id.name.eq_ignore_ascii_case("MAX"))
7397 || matches!(a, Expression::Column(col) if col.name.name.eq_ignore_ascii_case("MAX"))
7398 });
7399 if is_max {
7400 Some(DataType::Text)
7401 } else {
7402 Some(DataType::VarChar { length: len, parenthesized_length: false })
7403 }
7404 }
7405 "NCHAR" | "CHAR" => {
7406 let len = f.args.first().and_then(|a| {
7407 if let Expression::Literal(crate::expressions::Literal::Number(n)) = a {
7408 n.parse::<u32>().ok()
7409 } else { None }
7410 });
7411 Some(DataType::Char { length: len })
7412 }
7413 "NUMERIC" | "DECIMAL" => {
7414 let precision = f.args.first().and_then(|a| {
7415 if let Expression::Literal(crate::expressions::Literal::Number(n)) = a {
7416 n.parse::<u32>().ok()
7417 } else { None }
7418 });
7419 let scale = f.args.get(1).and_then(|a| {
7420 if let Expression::Literal(crate::expressions::Literal::Number(n)) = a {
7421 n.parse::<u32>().ok()
7422 } else { None }
7423 });
7424 Some(DataType::Decimal { precision, scale })
7425 }
7426 _ => None,
7427 }
7428 }
7429 _ => None,
7430 }
7431 }
7432
7433 if let Some(mut dt) = expr_to_datatype(&type_expr) {
7434 // For TSQL source: VARCHAR/CHAR without length defaults to 30
7435 let is_tsql_source = matches!(source, DialectType::TSQL | DialectType::Fabric);
7436 if is_tsql_source {
7437 match &dt {
7438 DataType::VarChar { length: None, .. } => {
7439 dt = DataType::VarChar { length: Some(30), parenthesized_length: false };
7440 }
7441 DataType::Char { length: None } => {
7442 dt = DataType::Char { length: Some(30) };
7443 }
7444 _ => {}
7445 }
7446 }
7447
7448 // Determine if this is a string type
7449 let is_string_type = matches!(dt, DataType::VarChar { .. } | DataType::Char { .. } | DataType::Text)
7450 || matches!(&dt, DataType::Custom { name } if name == "NVARCHAR" || name == "NCHAR"
7451 || name.starts_with("NVARCHAR(") || name.starts_with("NCHAR(")
7452 || name.starts_with("VARCHAR(") || name == "VARCHAR"
7453 || name == "STRING");
7454
7455 // Determine if this is a date/time type
7456 let is_datetime_type = matches!(dt, DataType::Timestamp { .. } | DataType::Date)
7457 || matches!(&dt, DataType::Custom { name } if name == "DATETIME"
7458 || name == "DATETIME2" || name == "SMALLDATETIME");
7459
7460 // Check for date conversion with style
7461 if style.is_some() {
7462 let style_num = style.and_then(|s| {
7463 if let Expression::Literal(crate::expressions::Literal::Number(n)) = s {
7464 n.parse::<u32>().ok()
7465 } else { None }
7466 });
7467
7468 // TSQL CONVERT date styles (Java format)
7469 let format_str = style_num.and_then(|n| match n {
7470 101 => Some("MM/dd/yyyy"),
7471 102 => Some("yyyy.MM.dd"),
7472 103 => Some("dd/MM/yyyy"),
7473 104 => Some("dd.MM.yyyy"),
7474 105 => Some("dd-MM-yyyy"),
7475 108 => Some("HH:mm:ss"),
7476 110 => Some("MM-dd-yyyy"),
7477 112 => Some("yyyyMMdd"),
7478 120 | 20 => Some("yyyy-MM-dd HH:mm:ss"),
7479 121 | 21 => Some("yyyy-MM-dd HH:mm:ss.SSSSSS"),
7480 126 | 127 => Some("yyyy-MM-dd'T'HH:mm:ss.SSS"),
7481 _ => None,
7482 });
7483
7484 // Non-string, non-datetime types with style: just CAST, ignore the style
7485 if !is_string_type && !is_datetime_type {
7486 let cast_expr = if is_try {
7487 Expression::TryCast(Box::new(crate::expressions::Cast {
7488 this: value_expr,
7489 to: dt,
7490 trailing_comments: Vec::new(),
7491 double_colon_syntax: false,
7492 format: None,
7493 default: None,
7494 }))
7495 } else {
7496 Expression::Cast(Box::new(crate::expressions::Cast {
7497 this: value_expr,
7498 to: dt,
7499 trailing_comments: Vec::new(),
7500 double_colon_syntax: false,
7501 format: None,
7502 default: None,
7503 }))
7504 };
7505 return Ok(cast_expr);
7506 }
7507
7508 if let Some(java_fmt) = format_str {
7509 let c_fmt = java_fmt
7510 .replace("yyyy", "%Y")
7511 .replace("MM", "%m")
7512 .replace("dd", "%d")
7513 .replace("HH", "%H")
7514 .replace("mm", "%M")
7515 .replace("ss", "%S")
7516 .replace("SSSSSS", "%f")
7517 .replace("SSS", "%f")
7518 .replace("'T'", "T");
7519
7520 // For datetime target types: style is the INPUT format for parsing strings -> dates
7521 if is_datetime_type {
7522 match target {
7523 DialectType::DuckDB => {
7524 return Ok(Expression::Function(Box::new(Function::new(
7525 "STRPTIME".to_string(),
7526 vec![value_expr, Expression::string(&c_fmt)],
7527 ))));
7528 }
7529 DialectType::Spark | DialectType::Databricks => {
7530 // CONVERT(DATETIME, x, style) -> TO_TIMESTAMP(x, fmt)
7531 // CONVERT(DATE, x, style) -> TO_DATE(x, fmt)
7532 let func_name = if matches!(dt, DataType::Date) {
7533 "TO_DATE"
7534 } else {
7535 "TO_TIMESTAMP"
7536 };
7537 return Ok(Expression::Function(Box::new(Function::new(
7538 func_name.to_string(),
7539 vec![value_expr, Expression::string(java_fmt)],
7540 ))));
7541 }
7542 DialectType::Hive => {
7543 return Ok(Expression::Function(Box::new(Function::new(
7544 "TO_TIMESTAMP".to_string(),
7545 vec![value_expr, Expression::string(java_fmt)],
7546 ))));
7547 }
7548 _ => {
7549 return Ok(Expression::Cast(Box::new(crate::expressions::Cast {
7550 this: value_expr,
7551 to: dt,
7552 trailing_comments: Vec::new(),
7553 double_colon_syntax: false,
7554 format: None,
7555 default: None,
7556 })));
7557 }
7558 }
7559 }
7560
7561 // For string target types: style is the OUTPUT format for dates -> strings
7562 match target {
7563 DialectType::DuckDB => {
7564 Ok(Expression::Function(Box::new(Function::new(
7565 "STRPTIME".to_string(),
7566 vec![value_expr, Expression::string(&c_fmt)],
7567 ))))
7568 }
7569 DialectType::Spark | DialectType::Databricks => {
7570 // For string target types with style: CAST(DATE_FORMAT(x, fmt) AS type)
7571 // Determine the target string type
7572 let string_dt = match &dt {
7573 DataType::VarChar { length: Some(l), .. } => DataType::VarChar { length: Some(*l), parenthesized_length: false },
7574 DataType::Text => DataType::Custom { name: "STRING".to_string() },
7575 _ => DataType::Custom { name: "STRING".to_string() },
7576 };
7577 let date_format_expr = Expression::Function(Box::new(Function::new(
7578 "DATE_FORMAT".to_string(),
7579 vec![value_expr, Expression::string(java_fmt)],
7580 )));
7581 let cast_expr = if is_try {
7582 Expression::TryCast(Box::new(crate::expressions::Cast {
7583 this: date_format_expr,
7584 to: string_dt,
7585 trailing_comments: Vec::new(),
7586 double_colon_syntax: false,
7587 format: None,
7588 default: None,
7589 }))
7590 } else {
7591 Expression::Cast(Box::new(crate::expressions::Cast {
7592 this: date_format_expr,
7593 to: string_dt,
7594 trailing_comments: Vec::new(),
7595 double_colon_syntax: false,
7596 format: None,
7597 default: None,
7598 }))
7599 };
7600 Ok(cast_expr)
7601 }
7602 DialectType::MySQL | DialectType::SingleStore => {
7603 // For MySQL: CAST(DATE_FORMAT(x, mysql_fmt) AS CHAR(n))
7604 let mysql_fmt = java_fmt
7605 .replace("yyyy", "%Y")
7606 .replace("MM", "%m")
7607 .replace("dd", "%d")
7608 .replace("HH:mm:ss.SSSSSS", "%T")
7609 .replace("HH:mm:ss", "%T")
7610 .replace("HH", "%H")
7611 .replace("mm", "%i")
7612 .replace("ss", "%S");
7613 let date_format_expr = Expression::Function(Box::new(Function::new(
7614 "DATE_FORMAT".to_string(),
7615 vec![value_expr, Expression::string(&mysql_fmt)],
7616 )));
7617 // MySQL uses CHAR for string casts
7618 let mysql_dt = match &dt {
7619 DataType::VarChar { length, .. } => DataType::Char { length: *length },
7620 _ => dt,
7621 };
7622 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
7623 this: date_format_expr,
7624 to: mysql_dt,
7625 trailing_comments: Vec::new(),
7626 double_colon_syntax: false,
7627 format: None,
7628 default: None,
7629 })))
7630 }
7631 DialectType::Hive => {
7632 let func_name = "TO_TIMESTAMP";
7633 Ok(Expression::Function(Box::new(Function::new(
7634 func_name.to_string(),
7635 vec![value_expr, Expression::string(java_fmt)],
7636 ))))
7637 }
7638 _ => {
7639 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
7640 this: value_expr,
7641 to: dt,
7642 trailing_comments: Vec::new(),
7643 double_colon_syntax: false,
7644 format: None,
7645 default: None,
7646 })))
7647 }
7648 }
7649 } else {
7650 // Unknown style, just CAST
7651 let cast_expr = if is_try {
7652 Expression::TryCast(Box::new(crate::expressions::Cast {
7653 this: value_expr,
7654 to: dt,
7655 trailing_comments: Vec::new(),
7656 double_colon_syntax: false,
7657 format: None,
7658 default: None,
7659 }))
7660 } else {
7661 Expression::Cast(Box::new(crate::expressions::Cast {
7662 this: value_expr,
7663 to: dt,
7664 trailing_comments: Vec::new(),
7665 double_colon_syntax: false,
7666 format: None,
7667 default: None,
7668 }))
7669 };
7670 Ok(cast_expr)
7671 }
7672 } else {
7673 // No style - simple CAST
7674 let final_dt = if matches!(target, DialectType::MySQL | DialectType::SingleStore) {
7675 match &dt {
7676 DataType::Int { .. } | DataType::BigInt { .. } | DataType::SmallInt { .. } | DataType::TinyInt { .. } => {
7677 DataType::Custom { name: "SIGNED".to_string() }
7678 }
7679 DataType::VarChar { length, .. } => DataType::Char { length: *length },
7680 _ => dt,
7681 }
7682 } else {
7683 dt
7684 };
7685 let cast_expr = if is_try {
7686 Expression::TryCast(Box::new(crate::expressions::Cast {
7687 this: value_expr,
7688 to: final_dt,
7689 trailing_comments: Vec::new(),
7690 double_colon_syntax: false,
7691 format: None,
7692 default: None,
7693 }))
7694 } else {
7695 Expression::Cast(Box::new(crate::expressions::Cast {
7696 this: value_expr,
7697 to: final_dt,
7698 trailing_comments: Vec::new(),
7699 double_colon_syntax: false,
7700 format: None,
7701 default: None,
7702 }))
7703 };
7704 Ok(cast_expr)
7705 }
7706 } else {
7707 // Can't convert type expression - keep as CONVERT/TRY_CONVERT function
7708 Ok(Expression::Function(f))
7709 }
7710 }
7711 // STRFTIME(val, fmt) from DuckDB / STRFTIME(fmt, val) from SQLite -> target-specific
7712 "STRFTIME" if f.args.len() == 2 => {
7713 // SQLite uses STRFTIME(fmt, val); DuckDB uses STRFTIME(val, fmt)
7714 let (val, fmt_expr) = if matches!(source, DialectType::SQLite) {
7715 // SQLite: args[0] = format, args[1] = value
7716 (f.args[1].clone(), &f.args[0])
7717 } else {
7718 // DuckDB and others: args[0] = value, args[1] = format
7719 (f.args[0].clone(), &f.args[1])
7720 };
7721
7722 // Helper to convert C-style format to Java-style
7723 fn c_to_java_format(fmt: &str) -> String {
7724 fmt.replace("%Y", "yyyy")
7725 .replace("%m", "MM")
7726 .replace("%d", "dd")
7727 .replace("%H", "HH")
7728 .replace("%M", "mm")
7729 .replace("%S", "ss")
7730 .replace("%f", "SSSSSS")
7731 .replace("%y", "yy")
7732 .replace("%-m", "M")
7733 .replace("%-d", "d")
7734 .replace("%-H", "H")
7735 .replace("%-I", "h")
7736 .replace("%I", "hh")
7737 .replace("%p", "a")
7738 .replace("%j", "DDD")
7739 .replace("%a", "EEE")
7740 .replace("%b", "MMM")
7741 .replace("%F", "yyyy-MM-dd")
7742 .replace("%T", "HH:mm:ss")
7743 }
7744
7745 // Helper: recursively convert format strings within expressions (handles CONCAT)
7746 fn convert_fmt_expr(expr: &Expression, converter: &dyn Fn(&str) -> String) -> Expression {
7747 match expr {
7748 Expression::Literal(crate::expressions::Literal::String(s)) => {
7749 Expression::string(&converter(s))
7750 }
7751 Expression::Function(func) if func.name.eq_ignore_ascii_case("CONCAT") => {
7752 let new_args: Vec<Expression> = func.args.iter()
7753 .map(|a| convert_fmt_expr(a, converter))
7754 .collect();
7755 Expression::Function(Box::new(Function::new("CONCAT".to_string(), new_args)))
7756 }
7757 other => other.clone(),
7758 }
7759 }
7760
7761 match target {
7762 DialectType::DuckDB => {
7763 if matches!(source, DialectType::SQLite) {
7764 // SQLite STRFTIME(fmt, val) -> DuckDB STRFTIME(CAST(val AS TIMESTAMP), fmt)
7765 let cast_val = Expression::Cast(Box::new(Cast {
7766 this: val,
7767 to: crate::expressions::DataType::Timestamp { precision: None, timezone: false },
7768 trailing_comments: Vec::new(),
7769 double_colon_syntax: false,
7770 format: None,
7771 default: None,
7772 }));
7773 Ok(Expression::Function(Box::new(Function::new(
7774 "STRFTIME".to_string(),
7775 vec![cast_val, fmt_expr.clone()],
7776 ))))
7777 } else {
7778 Ok(Expression::Function(f))
7779 }
7780 }
7781 DialectType::Spark | DialectType::Databricks
7782 | DialectType::Hive => {
7783 // STRFTIME(val, fmt) -> DATE_FORMAT(val, java_fmt)
7784 let converted_fmt = convert_fmt_expr(fmt_expr, &c_to_java_format);
7785 Ok(Expression::Function(Box::new(Function::new(
7786 "DATE_FORMAT".to_string(),
7787 vec![val, converted_fmt],
7788 ))))
7789 }
7790 DialectType::TSQL | DialectType::Fabric => {
7791 // STRFTIME(val, fmt) -> FORMAT(val, java_fmt)
7792 let converted_fmt = convert_fmt_expr(fmt_expr, &c_to_java_format);
7793 Ok(Expression::Function(Box::new(Function::new(
7794 "FORMAT".to_string(),
7795 vec![val, converted_fmt],
7796 ))))
7797 }
7798 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
7799 // STRFTIME(val, fmt) -> DATE_FORMAT(val, presto_fmt) (convert DuckDB format to Presto)
7800 if let Expression::Literal(crate::expressions::Literal::String(s)) = fmt_expr {
7801 let presto_fmt = duckdb_to_presto_format(s);
7802 Ok(Expression::Function(Box::new(Function::new(
7803 "DATE_FORMAT".to_string(),
7804 vec![val, Expression::string(&presto_fmt)],
7805 ))))
7806 } else {
7807 Ok(Expression::Function(Box::new(Function::new(
7808 "DATE_FORMAT".to_string(),
7809 vec![val, fmt_expr.clone()],
7810 ))))
7811 }
7812 }
7813 DialectType::BigQuery => {
7814 // STRFTIME(val, fmt) -> FORMAT_DATE(bq_fmt, val) - note reversed arg order
7815 if let Expression::Literal(crate::expressions::Literal::String(s)) = fmt_expr {
7816 let bq_fmt = duckdb_to_bigquery_format(s);
7817 Ok(Expression::Function(Box::new(Function::new(
7818 "FORMAT_DATE".to_string(),
7819 vec![Expression::string(&bq_fmt), val],
7820 ))))
7821 } else {
7822 Ok(Expression::Function(Box::new(Function::new(
7823 "FORMAT_DATE".to_string(),
7824 vec![fmt_expr.clone(), val],
7825 ))))
7826 }
7827 }
7828 DialectType::PostgreSQL | DialectType::Redshift => {
7829 // STRFTIME(val, fmt) -> TO_CHAR(val, pg_fmt)
7830 if let Expression::Literal(crate::expressions::Literal::String(s)) = fmt_expr {
7831 let pg_fmt = s
7832 .replace("%Y", "YYYY")
7833 .replace("%m", "MM")
7834 .replace("%d", "DD")
7835 .replace("%H", "HH24")
7836 .replace("%M", "MI")
7837 .replace("%S", "SS")
7838 .replace("%y", "YY")
7839 .replace("%-m", "FMMM")
7840 .replace("%-d", "FMDD")
7841 .replace("%-H", "FMHH24")
7842 .replace("%-I", "FMHH12")
7843 .replace("%p", "AM")
7844 .replace("%F", "YYYY-MM-DD")
7845 .replace("%T", "HH24:MI:SS");
7846 Ok(Expression::Function(Box::new(Function::new(
7847 "TO_CHAR".to_string(),
7848 vec![val, Expression::string(&pg_fmt)],
7849 ))))
7850 } else {
7851 Ok(Expression::Function(Box::new(Function::new(
7852 "TO_CHAR".to_string(),
7853 vec![val, fmt_expr.clone()],
7854 ))))
7855 }
7856 }
7857 _ => Ok(Expression::Function(f)),
7858 }
7859 }
7860 // STRPTIME(val, fmt) from DuckDB -> target-specific date parse function
7861 "STRPTIME" if f.args.len() == 2 => {
7862 let val = f.args[0].clone();
7863 let fmt_expr = &f.args[1];
7864
7865 fn c_to_java_format_parse(fmt: &str) -> String {
7866 fmt.replace("%Y", "yyyy")
7867 .replace("%m", "MM")
7868 .replace("%d", "dd")
7869 .replace("%H", "HH")
7870 .replace("%M", "mm")
7871 .replace("%S", "ss")
7872 .replace("%f", "SSSSSS")
7873 .replace("%y", "yy")
7874 .replace("%-m", "M")
7875 .replace("%-d", "d")
7876 .replace("%-H", "H")
7877 .replace("%-I", "h")
7878 .replace("%I", "hh")
7879 .replace("%p", "a")
7880 .replace("%F", "yyyy-MM-dd")
7881 .replace("%T", "HH:mm:ss")
7882 }
7883
7884 match target {
7885 DialectType::DuckDB => Ok(Expression::Function(f)),
7886 DialectType::Spark | DialectType::Databricks => {
7887 // STRPTIME(val, fmt) -> TO_TIMESTAMP(val, java_fmt)
7888 if let Expression::Literal(crate::expressions::Literal::String(s)) = fmt_expr {
7889 let java_fmt = c_to_java_format_parse(s);
7890 Ok(Expression::Function(Box::new(Function::new(
7891 "TO_TIMESTAMP".to_string(),
7892 vec![val, Expression::string(&java_fmt)],
7893 ))))
7894 } else {
7895 Ok(Expression::Function(Box::new(Function::new(
7896 "TO_TIMESTAMP".to_string(),
7897 vec![val, fmt_expr.clone()],
7898 ))))
7899 }
7900 }
7901 DialectType::Hive => {
7902 // STRPTIME(val, fmt) -> CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(val, java_fmt)) AS TIMESTAMP)
7903 if let Expression::Literal(crate::expressions::Literal::String(s)) = fmt_expr {
7904 let java_fmt = c_to_java_format_parse(s);
7905 let unix_ts = Expression::Function(Box::new(Function::new(
7906 "UNIX_TIMESTAMP".to_string(),
7907 vec![val, Expression::string(&java_fmt)],
7908 )));
7909 let from_unix = Expression::Function(Box::new(Function::new(
7910 "FROM_UNIXTIME".to_string(),
7911 vec![unix_ts],
7912 )));
7913 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
7914 this: from_unix,
7915 to: DataType::Timestamp { timezone: false, precision: None },
7916 trailing_comments: Vec::new(),
7917 double_colon_syntax: false,
7918 format: None,
7919 default: None,
7920 })))
7921 } else {
7922 Ok(Expression::Function(f))
7923 }
7924 }
7925 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
7926 // STRPTIME(val, fmt) -> DATE_PARSE(val, presto_fmt) (convert DuckDB format to Presto)
7927 if let Expression::Literal(crate::expressions::Literal::String(s)) = fmt_expr {
7928 let presto_fmt = duckdb_to_presto_format(s);
7929 Ok(Expression::Function(Box::new(Function::new(
7930 "DATE_PARSE".to_string(),
7931 vec![val, Expression::string(&presto_fmt)],
7932 ))))
7933 } else {
7934 Ok(Expression::Function(Box::new(Function::new(
7935 "DATE_PARSE".to_string(),
7936 vec![val, fmt_expr.clone()],
7937 ))))
7938 }
7939 }
7940 DialectType::BigQuery => {
7941 // STRPTIME(val, fmt) -> PARSE_TIMESTAMP(bq_fmt, val) - note reversed arg order
7942 if let Expression::Literal(crate::expressions::Literal::String(s)) = fmt_expr {
7943 let bq_fmt = duckdb_to_bigquery_format(s);
7944 Ok(Expression::Function(Box::new(Function::new(
7945 "PARSE_TIMESTAMP".to_string(),
7946 vec![Expression::string(&bq_fmt), val],
7947 ))))
7948 } else {
7949 Ok(Expression::Function(Box::new(Function::new(
7950 "PARSE_TIMESTAMP".to_string(),
7951 vec![fmt_expr.clone(), val],
7952 ))))
7953 }
7954 }
7955 _ => Ok(Expression::Function(f)),
7956 }
7957 }
7958 // DATE_FORMAT(val, fmt) from Presto source (C-style format) -> target-specific
7959 "DATE_FORMAT" if f.args.len() >= 2
7960 && matches!(source, DialectType::Presto | DialectType::Trino | DialectType::Athena) => {
7961 let val = f.args[0].clone();
7962 let fmt_expr = &f.args[1];
7963
7964 match target {
7965 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
7966 // Presto -> Presto: normalize format (e.g., %H:%i:%S -> %T)
7967 if let Expression::Literal(crate::expressions::Literal::String(s)) = fmt_expr {
7968 let normalized = crate::dialects::presto::PrestoDialect::normalize_presto_format(s);
7969 Ok(Expression::Function(Box::new(Function::new(
7970 "DATE_FORMAT".to_string(),
7971 vec![val, Expression::string(&normalized)],
7972 ))))
7973 } else {
7974 Ok(Expression::Function(f))
7975 }
7976 }
7977 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
7978 // Convert Presto C-style to Java-style format
7979 if let Expression::Literal(crate::expressions::Literal::String(s)) = fmt_expr {
7980 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
7981 Ok(Expression::Function(Box::new(Function::new(
7982 "DATE_FORMAT".to_string(),
7983 vec![val, Expression::string(&java_fmt)],
7984 ))))
7985 } else {
7986 Ok(Expression::Function(f))
7987 }
7988 }
7989 DialectType::DuckDB => {
7990 // Convert to STRFTIME(val, duckdb_fmt)
7991 if let Expression::Literal(crate::expressions::Literal::String(s)) = fmt_expr {
7992 let duckdb_fmt = crate::dialects::presto::PrestoDialect::presto_to_duckdb_format(s);
7993 Ok(Expression::Function(Box::new(Function::new(
7994 "STRFTIME".to_string(),
7995 vec![val, Expression::string(&duckdb_fmt)],
7996 ))))
7997 } else {
7998 Ok(Expression::Function(Box::new(Function::new(
7999 "STRFTIME".to_string(),
8000 vec![val, fmt_expr.clone()],
8001 ))))
8002 }
8003 }
8004 DialectType::BigQuery => {
8005 // Convert to FORMAT_DATE(bq_fmt, val) - reversed args
8006 if let Expression::Literal(crate::expressions::Literal::String(s)) = fmt_expr {
8007 let bq_fmt = crate::dialects::presto::PrestoDialect::presto_to_bigquery_format(s);
8008 Ok(Expression::Function(Box::new(Function::new(
8009 "FORMAT_DATE".to_string(),
8010 vec![Expression::string(&bq_fmt), val],
8011 ))))
8012 } else {
8013 Ok(Expression::Function(Box::new(Function::new(
8014 "FORMAT_DATE".to_string(),
8015 vec![fmt_expr.clone(), val],
8016 ))))
8017 }
8018 }
8019 _ => Ok(Expression::Function(f)),
8020 }
8021 }
8022 // DATE_PARSE(val, fmt) from Presto source -> target-specific parse function
8023 "DATE_PARSE" if f.args.len() >= 2
8024 && matches!(source, DialectType::Presto | DialectType::Trino | DialectType::Athena) => {
8025 let val = f.args[0].clone();
8026 let fmt_expr = &f.args[1];
8027
8028 match target {
8029 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
8030 // Presto -> Presto: normalize format
8031 if let Expression::Literal(crate::expressions::Literal::String(s)) = fmt_expr {
8032 let normalized = crate::dialects::presto::PrestoDialect::normalize_presto_format(s);
8033 Ok(Expression::Function(Box::new(Function::new(
8034 "DATE_PARSE".to_string(),
8035 vec![val, Expression::string(&normalized)],
8036 ))))
8037 } else {
8038 Ok(Expression::Function(f))
8039 }
8040 }
8041 DialectType::Hive => {
8042 // Presto -> Hive: if default format, just CAST(x AS TIMESTAMP)
8043 if let Expression::Literal(crate::expressions::Literal::String(s)) = fmt_expr {
8044 if crate::dialects::presto::PrestoDialect::is_default_timestamp_format(s)
8045 || crate::dialects::presto::PrestoDialect::is_default_date_format(s) {
8046 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
8047 this: val,
8048 to: DataType::Timestamp { timezone: false, precision: None },
8049 trailing_comments: Vec::new(),
8050 double_colon_syntax: false,
8051 format: None,
8052 default: None,
8053 })))
8054 } else {
8055 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
8056 Ok(Expression::Function(Box::new(Function::new(
8057 "TO_TIMESTAMP".to_string(),
8058 vec![val, Expression::string(&java_fmt)],
8059 ))))
8060 }
8061 } else {
8062 Ok(Expression::Function(f))
8063 }
8064 }
8065 DialectType::Spark | DialectType::Databricks => {
8066 // Presto -> Spark: TO_TIMESTAMP(val, java_fmt)
8067 if let Expression::Literal(crate::expressions::Literal::String(s)) = fmt_expr {
8068 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
8069 Ok(Expression::Function(Box::new(Function::new(
8070 "TO_TIMESTAMP".to_string(),
8071 vec![val, Expression::string(&java_fmt)],
8072 ))))
8073 } else {
8074 Ok(Expression::Function(f))
8075 }
8076 }
8077 DialectType::DuckDB => {
8078 // Presto -> DuckDB: STRPTIME(val, duckdb_fmt)
8079 if let Expression::Literal(crate::expressions::Literal::String(s)) = fmt_expr {
8080 let duckdb_fmt = crate::dialects::presto::PrestoDialect::presto_to_duckdb_format(s);
8081 Ok(Expression::Function(Box::new(Function::new(
8082 "STRPTIME".to_string(),
8083 vec![val, Expression::string(&duckdb_fmt)],
8084 ))))
8085 } else {
8086 Ok(Expression::Function(Box::new(Function::new(
8087 "STRPTIME".to_string(),
8088 vec![val, fmt_expr.clone()],
8089 ))))
8090 }
8091 }
8092 _ => Ok(Expression::Function(f)),
8093 }
8094 }
8095 // FROM_BASE64(x) / TO_BASE64(x) from Presto -> Hive-specific renames
8096 "FROM_BASE64" if f.args.len() == 1
8097 && matches!(target, DialectType::Hive) => {
8098 Ok(Expression::Function(Box::new(Function::new("UNBASE64".to_string(), f.args))))
8099 }
8100 "TO_BASE64" if f.args.len() == 1
8101 && matches!(target, DialectType::Hive) => {
8102 Ok(Expression::Function(Box::new(Function::new("BASE64".to_string(), f.args))))
8103 }
8104 // FROM_UNIXTIME(x) -> CAST(FROM_UNIXTIME(x) AS TIMESTAMP) for Spark
8105 "FROM_UNIXTIME" if f.args.len() == 1
8106 && matches!(source, DialectType::Presto | DialectType::Trino | DialectType::Athena)
8107 && matches!(target, DialectType::Spark | DialectType::Databricks) => {
8108 // Wrap FROM_UNIXTIME(x) in CAST(... AS TIMESTAMP)
8109 let from_unix = Expression::Function(Box::new(Function::new("FROM_UNIXTIME".to_string(), f.args)));
8110 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
8111 this: from_unix,
8112 to: DataType::Timestamp { timezone: false, precision: None },
8113 trailing_comments: Vec::new(),
8114 double_colon_syntax: false,
8115 format: None,
8116 default: None,
8117 })))
8118 }
8119 // DATE_FORMAT(val, fmt) from Hive/Spark/MySQL -> target-specific format function
8120 "DATE_FORMAT" if f.args.len() >= 2
8121 && !matches!(target, DialectType::Hive | DialectType::Spark
8122 | DialectType::Databricks
8123 | DialectType::MySQL | DialectType::SingleStore) => {
8124 let val = f.args[0].clone();
8125 let fmt_expr = &f.args[1];
8126 let is_hive_source = matches!(source, DialectType::Hive | DialectType::Spark | DialectType::Databricks);
8127
8128 fn java_to_c_format(fmt: &str) -> String {
8129 // Replace Java patterns with C strftime patterns.
8130 // Uses multi-pass to handle patterns that conflict.
8131 // First pass: replace multi-char patterns (longer first)
8132 let result = fmt
8133 .replace("yyyy", "%Y")
8134 .replace("SSSSSS", "%f")
8135 .replace("EEEE", "%W")
8136 .replace("MM", "%m")
8137 .replace("dd", "%d")
8138 .replace("HH", "%H")
8139 .replace("mm", "%M")
8140 .replace("ss", "%S")
8141 .replace("yy", "%y");
8142 // Second pass: handle single-char timezone patterns
8143 // z -> %Z (timezone name), Z -> %z (timezone offset)
8144 // Must be careful not to replace 'z'/'Z' inside already-replaced %Y, %M etc.
8145 let mut out = String::new();
8146 let chars: Vec<char> = result.chars().collect();
8147 let mut i = 0;
8148 while i < chars.len() {
8149 if chars[i] == '%' && i + 1 < chars.len() {
8150 // Already a format specifier, skip both chars
8151 out.push(chars[i]);
8152 out.push(chars[i + 1]);
8153 i += 2;
8154 } else if chars[i] == 'z' {
8155 out.push_str("%Z");
8156 i += 1;
8157 } else if chars[i] == 'Z' {
8158 out.push_str("%z");
8159 i += 1;
8160 } else {
8161 out.push(chars[i]);
8162 i += 1;
8163 }
8164 }
8165 out
8166 }
8167
8168 fn java_to_presto_format(fmt: &str) -> String {
8169 // Presto uses %T for HH:MM:SS
8170 let c_fmt = java_to_c_format(fmt);
8171 c_fmt.replace("%H:%M:%S", "%T")
8172 }
8173
8174 fn java_to_bq_format(fmt: &str) -> String {
8175 // BigQuery uses %F for yyyy-MM-dd and %T for HH:mm:ss
8176 let c_fmt = java_to_c_format(fmt);
8177 c_fmt.replace("%Y-%m-%d", "%F")
8178 .replace("%H:%M:%S", "%T")
8179 }
8180
8181 // For Hive source, CAST string literals to appropriate type
8182 let cast_val = if is_hive_source {
8183 match &val {
8184 Expression::Literal(crate::expressions::Literal::String(_)) => {
8185 match target {
8186 DialectType::DuckDB | DialectType::Presto | DialectType::Trino | DialectType::Athena => {
8187 Self::ensure_cast_timestamp(val.clone())
8188 }
8189 DialectType::BigQuery => {
8190 // BigQuery: CAST(val AS DATETIME)
8191 Expression::Cast(Box::new(crate::expressions::Cast {
8192 this: val.clone(),
8193 to: DataType::Custom { name: "DATETIME".to_string() },
8194 trailing_comments: vec![],
8195 double_colon_syntax: false,
8196 format: None,
8197 default: None,
8198 }))
8199 }
8200 _ => val.clone(),
8201 }
8202 }
8203 // For CAST(x AS DATE) or DATE literal, Presto needs CAST(CAST(x AS DATE) AS TIMESTAMP)
8204 Expression::Cast(c) if matches!(c.to, DataType::Date) && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena) => {
8205 Expression::Cast(Box::new(crate::expressions::Cast {
8206 this: val.clone(),
8207 to: DataType::Timestamp { timezone: false, precision: None },
8208 trailing_comments: vec![],
8209 double_colon_syntax: false,
8210 format: None,
8211 default: None,
8212 }))
8213 }
8214 Expression::Literal(crate::expressions::Literal::Date(_)) if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena) => {
8215 // DATE 'x' -> CAST(CAST('x' AS DATE) AS TIMESTAMP)
8216 let cast_date = Self::date_literal_to_cast(val.clone());
8217 Expression::Cast(Box::new(crate::expressions::Cast {
8218 this: cast_date,
8219 to: DataType::Timestamp { timezone: false, precision: None },
8220 trailing_comments: vec![],
8221 double_colon_syntax: false,
8222 format: None,
8223 default: None,
8224 }))
8225 }
8226 _ => val.clone(),
8227 }
8228 } else {
8229 val.clone()
8230 };
8231
8232 match target {
8233 DialectType::DuckDB => {
8234 if let Expression::Literal(crate::expressions::Literal::String(s)) = fmt_expr {
8235 let c_fmt = if is_hive_source {
8236 java_to_c_format(s)
8237 } else { s.clone() };
8238 Ok(Expression::Function(Box::new(Function::new(
8239 "STRFTIME".to_string(),
8240 vec![cast_val, Expression::string(&c_fmt)],
8241 ))))
8242 } else {
8243 Ok(Expression::Function(Box::new(Function::new(
8244 "STRFTIME".to_string(),
8245 vec![cast_val, fmt_expr.clone()],
8246 ))))
8247 }
8248 }
8249 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
8250 if is_hive_source {
8251 if let Expression::Literal(crate::expressions::Literal::String(s)) = fmt_expr {
8252 let p_fmt = java_to_presto_format(s);
8253 Ok(Expression::Function(Box::new(Function::new(
8254 "DATE_FORMAT".to_string(),
8255 vec![cast_val, Expression::string(&p_fmt)],
8256 ))))
8257 } else {
8258 Ok(Expression::Function(Box::new(Function::new(
8259 "DATE_FORMAT".to_string(),
8260 vec![cast_val, fmt_expr.clone()],
8261 ))))
8262 }
8263 } else {
8264 Ok(Expression::Function(Box::new(Function::new(
8265 "DATE_FORMAT".to_string(),
8266 f.args,
8267 ))))
8268 }
8269 }
8270 DialectType::BigQuery => {
8271 // DATE_FORMAT(val, fmt) -> FORMAT_DATE(fmt, val)
8272 if let Expression::Literal(crate::expressions::Literal::String(s)) = fmt_expr {
8273 let bq_fmt = if is_hive_source {
8274 java_to_bq_format(s)
8275 } else {
8276 java_to_c_format(s)
8277 };
8278 Ok(Expression::Function(Box::new(Function::new(
8279 "FORMAT_DATE".to_string(),
8280 vec![Expression::string(&bq_fmt), cast_val],
8281 ))))
8282 } else {
8283 Ok(Expression::Function(Box::new(Function::new(
8284 "FORMAT_DATE".to_string(),
8285 vec![fmt_expr.clone(), cast_val],
8286 ))))
8287 }
8288 }
8289 DialectType::PostgreSQL | DialectType::Redshift => {
8290 if let Expression::Literal(crate::expressions::Literal::String(s)) = fmt_expr {
8291 let pg_fmt = s
8292 .replace("yyyy", "YYYY")
8293 .replace("MM", "MM")
8294 .replace("dd", "DD")
8295 .replace("HH", "HH24")
8296 .replace("mm", "MI")
8297 .replace("ss", "SS")
8298 .replace("yy", "YY");
8299 Ok(Expression::Function(Box::new(Function::new(
8300 "TO_CHAR".to_string(),
8301 vec![val, Expression::string(&pg_fmt)],
8302 ))))
8303 } else {
8304 Ok(Expression::Function(Box::new(Function::new(
8305 "TO_CHAR".to_string(),
8306 vec![val, fmt_expr.clone()],
8307 ))))
8308 }
8309 }
8310 _ => Ok(Expression::Function(f)),
8311 }
8312 }
8313 // DATEDIFF(unit, start, end) - 3-arg form
8314 // SQLite uses DATEDIFF(date1, date2, unit_string) instead
8315 "DATEDIFF" if f.args.len() == 3 => {
8316 let mut args = f.args;
8317 // SQLite source: args = (date1, date2, unit_string)
8318 // Standard source: args = (unit, start, end)
8319 let (_arg0, arg1, arg2, unit_str) = if matches!(source, DialectType::SQLite) {
8320 let date1 = args.remove(0);
8321 let date2 = args.remove(0);
8322 let unit_expr = args.remove(0);
8323 let unit_s = Self::get_unit_str_static(&unit_expr);
8324
8325 // For SQLite target, generate JULIANDAY arithmetic directly
8326 if matches!(target, DialectType::SQLite) {
8327 let jd_first = Expression::Function(Box::new(Function::new(
8328 "JULIANDAY".to_string(), vec![date1],
8329 )));
8330 let jd_second = Expression::Function(Box::new(Function::new(
8331 "JULIANDAY".to_string(), vec![date2],
8332 )));
8333 let diff = Expression::Sub(Box::new(crate::expressions::BinaryOp::new(jd_first, jd_second)));
8334 let paren_diff = Expression::Paren(Box::new(crate::expressions::Paren {
8335 this: diff, trailing_comments: Vec::new(),
8336 }));
8337 let adjusted = match unit_s.as_str() {
8338 "HOUR" => Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
8339 paren_diff, Expression::Literal(Literal::Number("24.0".to_string())),
8340 ))),
8341 "MINUTE" => Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
8342 paren_diff, Expression::Literal(Literal::Number("1440.0".to_string())),
8343 ))),
8344 "SECOND" => Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
8345 paren_diff, Expression::Literal(Literal::Number("86400.0".to_string())),
8346 ))),
8347 "MONTH" => Expression::Div(Box::new(crate::expressions::BinaryOp::new(
8348 paren_diff, Expression::Literal(Literal::Number("30.0".to_string())),
8349 ))),
8350 "YEAR" => Expression::Div(Box::new(crate::expressions::BinaryOp::new(
8351 paren_diff, Expression::Literal(Literal::Number("365.0".to_string())),
8352 ))),
8353 _ => paren_diff,
8354 };
8355 return Ok(Expression::Cast(Box::new(Cast {
8356 this: adjusted,
8357 to: DataType::Int { length: None, integer_spelling: true },
8358 trailing_comments: vec![], double_colon_syntax: false, format: None, default: None,
8359 })));
8360 }
8361
8362 // For other targets, remap to standard (unit, start, end) form
8363 let unit_ident = Expression::Identifier(Identifier::new(&unit_s));
8364 (unit_ident, date1, date2, unit_s)
8365 } else {
8366 let arg0 = args.remove(0);
8367 let arg1 = args.remove(0);
8368 let arg2 = args.remove(0);
8369 let unit_s = Self::get_unit_str_static(&arg0);
8370 (arg0, arg1, arg2, unit_s)
8371 };
8372
8373 // For Hive/Spark source, string literal dates need to be cast
8374 // Note: Databricks is excluded - it handles string args like standard SQL
8375 let is_hive_spark = matches!(source, DialectType::Hive | DialectType::Spark);
8376
8377 match target {
8378 DialectType::Snowflake => {
8379 let unit = Expression::Identifier(Identifier::new(&unit_str));
8380 // Use ensure_to_date_preserved to add TO_DATE with a marker
8381 // that prevents the Snowflake TO_DATE handler from converting it to CAST
8382 let d1 = if is_hive_spark { Self::ensure_to_date_preserved(arg1) } else { arg1 };
8383 let d2 = if is_hive_spark { Self::ensure_to_date_preserved(arg2) } else { arg2 };
8384 Ok(Expression::Function(Box::new(Function::new(
8385 "DATEDIFF".to_string(), vec![unit, d1, d2],
8386 ))))
8387 }
8388 DialectType::Redshift => {
8389 let unit = Expression::Identifier(Identifier::new(&unit_str));
8390 let d1 = if is_hive_spark { Self::ensure_cast_date(arg1) } else { arg1 };
8391 let d2 = if is_hive_spark { Self::ensure_cast_date(arg2) } else { arg2 };
8392 Ok(Expression::Function(Box::new(Function::new(
8393 "DATEDIFF".to_string(), vec![unit, d1, d2],
8394 ))))
8395 }
8396 DialectType::TSQL => {
8397 let unit = Expression::Identifier(Identifier::new(&unit_str));
8398 Ok(Expression::Function(Box::new(Function::new(
8399 "DATEDIFF".to_string(), vec![unit, arg1, arg2],
8400 ))))
8401 }
8402 DialectType::DuckDB => {
8403 let is_redshift_tsql = matches!(source, DialectType::Redshift | DialectType::TSQL);
8404 if is_hive_spark {
8405 // For Hive/Spark source, CAST string args to DATE and emit DATE_DIFF directly
8406 let d1 = Self::ensure_cast_date(arg1);
8407 let d2 = Self::ensure_cast_date(arg2);
8408 Ok(Expression::Function(Box::new(Function::new(
8409 "DATE_DIFF".to_string(), vec![
8410 Expression::string(&unit_str),
8411 d1, d2,
8412 ],
8413 ))))
8414 } else if matches!(source, DialectType::Snowflake) {
8415 // For Snowflake source: special handling per unit
8416 match unit_str.as_str() {
8417 "NANOSECOND" => {
8418 // DATEDIFF(NANOSECOND, start, end) -> EPOCH_NS(CAST(end AS TIMESTAMP_NS)) - EPOCH_NS(CAST(start AS TIMESTAMP_NS))
8419 fn cast_to_timestamp_ns(expr: Expression) -> Expression {
8420 Expression::Cast(Box::new(Cast {
8421 this: expr,
8422 to: DataType::Custom { name: "TIMESTAMP_NS".to_string() },
8423 trailing_comments: vec![], double_colon_syntax: false, format: None, default: None,
8424 }))
8425 }
8426 let epoch_end = Expression::Function(Box::new(Function::new(
8427 "EPOCH_NS".to_string(), vec![cast_to_timestamp_ns(arg2)],
8428 )));
8429 let epoch_start = Expression::Function(Box::new(Function::new(
8430 "EPOCH_NS".to_string(), vec![cast_to_timestamp_ns(arg1)],
8431 )));
8432 Ok(Expression::Sub(Box::new(BinaryOp::new(epoch_end, epoch_start))))
8433 }
8434 "WEEK" => {
8435 // DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST(x AS DATE)), DATE_TRUNC('WEEK', CAST(y AS DATE)))
8436 let d1 = Self::force_cast_date(arg1);
8437 let d2 = Self::force_cast_date(arg2);
8438 let dt1 = Expression::Function(Box::new(Function::new(
8439 "DATE_TRUNC".to_string(), vec![Expression::string("WEEK"), d1],
8440 )));
8441 let dt2 = Expression::Function(Box::new(Function::new(
8442 "DATE_TRUNC".to_string(), vec![Expression::string("WEEK"), d2],
8443 )));
8444 Ok(Expression::Function(Box::new(Function::new(
8445 "DATE_DIFF".to_string(), vec![
8446 Expression::string(&unit_str),
8447 dt1, dt2,
8448 ],
8449 ))))
8450 }
8451 _ => {
8452 // YEAR, MONTH, QUARTER, DAY, etc.: CAST to DATE
8453 let d1 = Self::force_cast_date(arg1);
8454 let d2 = Self::force_cast_date(arg2);
8455 Ok(Expression::Function(Box::new(Function::new(
8456 "DATE_DIFF".to_string(), vec![
8457 Expression::string(&unit_str),
8458 d1, d2,
8459 ],
8460 ))))
8461 }
8462 }
8463 } else if is_redshift_tsql {
8464 // For Redshift/TSQL source, CAST args to TIMESTAMP (always)
8465 let d1 = Self::force_cast_timestamp(arg1);
8466 let d2 = Self::force_cast_timestamp(arg2);
8467 Ok(Expression::Function(Box::new(Function::new(
8468 "DATE_DIFF".to_string(), vec![
8469 Expression::string(&unit_str),
8470 d1, d2,
8471 ],
8472 ))))
8473 } else {
8474 // Keep as DATEDIFF so DuckDB's transform_datediff handles
8475 // DATE_TRUNC for WEEK, CAST for string literals, etc.
8476 let unit = Expression::Identifier(Identifier::new(&unit_str));
8477 Ok(Expression::Function(Box::new(Function::new(
8478 "DATEDIFF".to_string(), vec![unit, arg1, arg2],
8479 ))))
8480 }
8481 }
8482 DialectType::BigQuery => {
8483 let is_redshift_tsql = matches!(source, DialectType::Redshift | DialectType::TSQL | DialectType::Snowflake);
8484 let cast_d1 = if is_hive_spark { Self::ensure_cast_date(arg1) }
8485 else if is_redshift_tsql { Self::force_cast_datetime(arg1) }
8486 else { Self::ensure_cast_datetime(arg1) };
8487 let cast_d2 = if is_hive_spark { Self::ensure_cast_date(arg2) }
8488 else if is_redshift_tsql { Self::force_cast_datetime(arg2) }
8489 else { Self::ensure_cast_datetime(arg2) };
8490 let unit = Expression::Identifier(Identifier::new(&unit_str));
8491 Ok(Expression::Function(Box::new(Function::new(
8492 "DATE_DIFF".to_string(), vec![cast_d2, cast_d1, unit],
8493 ))))
8494 }
8495 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
8496 // For Hive/Spark source, string literals need double-cast: CAST(CAST(x AS TIMESTAMP) AS DATE)
8497 // For Redshift/TSQL source, args need CAST to TIMESTAMP (always)
8498 let is_redshift_tsql = matches!(source, DialectType::Redshift | DialectType::TSQL | DialectType::Snowflake);
8499 let d1 = if is_hive_spark { Self::double_cast_timestamp_date(arg1) }
8500 else if is_redshift_tsql { Self::force_cast_timestamp(arg1) }
8501 else { arg1 };
8502 let d2 = if is_hive_spark { Self::double_cast_timestamp_date(arg2) }
8503 else if is_redshift_tsql { Self::force_cast_timestamp(arg2) }
8504 else { arg2 };
8505 Ok(Expression::Function(Box::new(Function::new(
8506 "DATE_DIFF".to_string(), vec![
8507 Expression::string(&unit_str),
8508 d1, d2,
8509 ],
8510 ))))
8511 }
8512 DialectType::Hive => {
8513 match unit_str.as_str() {
8514 "MONTH" => {
8515 Ok(Expression::Cast(Box::new(Cast {
8516 this: Expression::Function(Box::new(Function::new(
8517 "MONTHS_BETWEEN".to_string(), vec![arg2, arg1],
8518 ))),
8519 to: DataType::Int { length: None, integer_spelling: false },
8520 trailing_comments: vec![], double_colon_syntax: false, format: None, default: None,
8521 })))
8522 }
8523 "WEEK" => {
8524 Ok(Expression::Cast(Box::new(Cast {
8525 this: Expression::Div(Box::new(crate::expressions::BinaryOp::new(
8526 Expression::Function(Box::new(Function::new(
8527 "DATEDIFF".to_string(), vec![arg2, arg1],
8528 ))),
8529 Expression::number(7),
8530 ))),
8531 to: DataType::Int { length: None, integer_spelling: false },
8532 trailing_comments: vec![], double_colon_syntax: false, format: None, default: None,
8533 })))
8534 }
8535 _ => {
8536 Ok(Expression::Function(Box::new(Function::new(
8537 "DATEDIFF".to_string(), vec![arg2, arg1],
8538 ))))
8539 }
8540 }
8541 }
8542 DialectType::Spark | DialectType::Databricks => {
8543 let unit = Expression::Identifier(Identifier::new(&unit_str));
8544 Ok(Expression::Function(Box::new(Function::new(
8545 "DATEDIFF".to_string(), vec![unit, arg1, arg2],
8546 ))))
8547 }
8548 _ => {
8549 // For Hive/Spark source targeting PostgreSQL etc., cast string literals to DATE
8550 let d1 = if is_hive_spark { Self::ensure_cast_date(arg1) } else { arg1 };
8551 let d2 = if is_hive_spark { Self::ensure_cast_date(arg2) } else { arg2 };
8552 let unit = Expression::Identifier(Identifier::new(&unit_str));
8553 Ok(Expression::Function(Box::new(Function::new(
8554 "DATEDIFF".to_string(), vec![unit, d1, d2],
8555 ))))
8556 }
8557 }
8558 }
8559 // DATEDIFF(end, start) - 2-arg form from Hive/MySQL
8560 "DATEDIFF" if f.args.len() == 2 => {
8561 let mut args = f.args;
8562 let arg0 = args.remove(0);
8563 let arg1 = args.remove(0);
8564
8565 // Helper: unwrap TO_DATE(x) -> x (extracts inner arg)
8566 // Also recognizes TryCast/Cast to DATE that may have been produced by
8567 // cross-dialect TO_DATE -> TRY_CAST conversion
8568 let unwrap_to_date = |e: Expression| -> (Expression, bool) {
8569 if let Expression::Function(ref f) = e {
8570 if f.name.eq_ignore_ascii_case("TO_DATE") && f.args.len() == 1 {
8571 return (f.args[0].clone(), true);
8572 }
8573 }
8574 // Also recognize TryCast(x, Date) as an already-converted TO_DATE
8575 if let Expression::TryCast(ref c) = e {
8576 if matches!(c.to, DataType::Date) {
8577 return (e, true); // Already properly cast, return as-is
8578 }
8579 }
8580 (e, false)
8581 };
8582
8583 match target {
8584 DialectType::DuckDB => {
8585 // For Hive source, always CAST to DATE
8586 // If arg is TO_DATE(x) or TRY_CAST(x AS DATE), use it directly
8587 let cast_d0 = if matches!(source, DialectType::Hive | DialectType::Spark | DialectType::Databricks) {
8588 let (inner, was_to_date) = unwrap_to_date(arg1);
8589 if was_to_date {
8590 // Already a date expression, use directly
8591 if matches!(&inner, Expression::TryCast(_)) {
8592 inner // Already TRY_CAST(x AS DATE)
8593 } else {
8594 Self::try_cast_date(inner)
8595 }
8596 } else {
8597 Self::force_cast_date(inner)
8598 }
8599 } else {
8600 Self::ensure_cast_date(arg1)
8601 };
8602 let cast_d1 = if matches!(source, DialectType::Hive | DialectType::Spark | DialectType::Databricks) {
8603 let (inner, was_to_date) = unwrap_to_date(arg0);
8604 if was_to_date {
8605 if matches!(&inner, Expression::TryCast(_)) {
8606 inner
8607 } else {
8608 Self::try_cast_date(inner)
8609 }
8610 } else {
8611 Self::force_cast_date(inner)
8612 }
8613 } else {
8614 Self::ensure_cast_date(arg0)
8615 };
8616 Ok(Expression::Function(Box::new(Function::new(
8617 "DATE_DIFF".to_string(), vec![
8618 Expression::string("DAY"),
8619 cast_d0, cast_d1,
8620 ],
8621 ))))
8622 }
8623 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
8624 // For Hive/Spark source, apply double_cast_timestamp_date
8625 // For other sources (MySQL etc.), just swap args without casting
8626 if matches!(source, DialectType::Hive | DialectType::Spark | DialectType::Databricks) {
8627 let cast_fn = |e: Expression| -> Expression {
8628 let (inner, was_to_date) = unwrap_to_date(e);
8629 if was_to_date {
8630 let first_cast = Self::double_cast_timestamp_date(inner);
8631 Self::double_cast_timestamp_date(first_cast)
8632 } else {
8633 Self::double_cast_timestamp_date(inner)
8634 }
8635 };
8636 Ok(Expression::Function(Box::new(Function::new(
8637 "DATE_DIFF".to_string(), vec![
8638 Expression::string("DAY"),
8639 cast_fn(arg1), cast_fn(arg0),
8640 ],
8641 ))))
8642 } else {
8643 Ok(Expression::Function(Box::new(Function::new(
8644 "DATE_DIFF".to_string(), vec![
8645 Expression::string("DAY"),
8646 arg1, arg0,
8647 ],
8648 ))))
8649 }
8650 }
8651 DialectType::Redshift => {
8652 let unit = Expression::Identifier(Identifier::new("DAY"));
8653 Ok(Expression::Function(Box::new(Function::new(
8654 "DATEDIFF".to_string(), vec![unit, arg1, arg0],
8655 ))))
8656 }
8657 _ => {
8658 Ok(Expression::Function(Box::new(Function::new(
8659 "DATEDIFF".to_string(), vec![arg0, arg1],
8660 ))))
8661 }
8662 }
8663 }
8664 // DATE_DIFF(unit, start, end) - 3-arg with string unit (ClickHouse/DuckDB style)
8665 "DATE_DIFF" if f.args.len() == 3 => {
8666 let mut args = f.args;
8667 let arg0 = args.remove(0);
8668 let arg1 = args.remove(0);
8669 let arg2 = args.remove(0);
8670 let unit_str = Self::get_unit_str_static(&arg0);
8671
8672 match target {
8673 DialectType::DuckDB => {
8674 // DuckDB: DATE_DIFF('UNIT', start, end)
8675 Ok(Expression::Function(Box::new(Function::new(
8676 "DATE_DIFF".to_string(), vec![
8677 Expression::string(&unit_str),
8678 arg1, arg2,
8679 ],
8680 ))))
8681 }
8682 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
8683 Ok(Expression::Function(Box::new(Function::new(
8684 "DATE_DIFF".to_string(), vec![
8685 Expression::string(&unit_str),
8686 arg1, arg2,
8687 ],
8688 ))))
8689 }
8690 DialectType::ClickHouse => {
8691 // ClickHouse: DATE_DIFF(UNIT, start, end) - identifier unit
8692 let unit = Expression::Identifier(Identifier::new(&unit_str));
8693 Ok(Expression::Function(Box::new(Function::new(
8694 "DATE_DIFF".to_string(), vec![unit, arg1, arg2],
8695 ))))
8696 }
8697 DialectType::Snowflake | DialectType::Redshift => {
8698 let unit = Expression::Identifier(Identifier::new(&unit_str));
8699 Ok(Expression::Function(Box::new(Function::new(
8700 "DATEDIFF".to_string(), vec![unit, arg1, arg2],
8701 ))))
8702 }
8703 _ => {
8704 let unit = Expression::Identifier(Identifier::new(&unit_str));
8705 Ok(Expression::Function(Box::new(Function::new(
8706 "DATEDIFF".to_string(), vec![unit, arg1, arg2],
8707 ))))
8708 }
8709 }
8710 }
8711 // DATEADD(unit, val, date) - 3-arg form
8712 "DATEADD" if f.args.len() == 3 => {
8713 let mut args = f.args;
8714 let arg0 = args.remove(0);
8715 let arg1 = args.remove(0);
8716 let arg2 = args.remove(0);
8717 let unit_str = Self::get_unit_str_static(&arg0);
8718
8719 // Normalize TSQL unit abbreviations to standard names
8720 let unit_str = match unit_str.as_str() {
8721 "YY" | "YYYY" => "YEAR".to_string(),
8722 "QQ" | "Q" => "QUARTER".to_string(),
8723 "MM" | "M" => "MONTH".to_string(),
8724 "WK" | "WW" => "WEEK".to_string(),
8725 "DD" | "D" | "DY" => "DAY".to_string(),
8726 "HH" => "HOUR".to_string(),
8727 "MI" | "N" => "MINUTE".to_string(),
8728 "SS" | "S" => "SECOND".to_string(),
8729 "MS" => "MILLISECOND".to_string(),
8730 "MCS" | "US" => "MICROSECOND".to_string(),
8731 _ => unit_str,
8732 };
8733 match target {
8734 DialectType::Snowflake => {
8735 let unit = Expression::Identifier(Identifier::new(&unit_str));
8736 // Cast string literal to TIMESTAMP, but not for Snowflake source
8737 // (Snowflake natively accepts string literals in DATEADD)
8738 let arg2 = if matches!(&arg2, Expression::Literal(Literal::String(_)))
8739 && !matches!(source, DialectType::Snowflake) {
8740 Expression::Cast(Box::new(Cast {
8741 this: arg2, to: DataType::Timestamp { precision: None, timezone: false },
8742 trailing_comments: Vec::new(), double_colon_syntax: false, format: None, default: None,
8743 }))
8744 } else { arg2 };
8745 Ok(Expression::Function(Box::new(Function::new(
8746 "DATEADD".to_string(), vec![unit, arg1, arg2],
8747 ))))
8748 }
8749 DialectType::TSQL => {
8750 let unit = Expression::Identifier(Identifier::new(&unit_str));
8751 // Cast string literal to DATETIME2, but not when source is Spark/Databricks family
8752 let arg2 = if matches!(&arg2, Expression::Literal(Literal::String(_)))
8753 && !matches!(source, DialectType::Spark | DialectType::Databricks | DialectType::Hive) {
8754 Expression::Cast(Box::new(Cast {
8755 this: arg2, to: DataType::Custom { name: "DATETIME2".to_string() },
8756 trailing_comments: Vec::new(), double_colon_syntax: false, format: None, default: None,
8757 }))
8758 } else { arg2 };
8759 Ok(Expression::Function(Box::new(Function::new(
8760 "DATEADD".to_string(), vec![unit, arg1, arg2],
8761 ))))
8762 }
8763 DialectType::Redshift => {
8764 let unit = Expression::Identifier(Identifier::new(&unit_str));
8765 Ok(Expression::Function(Box::new(Function::new(
8766 "DATEADD".to_string(), vec![unit, arg1, arg2],
8767 ))))
8768 }
8769 DialectType::Databricks => {
8770 let unit = Expression::Identifier(Identifier::new(&unit_str));
8771 // Sources with native DATEADD (TSQL, Databricks, Snowflake) -> DATEADD
8772 // Other sources (Redshift TsOrDsAdd, etc.) -> DATE_ADD
8773 let func_name = if matches!(source, DialectType::TSQL | DialectType::Fabric | DialectType::Databricks | DialectType::Snowflake) {
8774 "DATEADD"
8775 } else {
8776 "DATE_ADD"
8777 };
8778 Ok(Expression::Function(Box::new(Function::new(
8779 func_name.to_string(), vec![unit, arg1, arg2],
8780 ))))
8781 }
8782 DialectType::DuckDB => {
8783 // Special handling for NANOSECOND from Snowflake
8784 if unit_str == "NANOSECOND" && matches!(source, DialectType::Snowflake) {
8785 // DATEADD(NANOSECOND, offset, ts) -> MAKE_TIMESTAMP_NS(EPOCH_NS(CAST(ts AS TIMESTAMP_NS)) + offset)
8786 let cast_ts = Expression::Cast(Box::new(Cast {
8787 this: arg2,
8788 to: DataType::Custom { name: "TIMESTAMP_NS".to_string() },
8789 trailing_comments: vec![], double_colon_syntax: false, format: None, default: None,
8790 }));
8791 let epoch_ns = Expression::Function(Box::new(Function::new(
8792 "EPOCH_NS".to_string(), vec![cast_ts],
8793 )));
8794 let sum = Expression::Add(Box::new(BinaryOp::new(epoch_ns, arg1)));
8795 Ok(Expression::Function(Box::new(Function::new(
8796 "MAKE_TIMESTAMP_NS".to_string(), vec![sum],
8797 ))))
8798 } else {
8799 // DuckDB: convert to date + INTERVAL syntax with CAST
8800 let iu = Self::parse_interval_unit_static(&unit_str);
8801 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
8802 this: Some(arg1),
8803 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: iu, use_plural: false }),
8804 }));
8805 // Cast string literal to TIMESTAMP
8806 let arg2 = if matches!(&arg2, Expression::Literal(Literal::String(_))) {
8807 Expression::Cast(Box::new(Cast {
8808 this: arg2, to: DataType::Timestamp { precision: None, timezone: false },
8809 trailing_comments: Vec::new(), double_colon_syntax: false, format: None, default: None,
8810 }))
8811 } else { arg2 };
8812 Ok(Expression::Add(Box::new(crate::expressions::BinaryOp::new(arg2, interval))))
8813 }
8814 }
8815 DialectType::Spark => {
8816 // For TSQL source: convert to ADD_MONTHS/DATE_ADD(date, val)
8817 // For other sources: keep 3-arg DATE_ADD(UNIT, val, date) form
8818 if matches!(source, DialectType::TSQL | DialectType::Fabric) {
8819 fn multiply_expr_spark(expr: Expression, factor: i64) -> Expression {
8820 if let Expression::Literal(crate::expressions::Literal::Number(n)) = &expr {
8821 if let Ok(val) = n.parse::<i64>() {
8822 return Expression::Literal(crate::expressions::Literal::Number((val * factor).to_string()));
8823 }
8824 }
8825 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
8826 expr, Expression::Literal(crate::expressions::Literal::Number(factor.to_string())),
8827 )))
8828 }
8829 let normalized_unit = match unit_str.as_str() {
8830 "YEAR" | "YY" | "YYYY" => "YEAR",
8831 "QUARTER" | "QQ" | "Q" => "QUARTER",
8832 "MONTH" | "MM" | "M" => "MONTH",
8833 "WEEK" | "WK" | "WW" => "WEEK",
8834 "DAY" | "DD" | "D" | "DY" => "DAY",
8835 _ => &unit_str,
8836 };
8837 match normalized_unit {
8838 "YEAR" => {
8839 let months = multiply_expr_spark(arg1, 12);
8840 Ok(Expression::Function(Box::new(Function::new(
8841 "ADD_MONTHS".to_string(), vec![arg2, months],
8842 ))))
8843 }
8844 "QUARTER" => {
8845 let months = multiply_expr_spark(arg1, 3);
8846 Ok(Expression::Function(Box::new(Function::new(
8847 "ADD_MONTHS".to_string(), vec![arg2, months],
8848 ))))
8849 }
8850 "MONTH" => {
8851 Ok(Expression::Function(Box::new(Function::new(
8852 "ADD_MONTHS".to_string(), vec![arg2, arg1],
8853 ))))
8854 }
8855 "WEEK" => {
8856 let days = multiply_expr_spark(arg1, 7);
8857 Ok(Expression::Function(Box::new(Function::new(
8858 "DATE_ADD".to_string(), vec![arg2, days],
8859 ))))
8860 }
8861 "DAY" => {
8862 Ok(Expression::Function(Box::new(Function::new(
8863 "DATE_ADD".to_string(), vec![arg2, arg1],
8864 ))))
8865 }
8866 _ => {
8867 let unit = Expression::Identifier(Identifier::new(&unit_str));
8868 Ok(Expression::Function(Box::new(Function::new(
8869 "DATE_ADD".to_string(), vec![unit, arg1, arg2],
8870 ))))
8871 }
8872 }
8873 } else {
8874 // Non-TSQL source: keep 3-arg DATE_ADD(UNIT, val, date)
8875 let unit = Expression::Identifier(Identifier::new(&unit_str));
8876 Ok(Expression::Function(Box::new(Function::new(
8877 "DATE_ADD".to_string(), vec![unit, arg1, arg2],
8878 ))))
8879 }
8880 }
8881 DialectType::Hive => {
8882 match unit_str.as_str() {
8883 "MONTH" => {
8884 Ok(Expression::Function(Box::new(Function::new(
8885 "ADD_MONTHS".to_string(), vec![arg2, arg1],
8886 ))))
8887 }
8888 _ => {
8889 Ok(Expression::Function(Box::new(Function::new(
8890 "DATE_ADD".to_string(), vec![arg2, arg1],
8891 ))))
8892 }
8893 }
8894 }
8895 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
8896 // Cast string literal date to TIMESTAMP
8897 let arg2 = if matches!(&arg2, Expression::Literal(Literal::String(_))) {
8898 Expression::Cast(Box::new(Cast {
8899 this: arg2, to: DataType::Timestamp { precision: None, timezone: false },
8900 trailing_comments: Vec::new(), double_colon_syntax: false, format: None, default: None,
8901 }))
8902 } else { arg2 };
8903 Ok(Expression::Function(Box::new(Function::new(
8904 "DATE_ADD".to_string(), vec![
8905 Expression::string(&unit_str),
8906 arg1, arg2,
8907 ],
8908 ))))
8909 }
8910 DialectType::MySQL => {
8911 let iu = Self::parse_interval_unit_static(&unit_str);
8912 Ok(Expression::DateAdd(Box::new(crate::expressions::DateAddFunc {
8913 this: arg2,
8914 interval: arg1,
8915 unit: iu,
8916 })))
8917 }
8918 DialectType::PostgreSQL => {
8919 // Cast string literal date to TIMESTAMP
8920 let arg2 = if matches!(&arg2, Expression::Literal(Literal::String(_))) {
8921 Expression::Cast(Box::new(Cast {
8922 this: arg2, to: DataType::Timestamp { precision: None, timezone: false },
8923 trailing_comments: Vec::new(), double_colon_syntax: false, format: None, default: None,
8924 }))
8925 } else { arg2 };
8926 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
8927 this: Some(Expression::string(&format!("{} {}", Self::expr_to_string_static(&arg1), unit_str))),
8928 unit: None,
8929 }));
8930 Ok(Expression::Add(Box::new(crate::expressions::BinaryOp::new(arg2, interval))))
8931 }
8932 DialectType::BigQuery => {
8933 let iu = Self::parse_interval_unit_static(&unit_str);
8934 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
8935 this: Some(arg1),
8936 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: iu, use_plural: false }),
8937 }));
8938 // Non-TSQL sources: CAST string literal to DATETIME
8939 let arg2 = if !matches!(source, DialectType::TSQL | DialectType::Fabric)
8940 && matches!(&arg2, Expression::Literal(Literal::String(_)))
8941 {
8942 Expression::Cast(Box::new(Cast {
8943 this: arg2, to: DataType::Custom { name: "DATETIME".to_string() },
8944 trailing_comments: Vec::new(), double_colon_syntax: false, format: None, default: None,
8945 }))
8946 } else { arg2 };
8947 Ok(Expression::Function(Box::new(Function::new(
8948 "DATE_ADD".to_string(), vec![arg2, interval],
8949 ))))
8950 }
8951 _ => {
8952 let unit = Expression::Identifier(Identifier::new(&unit_str));
8953 Ok(Expression::Function(Box::new(Function::new(
8954 "DATEADD".to_string(), vec![unit, arg1, arg2],
8955 ))))
8956 }
8957 }
8958 }
8959 // DATE_ADD(unit, val, date) - 3-arg from ClickHouse/Presto/Spark
8960 "DATE_ADD" if f.args.len() == 3 => {
8961 let mut args = f.args;
8962 let arg0 = args.remove(0);
8963 let arg1 = args.remove(0);
8964 let arg2 = args.remove(0);
8965 let unit_str = Self::get_unit_str_static(&arg0);
8966
8967 match target {
8968 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
8969 Ok(Expression::Function(Box::new(Function::new(
8970 "DATE_ADD".to_string(), vec![
8971 Expression::string(&unit_str),
8972 arg1, arg2,
8973 ],
8974 ))))
8975 }
8976 DialectType::DuckDB => {
8977 let iu = Self::parse_interval_unit_static(&unit_str);
8978 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
8979 this: Some(arg1),
8980 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: iu, use_plural: false }),
8981 }));
8982 Ok(Expression::Add(Box::new(crate::expressions::BinaryOp::new(arg2, interval))))
8983 }
8984 DialectType::Snowflake | DialectType::TSQL | DialectType::Redshift => {
8985 let unit = Expression::Identifier(Identifier::new(&unit_str));
8986 Ok(Expression::Function(Box::new(Function::new(
8987 "DATEADD".to_string(), vec![unit, arg1, arg2],
8988 ))))
8989 }
8990 DialectType::Spark => {
8991 // Spark: DATE_ADD(date, val) for DAY, or DATEADD(UNIT, val, date)
8992 if unit_str == "DAY" {
8993 Ok(Expression::Function(Box::new(Function::new(
8994 "DATE_ADD".to_string(), vec![arg2, arg1],
8995 ))))
8996 } else {
8997 let unit = Expression::Identifier(Identifier::new(&unit_str));
8998 Ok(Expression::Function(Box::new(Function::new(
8999 "DATE_ADD".to_string(), vec![unit, arg1, arg2],
9000 ))))
9001 }
9002 }
9003 DialectType::Databricks => {
9004 let unit = Expression::Identifier(Identifier::new(&unit_str));
9005 Ok(Expression::Function(Box::new(Function::new(
9006 "DATE_ADD".to_string(), vec![unit, arg1, arg2],
9007 ))))
9008 }
9009 DialectType::Hive => {
9010 // Hive: DATE_ADD(date, val) for DAY
9011 Ok(Expression::Function(Box::new(Function::new(
9012 "DATE_ADD".to_string(), vec![arg2, arg1],
9013 ))))
9014 }
9015 _ => {
9016 let unit = Expression::Identifier(Identifier::new(&unit_str));
9017 Ok(Expression::Function(Box::new(Function::new(
9018 "DATE_ADD".to_string(), vec![unit, arg1, arg2],
9019 ))))
9020 }
9021 }
9022 }
9023 // DATE_ADD(date, days) - 2-arg Hive/Spark form (add days)
9024 "DATE_ADD" if f.args.len() == 2
9025 && matches!(source, DialectType::Hive | DialectType::Spark | DialectType::Databricks) => {
9026 let mut args = f.args;
9027 let date = args.remove(0);
9028 let days = args.remove(0);
9029 match target {
9030 DialectType::Hive | DialectType::Spark => {
9031 // Keep as DATE_ADD(date, days) for Hive/Spark
9032 Ok(Expression::Function(Box::new(Function::new(
9033 "DATE_ADD".to_string(), vec![date, days],
9034 ))))
9035 }
9036 DialectType::Databricks => {
9037 // Databricks: DATEADD(DAY, days, date)
9038 Ok(Expression::Function(Box::new(Function::new(
9039 "DATEADD".to_string(), vec![
9040 Expression::Identifier(Identifier::new("DAY")),
9041 days, date,
9042 ],
9043 ))))
9044 }
9045 DialectType::DuckDB => {
9046 // DuckDB: CAST(date AS DATE) + INTERVAL days DAY
9047 let cast_date = Self::ensure_cast_date(date);
9048 // Wrap complex expressions (like Mul from DATE_SUB negation) in Paren
9049 let interval_val = if matches!(days, Expression::Mul(_) | Expression::Sub(_) | Expression::Add(_)) {
9050 Expression::Paren(Box::new(crate::expressions::Paren { this: days, trailing_comments: vec![] }))
9051 } else { days };
9052 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
9053 this: Some(interval_val),
9054 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
9055 unit: crate::expressions::IntervalUnit::Day,
9056 use_plural: false,
9057 }),
9058 }));
9059 Ok(Expression::Add(Box::new(crate::expressions::BinaryOp::new(cast_date, interval))))
9060 }
9061 DialectType::Snowflake => {
9062 // For Hive source with string literal date, use CAST(CAST(date AS TIMESTAMP) AS DATE)
9063 let cast_date = if matches!(source, DialectType::Hive | DialectType::Spark | DialectType::Databricks) {
9064 if matches!(date, Expression::Literal(Literal::String(_))) {
9065 Self::double_cast_timestamp_date(date)
9066 } else { date }
9067 } else { date };
9068 Ok(Expression::Function(Box::new(Function::new(
9069 "DATEADD".to_string(), vec![
9070 Expression::Identifier(Identifier::new("DAY")),
9071 days, cast_date,
9072 ],
9073 ))))
9074 }
9075 DialectType::Redshift => {
9076 Ok(Expression::Function(Box::new(Function::new(
9077 "DATEADD".to_string(), vec![
9078 Expression::Identifier(Identifier::new("DAY")),
9079 days, date,
9080 ],
9081 ))))
9082 }
9083 DialectType::TSQL | DialectType::Fabric => {
9084 // For Hive source with string literal date, use CAST(CAST(date AS DATETIME2) AS DATE)
9085 // But Databricks DATE_ADD doesn't need this wrapping for TSQL
9086 let cast_date = if matches!(source, DialectType::Hive | DialectType::Spark) {
9087 if matches!(date, Expression::Literal(Literal::String(_))) {
9088 Self::double_cast_datetime2_date(date)
9089 } else { date }
9090 } else { date };
9091 Ok(Expression::Function(Box::new(Function::new(
9092 "DATEADD".to_string(), vec![
9093 Expression::Identifier(Identifier::new("DAY")),
9094 days, cast_date,
9095 ],
9096 ))))
9097 }
9098 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
9099 // For Hive source with string literal date, use CAST(CAST(date AS TIMESTAMP) AS DATE)
9100 let cast_date = if matches!(source, DialectType::Hive | DialectType::Spark | DialectType::Databricks) {
9101 if matches!(date, Expression::Literal(Literal::String(_))) {
9102 Self::double_cast_timestamp_date(date)
9103 } else { date }
9104 } else { date };
9105 Ok(Expression::Function(Box::new(Function::new(
9106 "DATE_ADD".to_string(), vec![
9107 Expression::string("DAY"),
9108 days, cast_date,
9109 ],
9110 ))))
9111 }
9112 DialectType::BigQuery => {
9113 // For Hive/Spark source, wrap date in CAST(CAST(date AS DATETIME) AS DATE)
9114 let cast_date = if matches!(source, DialectType::Hive | DialectType::Spark | DialectType::Databricks) {
9115 Self::double_cast_datetime_date(date)
9116 } else { date };
9117 // Wrap complex expressions in Paren for interval
9118 let interval_val = if matches!(days, Expression::Mul(_) | Expression::Sub(_) | Expression::Add(_)) {
9119 Expression::Paren(Box::new(crate::expressions::Paren { this: days, trailing_comments: vec![] }))
9120 } else { days };
9121 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
9122 this: Some(interval_val),
9123 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
9124 unit: crate::expressions::IntervalUnit::Day,
9125 use_plural: false,
9126 }),
9127 }));
9128 Ok(Expression::Function(Box::new(Function::new(
9129 "DATE_ADD".to_string(), vec![cast_date, interval],
9130 ))))
9131 }
9132 DialectType::MySQL => {
9133 let iu = crate::expressions::IntervalUnit::Day;
9134 Ok(Expression::DateAdd(Box::new(crate::expressions::DateAddFunc {
9135 this: date,
9136 interval: days,
9137 unit: iu,
9138 })))
9139 }
9140 DialectType::PostgreSQL => {
9141 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
9142 this: Some(Expression::string(&format!("{} DAY", Self::expr_to_string_static(&days)))),
9143 unit: None,
9144 }));
9145 Ok(Expression::Add(Box::new(crate::expressions::BinaryOp::new(date, interval))))
9146 }
9147 _ => {
9148 Ok(Expression::Function(Box::new(Function::new(
9149 "DATE_ADD".to_string(), vec![date, days],
9150 ))))
9151 }
9152 }
9153 }
9154 // DATE_SUB(date, days) - 2-arg Hive/Spark form (subtract days)
9155 "DATE_SUB" if f.args.len() == 2
9156 && matches!(source, DialectType::Hive | DialectType::Spark | DialectType::Databricks) => {
9157 let mut args = f.args;
9158 let date = args.remove(0);
9159 let days = args.remove(0);
9160 // Helper to create days * -1
9161 let make_neg_days = |d: Expression| -> Expression {
9162 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
9163 d,
9164 Expression::Literal(Literal::Number("-1".to_string())),
9165 )))
9166 };
9167 let is_string_literal = matches!(date, Expression::Literal(Literal::String(_)));
9168 match target {
9169 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
9170 // Keep as DATE_SUB(date, days) for Hive/Spark
9171 Ok(Expression::Function(Box::new(Function::new(
9172 "DATE_SUB".to_string(), vec![date, days],
9173 ))))
9174 }
9175 DialectType::DuckDB => {
9176 let cast_date = Self::ensure_cast_date(date);
9177 let neg = make_neg_days(days);
9178 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
9179 this: Some(Expression::Paren(Box::new(crate::expressions::Paren { this: neg, trailing_comments: vec![] }))),
9180 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
9181 unit: crate::expressions::IntervalUnit::Day,
9182 use_plural: false,
9183 }),
9184 }));
9185 Ok(Expression::Add(Box::new(crate::expressions::BinaryOp::new(cast_date, interval))))
9186 }
9187 DialectType::Snowflake => {
9188 let cast_date = if is_string_literal {
9189 Self::double_cast_timestamp_date(date)
9190 } else { date };
9191 let neg = make_neg_days(days);
9192 Ok(Expression::Function(Box::new(Function::new(
9193 "DATEADD".to_string(), vec![
9194 Expression::Identifier(Identifier::new("DAY")),
9195 neg, cast_date,
9196 ],
9197 ))))
9198 }
9199 DialectType::Redshift => {
9200 let neg = make_neg_days(days);
9201 Ok(Expression::Function(Box::new(Function::new(
9202 "DATEADD".to_string(), vec![
9203 Expression::Identifier(Identifier::new("DAY")),
9204 neg, date,
9205 ],
9206 ))))
9207 }
9208 DialectType::TSQL | DialectType::Fabric => {
9209 let cast_date = if is_string_literal {
9210 Self::double_cast_datetime2_date(date)
9211 } else { date };
9212 let neg = make_neg_days(days);
9213 Ok(Expression::Function(Box::new(Function::new(
9214 "DATEADD".to_string(), vec![
9215 Expression::Identifier(Identifier::new("DAY")),
9216 neg, cast_date,
9217 ],
9218 ))))
9219 }
9220 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
9221 let cast_date = if is_string_literal {
9222 Self::double_cast_timestamp_date(date)
9223 } else { date };
9224 let neg = make_neg_days(days);
9225 Ok(Expression::Function(Box::new(Function::new(
9226 "DATE_ADD".to_string(), vec![
9227 Expression::string("DAY"),
9228 neg, cast_date,
9229 ],
9230 ))))
9231 }
9232 DialectType::BigQuery => {
9233 let cast_date = if is_string_literal {
9234 Self::double_cast_datetime_date(date)
9235 } else { date };
9236 let neg = make_neg_days(days);
9237 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
9238 this: Some(Expression::Paren(Box::new(crate::expressions::Paren { this: neg, trailing_comments: vec![] }))),
9239 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
9240 unit: crate::expressions::IntervalUnit::Day,
9241 use_plural: false,
9242 }),
9243 }));
9244 Ok(Expression::Function(Box::new(Function::new(
9245 "DATE_ADD".to_string(), vec![cast_date, interval],
9246 ))))
9247 }
9248 _ => {
9249 Ok(Expression::Function(Box::new(Function::new(
9250 "DATE_SUB".to_string(), vec![date, days],
9251 ))))
9252 }
9253 }
9254 }
9255 // ADD_MONTHS(date, val) -> target-specific
9256 "ADD_MONTHS" if f.args.len() == 2 => {
9257 let mut args = f.args;
9258 let date = args.remove(0);
9259 let val = args.remove(0);
9260 match target {
9261 DialectType::TSQL => {
9262 let cast_date = Self::ensure_cast_datetime2(date);
9263 Ok(Expression::Function(Box::new(Function::new(
9264 "DATEADD".to_string(), vec![
9265 Expression::Identifier(Identifier::new("MONTH")),
9266 val, cast_date,
9267 ],
9268 ))))
9269 }
9270 DialectType::DuckDB => {
9271 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
9272 this: Some(val),
9273 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
9274 unit: crate::expressions::IntervalUnit::Month,
9275 use_plural: false,
9276 }),
9277 }));
9278 Ok(Expression::Add(Box::new(crate::expressions::BinaryOp::new(date, interval))))
9279 }
9280 DialectType::Snowflake => {
9281 // Keep ADD_MONTHS when source is Snowflake
9282 if matches!(source, DialectType::Snowflake) {
9283 Ok(Expression::Function(Box::new(Function::new(
9284 "ADD_MONTHS".to_string(), vec![date, val],
9285 ))))
9286 } else {
9287 Ok(Expression::Function(Box::new(Function::new(
9288 "DATEADD".to_string(), vec![
9289 Expression::Identifier(Identifier::new("MONTH")),
9290 val, date,
9291 ],
9292 ))))
9293 }
9294 }
9295 DialectType::Redshift => {
9296 Ok(Expression::Function(Box::new(Function::new(
9297 "DATEADD".to_string(), vec![
9298 Expression::Identifier(Identifier::new("MONTH")),
9299 val, date,
9300 ],
9301 ))))
9302 }
9303 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
9304 Ok(Expression::Function(Box::new(Function::new(
9305 "DATE_ADD".to_string(), vec![
9306 Expression::string("MONTH"),
9307 val, date,
9308 ],
9309 ))))
9310 }
9311 DialectType::BigQuery => {
9312 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
9313 this: Some(val),
9314 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
9315 unit: crate::expressions::IntervalUnit::Month,
9316 use_plural: false,
9317 }),
9318 }));
9319 Ok(Expression::Function(Box::new(Function::new(
9320 "DATE_ADD".to_string(), vec![date, interval],
9321 ))))
9322 }
9323 _ => {
9324 Ok(Expression::Function(Box::new(Function::new(
9325 "ADD_MONTHS".to_string(), vec![date, val],
9326 ))))
9327 }
9328 }
9329 }
9330 // DATETRUNC(unit, date) - TSQL form -> DATE_TRUNC for other targets
9331 "DATETRUNC" if f.args.len() == 2 => {
9332 let mut args = f.args;
9333 let arg0 = args.remove(0);
9334 let arg1 = args.remove(0);
9335 let unit_str = Self::get_unit_str_static(&arg0);
9336 match target {
9337 DialectType::TSQL | DialectType::Fabric => {
9338 // Keep as DATETRUNC for TSQL - the target handler will uppercase the unit
9339 Ok(Expression::Function(Box::new(Function::new(
9340 "DATETRUNC".to_string(), vec![
9341 Expression::Identifier(Identifier::new(&unit_str)),
9342 arg1,
9343 ],
9344 ))))
9345 }
9346 DialectType::DuckDB => {
9347 // DuckDB: DATE_TRUNC('UNIT', expr) with CAST for string literals
9348 let date = Self::ensure_cast_timestamp(arg1);
9349 Ok(Expression::Function(Box::new(Function::new(
9350 "DATE_TRUNC".to_string(), vec![
9351 Expression::string(&unit_str),
9352 date,
9353 ],
9354 ))))
9355 }
9356 DialectType::ClickHouse => {
9357 // ClickHouse: dateTrunc('UNIT', expr)
9358 Ok(Expression::Function(Box::new(Function::new(
9359 "dateTrunc".to_string(), vec![
9360 Expression::string(&unit_str),
9361 arg1,
9362 ],
9363 ))))
9364 }
9365 _ => {
9366 // Standard: DATE_TRUNC('UNIT', expr)
9367 let unit = Expression::string(&unit_str);
9368 Ok(Expression::Function(Box::new(Function::new(
9369 "DATE_TRUNC".to_string(), vec![unit, arg1],
9370 ))))
9371 }
9372 }
9373 }
9374 // GETDATE() -> CURRENT_TIMESTAMP for non-TSQL targets
9375 "GETDATE" if f.args.is_empty() => {
9376 match target {
9377 DialectType::TSQL => Ok(Expression::Function(f)),
9378 DialectType::Redshift => Ok(Expression::Function(Box::new(Function::new("GETDATE".to_string(), vec![])))),
9379 _ => Ok(Expression::CurrentTimestamp(crate::expressions::CurrentTimestamp {
9380 precision: None,
9381 sysdate: false,
9382 })),
9383 }
9384 }
9385 // TO_HEX(x) / HEX(x) -> target-specific hex function
9386 "TO_HEX" | "HEX" if f.args.len() == 1 => {
9387 let name = match target {
9388 DialectType::Presto | DialectType::Trino => "TO_HEX",
9389 DialectType::Spark | DialectType::Databricks | DialectType::Hive => "HEX",
9390 DialectType::DuckDB | DialectType::PostgreSQL | DialectType::Redshift => "TO_HEX",
9391 _ => &f.name,
9392 };
9393 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
9394 }
9395 // FROM_HEX(x) / UNHEX(x) -> target-specific hex decode function
9396 "FROM_HEX" | "UNHEX" if f.args.len() == 1 => {
9397 match target {
9398 DialectType::BigQuery => {
9399 // BigQuery: UNHEX(x) -> FROM_HEX(x)
9400 // Special case: UNHEX(MD5(x)) -> FROM_HEX(TO_HEX(MD5(x)))
9401 // because BigQuery MD5 returns BYTES, not hex string
9402 let arg = &f.args[0];
9403 let wrapped_arg = match arg {
9404 Expression::Function(inner_f) if inner_f.name.to_uppercase() == "MD5"
9405 || inner_f.name.to_uppercase() == "SHA1"
9406 || inner_f.name.to_uppercase() == "SHA256"
9407 || inner_f.name.to_uppercase() == "SHA512" => {
9408 // Wrap hash function in TO_HEX for BigQuery
9409 Expression::Function(Box::new(Function::new(
9410 "TO_HEX".to_string(), vec![arg.clone()],
9411 )))
9412 }
9413 _ => f.args.into_iter().next().unwrap(),
9414 };
9415 Ok(Expression::Function(Box::new(Function::new("FROM_HEX".to_string(), vec![wrapped_arg]))))
9416 }
9417 _ => {
9418 let name = match target {
9419 DialectType::Presto | DialectType::Trino => "FROM_HEX",
9420 DialectType::Spark | DialectType::Databricks | DialectType::Hive => "UNHEX",
9421 _ => &f.name,
9422 };
9423 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
9424 }
9425 }
9426 }
9427 // TO_UTF8(x) -> ENCODE(x, 'utf-8') for Spark
9428 "TO_UTF8" if f.args.len() == 1 => {
9429 match target {
9430 DialectType::Spark | DialectType::Databricks => {
9431 let mut args = f.args;
9432 args.push(Expression::string("utf-8"));
9433 Ok(Expression::Function(Box::new(Function::new("ENCODE".to_string(), args))))
9434 }
9435 _ => Ok(Expression::Function(f)),
9436 }
9437 }
9438 // FROM_UTF8(x) -> DECODE(x, 'utf-8') for Spark
9439 "FROM_UTF8" if f.args.len() == 1 => {
9440 match target {
9441 DialectType::Spark | DialectType::Databricks => {
9442 let mut args = f.args;
9443 args.push(Expression::string("utf-8"));
9444 Ok(Expression::Function(Box::new(Function::new("DECODE".to_string(), args))))
9445 }
9446 _ => Ok(Expression::Function(f)),
9447 }
9448 }
9449 // STARTS_WITH(x, y) / STARTSWITH(x, y) -> target-specific
9450 "STARTS_WITH" | "STARTSWITH" if f.args.len() == 2 => {
9451 let name = match target {
9452 DialectType::Spark | DialectType::Databricks => "STARTSWITH",
9453 DialectType::Presto | DialectType::Trino => "STARTS_WITH",
9454 DialectType::PostgreSQL | DialectType::Redshift => "STARTS_WITH",
9455 _ => &f.name,
9456 };
9457 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
9458 }
9459 // APPROX_COUNT_DISTINCT(x) <-> APPROX_DISTINCT(x)
9460 "APPROX_COUNT_DISTINCT" if f.args.len() >= 1 => {
9461 let name = match target {
9462 DialectType::Presto | DialectType::Trino | DialectType::Athena => "APPROX_DISTINCT",
9463 _ => "APPROX_COUNT_DISTINCT",
9464 };
9465 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
9466 }
9467 // JSON_EXTRACT -> GET_JSON_OBJECT for Spark/Hive
9468 "JSON_EXTRACT" if f.args.len() == 2
9469 && !matches!(source, DialectType::BigQuery)
9470 && matches!(target, DialectType::Spark | DialectType::Databricks | DialectType::Hive) => {
9471 Ok(Expression::Function(Box::new(Function::new("GET_JSON_OBJECT".to_string(), f.args))))
9472 }
9473 // JSON_EXTRACT(x, path) -> x -> path for SQLite (arrow syntax)
9474 "JSON_EXTRACT" if f.args.len() == 2
9475 && matches!(target, DialectType::SQLite) => {
9476 let mut args = f.args;
9477 let path = args.remove(1);
9478 let this = args.remove(0);
9479 Ok(Expression::JsonExtract(Box::new(crate::expressions::JsonExtractFunc {
9480 this,
9481 path,
9482 returning: None,
9483 arrow_syntax: true,
9484 hash_arrow_syntax: false,
9485 wrapper_option: None,
9486 quotes_option: None,
9487 on_scalar_string: false,
9488 on_error: None,
9489 })))
9490 }
9491 // JSON_FORMAT(x) -> TO_JSON(x) for Spark, TO_JSON_STRING for BigQuery, CAST(TO_JSON(x) AS TEXT) for DuckDB
9492 "JSON_FORMAT" if f.args.len() == 1 => {
9493 match target {
9494 DialectType::Spark | DialectType::Databricks => {
9495 // Presto JSON_FORMAT(JSON '...') needs Spark's string-unquoting flow:
9496 // REGEXP_EXTRACT(TO_JSON(FROM_JSON('[...]', SCHEMA_OF_JSON('[...]'))), '^.(.*).$', 1)
9497 if matches!(source, DialectType::Presto | DialectType::Trino | DialectType::Athena) {
9498 if let Some(Expression::ParseJson(pj)) = f.args.first() {
9499 if let Expression::Literal(Literal::String(s)) = &pj.this {
9500 let wrapped = Expression::Literal(Literal::String(format!("[{}]", s)));
9501 let schema_of_json = Expression::Function(Box::new(Function::new(
9502 "SCHEMA_OF_JSON".to_string(),
9503 vec![wrapped.clone()],
9504 )));
9505 let from_json = Expression::Function(Box::new(Function::new(
9506 "FROM_JSON".to_string(),
9507 vec![wrapped, schema_of_json],
9508 )));
9509 let to_json = Expression::Function(Box::new(Function::new(
9510 "TO_JSON".to_string(),
9511 vec![from_json],
9512 )));
9513 return Ok(Expression::Function(Box::new(Function::new(
9514 "REGEXP_EXTRACT".to_string(),
9515 vec![
9516 to_json,
9517 Expression::Literal(Literal::String("^.(.*).$".to_string())),
9518 Expression::Literal(Literal::Number("1".to_string())),
9519 ],
9520 ))));
9521 }
9522 }
9523 }
9524
9525 // Strip inner CAST(... AS JSON) or TO_JSON() if present
9526 // The CastToJsonForSpark may have already converted CAST(x AS JSON) to TO_JSON(x)
9527 let mut args = f.args;
9528 if let Some(Expression::Cast(ref c)) = args.first() {
9529 if matches!(&c.to, DataType::Json | DataType::JsonB) {
9530 args = vec![c.this.clone()];
9531 }
9532 } else if let Some(Expression::Function(ref inner_f)) = args.first() {
9533 if inner_f.name.eq_ignore_ascii_case("TO_JSON") && inner_f.args.len() == 1 {
9534 // Already TO_JSON(x) from CastToJsonForSpark, just use the inner arg
9535 args = inner_f.args.clone();
9536 }
9537 }
9538 Ok(Expression::Function(Box::new(Function::new("TO_JSON".to_string(), args))))
9539 }
9540 DialectType::BigQuery => {
9541 Ok(Expression::Function(Box::new(Function::new("TO_JSON_STRING".to_string(), f.args))))
9542 }
9543 DialectType::DuckDB => {
9544 // CAST(TO_JSON(x) AS TEXT)
9545 let to_json = Expression::Function(Box::new(Function::new("TO_JSON".to_string(), f.args)));
9546 Ok(Expression::Cast(Box::new(Cast {
9547 this: to_json,
9548 to: DataType::Text,
9549 trailing_comments: Vec::new(),
9550 double_colon_syntax: false,
9551 format: None,
9552 default: None,
9553 })))
9554 }
9555 _ => Ok(Expression::Function(f)),
9556 }
9557 }
9558 // SYSDATE -> CURRENT_TIMESTAMP for non-Oracle/Redshift/Snowflake targets
9559 "SYSDATE" if f.args.is_empty() => {
9560 match target {
9561 DialectType::Oracle | DialectType::Redshift => Ok(Expression::Function(f)),
9562 DialectType::Snowflake => {
9563 // Snowflake uses SYSDATE() with parens
9564 let mut f = *f;
9565 f.no_parens = false;
9566 Ok(Expression::Function(Box::new(f)))
9567 }
9568 DialectType::DuckDB => {
9569 // DuckDB: SYSDATE() -> CURRENT_TIMESTAMP AT TIME ZONE 'UTC'
9570 Ok(Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
9571 this: Expression::CurrentTimestamp(crate::expressions::CurrentTimestamp {
9572 precision: None,
9573 sysdate: false,
9574 }),
9575 zone: Expression::Literal(Literal::String("UTC".to_string())),
9576 })))
9577 }
9578 _ => Ok(Expression::CurrentTimestamp(crate::expressions::CurrentTimestamp {
9579 precision: None,
9580 sysdate: true,
9581 })),
9582 }
9583 }
9584 // LOGICAL_OR(x) -> BOOL_OR(x)
9585 "LOGICAL_OR" if f.args.len() == 1 => {
9586 let name = match target {
9587 DialectType::Spark | DialectType::Databricks => "BOOL_OR",
9588 _ => &f.name,
9589 };
9590 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
9591 }
9592 // LOGICAL_AND(x) -> BOOL_AND(x)
9593 "LOGICAL_AND" if f.args.len() == 1 => {
9594 let name = match target {
9595 DialectType::Spark | DialectType::Databricks => "BOOL_AND",
9596 _ => &f.name,
9597 };
9598 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
9599 }
9600 // MONTHS_ADD(d, n) -> ADD_MONTHS(d, n) for Oracle
9601 "MONTHS_ADD" if f.args.len() == 2 => {
9602 match target {
9603 DialectType::Oracle => {
9604 Ok(Expression::Function(Box::new(Function::new("ADD_MONTHS".to_string(), f.args))))
9605 }
9606 _ => Ok(Expression::Function(f)),
9607 }
9608 }
9609 // ARRAY_JOIN(arr, sep[, null_replacement]) -> target-specific
9610 "ARRAY_JOIN" if f.args.len() >= 2 => {
9611 match target {
9612 DialectType::Spark | DialectType::Databricks => {
9613 // Keep as ARRAY_JOIN for Spark (it supports null_replacement)
9614 Ok(Expression::Function(f))
9615 }
9616 DialectType::Hive => {
9617 // ARRAY_JOIN(arr, sep[, null_rep]) -> CONCAT_WS(sep, arr) (drop null_replacement)
9618 let mut args = f.args;
9619 let arr = args.remove(0);
9620 let sep = args.remove(0);
9621 // Drop any remaining args (null_replacement)
9622 Ok(Expression::Function(Box::new(Function::new("CONCAT_WS".to_string(), vec![sep, arr]))))
9623 }
9624 DialectType::Presto | DialectType::Trino => {
9625 Ok(Expression::Function(f))
9626 }
9627 _ => Ok(Expression::Function(f)),
9628 }
9629 }
9630 // LOCATE(substr, str, pos) 3-arg -> target-specific
9631 // For Presto/DuckDB: STRPOS doesn't support 3-arg, need complex expansion
9632 "LOCATE" if f.args.len() == 3 && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena | DialectType::DuckDB) => {
9633 let mut args = f.args;
9634 let substr = args.remove(0);
9635 let string = args.remove(0);
9636 let pos = args.remove(0);
9637 // STRPOS(SUBSTRING(string, pos), substr)
9638 let substring_call = Expression::Function(Box::new(Function::new(
9639 "SUBSTRING".to_string(), vec![string.clone(), pos.clone()],
9640 )));
9641 let strpos_call = Expression::Function(Box::new(Function::new(
9642 "STRPOS".to_string(), vec![substring_call, substr.clone()],
9643 )));
9644 // STRPOS(...) + pos - 1
9645 let pos_adjusted = Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
9646 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
9647 strpos_call.clone(),
9648 pos.clone(),
9649 ))),
9650 Expression::number(1),
9651 )));
9652 // STRPOS(...) = 0
9653 let is_zero = Expression::Eq(Box::new(crate::expressions::BinaryOp::new(
9654 strpos_call.clone(),
9655 Expression::number(0),
9656 )));
9657
9658 match target {
9659 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
9660 // IF(STRPOS(...) = 0, 0, STRPOS(...) + pos - 1)
9661 Ok(Expression::Function(Box::new(Function::new(
9662 "IF".to_string(),
9663 vec![is_zero, Expression::number(0), pos_adjusted],
9664 ))))
9665 }
9666 DialectType::DuckDB => {
9667 // CASE WHEN STRPOS(...) = 0 THEN 0 ELSE STRPOS(...) + pos - 1 END
9668 Ok(Expression::Case(Box::new(crate::expressions::Case {
9669 operand: None,
9670 whens: vec![
9671 (is_zero, Expression::number(0)),
9672 ],
9673 else_: Some(pos_adjusted),
9674 })))
9675 }
9676 _ => Ok(Expression::Function(Box::new(Function::new(
9677 "LOCATE".to_string(), vec![substr, string, pos],
9678 )))),
9679 }
9680 }
9681 // STRPOS(haystack, needle, occurrence) 3-arg -> INSTR(haystack, needle, 1, occurrence)
9682 "STRPOS" if f.args.len() == 3
9683 && matches!(target, DialectType::BigQuery | DialectType::Oracle | DialectType::Teradata) => {
9684 let mut args = f.args;
9685 let haystack = args.remove(0);
9686 let needle = args.remove(0);
9687 let occurrence = args.remove(0);
9688 Ok(Expression::Function(Box::new(Function::new(
9689 "INSTR".to_string(),
9690 vec![haystack, needle, Expression::number(1), occurrence],
9691 ))))
9692 }
9693 // SCHEMA_NAME(id) -> target-specific
9694 "SCHEMA_NAME" if f.args.len() <= 1 => {
9695 match target {
9696 DialectType::MySQL | DialectType::SingleStore => {
9697 Ok(Expression::Function(Box::new(Function::new("SCHEMA".to_string(), vec![]))))
9698 }
9699 DialectType::PostgreSQL => {
9700 Ok(Expression::CurrentSchema(Box::new(crate::expressions::CurrentSchema { this: None })))
9701 }
9702 DialectType::SQLite => {
9703 Ok(Expression::string("main"))
9704 }
9705 _ => Ok(Expression::Function(f)),
9706 }
9707 }
9708 // STRTOL(str, base) -> FROM_BASE(str, base) for Trino/Presto
9709 "STRTOL" if f.args.len() == 2 => {
9710 match target {
9711 DialectType::Presto | DialectType::Trino => {
9712 Ok(Expression::Function(Box::new(Function::new("FROM_BASE".to_string(), f.args))))
9713 }
9714 _ => Ok(Expression::Function(f)),
9715 }
9716 }
9717 // EDITDIST3(a, b) -> LEVENSHTEIN(a, b) for Spark
9718 "EDITDIST3" if f.args.len() == 2 => {
9719 match target {
9720 DialectType::Spark | DialectType::Databricks => {
9721 Ok(Expression::Function(Box::new(Function::new("LEVENSHTEIN".to_string(), f.args))))
9722 }
9723 _ => Ok(Expression::Function(f)),
9724 }
9725 }
9726 // FORMAT(num, decimals) from MySQL -> DuckDB FORMAT('{:,.Xf}', num)
9727 "FORMAT" if f.args.len() == 2
9728 && matches!(source, DialectType::MySQL | DialectType::SingleStore)
9729 && matches!(target, DialectType::DuckDB) => {
9730 let mut args = f.args;
9731 let num_expr = args.remove(0);
9732 let decimals_expr = args.remove(0);
9733 // Extract decimal count
9734 let dec_count = match &decimals_expr {
9735 Expression::Literal(Literal::Number(n)) => n.clone(),
9736 _ => "0".to_string(),
9737 };
9738 let fmt_str = format!("{{:,.{}f}}", dec_count);
9739 Ok(Expression::Function(Box::new(Function::new(
9740 "FORMAT".to_string(),
9741 vec![Expression::string(&fmt_str), num_expr],
9742 ))))
9743 }
9744 // FORMAT(x, fmt) from TSQL -> DATE_FORMAT for Spark, or expand short codes
9745 "FORMAT" if f.args.len() == 2 && matches!(source, DialectType::TSQL | DialectType::Fabric) => {
9746 let val_expr = f.args[0].clone();
9747 let fmt_expr = f.args[1].clone();
9748 // Expand unambiguous .NET single-char date format shortcodes to full patterns.
9749 // Only expand shortcodes that are NOT also valid numeric format specifiers.
9750 // Ambiguous: d, D, f, F, g, G (used for both dates and numbers)
9751 // Unambiguous date: m/M (Month day), t/T (Time), y/Y (Year month)
9752 let (expanded_fmt, is_shortcode) = match &fmt_expr {
9753 Expression::Literal(crate::expressions::Literal::String(s)) => {
9754 match s.as_str() {
9755 "m" | "M" => (Expression::string("MMMM d"), true),
9756 "t" => (Expression::string("h:mm tt"), true),
9757 "T" => (Expression::string("h:mm:ss tt"), true),
9758 "y" | "Y" => (Expression::string("MMMM yyyy"), true),
9759 _ => (fmt_expr.clone(), false),
9760 }
9761 }
9762 _ => (fmt_expr.clone(), false),
9763 };
9764 // Check if the format looks like a date format
9765 let is_date_format = is_shortcode || match &expanded_fmt {
9766 Expression::Literal(crate::expressions::Literal::String(s)) => {
9767 // Date formats typically contain yyyy, MM, dd, MMMM, HH, etc.
9768 s.contains("yyyy") || s.contains("YYYY") || s.contains("MM")
9769 || s.contains("dd") || s.contains("MMMM") || s.contains("HH")
9770 || s.contains("hh") || s.contains("ss")
9771 }
9772 _ => false,
9773 };
9774 match target {
9775 DialectType::Spark | DialectType::Databricks => {
9776 let func_name = if is_date_format {
9777 "DATE_FORMAT"
9778 } else {
9779 "FORMAT_NUMBER"
9780 };
9781 Ok(Expression::Function(Box::new(Function::new(
9782 func_name.to_string(), vec![val_expr, expanded_fmt],
9783 ))))
9784 }
9785 _ => {
9786 // For TSQL and other targets, expand shortcodes but keep FORMAT
9787 if is_shortcode {
9788 Ok(Expression::Function(Box::new(Function::new(
9789 "FORMAT".to_string(), vec![val_expr, expanded_fmt],
9790 ))))
9791 } else {
9792 Ok(Expression::Function(f))
9793 }
9794 }
9795 }
9796 }
9797 // FORMAT('%s', x) from Trino/Presto -> target-specific
9798 "FORMAT" if f.args.len() >= 2
9799 && matches!(source, DialectType::Trino | DialectType::Presto | DialectType::Athena) => {
9800 let fmt_expr = f.args[0].clone();
9801 let value_args: Vec<Expression> = f.args[1..].to_vec();
9802 match target {
9803 // DuckDB: replace %s with {} in format string
9804 DialectType::DuckDB => {
9805 let new_fmt = match &fmt_expr {
9806 Expression::Literal(Literal::String(s)) => {
9807 Expression::Literal(Literal::String(s.replace("%s", "{}")))
9808 }
9809 _ => fmt_expr,
9810 };
9811 let mut args = vec![new_fmt];
9812 args.extend(value_args);
9813 Ok(Expression::Function(Box::new(Function::new(
9814 "FORMAT".to_string(), args,
9815 ))))
9816 }
9817 // Snowflake: FORMAT('%s', x) -> TO_CHAR(x) when just %s
9818 DialectType::Snowflake => {
9819 match &fmt_expr {
9820 Expression::Literal(Literal::String(s)) if s == "%s" && value_args.len() == 1 => {
9821 Ok(Expression::Function(Box::new(Function::new(
9822 "TO_CHAR".to_string(), value_args,
9823 ))))
9824 }
9825 _ => Ok(Expression::Function(f)),
9826 }
9827 }
9828 // Default: keep FORMAT as-is
9829 _ => Ok(Expression::Function(f)),
9830 }
9831 }
9832 // LIST_CONTAINS / LIST_HAS / ARRAY_CONTAINS -> target-specific
9833 "LIST_CONTAINS" | "LIST_HAS" | "ARRAY_CONTAINS" if f.args.len() == 2 => {
9834 match target {
9835 DialectType::PostgreSQL | DialectType::Redshift => {
9836 // CASE WHEN needle IS NULL THEN NULL ELSE COALESCE(needle = ANY(arr), FALSE) END
9837 let arr = f.args[0].clone();
9838 let needle = f.args[1].clone();
9839 // Convert [] to ARRAY[] for PostgreSQL
9840 let pg_arr = match arr {
9841 Expression::Array(a) => Expression::ArrayFunc(Box::new(crate::expressions::ArrayConstructor {
9842 expressions: a.expressions,
9843 bracket_notation: false,
9844 use_list_keyword: false,
9845 })),
9846 _ => arr,
9847 };
9848 // needle = ANY(arr) using the Any quantified expression
9849 let any_expr = Expression::Any(Box::new(crate::expressions::QuantifiedExpr {
9850 this: needle.clone(),
9851 subquery: pg_arr,
9852 op: Some(crate::expressions::QuantifiedOp::Eq),
9853 }));
9854 let coalesce = Expression::Coalesce(Box::new(crate::expressions::VarArgFunc {
9855 expressions: vec![any_expr, Expression::Boolean(crate::expressions::BooleanLiteral { value: false })],
9856 original_name: None,
9857 }));
9858 let is_null_check = Expression::IsNull(Box::new(crate::expressions::IsNull {
9859 this: needle,
9860 not: false,
9861 postfix_form: false,
9862 }));
9863 Ok(Expression::Case(Box::new(Case {
9864 operand: None,
9865 whens: vec![(is_null_check, Expression::Null(crate::expressions::Null))],
9866 else_: Some(coalesce),
9867 })))
9868 }
9869 _ => {
9870 Ok(Expression::Function(Box::new(Function::new("ARRAY_CONTAINS".to_string(), f.args))))
9871 }
9872 }
9873 }
9874 // LIST_HAS_ANY / ARRAY_HAS_ANY -> target-specific overlap operator
9875 "LIST_HAS_ANY" | "ARRAY_HAS_ANY" if f.args.len() == 2 => {
9876 match target {
9877 DialectType::PostgreSQL | DialectType::Redshift => {
9878 // arr1 && arr2 with ARRAY[] syntax
9879 let mut args = f.args;
9880 let arr1 = args.remove(0);
9881 let arr2 = args.remove(0);
9882 let pg_arr1 = match arr1 {
9883 Expression::Array(a) => Expression::ArrayFunc(Box::new(crate::expressions::ArrayConstructor {
9884 expressions: a.expressions,
9885 bracket_notation: false,
9886 use_list_keyword: false,
9887 })),
9888 _ => arr1,
9889 };
9890 let pg_arr2 = match arr2 {
9891 Expression::Array(a) => Expression::ArrayFunc(Box::new(crate::expressions::ArrayConstructor {
9892 expressions: a.expressions,
9893 bracket_notation: false,
9894 use_list_keyword: false,
9895 })),
9896 _ => arr2,
9897 };
9898 Ok(Expression::ArrayOverlaps(Box::new(BinaryOp::new(pg_arr1, pg_arr2))))
9899 }
9900 DialectType::DuckDB => {
9901 // DuckDB: arr1 && arr2 (native support)
9902 let mut args = f.args;
9903 let arr1 = args.remove(0);
9904 let arr2 = args.remove(0);
9905 Ok(Expression::ArrayOverlaps(Box::new(BinaryOp::new(arr1, arr2))))
9906 }
9907 _ => Ok(Expression::Function(Box::new(Function::new("LIST_HAS_ANY".to_string(), f.args)))),
9908 }
9909 }
9910 // APPROX_QUANTILE(x, q) -> target-specific
9911 "APPROX_QUANTILE" if f.args.len() == 2 => {
9912 match target {
9913 DialectType::Snowflake => {
9914 Ok(Expression::Function(Box::new(Function::new("APPROX_PERCENTILE".to_string(), f.args))))
9915 }
9916 DialectType::DuckDB => {
9917 Ok(Expression::Function(f))
9918 }
9919 _ => Ok(Expression::Function(f)),
9920 }
9921 }
9922 // MAKE_DATE(y, m, d) -> DATE(y, m, d) for BigQuery
9923 "MAKE_DATE" if f.args.len() == 3 => {
9924 match target {
9925 DialectType::BigQuery => {
9926 Ok(Expression::Function(Box::new(Function::new("DATE".to_string(), f.args))))
9927 }
9928 _ => Ok(Expression::Function(f)),
9929 }
9930 }
9931 // RANGE(start, end[, step]) -> target-specific
9932 "RANGE" if f.args.len() >= 2 && !matches!(target, DialectType::DuckDB) => {
9933 let start = f.args[0].clone();
9934 let end = f.args[1].clone();
9935 let step = f.args.get(2).cloned();
9936 match target {
9937 DialectType::Spark | DialectType::Databricks => {
9938 // RANGE(start, end) -> SEQUENCE(start, end-1)
9939 // RANGE(start, end, step) -> SEQUENCE(start, end-step, step) when step constant
9940 // RANGE(start, start) -> ARRAY() (empty)
9941 // RANGE(start, end, 0) -> ARRAY() (empty)
9942 // When end is variable: IF((end - 1) <= start, ARRAY(), SEQUENCE(start, (end - 1)))
9943
9944 // Check for constant args
9945 fn extract_i64(e: &Expression) -> Option<i64> {
9946 match e {
9947 Expression::Literal(Literal::Number(n)) => n.parse::<i64>().ok(),
9948 Expression::Neg(u) => {
9949 if let Expression::Literal(Literal::Number(n)) = &u.this {
9950 n.parse::<i64>().ok().map(|v| -v)
9951 } else { None }
9952 }
9953 _ => None,
9954 }
9955 }
9956 let start_val = extract_i64(&start);
9957 let end_val = extract_i64(&end);
9958 let step_val = step.as_ref().and_then(|s| extract_i64(s));
9959
9960 // Check for RANGE(x, x) or RANGE(x, y, 0) -> empty array
9961 if step_val == Some(0) {
9962 return Ok(Expression::Function(Box::new(Function::new("ARRAY".to_string(), vec![]))));
9963 }
9964 if let (Some(s), Some(e_val)) = (start_val, end_val) {
9965 if s == e_val {
9966 return Ok(Expression::Function(Box::new(Function::new("ARRAY".to_string(), vec![]))));
9967 }
9968 }
9969
9970 if let (Some(_s_val), Some(e_val)) = (start_val, end_val) {
9971 // All constants - compute new end = end - step (if step provided) or end - 1
9972 match step_val {
9973 Some(st) if st < 0 => {
9974 // Negative step: SEQUENCE(start, end - step, step)
9975 let new_end = e_val - st; // end - step (= end + |step|)
9976 let mut args = vec![start, Expression::number(new_end)];
9977 if let Some(s) = step { args.push(s); }
9978 Ok(Expression::Function(Box::new(Function::new("SEQUENCE".to_string(), args))))
9979 }
9980 Some(st) => {
9981 let new_end = e_val - st;
9982 let mut args = vec![start, Expression::number(new_end)];
9983 if let Some(s) = step { args.push(s); }
9984 Ok(Expression::Function(Box::new(Function::new("SEQUENCE".to_string(), args))))
9985 }
9986 None => {
9987 // No step: SEQUENCE(start, end - 1)
9988 let new_end = e_val - 1;
9989 Ok(Expression::Function(Box::new(Function::new("SEQUENCE".to_string(), vec![start, Expression::number(new_end)]))))
9990 }
9991 }
9992 } else {
9993 // Variable end: IF((end - 1) <= start, ARRAY(), SEQUENCE(start, (end - 1)))
9994 let end_m1 = Expression::Sub(Box::new(BinaryOp::new(end.clone(), Expression::number(1))));
9995 let cond = Expression::Lte(Box::new(BinaryOp::new(
9996 Expression::Paren(Box::new(Paren { this: end_m1.clone(), trailing_comments: Vec::new() })),
9997 start.clone(),
9998 )));
9999 let empty = Expression::Function(Box::new(Function::new("ARRAY".to_string(), vec![])));
10000 let mut seq_args = vec![start, Expression::Paren(Box::new(Paren { this: end_m1, trailing_comments: Vec::new() }))];
10001 if let Some(s) = step { seq_args.push(s); }
10002 let seq = Expression::Function(Box::new(Function::new("SEQUENCE".to_string(), seq_args)));
10003 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
10004 condition: cond,
10005 true_value: empty,
10006 false_value: Some(seq),
10007 original_name: None,
10008 })))
10009 }
10010 }
10011 DialectType::SQLite => {
10012 // RANGE(start, end) -> GENERATE_SERIES(start, end)
10013 // The subquery wrapping is handled at the Alias level
10014 let mut args = vec![start, end];
10015 if let Some(s) = step { args.push(s); }
10016 Ok(Expression::Function(Box::new(Function::new("GENERATE_SERIES".to_string(), args))))
10017 }
10018 _ => Ok(Expression::Function(f)),
10019 }
10020 }
10021 // ARRAY_REVERSE_SORT -> target-specific
10022 // (handled above as well, but also need DuckDB self-normalization)
10023 // MAP_FROM_ARRAYS(keys, values) -> target-specific map construction
10024 "MAP_FROM_ARRAYS" if f.args.len() == 2 => {
10025 match target {
10026 DialectType::Snowflake => {
10027 Ok(Expression::Function(Box::new(Function::new("OBJECT_CONSTRUCT".to_string(), f.args))))
10028 }
10029 DialectType::Spark | DialectType::Databricks => {
10030 Ok(Expression::Function(Box::new(Function::new("MAP_FROM_ARRAYS".to_string(), f.args))))
10031 }
10032 _ => {
10033 Ok(Expression::Function(Box::new(Function::new("MAP".to_string(), f.args))))
10034 }
10035 }
10036 }
10037 // VARIANCE(x) -> varSamp(x) for ClickHouse
10038 "VARIANCE" if f.args.len() == 1 => {
10039 match target {
10040 DialectType::ClickHouse => {
10041 Ok(Expression::Function(Box::new(Function::new("varSamp".to_string(), f.args))))
10042 }
10043 _ => Ok(Expression::Function(f)),
10044 }
10045 }
10046 // STDDEV(x) -> stddevSamp(x) for ClickHouse
10047 "STDDEV" if f.args.len() == 1 => {
10048 match target {
10049 DialectType::ClickHouse => {
10050 Ok(Expression::Function(Box::new(Function::new("stddevSamp".to_string(), f.args))))
10051 }
10052 _ => Ok(Expression::Function(f)),
10053 }
10054 }
10055 // ISINF(x) -> IS_INF(x) for BigQuery
10056 "ISINF" if f.args.len() == 1 => {
10057 match target {
10058 DialectType::BigQuery => {
10059 Ok(Expression::Function(Box::new(Function::new("IS_INF".to_string(), f.args))))
10060 }
10061 _ => Ok(Expression::Function(f)),
10062 }
10063 }
10064 // CONTAINS(arr, x) -> ARRAY_CONTAINS(arr, x) for Spark/Hive
10065 "CONTAINS" if f.args.len() == 2 => {
10066 match target {
10067 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
10068 Ok(Expression::Function(Box::new(Function::new("ARRAY_CONTAINS".to_string(), f.args))))
10069 }
10070 _ => Ok(Expression::Function(f)),
10071 }
10072 }
10073 // ARRAY_CONTAINS(arr, x) -> CONTAINS(arr, x) for Presto
10074 "ARRAY_CONTAINS" if f.args.len() == 2 => {
10075 match target {
10076 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
10077 Ok(Expression::Function(Box::new(Function::new("CONTAINS".to_string(), f.args))))
10078 }
10079 DialectType::DuckDB => {
10080 Ok(Expression::Function(Box::new(Function::new("ARRAY_CONTAINS".to_string(), f.args))))
10081 }
10082 _ => Ok(Expression::Function(f)),
10083 }
10084 }
10085 // TO_UNIXTIME(x) -> UNIX_TIMESTAMP(x) for Hive/Spark
10086 "TO_UNIXTIME" if f.args.len() == 1 => {
10087 match target {
10088 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
10089 Ok(Expression::Function(Box::new(Function::new("UNIX_TIMESTAMP".to_string(), f.args))))
10090 }
10091 _ => Ok(Expression::Function(f)),
10092 }
10093 }
10094 // FROM_UNIXTIME(x) -> target-specific
10095 "FROM_UNIXTIME" if f.args.len() == 1 => {
10096 match target {
10097 DialectType::Hive | DialectType::Spark | DialectType::Databricks
10098 | DialectType::Presto | DialectType::Trino => {
10099 Ok(Expression::Function(f))
10100 }
10101 DialectType::DuckDB => {
10102 // DuckDB: TO_TIMESTAMP(x)
10103 let arg = f.args.into_iter().next().unwrap();
10104 Ok(Expression::Function(Box::new(Function::new("TO_TIMESTAMP".to_string(), vec![arg]))))
10105 }
10106 DialectType::PostgreSQL => {
10107 // PG: TO_TIMESTAMP(col)
10108 let arg = f.args.into_iter().next().unwrap();
10109 Ok(Expression::Function(Box::new(Function::new("TO_TIMESTAMP".to_string(), vec![arg]))))
10110 }
10111 DialectType::Redshift => {
10112 // Redshift: (TIMESTAMP 'epoch' + col * INTERVAL '1 SECOND')
10113 let arg = f.args.into_iter().next().unwrap();
10114 let epoch_ts = Expression::Literal(Literal::Timestamp("epoch".to_string()));
10115 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
10116 this: Some(Expression::string("1 SECOND")),
10117 unit: None,
10118 }));
10119 let mul = Expression::Mul(Box::new(BinaryOp::new(arg, interval)));
10120 let add = Expression::Add(Box::new(BinaryOp::new(epoch_ts, mul)));
10121 Ok(Expression::Paren(Box::new(crate::expressions::Paren { this: add, trailing_comments: Vec::new() })))
10122 }
10123 _ => Ok(Expression::Function(f)),
10124 }
10125 }
10126 // FROM_UNIXTIME(x, fmt) with 2 args from Hive/Spark -> target-specific
10127 "FROM_UNIXTIME" if f.args.len() == 2
10128 && matches!(source, DialectType::Hive | DialectType::Spark | DialectType::Databricks) => {
10129 let mut args = f.args;
10130 let unix_ts = args.remove(0);
10131 let fmt_expr = args.remove(0);
10132 match target {
10133 DialectType::DuckDB => {
10134 // DuckDB: STRFTIME(TO_TIMESTAMP(x), c_fmt)
10135 let to_ts = Expression::Function(Box::new(Function::new("TO_TIMESTAMP".to_string(), vec![unix_ts])));
10136 if let Expression::Literal(crate::expressions::Literal::String(s)) = &fmt_expr {
10137 let c_fmt = Self::hive_format_to_c_format(s);
10138 Ok(Expression::Function(Box::new(Function::new(
10139 "STRFTIME".to_string(), vec![to_ts, Expression::string(&c_fmt)],
10140 ))))
10141 } else {
10142 Ok(Expression::Function(Box::new(Function::new(
10143 "STRFTIME".to_string(), vec![to_ts, fmt_expr],
10144 ))))
10145 }
10146 }
10147 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
10148 // Presto: DATE_FORMAT(FROM_UNIXTIME(x), presto_fmt)
10149 let from_unix = Expression::Function(Box::new(Function::new("FROM_UNIXTIME".to_string(), vec![unix_ts])));
10150 if let Expression::Literal(crate::expressions::Literal::String(s)) = &fmt_expr {
10151 let p_fmt = Self::hive_format_to_presto_format(s);
10152 Ok(Expression::Function(Box::new(Function::new(
10153 "DATE_FORMAT".to_string(), vec![from_unix, Expression::string(&p_fmt)],
10154 ))))
10155 } else {
10156 Ok(Expression::Function(Box::new(Function::new(
10157 "DATE_FORMAT".to_string(), vec![from_unix, fmt_expr],
10158 ))))
10159 }
10160 }
10161 _ => {
10162 // Keep as FROM_UNIXTIME(x, fmt) for other targets
10163 Ok(Expression::Function(Box::new(Function::new(
10164 "FROM_UNIXTIME".to_string(), vec![unix_ts, fmt_expr],
10165 ))))
10166 }
10167 }
10168 }
10169 // DATEPART(unit, expr) -> EXTRACT(unit FROM expr) for Spark
10170 "DATEPART" | "DATE_PART" if f.args.len() == 2 => {
10171 let unit_str = Self::get_unit_str_static(&f.args[0]);
10172 // Get the raw unit text preserving original case
10173 let raw_unit = match &f.args[0] {
10174 Expression::Identifier(id) => id.name.clone(),
10175 Expression::Literal(crate::expressions::Literal::String(s)) => s.clone(),
10176 Expression::Column(col) => col.name.name.clone(),
10177 _ => unit_str.clone(),
10178 };
10179 match target {
10180 DialectType::TSQL | DialectType::Fabric => {
10181 // Preserve original case of unit for TSQL
10182 let unit_name = match unit_str.as_str() {
10183 "YY" | "YYYY" => "YEAR".to_string(),
10184 "QQ" | "Q" => "QUARTER".to_string(),
10185 "MM" | "M" => "MONTH".to_string(),
10186 "WK" | "WW" => "WEEK".to_string(),
10187 "DD" | "D" | "DY" => "DAY".to_string(),
10188 "HH" => "HOUR".to_string(),
10189 "MI" | "N" => "MINUTE".to_string(),
10190 "SS" | "S" => "SECOND".to_string(),
10191 _ => raw_unit.clone(), // preserve original case
10192 };
10193 let mut args = f.args;
10194 args[0] = Expression::Identifier(Identifier::new(&unit_name));
10195 Ok(Expression::Function(Box::new(Function::new("DATEPART".to_string(), args))))
10196 }
10197 DialectType::Spark | DialectType::Databricks => {
10198 // DATEPART(unit, expr) -> EXTRACT(unit FROM expr)
10199 // Preserve original case for non-abbreviation units
10200 let unit = match unit_str.as_str() {
10201 "YY" | "YYYY" => "YEAR".to_string(),
10202 "QQ" | "Q" => "QUARTER".to_string(),
10203 "MM" | "M" => "MONTH".to_string(),
10204 "WK" | "WW" => "WEEK".to_string(),
10205 "DD" | "D" | "DY" => "DAY".to_string(),
10206 "HH" => "HOUR".to_string(),
10207 "MI" | "N" => "MINUTE".to_string(),
10208 "SS" | "S" => "SECOND".to_string(),
10209 _ => raw_unit, // preserve original case
10210 };
10211 Ok(Expression::Extract(Box::new(crate::expressions::ExtractFunc {
10212 this: f.args[1].clone(),
10213 field: crate::expressions::DateTimeField::Custom(unit),
10214 })))
10215 }
10216 _ => {
10217 Ok(Expression::Function(Box::new(Function::new("DATE_PART".to_string(), f.args))))
10218 }
10219 }
10220 }
10221 // DATENAME(mm, date) -> FORMAT(CAST(date AS DATETIME2), 'MMMM') for TSQL
10222 // DATENAME(dw, date) -> FORMAT(CAST(date AS DATETIME2), 'dddd') for TSQL
10223 // DATENAME(mm, date) -> DATE_FORMAT(CAST(date AS TIMESTAMP), 'MMMM') for Spark
10224 // DATENAME(dw, date) -> DATE_FORMAT(CAST(date AS TIMESTAMP), 'EEEE') for Spark
10225 "DATENAME" if f.args.len() == 2 => {
10226 let unit_str = Self::get_unit_str_static(&f.args[0]);
10227 let date_expr = f.args[1].clone();
10228 match unit_str.as_str() {
10229 "MM" | "M" | "MONTH" => {
10230 match target {
10231 DialectType::TSQL => {
10232 let cast_date = Expression::Cast(Box::new(crate::expressions::Cast {
10233 this: date_expr,
10234 to: DataType::Custom { name: "DATETIME2".to_string() },
10235 trailing_comments: Vec::new(),
10236 double_colon_syntax: false,
10237 format: None,
10238 default: None,
10239 }));
10240 Ok(Expression::Function(Box::new(Function::new(
10241 "FORMAT".to_string(), vec![cast_date, Expression::string("MMMM")],
10242 ))))
10243 }
10244 DialectType::Spark | DialectType::Databricks => {
10245 let cast_date = Expression::Cast(Box::new(crate::expressions::Cast {
10246 this: date_expr,
10247 to: DataType::Timestamp { timezone: false, precision: None },
10248 trailing_comments: Vec::new(),
10249 double_colon_syntax: false,
10250 format: None,
10251 default: None,
10252 }));
10253 Ok(Expression::Function(Box::new(Function::new(
10254 "DATE_FORMAT".to_string(), vec![cast_date, Expression::string("MMMM")],
10255 ))))
10256 }
10257 _ => Ok(Expression::Function(f)),
10258 }
10259 }
10260 "DW" | "WEEKDAY" => {
10261 match target {
10262 DialectType::TSQL => {
10263 let cast_date = Expression::Cast(Box::new(crate::expressions::Cast {
10264 this: date_expr,
10265 to: DataType::Custom { name: "DATETIME2".to_string() },
10266 trailing_comments: Vec::new(),
10267 double_colon_syntax: false,
10268 format: None,
10269 default: None,
10270 }));
10271 Ok(Expression::Function(Box::new(Function::new(
10272 "FORMAT".to_string(), vec![cast_date, Expression::string("dddd")],
10273 ))))
10274 }
10275 DialectType::Spark | DialectType::Databricks => {
10276 let cast_date = Expression::Cast(Box::new(crate::expressions::Cast {
10277 this: date_expr,
10278 to: DataType::Timestamp { timezone: false, precision: None },
10279 trailing_comments: Vec::new(),
10280 double_colon_syntax: false,
10281 format: None,
10282 default: None,
10283 }));
10284 Ok(Expression::Function(Box::new(Function::new(
10285 "DATE_FORMAT".to_string(), vec![cast_date, Expression::string("EEEE")],
10286 ))))
10287 }
10288 _ => Ok(Expression::Function(f)),
10289 }
10290 }
10291 _ => Ok(Expression::Function(f)),
10292 }
10293 }
10294 // STRING_AGG(x, sep) without WITHIN GROUP -> target-specific
10295 "STRING_AGG" if f.args.len() >= 2 => {
10296 let x = f.args[0].clone();
10297 let sep = f.args[1].clone();
10298 match target {
10299 DialectType::MySQL | DialectType::SingleStore | DialectType::Doris | DialectType::StarRocks => {
10300 Ok(Expression::GroupConcat(Box::new(crate::expressions::GroupConcatFunc {
10301 this: x, separator: Some(sep), order_by: None, distinct: false, filter: None,
10302 })))
10303 }
10304 DialectType::SQLite => {
10305 Ok(Expression::GroupConcat(Box::new(crate::expressions::GroupConcatFunc {
10306 this: x, separator: Some(sep), order_by: None, distinct: false, filter: None,
10307 })))
10308 }
10309 DialectType::PostgreSQL | DialectType::Redshift => {
10310 Ok(Expression::StringAgg(Box::new(crate::expressions::StringAggFunc {
10311 this: x, separator: Some(sep), order_by: None, distinct: false, filter: None, limit: None,
10312 })))
10313 }
10314 _ => Ok(Expression::Function(f)),
10315 }
10316 }
10317 // JSON_ARRAYAGG -> JSON_AGG for PostgreSQL
10318 "JSON_ARRAYAGG" => {
10319 match target {
10320 DialectType::PostgreSQL => {
10321 Ok(Expression::Function(Box::new(Function { name: "JSON_AGG".to_string(), ..(*f) })))
10322 }
10323 _ => Ok(Expression::Function(f)),
10324 }
10325 }
10326 // SCHEMA_NAME(id) -> CURRENT_SCHEMA for PostgreSQL, 'main' for SQLite
10327 "SCHEMA_NAME" => {
10328 match target {
10329 DialectType::PostgreSQL => {
10330 Ok(Expression::CurrentSchema(Box::new(crate::expressions::CurrentSchema { this: None })))
10331 }
10332 DialectType::SQLite => {
10333 Ok(Expression::string("main"))
10334 }
10335 _ => Ok(Expression::Function(f)),
10336 }
10337 }
10338 // TO_TIMESTAMP(x, fmt) 2-arg from Spark/Hive: convert Java format to target format
10339 "TO_TIMESTAMP" if f.args.len() == 2
10340 && matches!(source, DialectType::Spark | DialectType::Databricks | DialectType::Hive)
10341 && matches!(target, DialectType::DuckDB) => {
10342 let mut args = f.args;
10343 let val = args.remove(0);
10344 let fmt_expr = args.remove(0);
10345 if let Expression::Literal(Literal::String(ref s)) = fmt_expr {
10346 // Convert Java/Spark format to C strptime format
10347 fn java_to_c_fmt(fmt: &str) -> String {
10348 let result = fmt
10349 .replace("yyyy", "%Y")
10350 .replace("SSSSSS", "%f")
10351 .replace("EEEE", "%W")
10352 .replace("MM", "%m")
10353 .replace("dd", "%d")
10354 .replace("HH", "%H")
10355 .replace("mm", "%M")
10356 .replace("ss", "%S")
10357 .replace("yy", "%y");
10358 let mut out = String::new();
10359 let chars: Vec<char> = result.chars().collect();
10360 let mut i = 0;
10361 while i < chars.len() {
10362 if chars[i] == '%' && i + 1 < chars.len() {
10363 out.push(chars[i]);
10364 out.push(chars[i + 1]);
10365 i += 2;
10366 } else if chars[i] == 'z' {
10367 out.push_str("%Z");
10368 i += 1;
10369 } else if chars[i] == 'Z' {
10370 out.push_str("%z");
10371 i += 1;
10372 } else {
10373 out.push(chars[i]);
10374 i += 1;
10375 }
10376 }
10377 out
10378 }
10379 let c_fmt = java_to_c_fmt(s);
10380 Ok(Expression::Function(Box::new(Function::new(
10381 "STRPTIME".to_string(),
10382 vec![val, Expression::string(&c_fmt)],
10383 ))))
10384 } else {
10385 Ok(Expression::Function(Box::new(Function::new(
10386 "STRPTIME".to_string(),
10387 vec![val, fmt_expr],
10388 ))))
10389 }
10390 }
10391 // TO_DATE(x) 1-arg from Doris: date conversion
10392 "TO_DATE" if f.args.len() == 1
10393 && matches!(source, DialectType::Doris | DialectType::StarRocks) => {
10394 let arg = f.args.into_iter().next().unwrap();
10395 match target {
10396 DialectType::Oracle | DialectType::DuckDB | DialectType::TSQL => {
10397 // CAST(x AS DATE)
10398 Ok(Expression::Cast(Box::new(Cast {
10399 this: arg,
10400 to: DataType::Date,
10401 double_colon_syntax: false,
10402 trailing_comments: vec![],
10403 format: None,
10404 default: None,
10405 })))
10406 }
10407 DialectType::MySQL | DialectType::SingleStore => {
10408 // DATE(x)
10409 Ok(Expression::Function(Box::new(Function::new("DATE".to_string(), vec![arg]))))
10410 }
10411 _ => {
10412 // Default: keep as TO_DATE(x) (Spark, PostgreSQL, etc.)
10413 Ok(Expression::Function(Box::new(Function::new("TO_DATE".to_string(), vec![arg]))))
10414 }
10415 }
10416 }
10417 // TO_DATE(x) 1-arg from Spark/Hive: safe date conversion
10418 "TO_DATE" if f.args.len() == 1
10419 && matches!(source, DialectType::Spark | DialectType::Databricks | DialectType::Hive) => {
10420 let arg = f.args.into_iter().next().unwrap();
10421 match target {
10422 DialectType::DuckDB => {
10423 // Spark TO_DATE is safe -> TRY_CAST(x AS DATE)
10424 Ok(Expression::TryCast(Box::new(Cast {
10425 this: arg,
10426 to: DataType::Date,
10427 double_colon_syntax: false,
10428 trailing_comments: vec![],
10429 format: None,
10430 default: None,
10431 })))
10432 }
10433 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
10434 // CAST(CAST(x AS TIMESTAMP) AS DATE)
10435 Ok(Self::double_cast_timestamp_date(arg))
10436 }
10437 DialectType::Snowflake => {
10438 // Spark's TO_DATE is safe -> TRY_TO_DATE(x, 'yyyy-mm-DD')
10439 // The default Spark format 'yyyy-MM-dd' maps to Snowflake 'yyyy-mm-DD'
10440 Ok(Expression::Function(Box::new(Function::new(
10441 "TRY_TO_DATE".to_string(),
10442 vec![arg, Expression::string("yyyy-mm-DD")],
10443 ))))
10444 }
10445 _ => {
10446 // Default: keep as TO_DATE(x)
10447 Ok(Expression::Function(Box::new(Function::new("TO_DATE".to_string(), vec![arg]))))
10448 }
10449 }
10450 }
10451 // TO_DATE(x, fmt) 2-arg from Spark/Hive: format-based date conversion
10452 "TO_DATE" if f.args.len() == 2
10453 && matches!(source, DialectType::Spark | DialectType::Databricks | DialectType::Hive) => {
10454 let mut args = f.args;
10455 let val = args.remove(0);
10456 let fmt_expr = args.remove(0);
10457 let is_default_format = matches!(&fmt_expr, Expression::Literal(Literal::String(s)) if s == "yyyy-MM-dd");
10458
10459 if is_default_format {
10460 // Default format: same as 1-arg form
10461 match target {
10462 DialectType::DuckDB => {
10463 Ok(Expression::TryCast(Box::new(Cast {
10464 this: val,
10465 to: DataType::Date,
10466 double_colon_syntax: false,
10467 trailing_comments: vec![],
10468 format: None,
10469 default: None,
10470 })))
10471 }
10472 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
10473 Ok(Self::double_cast_timestamp_date(val))
10474 }
10475 DialectType::Snowflake => {
10476 // TRY_TO_DATE(x, format) with Snowflake format mapping
10477 let sf_fmt = "yyyy-MM-dd".replace("yyyy", "yyyy").replace("MM", "mm").replace("dd", "DD");
10478 Ok(Expression::Function(Box::new(Function::new(
10479 "TRY_TO_DATE".to_string(),
10480 vec![val, Expression::string(&sf_fmt)],
10481 ))))
10482 }
10483 _ => {
10484 Ok(Expression::Function(Box::new(Function::new("TO_DATE".to_string(), vec![val]))))
10485 }
10486 }
10487 } else {
10488 // Non-default format: use format-based parsing
10489 if let Expression::Literal(Literal::String(ref s)) = fmt_expr {
10490 match target {
10491 DialectType::DuckDB => {
10492 // CAST(CAST(TRY_STRPTIME(x, c_fmt) AS TIMESTAMP) AS DATE)
10493 fn java_to_c_fmt_todate(fmt: &str) -> String {
10494 let result = fmt
10495 .replace("yyyy", "%Y")
10496 .replace("SSSSSS", "%f")
10497 .replace("EEEE", "%W")
10498 .replace("MM", "%m")
10499 .replace("dd", "%d")
10500 .replace("HH", "%H")
10501 .replace("mm", "%M")
10502 .replace("ss", "%S")
10503 .replace("yy", "%y");
10504 let mut out = String::new();
10505 let chars: Vec<char> = result.chars().collect();
10506 let mut i = 0;
10507 while i < chars.len() {
10508 if chars[i] == '%' && i + 1 < chars.len() {
10509 out.push(chars[i]);
10510 out.push(chars[i + 1]);
10511 i += 2;
10512 } else if chars[i] == 'z' {
10513 out.push_str("%Z");
10514 i += 1;
10515 } else if chars[i] == 'Z' {
10516 out.push_str("%z");
10517 i += 1;
10518 } else {
10519 out.push(chars[i]);
10520 i += 1;
10521 }
10522 }
10523 out
10524 }
10525 let c_fmt = java_to_c_fmt_todate(s);
10526 // CAST(CAST(TRY_STRPTIME(x, fmt) AS TIMESTAMP) AS DATE)
10527 let try_strptime = Expression::Function(Box::new(Function::new(
10528 "TRY_STRPTIME".to_string(),
10529 vec![val, Expression::string(&c_fmt)],
10530 )));
10531 let cast_ts = Expression::Cast(Box::new(Cast {
10532 this: try_strptime,
10533 to: DataType::Timestamp { precision: None, timezone: false },
10534 double_colon_syntax: false,
10535 trailing_comments: vec![],
10536 format: None,
10537 default: None,
10538 }));
10539 Ok(Expression::Cast(Box::new(Cast {
10540 this: cast_ts,
10541 to: DataType::Date,
10542 double_colon_syntax: false,
10543 trailing_comments: vec![],
10544 format: None,
10545 default: None,
10546 })))
10547 }
10548 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
10549 // CAST(DATE_PARSE(x, presto_fmt) AS DATE)
10550 let p_fmt = s
10551 .replace("yyyy", "%Y")
10552 .replace("SSSSSS", "%f")
10553 .replace("MM", "%m")
10554 .replace("dd", "%d")
10555 .replace("HH", "%H")
10556 .replace("mm", "%M")
10557 .replace("ss", "%S")
10558 .replace("yy", "%y");
10559 let date_parse = Expression::Function(Box::new(Function::new(
10560 "DATE_PARSE".to_string(),
10561 vec![val, Expression::string(&p_fmt)],
10562 )));
10563 Ok(Expression::Cast(Box::new(Cast {
10564 this: date_parse,
10565 to: DataType::Date,
10566 double_colon_syntax: false,
10567 trailing_comments: vec![],
10568 format: None,
10569 default: None,
10570 })))
10571 }
10572 DialectType::Snowflake => {
10573 // TRY_TO_DATE(x, snowflake_fmt)
10574 Ok(Expression::Function(Box::new(Function::new(
10575 "TRY_TO_DATE".to_string(),
10576 vec![val, Expression::string(s)],
10577 ))))
10578 }
10579 _ => {
10580 Ok(Expression::Function(Box::new(Function::new("TO_DATE".to_string(), vec![val, fmt_expr]))))
10581 }
10582 }
10583 } else {
10584 Ok(Expression::Function(Box::new(Function::new("TO_DATE".to_string(), vec![val, fmt_expr]))))
10585 }
10586 }
10587 }
10588 // TO_TIMESTAMP(x) 1-arg: epoch conversion
10589 "TO_TIMESTAMP" if f.args.len() == 1
10590 && matches!(source, DialectType::DuckDB)
10591 && matches!(target, DialectType::BigQuery | DialectType::Presto | DialectType::Trino
10592 | DialectType::Hive | DialectType::Spark | DialectType::Databricks | DialectType::Athena) => {
10593 let arg = f.args.into_iter().next().unwrap();
10594 let func_name = match target {
10595 DialectType::BigQuery => "TIMESTAMP_SECONDS",
10596 DialectType::Presto | DialectType::Trino | DialectType::Athena
10597 | DialectType::Hive | DialectType::Spark | DialectType::Databricks => "FROM_UNIXTIME",
10598 _ => "TO_TIMESTAMP",
10599 };
10600 Ok(Expression::Function(Box::new(Function::new(func_name.to_string(), vec![arg]))))
10601 }
10602 // CONCAT(x) single-arg: -> CONCAT(COALESCE(x, '')) for Spark
10603 "CONCAT" if f.args.len() == 1
10604 && matches!(target, DialectType::Spark | DialectType::Databricks) => {
10605 let arg = f.args.into_iter().next().unwrap();
10606 let coalesced = Expression::Coalesce(Box::new(crate::expressions::VarArgFunc {
10607 expressions: vec![arg, Expression::string("")],
10608 original_name: None,
10609 }));
10610 Ok(Expression::Function(Box::new(Function::new("CONCAT".to_string(), vec![coalesced]))))
10611 }
10612 // REGEXP_EXTRACT(a, p) 2-arg: BigQuery default group is 0 (no 3rd arg needed)
10613 "REGEXP_EXTRACT" if f.args.len() == 3
10614 && matches!(target, DialectType::BigQuery) => {
10615 // If group_index is 0, drop it
10616 let drop_group = match &f.args[2] {
10617 Expression::Literal(Literal::Number(n)) => n == "0",
10618 _ => false,
10619 };
10620 if drop_group {
10621 let mut args = f.args;
10622 args.truncate(2);
10623 Ok(Expression::Function(Box::new(Function::new("REGEXP_EXTRACT".to_string(), args))))
10624 } else {
10625 Ok(Expression::Function(f))
10626 }
10627 }
10628 // REGEXP_EXTRACT(a, pattern, group, flags) 4-arg -> REGEXP_SUBSTR for Snowflake
10629 "REGEXP_EXTRACT" if f.args.len() == 4
10630 && matches!(target, DialectType::Snowflake) => {
10631 // REGEXP_EXTRACT(a, 'pattern', 2, 'i') -> REGEXP_SUBSTR(a, 'pattern', 1, 1, 'i', 2)
10632 let mut args = f.args;
10633 let this = args.remove(0);
10634 let pattern = args.remove(0);
10635 let group = args.remove(0);
10636 let flags = args.remove(0);
10637 Ok(Expression::Function(Box::new(Function::new(
10638 "REGEXP_SUBSTR".to_string(),
10639 vec![this, pattern, Expression::number(1), Expression::number(1), flags, group],
10640 ))))
10641 }
10642 // REGEXP_SUBSTR(a, pattern, position) 3-arg -> REGEXP_EXTRACT(SUBSTRING(a, pos), pattern)
10643 "REGEXP_SUBSTR" if f.args.len() == 3
10644 && matches!(target, DialectType::DuckDB | DialectType::Presto | DialectType::Trino | DialectType::Spark | DialectType::Databricks) => {
10645 let mut args = f.args;
10646 let this = args.remove(0);
10647 let pattern = args.remove(0);
10648 let position = args.remove(0);
10649 // Wrap subject in SUBSTRING(this, position) to apply the offset
10650 let substring_expr = Expression::Function(Box::new(Function::new(
10651 "SUBSTRING".to_string(),
10652 vec![this, position],
10653 )));
10654 let target_name = match target {
10655 DialectType::DuckDB => "REGEXP_EXTRACT",
10656 _ => "REGEXP_EXTRACT",
10657 };
10658 Ok(Expression::Function(Box::new(Function::new(
10659 target_name.to_string(),
10660 vec![substring_expr, pattern],
10661 ))))
10662 }
10663 // TO_DAYS(x) -> (DATEDIFF(x, '0000-01-01') + 1) or target-specific
10664 "TO_DAYS" if f.args.len() == 1 => {
10665 let x = f.args.into_iter().next().unwrap();
10666 let epoch = Expression::string("0000-01-01");
10667 // Build the final target-specific expression directly
10668 let datediff_expr = match target {
10669 DialectType::MySQL | DialectType::SingleStore => {
10670 // MySQL: (DATEDIFF(x, '0000-01-01') + 1)
10671 Expression::Function(Box::new(Function::new("DATEDIFF".to_string(), vec![x, epoch])))
10672 }
10673 DialectType::DuckDB => {
10674 // DuckDB: (DATE_DIFF('DAY', CAST('0000-01-01' AS DATE), CAST(x AS DATE)) + 1)
10675 let cast_epoch = Expression::Cast(Box::new(Cast {
10676 this: epoch, to: DataType::Date,
10677 trailing_comments: Vec::new(), double_colon_syntax: false,
10678 format: None, default: None,
10679 }));
10680 let cast_x = Expression::Cast(Box::new(Cast {
10681 this: x, to: DataType::Date,
10682 trailing_comments: Vec::new(), double_colon_syntax: false,
10683 format: None, default: None,
10684 }));
10685 Expression::Function(Box::new(Function::new("DATE_DIFF".to_string(), vec![
10686 Expression::string("DAY"), cast_epoch, cast_x,
10687 ])))
10688 }
10689 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
10690 // Presto: (DATE_DIFF('DAY', CAST(CAST('0000-01-01' AS TIMESTAMP) AS DATE), CAST(CAST(x AS TIMESTAMP) AS DATE)) + 1)
10691 let cast_epoch = Self::double_cast_timestamp_date(epoch);
10692 let cast_x = Self::double_cast_timestamp_date(x);
10693 Expression::Function(Box::new(Function::new("DATE_DIFF".to_string(), vec![
10694 Expression::string("DAY"), cast_epoch, cast_x,
10695 ])))
10696 }
10697 _ => {
10698 // Default: (DATEDIFF(x, '0000-01-01') + 1)
10699 Expression::Function(Box::new(Function::new("DATEDIFF".to_string(), vec![x, epoch])))
10700 }
10701 };
10702 let add_one = Expression::Add(Box::new(BinaryOp::new(datediff_expr, Expression::number(1))));
10703 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
10704 this: add_one,
10705 trailing_comments: Vec::new(),
10706 })))
10707 }
10708 // STR_TO_DATE(x, format) -> DATE_PARSE / STRPTIME / TO_DATE etc.
10709 "STR_TO_DATE" if f.args.len() == 2
10710 && matches!(target, DialectType::Presto | DialectType::Trino) => {
10711 let mut args = f.args;
10712 let x = args.remove(0);
10713 let format_expr = args.remove(0);
10714 // Check if the format contains time components
10715 let has_time = if let Expression::Literal(Literal::String(ref fmt)) = format_expr {
10716 fmt.contains("%H") || fmt.contains("%T") || fmt.contains("%M") || fmt.contains("%S")
10717 || fmt.contains("%I") || fmt.contains("%p")
10718 } else {
10719 false
10720 };
10721 let date_parse = Expression::Function(Box::new(Function::new(
10722 "DATE_PARSE".to_string(),
10723 vec![x, format_expr],
10724 )));
10725 if has_time {
10726 // Has time components: just DATE_PARSE
10727 Ok(date_parse)
10728 } else {
10729 // Date-only: CAST(DATE_PARSE(...) AS DATE)
10730 Ok(Expression::Cast(Box::new(Cast {
10731 this: date_parse,
10732 to: DataType::Date,
10733 trailing_comments: Vec::new(),
10734 double_colon_syntax: false,
10735 format: None,
10736 default: None,
10737 })))
10738 }
10739 }
10740 "STR_TO_DATE" if f.args.len() == 2
10741 && matches!(target, DialectType::PostgreSQL | DialectType::Redshift) => {
10742 let mut args = f.args;
10743 let x = args.remove(0);
10744 let fmt = args.remove(0);
10745 let pg_fmt = match fmt {
10746 Expression::Literal(Literal::String(s)) => {
10747 Expression::string(
10748 &s.replace("%Y", "YYYY")
10749 .replace("%m", "MM")
10750 .replace("%d", "DD")
10751 .replace("%H", "HH24")
10752 .replace("%M", "MI")
10753 .replace("%S", "SS")
10754 )
10755 }
10756 other => other,
10757 };
10758 let to_date = Expression::Function(Box::new(Function::new(
10759 "TO_DATE".to_string(),
10760 vec![x, pg_fmt],
10761 )));
10762 Ok(Expression::Cast(Box::new(Cast {
10763 this: to_date,
10764 to: DataType::Timestamp { timezone: false, precision: None },
10765 trailing_comments: Vec::new(),
10766 double_colon_syntax: false,
10767 format: None,
10768 default: None,
10769 })))
10770 }
10771 // RANGE(start, end) -> GENERATE_SERIES for SQLite
10772 "RANGE" if (f.args.len() == 1 || f.args.len() == 2)
10773 && matches!(target, DialectType::SQLite) => {
10774 if f.args.len() == 2 {
10775 // RANGE(start, end) -> (SELECT value AS col_alias FROM GENERATE_SERIES(start, end))
10776 // For SQLite, RANGE is exclusive on end, GENERATE_SERIES is inclusive
10777 let mut args = f.args;
10778 let start = args.remove(0);
10779 let end = args.remove(0);
10780 Ok(Expression::Function(Box::new(Function::new("GENERATE_SERIES".to_string(), vec![start, end]))))
10781 } else {
10782 Ok(Expression::Function(f))
10783 }
10784 }
10785 // UNIFORM(low, high[, seed]) -> UNIFORM(low, high, RANDOM([seed])) for Snowflake
10786 // When source is Snowflake, keep as-is (args already in correct form)
10787 "UNIFORM" if matches!(target, DialectType::Snowflake) && (f.args.len() == 2 || f.args.len() == 3) => {
10788 if matches!(source, DialectType::Snowflake) {
10789 // Snowflake -> Snowflake: keep as-is
10790 Ok(Expression::Function(f))
10791 } else {
10792 let mut args = f.args;
10793 let low = args.remove(0);
10794 let high = args.remove(0);
10795 let random = if !args.is_empty() {
10796 let seed = args.remove(0);
10797 Expression::Function(Box::new(Function::new("RANDOM".to_string(), vec![seed])))
10798 } else {
10799 Expression::Function(Box::new(Function::new("RANDOM".to_string(), vec![])))
10800 };
10801 Ok(Expression::Function(Box::new(Function::new("UNIFORM".to_string(), vec![low, high, random]))))
10802 }
10803 }
10804 // TO_UTC_TIMESTAMP(ts, tz) -> target-specific UTC conversion
10805 "TO_UTC_TIMESTAMP" if f.args.len() == 2 => {
10806 let mut args = f.args;
10807 let ts_arg = args.remove(0);
10808 let tz_arg = args.remove(0);
10809 // Cast string literal to TIMESTAMP for all targets
10810 let ts_cast = if matches!(&ts_arg, Expression::Literal(Literal::String(_))) {
10811 Expression::Cast(Box::new(Cast {
10812 this: ts_arg, to: DataType::Timestamp { timezone: false, precision: None },
10813 trailing_comments: vec![], double_colon_syntax: false, format: None, default: None,
10814 }))
10815 } else { ts_arg };
10816 match target {
10817 DialectType::Spark | DialectType::Databricks => {
10818 Ok(Expression::Function(Box::new(Function::new(
10819 "TO_UTC_TIMESTAMP".to_string(), vec![ts_cast, tz_arg],
10820 ))))
10821 }
10822 DialectType::Snowflake => {
10823 // CONVERT_TIMEZONE(tz, 'UTC', CAST(ts AS TIMESTAMP))
10824 Ok(Expression::Function(Box::new(Function::new(
10825 "CONVERT_TIMEZONE".to_string(), vec![tz_arg, Expression::string("UTC"), ts_cast],
10826 ))))
10827 }
10828 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
10829 // WITH_TIMEZONE(CAST(ts AS TIMESTAMP), tz) AT TIME ZONE 'UTC'
10830 let wtz = Expression::Function(Box::new(Function::new(
10831 "WITH_TIMEZONE".to_string(), vec![ts_cast, tz_arg],
10832 )));
10833 Ok(Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
10834 this: wtz, zone: Expression::string("UTC"),
10835 })))
10836 }
10837 DialectType::BigQuery => {
10838 // DATETIME(TIMESTAMP(CAST(ts AS DATETIME), tz), 'UTC')
10839 let cast_dt = Expression::Cast(Box::new(Cast {
10840 this: if let Expression::Cast(c) = ts_cast { c.this } else { ts_cast.clone() },
10841 to: DataType::Custom { name: "DATETIME".to_string() },
10842 trailing_comments: vec![], double_colon_syntax: false, format: None, default: None,
10843 }));
10844 let ts_func = Expression::Function(Box::new(Function::new(
10845 "TIMESTAMP".to_string(), vec![cast_dt, tz_arg],
10846 )));
10847 Ok(Expression::Function(Box::new(Function::new(
10848 "DATETIME".to_string(), vec![ts_func, Expression::string("UTC")],
10849 ))))
10850 }
10851 _ => {
10852 // DuckDB, PostgreSQL, Redshift: CAST(ts AS TIMESTAMP) AT TIME ZONE tz AT TIME ZONE 'UTC'
10853 let atz1 = Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
10854 this: ts_cast, zone: tz_arg,
10855 }));
10856 Ok(Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
10857 this: atz1, zone: Expression::string("UTC"),
10858 })))
10859 }
10860 }
10861 }
10862 // FROM_UTC_TIMESTAMP(ts, tz) -> target-specific UTC conversion
10863 "FROM_UTC_TIMESTAMP" if f.args.len() == 2 => {
10864 let mut args = f.args;
10865 let ts_arg = args.remove(0);
10866 let tz_arg = args.remove(0);
10867 // Cast string literal to TIMESTAMP
10868 let ts_cast = if matches!(&ts_arg, Expression::Literal(Literal::String(_))) {
10869 Expression::Cast(Box::new(Cast {
10870 this: ts_arg, to: DataType::Timestamp { timezone: false, precision: None },
10871 trailing_comments: vec![], double_colon_syntax: false, format: None, default: None,
10872 }))
10873 } else { ts_arg };
10874 match target {
10875 DialectType::Spark | DialectType::Databricks => {
10876 Ok(Expression::Function(Box::new(Function::new(
10877 "FROM_UTC_TIMESTAMP".to_string(), vec![ts_cast, tz_arg],
10878 ))))
10879 }
10880 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
10881 // AT_TIMEZONE(CAST(ts AS TIMESTAMP), tz)
10882 Ok(Expression::Function(Box::new(Function::new(
10883 "AT_TIMEZONE".to_string(), vec![ts_cast, tz_arg],
10884 ))))
10885 }
10886 DialectType::Snowflake => {
10887 // CONVERT_TIMEZONE('UTC', tz, CAST(ts AS TIMESTAMP))
10888 Ok(Expression::Function(Box::new(Function::new(
10889 "CONVERT_TIMEZONE".to_string(), vec![Expression::string("UTC"), tz_arg, ts_cast],
10890 ))))
10891 }
10892 _ => {
10893 // DuckDB, PostgreSQL, Redshift: CAST(ts AS TIMESTAMP) AT TIME ZONE tz
10894 Ok(Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
10895 this: ts_cast, zone: tz_arg,
10896 })))
10897 }
10898 }
10899 }
10900 // MAP_FROM_ARRAYS(keys, values) -> target-specific map construction
10901 "MAP_FROM_ARRAYS" if f.args.len() == 2 => {
10902 let name = match target {
10903 DialectType::Snowflake => "OBJECT_CONSTRUCT",
10904 _ => "MAP",
10905 };
10906 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
10907 }
10908 // STR_TO_MAP(s, pair_delim, kv_delim) -> SPLIT_TO_MAP for Presto
10909 "STR_TO_MAP" if f.args.len() >= 1 => {
10910 match target {
10911 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
10912 Ok(Expression::Function(Box::new(Function::new("SPLIT_TO_MAP".to_string(), f.args))))
10913 }
10914 _ => Ok(Expression::Function(f)),
10915 }
10916 }
10917 // TIME_TO_STR(x, fmt) -> Expression::TimeToStr for proper generation
10918 "TIME_TO_STR" if f.args.len() == 2 => {
10919 let mut args = f.args;
10920 let this = args.remove(0);
10921 let fmt_expr = args.remove(0);
10922 let format = if let Expression::Literal(Literal::String(s)) = fmt_expr {
10923 s
10924 } else {
10925 "%Y-%m-%d %H:%M:%S".to_string()
10926 };
10927 Ok(Expression::TimeToStr(Box::new(crate::expressions::TimeToStr {
10928 this: Box::new(this),
10929 format,
10930 culture: None,
10931 zone: None,
10932 })))
10933 }
10934 // MONTHS_BETWEEN(end, start) -> DuckDB complex expansion
10935 "MONTHS_BETWEEN" if f.args.len() == 2 => {
10936 match target {
10937 DialectType::DuckDB => {
10938 let mut args = f.args;
10939 let end_date = args.remove(0);
10940 let start_date = args.remove(0);
10941 let cast_end = Self::ensure_cast_date(end_date);
10942 let cast_start = Self::ensure_cast_date(start_date);
10943 // DATE_DIFF('MONTH', start, end) + CASE WHEN DAY(end) = DAY(LAST_DAY(end)) AND DAY(start) = DAY(LAST_DAY(start)) THEN 0 ELSE (DAY(end) - DAY(start)) / 31.0 END
10944 let dd = Expression::Function(Box::new(Function::new(
10945 "DATE_DIFF".to_string(), vec![Expression::string("MONTH"), cast_start.clone(), cast_end.clone()],
10946 )));
10947 let day_end = Expression::Function(Box::new(Function::new("DAY".to_string(), vec![cast_end.clone()])));
10948 let day_start = Expression::Function(Box::new(Function::new("DAY".to_string(), vec![cast_start.clone()])));
10949 let last_day_end = Expression::Function(Box::new(Function::new("LAST_DAY".to_string(), vec![cast_end.clone()])));
10950 let last_day_start = Expression::Function(Box::new(Function::new("LAST_DAY".to_string(), vec![cast_start.clone()])));
10951 let day_last_end = Expression::Function(Box::new(Function::new("DAY".to_string(), vec![last_day_end])));
10952 let day_last_start = Expression::Function(Box::new(Function::new("DAY".to_string(), vec![last_day_start])));
10953 let cond1 = Expression::Eq(Box::new(BinaryOp::new(day_end.clone(), day_last_end)));
10954 let cond2 = Expression::Eq(Box::new(BinaryOp::new(day_start.clone(), day_last_start)));
10955 let both_cond = Expression::And(Box::new(BinaryOp::new(cond1, cond2)));
10956 let day_diff = Expression::Sub(Box::new(BinaryOp::new(day_end, day_start)));
10957 let day_diff_paren = Expression::Paren(Box::new(crate::expressions::Paren {
10958 this: day_diff, trailing_comments: Vec::new(),
10959 }));
10960 let frac = Expression::Div(Box::new(BinaryOp::new(
10961 day_diff_paren,
10962 Expression::Literal(Literal::Number("31.0".to_string())),
10963 )));
10964 let case_expr = Expression::Case(Box::new(Case {
10965 operand: None,
10966 whens: vec![(both_cond, Expression::number(0))],
10967 else_: Some(frac),
10968 }));
10969 Ok(Expression::Add(Box::new(BinaryOp::new(dd, case_expr))))
10970 }
10971 DialectType::Snowflake | DialectType::Redshift => {
10972 let mut args = f.args;
10973 let end_date = args.remove(0);
10974 let start_date = args.remove(0);
10975 let unit = Expression::Identifier(Identifier::new("MONTH"));
10976 Ok(Expression::Function(Box::new(Function::new(
10977 "DATEDIFF".to_string(), vec![unit, start_date, end_date],
10978 ))))
10979 }
10980 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
10981 let mut args = f.args;
10982 let end_date = args.remove(0);
10983 let start_date = args.remove(0);
10984 Ok(Expression::Function(Box::new(Function::new(
10985 "DATE_DIFF".to_string(), vec![Expression::string("MONTH"), start_date, end_date],
10986 ))))
10987 }
10988 _ => Ok(Expression::Function(f)),
10989 }
10990 }
10991 // MONTHS_BETWEEN(end, start, roundOff) - 3-arg form (Spark-specific)
10992 // Drop the roundOff arg for non-Spark targets, keep it for Spark
10993 "MONTHS_BETWEEN" if f.args.len() == 3 => {
10994 match target {
10995 DialectType::Spark | DialectType::Databricks => {
10996 Ok(Expression::Function(f))
10997 }
10998 _ => {
10999 // Drop the 3rd arg and delegate to the 2-arg logic
11000 let mut args = f.args;
11001 let end_date = args.remove(0);
11002 let start_date = args.remove(0);
11003 // Re-create as 2-arg and process
11004 let f2 = Function::new("MONTHS_BETWEEN".to_string(), vec![end_date, start_date]);
11005 let e2 = Expression::Function(Box::new(f2));
11006 Self::cross_dialect_normalize(e2, source, target)
11007 }
11008 }
11009 }
11010 // TO_TIMESTAMP(x) with 1 arg -> CAST(x AS TIMESTAMP) for most targets
11011 "TO_TIMESTAMP" if f.args.len() == 1
11012 && matches!(source, DialectType::Spark | DialectType::Databricks | DialectType::Hive) => {
11013 let arg = f.args.into_iter().next().unwrap();
11014 Ok(Expression::Cast(Box::new(Cast {
11015 this: arg, to: DataType::Timestamp { timezone: false, precision: None },
11016 trailing_comments: vec![], double_colon_syntax: false, format: None, default: None,
11017 })))
11018 }
11019 // STRING(x) -> CAST(x AS STRING) for Spark target
11020 "STRING" if f.args.len() == 1
11021 && matches!(source, DialectType::Spark | DialectType::Databricks) => {
11022 let arg = f.args.into_iter().next().unwrap();
11023 let dt = match target {
11024 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
11025 DataType::Custom { name: "STRING".to_string() }
11026 }
11027 _ => DataType::Text,
11028 };
11029 Ok(Expression::Cast(Box::new(Cast {
11030 this: arg, to: dt,
11031 trailing_comments: vec![], double_colon_syntax: false, format: None, default: None,
11032 })))
11033 }
11034 // LOGICAL_OR(x) -> BOOL_OR(x) for Spark target
11035 "LOGICAL_OR" if f.args.len() == 1 => {
11036 let name = match target {
11037 DialectType::Spark | DialectType::Databricks => "BOOL_OR",
11038 _ => "LOGICAL_OR",
11039 };
11040 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
11041 }
11042 // SPLIT(x, pattern) from Spark -> STR_SPLIT_REGEX for DuckDB, REGEXP_SPLIT for Presto
11043 "SPLIT" if f.args.len() == 2
11044 && matches!(source, DialectType::Spark | DialectType::Databricks | DialectType::Hive) => {
11045 let name = match target {
11046 DialectType::DuckDB => "STR_SPLIT_REGEX",
11047 DialectType::Presto | DialectType::Trino | DialectType::Athena => "REGEXP_SPLIT",
11048 DialectType::Spark | DialectType::Databricks | DialectType::Hive => "SPLIT",
11049 _ => "SPLIT",
11050 };
11051 Ok(Expression::Function(Box::new(Function::new(name.to_string(), f.args))))
11052 }
11053 // TRY_ELEMENT_AT -> ELEMENT_AT for Presto, array[idx] for DuckDB
11054 "TRY_ELEMENT_AT" if f.args.len() == 2 => {
11055 match target {
11056 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
11057 Ok(Expression::Function(Box::new(Function::new("ELEMENT_AT".to_string(), f.args))))
11058 }
11059 DialectType::DuckDB => {
11060 let mut args = f.args;
11061 let arr = args.remove(0);
11062 let idx = args.remove(0);
11063 Ok(Expression::Subscript(Box::new(crate::expressions::Subscript {
11064 this: arr,
11065 index: idx,
11066 })))
11067 }
11068 _ => Ok(Expression::Function(f)),
11069 }
11070 }
11071 _ => Ok(Expression::Function(f)),
11072 }
11073 } else if let Expression::AggregateFunction(mut af) = e {
11074 let name = af.name.to_uppercase();
11075 match name.as_str() {
11076 "ARBITRARY" if af.args.len() == 1 => {
11077 let arg = af.args.into_iter().next().unwrap();
11078 Ok(convert_arbitrary(arg, target))
11079 }
11080 "JSON_ARRAYAGG" => {
11081 match target {
11082 DialectType::PostgreSQL => {
11083 af.name = "JSON_AGG".to_string();
11084 // Add NULLS FIRST to ORDER BY items for PostgreSQL
11085 for ordered in af.order_by.iter_mut() {
11086 if ordered.nulls_first.is_none() {
11087 ordered.nulls_first = Some(true);
11088 }
11089 }
11090 Ok(Expression::AggregateFunction(af))
11091 }
11092 _ => Ok(Expression::AggregateFunction(af)),
11093 }
11094 }
11095 _ => Ok(Expression::AggregateFunction(af)),
11096 }
11097 } else if let Expression::JSONArrayAgg(ja) = e {
11098 // JSONArrayAgg -> JSON_AGG for PostgreSQL, JSON_ARRAYAGG for others
11099 match target {
11100 DialectType::PostgreSQL => {
11101 let mut order_by = Vec::new();
11102 if let Some(order_expr) = ja.order {
11103 if let Expression::OrderBy(ob) = *order_expr {
11104 for mut ordered in ob.expressions {
11105 if ordered.nulls_first.is_none() {
11106 ordered.nulls_first = Some(true);
11107 }
11108 order_by.push(ordered);
11109 }
11110 }
11111 }
11112 Ok(Expression::AggregateFunction(Box::new(crate::expressions::AggregateFunction {
11113 name: "JSON_AGG".to_string(),
11114 args: vec![*ja.this],
11115 distinct: false,
11116 filter: None,
11117 order_by,
11118 limit: None,
11119 ignore_nulls: None,
11120 })))
11121 }
11122 _ => Ok(Expression::JSONArrayAgg(ja)),
11123 }
11124 } else if let Expression::ToNumber(tn) = e {
11125 // TO_NUMBER(x) with no format/precision/scale -> CAST(x AS DOUBLE)
11126 let arg = *tn.this;
11127 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
11128 this: arg,
11129 to: crate::expressions::DataType::Double { precision: None, scale: None },
11130 double_colon_syntax: false,
11131 trailing_comments: Vec::new(),
11132 format: None,
11133 default: None,
11134 })))
11135 } else {
11136 Ok(e)
11137 }
11138 }
11139
11140 Action::RegexpLikeToDuckDB => {
11141 if let Expression::RegexpLike(f) = e {
11142 let mut args = vec![f.this, f.pattern];
11143 if let Some(flags) = f.flags {
11144 args.push(flags);
11145 }
11146 Ok(Expression::Function(Box::new(Function::new(
11147 "REGEXP_MATCHES".to_string(),
11148 args,
11149 ))))
11150 } else {
11151 Ok(e)
11152 }
11153 }
11154 Action::EpochConvert => {
11155 if let Expression::Epoch(f) = e {
11156 let arg = f.this;
11157 let name = match target {
11158 DialectType::Spark | DialectType::Databricks | DialectType::Hive => "UNIX_TIMESTAMP",
11159 DialectType::Presto | DialectType::Trino => "TO_UNIXTIME",
11160 DialectType::BigQuery => "TIME_TO_UNIX",
11161 _ => "EPOCH",
11162 };
11163 Ok(Expression::Function(Box::new(Function::new(name.to_string(), vec![arg]))))
11164 } else {
11165 Ok(e)
11166 }
11167 }
11168 Action::EpochMsConvert => {
11169 use crate::expressions::{BinaryOp, Cast};
11170 if let Expression::EpochMs(f) = e {
11171 let arg = f.this;
11172 match target {
11173 DialectType::Spark | DialectType::Databricks => {
11174 Ok(Expression::Function(Box::new(Function::new("TIMESTAMP_MILLIS".to_string(), vec![arg]))))
11175 }
11176 DialectType::BigQuery => {
11177 Ok(Expression::Function(Box::new(Function::new("TIMESTAMP_MILLIS".to_string(), vec![arg]))))
11178 }
11179 DialectType::Presto | DialectType::Trino => {
11180 // FROM_UNIXTIME(CAST(x AS DOUBLE) / POW(10, 3))
11181 let cast_arg = Expression::Cast(Box::new(Cast {
11182 this: arg,
11183 to: DataType::Double { precision: None, scale: None },
11184 trailing_comments: Vec::new(),
11185 double_colon_syntax: false,
11186 format: None,
11187 default: None,
11188 }));
11189 let div = Expression::Div(Box::new(BinaryOp::new(
11190 cast_arg,
11191 Expression::Function(Box::new(Function::new("POW".to_string(), vec![Expression::number(10), Expression::number(3)]))),
11192 )));
11193 Ok(Expression::Function(Box::new(Function::new("FROM_UNIXTIME".to_string(), vec![div]))))
11194 }
11195 DialectType::MySQL => {
11196 // FROM_UNIXTIME(x / POWER(10, 3))
11197 let div = Expression::Div(Box::new(BinaryOp::new(
11198 arg,
11199 Expression::Function(Box::new(Function::new("POWER".to_string(), vec![Expression::number(10), Expression::number(3)]))),
11200 )));
11201 Ok(Expression::Function(Box::new(Function::new("FROM_UNIXTIME".to_string(), vec![div]))))
11202 }
11203 DialectType::PostgreSQL | DialectType::Redshift => {
11204 // TO_TIMESTAMP(CAST(x AS DOUBLE PRECISION) / POWER(10, 3))
11205 let cast_arg = Expression::Cast(Box::new(Cast {
11206 this: arg,
11207 to: DataType::Custom { name: "DOUBLE PRECISION".to_string() },
11208 trailing_comments: Vec::new(),
11209 double_colon_syntax: false,
11210 format: None,
11211 default: None,
11212 }));
11213 let div = Expression::Div(Box::new(BinaryOp::new(
11214 cast_arg,
11215 Expression::Function(Box::new(Function::new("POWER".to_string(), vec![Expression::number(10), Expression::number(3)]))),
11216 )));
11217 Ok(Expression::Function(Box::new(Function::new("TO_TIMESTAMP".to_string(), vec![div]))))
11218 }
11219 DialectType::ClickHouse => {
11220 // fromUnixTimestamp64Milli(CAST(x AS Nullable(Int64)))
11221 let cast_arg = Expression::Cast(Box::new(Cast {
11222 this: arg,
11223 to: DataType::Custom { name: "Nullable(Int64)".to_string() },
11224 trailing_comments: Vec::new(),
11225 double_colon_syntax: false,
11226 format: None,
11227 default: None,
11228 }));
11229 Ok(Expression::Function(Box::new(Function::new("fromUnixTimestamp64Milli".to_string(), vec![cast_arg]))))
11230 }
11231 _ => Ok(Expression::Function(Box::new(Function::new("EPOCH_MS".to_string(), vec![arg])))),
11232 }
11233 } else {
11234 Ok(e)
11235 }
11236 }
11237 Action::TSQLTypeNormalize => {
11238 if let Expression::DataType(dt) = e {
11239 let new_dt = match &dt {
11240 DataType::Custom { name } if name.eq_ignore_ascii_case("MONEY") => {
11241 DataType::Decimal { precision: Some(15), scale: Some(4) }
11242 }
11243 DataType::Custom { name } if name.eq_ignore_ascii_case("SMALLMONEY") => {
11244 DataType::Decimal { precision: Some(6), scale: Some(4) }
11245 }
11246 DataType::Custom { name } if name.eq_ignore_ascii_case("DATETIME2") => {
11247 DataType::Timestamp { timezone: false, precision: None }
11248 }
11249 DataType::Custom { name } if name.eq_ignore_ascii_case("REAL") => {
11250 DataType::Float { precision: None, scale: None, real_spelling: false }
11251 }
11252 DataType::Float { real_spelling: true, .. } => {
11253 DataType::Float { precision: None, scale: None, real_spelling: false }
11254 }
11255 DataType::Custom { name } if name.eq_ignore_ascii_case("IMAGE") => {
11256 DataType::Custom { name: "BLOB".to_string() }
11257 }
11258 DataType::Custom { name } if name.eq_ignore_ascii_case("BIT") => {
11259 DataType::Boolean
11260 }
11261 DataType::Custom { name } if name.eq_ignore_ascii_case("ROWVERSION") => {
11262 DataType::Custom { name: "BINARY".to_string() }
11263 }
11264 DataType::Custom { name } if name.eq_ignore_ascii_case("UNIQUEIDENTIFIER") => {
11265 match target {
11266 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
11267 DataType::Custom { name: "STRING".to_string() }
11268 }
11269 _ => DataType::VarChar { length: Some(36), parenthesized_length: true },
11270 }
11271 }
11272 DataType::Custom { name } if name.eq_ignore_ascii_case("DATETIMEOFFSET") => {
11273 match target {
11274 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
11275 DataType::Timestamp { timezone: false, precision: None }
11276 }
11277 _ => DataType::Timestamp { timezone: true, precision: None },
11278 }
11279 }
11280 DataType::Custom { ref name } if name.to_uppercase().starts_with("DATETIME2(") => {
11281 // DATETIME2(n) -> TIMESTAMP
11282 DataType::Timestamp { timezone: false, precision: None }
11283 }
11284 DataType::Custom { ref name } if name.to_uppercase().starts_with("TIME(") => {
11285 // TIME(n) -> TIMESTAMP for Spark, keep as TIME for others
11286 match target {
11287 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
11288 DataType::Timestamp { timezone: false, precision: None }
11289 }
11290 _ => return Ok(Expression::DataType(dt)),
11291 }
11292 }
11293 DataType::Custom { ref name } if name.to_uppercase().starts_with("NUMERIC") => {
11294 // Parse NUMERIC(p,s) back to Decimal(p,s)
11295 let upper = name.to_uppercase();
11296 if let Some(inner) = upper.strip_prefix("NUMERIC(").and_then(|s| s.strip_suffix(')')) {
11297 let parts: Vec<&str> = inner.split(',').collect();
11298 let precision = parts.first().and_then(|s| s.trim().parse::<u32>().ok());
11299 let scale = parts.get(1).and_then(|s| s.trim().parse::<u32>().ok());
11300 DataType::Decimal { precision, scale }
11301 } else if upper == "NUMERIC" {
11302 DataType::Decimal { precision: None, scale: None }
11303 } else {
11304 return Ok(Expression::DataType(dt));
11305 }
11306 }
11307 DataType::Float { precision: Some(p), .. } => {
11308 // For Hive/Spark: FLOAT(1-32) -> FLOAT, FLOAT(33+) -> DOUBLE (IEEE 754 boundary)
11309 // For other targets: FLOAT(1-24) -> FLOAT, FLOAT(25+) -> DOUBLE (TSQL boundary)
11310 let boundary = match target {
11311 DialectType::Hive | DialectType::Spark | DialectType::Databricks => 32,
11312 _ => 24,
11313 };
11314 if *p <= boundary {
11315 DataType::Float { precision: None, scale: None, real_spelling: false }
11316 } else {
11317 DataType::Double { precision: None, scale: None }
11318 }
11319 }
11320 DataType::TinyInt { .. } => {
11321 match target {
11322 DialectType::DuckDB => DataType::Custom { name: "UTINYINT".to_string() },
11323 DialectType::Hive | DialectType::Spark | DialectType::Databricks => DataType::SmallInt { length: None },
11324 _ => return Ok(Expression::DataType(dt)),
11325 }
11326 }
11327 // INTEGER -> INT for Spark/Databricks
11328 DataType::Int { length, integer_spelling: true } => {
11329 DataType::Int { length: *length, integer_spelling: false }
11330 }
11331 _ => return Ok(Expression::DataType(dt)),
11332 };
11333 Ok(Expression::DataType(new_dt))
11334 } else {
11335 Ok(e)
11336 }
11337 }
11338 Action::MySQLSafeDivide => {
11339 use crate::expressions::{BinaryOp, Cast};
11340 if let Expression::Div(op) = e {
11341 let left = op.left;
11342 let right = op.right;
11343 // For SQLite: CAST left as REAL but NO NULLIF wrapping
11344 if matches!(target, DialectType::SQLite) {
11345 let new_left = Expression::Cast(Box::new(Cast {
11346 this: left,
11347 to: DataType::Float { precision: None, scale: None, real_spelling: true },
11348 trailing_comments: Vec::new(),
11349 double_colon_syntax: false,
11350 format: None,
11351 default: None,
11352 }));
11353 return Ok(Expression::Div(Box::new(BinaryOp::new(new_left, right))));
11354 }
11355 // Wrap right in NULLIF(right, 0)
11356 let nullif_right = Expression::Function(Box::new(Function::new(
11357 "NULLIF".to_string(),
11358 vec![right, Expression::number(0)],
11359 )));
11360 // For some dialects, also CAST the left side
11361 let new_left = match target {
11362 DialectType::PostgreSQL | DialectType::Redshift | DialectType::Teradata => {
11363 Expression::Cast(Box::new(Cast {
11364 this: left,
11365 to: DataType::Custom { name: "DOUBLE PRECISION".to_string() },
11366 trailing_comments: Vec::new(),
11367 double_colon_syntax: false,
11368 format: None,
11369 default: None,
11370 }))
11371 }
11372 DialectType::Drill | DialectType::Trino | DialectType::Presto => {
11373 Expression::Cast(Box::new(Cast {
11374 this: left,
11375 to: DataType::Double { precision: None, scale: None },
11376 trailing_comments: Vec::new(),
11377 double_colon_syntax: false,
11378 format: None,
11379 default: None,
11380 }))
11381 }
11382 DialectType::TSQL => {
11383 Expression::Cast(Box::new(Cast {
11384 this: left,
11385 to: DataType::Float { precision: None, scale: None, real_spelling: false },
11386 trailing_comments: Vec::new(),
11387 double_colon_syntax: false,
11388 format: None,
11389 default: None,
11390 }))
11391 }
11392 _ => left,
11393 };
11394 Ok(Expression::Div(Box::new(BinaryOp::new(new_left, nullif_right))))
11395 } else {
11396 Ok(e)
11397 }
11398 }
11399 Action::AlterTableRenameStripSchema => {
11400 if let Expression::AlterTable(mut at) = e {
11401 if let Some(crate::expressions::AlterTableAction::RenameTable(ref mut new_tbl)) = at.actions.first_mut() {
11402 new_tbl.schema = None;
11403 new_tbl.catalog = None;
11404 }
11405 Ok(Expression::AlterTable(at))
11406 } else {
11407 Ok(e)
11408 }
11409 }
11410 Action::NullsOrdering => {
11411 // Fill in the source dialect's implied null ordering default.
11412 // This makes implicit null ordering explicit so the target generator
11413 // can correctly strip or keep it.
11414 //
11415 // Dialect null ordering categories:
11416 // nulls_are_large (Oracle, PostgreSQL, Redshift, Snowflake):
11417 // ASC -> NULLS LAST, DESC -> NULLS FIRST
11418 // nulls_are_small (Spark, Hive, BigQuery, MySQL, Databricks, ClickHouse, etc.):
11419 // ASC -> NULLS FIRST, DESC -> NULLS LAST
11420 // nulls_are_last (DuckDB, Presto, Trino, Dremio, Athena):
11421 // NULLS LAST always (both ASC and DESC)
11422 if let Expression::Ordered(mut o) = e {
11423 let is_asc = !o.desc;
11424
11425 let is_source_nulls_large = matches!(source,
11426 DialectType::Oracle | DialectType::PostgreSQL | DialectType::Redshift
11427 | DialectType::Snowflake
11428 );
11429 let is_source_nulls_last = matches!(source,
11430 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
11431 | DialectType::Dremio | DialectType::Athena | DialectType::ClickHouse
11432 | DialectType::Drill | DialectType::Exasol
11433 );
11434
11435 // Determine target category to check if default matches
11436 let is_target_nulls_large = matches!(target,
11437 DialectType::Oracle | DialectType::PostgreSQL | DialectType::Redshift
11438 | DialectType::Snowflake
11439 );
11440 let is_target_nulls_last = matches!(target,
11441 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
11442 | DialectType::Dremio | DialectType::Athena | DialectType::ClickHouse
11443 | DialectType::Drill | DialectType::Exasol
11444 );
11445
11446 // Compute the implied nulls_first for source
11447 let source_nulls_first = if is_source_nulls_large {
11448 !is_asc // ASC -> NULLS LAST (false), DESC -> NULLS FIRST (true)
11449 } else if is_source_nulls_last {
11450 false // NULLS LAST always
11451 } else {
11452 is_asc // nulls_are_small: ASC -> NULLS FIRST (true), DESC -> NULLS LAST (false)
11453 };
11454
11455 // Compute the target's default
11456 let target_nulls_first = if is_target_nulls_large {
11457 !is_asc
11458 } else if is_target_nulls_last {
11459 false
11460 } else {
11461 is_asc
11462 };
11463
11464 // Only add explicit nulls ordering if source and target defaults differ
11465 if source_nulls_first != target_nulls_first {
11466 o.nulls_first = Some(source_nulls_first);
11467 }
11468 // If they match, leave nulls_first as None so the generator won't output it
11469
11470 Ok(Expression::Ordered(o))
11471 } else {
11472 Ok(e)
11473 }
11474 }
11475 Action::StringAggConvert => {
11476 match e {
11477 Expression::WithinGroup(wg) => {
11478 // STRING_AGG(x, sep) WITHIN GROUP (ORDER BY z) -> target-specific
11479 // Extract args and distinct flag from either Function, AggregateFunction, or StringAgg
11480 let (x_opt, sep_opt, distinct) = match wg.this {
11481 Expression::AggregateFunction(ref af) if af.name.eq_ignore_ascii_case("STRING_AGG") && af.args.len() >= 2 => {
11482 (Some(af.args[0].clone()), Some(af.args[1].clone()), af.distinct)
11483 }
11484 Expression::Function(ref f) if f.name.eq_ignore_ascii_case("STRING_AGG") && f.args.len() >= 2 => {
11485 (Some(f.args[0].clone()), Some(f.args[1].clone()), false)
11486 }
11487 Expression::StringAgg(ref sa) => {
11488 (Some(sa.this.clone()), sa.separator.clone(), sa.distinct)
11489 }
11490 _ => (None, None, false),
11491 };
11492 if let (Some(x), Some(sep)) = (x_opt, sep_opt) {
11493 let order_by = wg.order_by;
11494
11495 match target {
11496 DialectType::TSQL | DialectType::Fabric => {
11497 // Keep as WithinGroup(StringAgg) for TSQL
11498 Ok(Expression::WithinGroup(Box::new(crate::expressions::WithinGroup {
11499 this: Expression::StringAgg(Box::new(crate::expressions::StringAggFunc {
11500 this: x,
11501 separator: Some(sep),
11502 order_by: None, // order_by goes in WithinGroup, not StringAgg
11503 distinct,
11504 filter: None,
11505 limit: None,
11506 })),
11507 order_by,
11508 })))
11509 }
11510 DialectType::MySQL | DialectType::SingleStore | DialectType::Doris | DialectType::StarRocks => {
11511 // GROUP_CONCAT(x ORDER BY z SEPARATOR sep)
11512 Ok(Expression::GroupConcat(Box::new(crate::expressions::GroupConcatFunc {
11513 this: x,
11514 separator: Some(sep),
11515 order_by: Some(order_by),
11516 distinct,
11517 filter: None,
11518 })))
11519 }
11520 DialectType::SQLite => {
11521 // GROUP_CONCAT(x, sep) - no ORDER BY support
11522 Ok(Expression::GroupConcat(Box::new(crate::expressions::GroupConcatFunc {
11523 this: x,
11524 separator: Some(sep),
11525 order_by: None,
11526 distinct,
11527 filter: None,
11528 })))
11529 }
11530 DialectType::PostgreSQL | DialectType::Redshift => {
11531 // STRING_AGG(x, sep ORDER BY z)
11532 Ok(Expression::StringAgg(Box::new(crate::expressions::StringAggFunc {
11533 this: x,
11534 separator: Some(sep),
11535 order_by: Some(order_by),
11536 distinct,
11537 filter: None,
11538 limit: None,
11539 })))
11540 }
11541 _ => {
11542 // Default: keep as STRING_AGG(x, sep) with ORDER BY inside
11543 Ok(Expression::StringAgg(Box::new(crate::expressions::StringAggFunc {
11544 this: x,
11545 separator: Some(sep),
11546 order_by: Some(order_by),
11547 distinct,
11548 filter: None,
11549 limit: None,
11550 })))
11551 }
11552 }
11553 } else {
11554 Ok(Expression::WithinGroup(wg))
11555 }
11556 }
11557 Expression::StringAgg(sa) => {
11558 match target {
11559 DialectType::MySQL | DialectType::SingleStore | DialectType::Doris | DialectType::StarRocks => {
11560 // STRING_AGG(x, sep) -> GROUP_CONCAT(x SEPARATOR sep)
11561 Ok(Expression::GroupConcat(Box::new(crate::expressions::GroupConcatFunc {
11562 this: sa.this,
11563 separator: sa.separator,
11564 order_by: sa.order_by,
11565 distinct: sa.distinct,
11566 filter: sa.filter,
11567 })))
11568 }
11569 DialectType::SQLite => {
11570 // STRING_AGG(x, sep) -> GROUP_CONCAT(x, sep)
11571 Ok(Expression::GroupConcat(Box::new(crate::expressions::GroupConcatFunc {
11572 this: sa.this,
11573 separator: sa.separator,
11574 order_by: None, // SQLite doesn't support ORDER BY in GROUP_CONCAT
11575 distinct: sa.distinct,
11576 filter: sa.filter,
11577 })))
11578 }
11579 DialectType::Spark | DialectType::Databricks => {
11580 // STRING_AGG(x, sep) -> LISTAGG(x, sep)
11581 Ok(Expression::ListAgg(Box::new(crate::expressions::ListAggFunc {
11582 this: sa.this,
11583 separator: sa.separator,
11584 on_overflow: None,
11585 order_by: sa.order_by,
11586 distinct: sa.distinct,
11587 filter: None,
11588 })))
11589 }
11590 _ => Ok(Expression::StringAgg(sa)),
11591 }
11592 }
11593 _ => Ok(e),
11594 }
11595 }
11596 Action::GroupConcatConvert => {
11597 // Helper to expand CONCAT(a, b, c) -> a || b || c (for PostgreSQL/SQLite)
11598 // or CONCAT(a, b, c) -> a + b + c (for TSQL)
11599 fn expand_concat_to_dpipe(expr: Expression) -> Expression {
11600 if let Expression::Function(ref f) = expr {
11601 if f.name.to_uppercase() == "CONCAT" && f.args.len() > 1 {
11602 let mut result = f.args[0].clone();
11603 for arg in &f.args[1..] {
11604 result = Expression::Concat(Box::new(BinaryOp {
11605 left: result,
11606 right: arg.clone(),
11607 left_comments: vec![],
11608 operator_comments: vec![],
11609 trailing_comments: vec![],
11610 }));
11611 }
11612 return result;
11613 }
11614 }
11615 expr
11616 }
11617 fn expand_concat_to_plus(expr: Expression) -> Expression {
11618 if let Expression::Function(ref f) = expr {
11619 if f.name.to_uppercase() == "CONCAT" && f.args.len() > 1 {
11620 let mut result = f.args[0].clone();
11621 for arg in &f.args[1..] {
11622 result = Expression::Add(Box::new(BinaryOp {
11623 left: result,
11624 right: arg.clone(),
11625 left_comments: vec![],
11626 operator_comments: vec![],
11627 trailing_comments: vec![],
11628 }));
11629 }
11630 return result;
11631 }
11632 }
11633 expr
11634 }
11635 // Helper to wrap each arg in CAST(arg AS VARCHAR) for Presto/Trino CONCAT
11636 fn wrap_concat_args_in_varchar_cast(expr: Expression) -> Expression {
11637 if let Expression::Function(ref f) = expr {
11638 if f.name.to_uppercase() == "CONCAT" && f.args.len() > 1 {
11639 let new_args: Vec<Expression> = f.args.iter().map(|arg| {
11640 Expression::Cast(Box::new(crate::expressions::Cast {
11641 this: arg.clone(),
11642 to: crate::expressions::DataType::VarChar { length: None, parenthesized_length: false },
11643 trailing_comments: Vec::new(),
11644 double_colon_syntax: false,
11645 format: None,
11646 default: None,
11647 }))
11648 }).collect();
11649 return Expression::Function(Box::new(crate::expressions::Function::new(
11650 "CONCAT".to_string(),
11651 new_args,
11652 )));
11653 }
11654 }
11655 expr
11656 }
11657 if let Expression::GroupConcat(gc) = e {
11658 match target {
11659 DialectType::Presto => {
11660 // GROUP_CONCAT(x [, sep]) -> ARRAY_JOIN(ARRAY_AGG(x), sep)
11661 let sep = gc.separator.unwrap_or(Expression::string(","));
11662 // For multi-arg CONCAT, wrap each arg in CAST(... AS VARCHAR)
11663 let this = wrap_concat_args_in_varchar_cast(gc.this);
11664 let array_agg = Expression::ArrayAgg(Box::new(crate::expressions::AggFunc {
11665 this,
11666 distinct: gc.distinct,
11667 filter: gc.filter,
11668 order_by: gc.order_by.unwrap_or_default(),
11669 name: None,
11670 ignore_nulls: None,
11671 having_max: None,
11672 limit: None,
11673 }));
11674 Ok(Expression::ArrayJoin(Box::new(crate::expressions::ArrayJoinFunc {
11675 this: array_agg,
11676 separator: sep,
11677 null_replacement: None,
11678 })))
11679 }
11680 DialectType::Trino => {
11681 // GROUP_CONCAT(x [, sep]) -> LISTAGG(x, sep)
11682 let sep = gc.separator.unwrap_or(Expression::string(","));
11683 // For multi-arg CONCAT, wrap each arg in CAST(... AS VARCHAR)
11684 let this = wrap_concat_args_in_varchar_cast(gc.this);
11685 Ok(Expression::ListAgg(Box::new(crate::expressions::ListAggFunc {
11686 this,
11687 separator: Some(sep),
11688 on_overflow: None,
11689 order_by: gc.order_by,
11690 distinct: gc.distinct,
11691 filter: gc.filter,
11692 })))
11693 }
11694 DialectType::PostgreSQL | DialectType::Redshift | DialectType::Snowflake
11695 | DialectType::DuckDB
11696 | DialectType::Hive | DialectType::ClickHouse => {
11697 // GROUP_CONCAT(x [, sep]) -> STRING_AGG(x, sep)
11698 let sep = gc.separator.unwrap_or(Expression::string(","));
11699 // Expand CONCAT(a,b,c) -> a || b || c for || dialects
11700 let this = expand_concat_to_dpipe(gc.this);
11701 // For PostgreSQL, add NULLS LAST for DESC / NULLS FIRST for ASC
11702 let order_by = if target == DialectType::PostgreSQL {
11703 gc.order_by.map(|ords| {
11704 ords.into_iter().map(|mut o| {
11705 if o.nulls_first.is_none() {
11706 if o.desc {
11707 o.nulls_first = Some(false); // NULLS LAST
11708 } else {
11709 o.nulls_first = Some(true); // NULLS FIRST
11710 }
11711 }
11712 o
11713 }).collect()
11714 })
11715 } else {
11716 gc.order_by
11717 };
11718 Ok(Expression::StringAgg(Box::new(crate::expressions::StringAggFunc {
11719 this,
11720 separator: Some(sep),
11721 order_by,
11722 distinct: gc.distinct,
11723 filter: gc.filter,
11724 limit: None,
11725 })))
11726 }
11727 DialectType::TSQL => {
11728 // GROUP_CONCAT(x [, sep]) -> STRING_AGG(x, sep) WITHIN GROUP (ORDER BY ...)
11729 // TSQL doesn't support DISTINCT in STRING_AGG
11730 let sep = gc.separator.unwrap_or(Expression::string(","));
11731 // Expand CONCAT(a,b,c) -> a + b + c for TSQL
11732 let this = expand_concat_to_plus(gc.this);
11733 Ok(Expression::StringAgg(Box::new(crate::expressions::StringAggFunc {
11734 this,
11735 separator: Some(sep),
11736 order_by: gc.order_by,
11737 distinct: false, // TSQL doesn't support DISTINCT in STRING_AGG
11738 filter: gc.filter,
11739 limit: None,
11740 })))
11741 }
11742 DialectType::SQLite => {
11743 // GROUP_CONCAT stays as GROUP_CONCAT but ORDER BY is removed
11744 // SQLite GROUP_CONCAT doesn't support ORDER BY
11745 // Expand CONCAT(a,b,c) -> a || b || c
11746 let this = expand_concat_to_dpipe(gc.this);
11747 Ok(Expression::GroupConcat(Box::new(crate::expressions::GroupConcatFunc {
11748 this,
11749 separator: gc.separator,
11750 order_by: None, // SQLite doesn't support ORDER BY in GROUP_CONCAT
11751 distinct: gc.distinct,
11752 filter: gc.filter,
11753 })))
11754 }
11755 DialectType::Spark | DialectType::Databricks => {
11756 // GROUP_CONCAT(x [, sep]) -> LISTAGG(x, sep)
11757 let sep = gc.separator.unwrap_or(Expression::string(","));
11758 Ok(Expression::ListAgg(Box::new(crate::expressions::ListAggFunc {
11759 this: gc.this,
11760 separator: Some(sep),
11761 on_overflow: None,
11762 order_by: gc.order_by,
11763 distinct: gc.distinct,
11764 filter: None,
11765 })))
11766 }
11767 DialectType::MySQL | DialectType::SingleStore | DialectType::StarRocks => {
11768 // MySQL GROUP_CONCAT should have explicit SEPARATOR (default ',')
11769 if gc.separator.is_none() {
11770 let mut gc = gc;
11771 gc.separator = Some(Expression::string(","));
11772 Ok(Expression::GroupConcat(gc))
11773 } else {
11774 Ok(Expression::GroupConcat(gc))
11775 }
11776 }
11777 _ => Ok(Expression::GroupConcat(gc)),
11778 }
11779 } else {
11780 Ok(e)
11781 }
11782 }
11783 Action::TempTableHash => {
11784 match e {
11785 Expression::CreateTable(mut ct) => {
11786 // TSQL #table -> TEMPORARY TABLE with # stripped from name
11787 let name = &ct.name.name.name;
11788 if name.starts_with('#') {
11789 ct.name.name.name = name.trim_start_matches('#').to_string();
11790 }
11791 // Set temporary flag
11792 ct.temporary = true;
11793 Ok(Expression::CreateTable(ct))
11794 }
11795 Expression::Table(mut tr) => {
11796 // Strip # from table references
11797 let name = &tr.name.name;
11798 if name.starts_with('#') {
11799 tr.name.name = name.trim_start_matches('#').to_string();
11800 }
11801 Ok(Expression::Table(tr))
11802 }
11803 Expression::DropTable(mut dt) => {
11804 // Strip # from DROP TABLE names
11805 for table_ref in &mut dt.names {
11806 if table_ref.name.name.starts_with('#') {
11807 table_ref.name.name = table_ref.name.name.trim_start_matches('#').to_string();
11808 }
11809 }
11810 Ok(Expression::DropTable(dt))
11811 }
11812 _ => Ok(e),
11813 }
11814 }
11815 Action::NvlClearOriginal => {
11816 if let Expression::Nvl(mut f) = e {
11817 f.original_name = None;
11818 Ok(Expression::Nvl(f))
11819 } else {
11820 Ok(e)
11821 }
11822 }
11823 Action::HiveCastToTryCast => {
11824 // Convert Hive/Spark CAST to TRY_CAST for targets that support it
11825 if let Expression::Cast(mut c) = e {
11826 // For Spark/Hive -> DuckDB: TIMESTAMP -> TIMESTAMPTZ
11827 // (Spark's TIMESTAMP is always timezone-aware)
11828 if matches!(target, DialectType::DuckDB)
11829 && matches!(source, DialectType::Spark | DialectType::Databricks)
11830 && matches!(c.to, DataType::Timestamp { timezone: false, .. })
11831 {
11832 c.to = DataType::Custom { name: "TIMESTAMPTZ".to_string() };
11833 }
11834 // For Spark source -> Databricks: VARCHAR/CHAR -> STRING
11835 // Spark parses VARCHAR(n)/CHAR(n) as TEXT, normalize to STRING
11836 if matches!(target, DialectType::Databricks | DialectType::Spark)
11837 && matches!(source, DialectType::Spark | DialectType::Databricks | DialectType::Hive)
11838 && Self::has_varchar_char_type(&c.to)
11839 {
11840 c.to = Self::normalize_varchar_to_string(c.to);
11841 }
11842 Ok(Expression::TryCast(c))
11843 } else {
11844 Ok(e)
11845 }
11846 }
11847 Action::XorExpand => {
11848 // Expand XOR to (a AND NOT b) OR (NOT a AND b) for dialects without XOR keyword
11849 // Snowflake: use BOOLXOR(a, b) instead
11850 if let Expression::Xor(xor) = e {
11851 // Collect all XOR operands
11852 let mut operands = Vec::new();
11853 if let Some(this) = xor.this {
11854 operands.push(*this);
11855 }
11856 if let Some(expr) = xor.expression {
11857 operands.push(*expr);
11858 }
11859 operands.extend(xor.expressions);
11860
11861 // Snowflake: use BOOLXOR(a, b)
11862 if matches!(target, DialectType::Snowflake) && operands.len() == 2 {
11863 let a = operands.remove(0);
11864 let b = operands.remove(0);
11865 return Ok(Expression::Function(Box::new(Function::new("BOOLXOR".to_string(), vec![a, b]))));
11866 }
11867
11868 // Helper to build (a AND NOT b) OR (NOT a AND b)
11869 let make_xor = |a: Expression, b: Expression| -> Expression {
11870 let not_b = Expression::Not(Box::new(crate::expressions::UnaryOp::new(b.clone())));
11871 let not_a = Expression::Not(Box::new(crate::expressions::UnaryOp::new(a.clone())));
11872 let left_and = Expression::And(Box::new(BinaryOp {
11873 left: a,
11874 right: Expression::Paren(Box::new(Paren { this: not_b, trailing_comments: Vec::new() })),
11875 left_comments: Vec::new(), operator_comments: Vec::new(), trailing_comments: Vec::new(),
11876 }));
11877 let right_and = Expression::And(Box::new(BinaryOp {
11878 left: Expression::Paren(Box::new(Paren { this: not_a, trailing_comments: Vec::new() })),
11879 right: b,
11880 left_comments: Vec::new(), operator_comments: Vec::new(), trailing_comments: Vec::new(),
11881 }));
11882 Expression::Or(Box::new(BinaryOp {
11883 left: Expression::Paren(Box::new(Paren { this: left_and, trailing_comments: Vec::new() })),
11884 right: Expression::Paren(Box::new(Paren { this: right_and, trailing_comments: Vec::new() })),
11885 left_comments: Vec::new(), operator_comments: Vec::new(), trailing_comments: Vec::new(),
11886 }))
11887 };
11888
11889 if operands.len() >= 2 {
11890 let mut result = make_xor(operands.remove(0), operands.remove(0));
11891 for operand in operands {
11892 result = make_xor(result, operand);
11893 }
11894 Ok(result)
11895 } else if operands.len() == 1 {
11896 Ok(operands.remove(0))
11897 } else {
11898 // No operands - return FALSE (shouldn't happen)
11899 Ok(Expression::Boolean(crate::expressions::BooleanLiteral { value: false }))
11900 }
11901 } else {
11902 Ok(e)
11903 }
11904 }
11905 Action::DatePartUnquote => {
11906 // DATE_PART('month', x) -> DATE_PART(month, x) for Snowflake target
11907 // Convert the quoted string first arg to a bare Column/Identifier
11908 if let Expression::Function(mut f) = e {
11909 if let Some(Expression::Literal(crate::expressions::Literal::String(s))) = f.args.first() {
11910 let bare_name = s.to_lowercase();
11911 f.args[0] = Expression::Column(crate::expressions::Column {
11912 name: Identifier::new(bare_name),
11913 table: None,
11914 join_mark: false,
11915 trailing_comments: Vec::new(),
11916 });
11917 }
11918 Ok(Expression::Function(f))
11919 } else {
11920 Ok(e)
11921 }
11922 }
11923 Action::ArrayLengthConvert => {
11924 // Extract the argument from the expression
11925 let arg = match e {
11926 Expression::Cardinality(ref f) => f.this.clone(),
11927 Expression::ArrayLength(ref f) => f.this.clone(),
11928 Expression::ArraySize(ref f) => f.this.clone(),
11929 _ => return Ok(e),
11930 };
11931 match target {
11932 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
11933 Ok(Expression::Function(Box::new(Function::new("SIZE".to_string(), vec![arg]))))
11934 }
11935 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
11936 Ok(Expression::Cardinality(Box::new(crate::expressions::UnaryFunc::new(arg))))
11937 }
11938 DialectType::BigQuery => {
11939 Ok(Expression::ArrayLength(Box::new(crate::expressions::UnaryFunc::new(arg))))
11940 }
11941 DialectType::DuckDB => {
11942 Ok(Expression::ArrayLength(Box::new(crate::expressions::UnaryFunc::new(arg))))
11943 }
11944 DialectType::PostgreSQL | DialectType::Redshift => {
11945 // PostgreSQL ARRAY_LENGTH requires dimension arg
11946 Ok(Expression::Function(Box::new(Function::new("ARRAY_LENGTH".to_string(), vec![arg, Expression::number(1)]))))
11947 }
11948 _ => Ok(e), // Keep original
11949 }
11950 }
11951
11952 Action::JsonExtractToArrow => {
11953 // JSON_EXTRACT(x, path) -> x -> path for SQLite/DuckDB (set arrow_syntax = true)
11954 if let Expression::JsonExtract(mut f) = e {
11955 f.arrow_syntax = true;
11956 Ok(Expression::JsonExtract(f))
11957 } else {
11958 Ok(e)
11959 }
11960 }
11961
11962 Action::JsonExtractToGetJsonObject => {
11963 if let Expression::JsonExtract(f) = e {
11964 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
11965 // JSON_EXTRACT(x, '$.key') -> JSON_EXTRACT_PATH(x, 'key') for PostgreSQL
11966 // Convert JSONPath to individual keys
11967 let extracted_keys: Option<Vec<String>> = if let Expression::Literal(Literal::String(ref s)) = f.path {
11968 s.strip_prefix("$.").map(|stripped| {
11969 stripped.split('.').map(|k| k.to_string()).collect()
11970 })
11971 } else {
11972 None
11973 };
11974 let keys = if let Some(key_list) = extracted_keys {
11975 key_list.into_iter().map(|k| Expression::string(&k)).collect::<Vec<_>>()
11976 } else {
11977 vec![f.path]
11978 };
11979 let mut args = vec![f.this];
11980 args.extend(keys);
11981 Ok(Expression::Function(Box::new(Function::new(
11982 "JSON_EXTRACT_PATH".to_string(),
11983 args,
11984 ))))
11985 } else {
11986 // GET_JSON_OBJECT(x, '$.path') for Hive/Spark
11987 Ok(Expression::Function(Box::new(Function::new(
11988 "GET_JSON_OBJECT".to_string(),
11989 vec![f.this, f.path],
11990 ))))
11991 }
11992 } else {
11993 Ok(e)
11994 }
11995 }
11996
11997 Action::JsonExtractScalarToGetJsonObject => {
11998 // JSON_EXTRACT_SCALAR(x, '$.path') -> GET_JSON_OBJECT(x, '$.path') for Hive/Spark
11999 if let Expression::JsonExtractScalar(f) = e {
12000 Ok(Expression::Function(Box::new(Function::new(
12001 "GET_JSON_OBJECT".to_string(),
12002 vec![f.this, f.path],
12003 ))))
12004 } else {
12005 Ok(e)
12006 }
12007 }
12008
12009 Action::JsonQueryValueConvert => {
12010 // JsonQuery/JsonValue -> target-specific
12011 let (f, is_query) = match e {
12012 Expression::JsonQuery(f) => (f, true),
12013 Expression::JsonValue(f) => (f, false),
12014 _ => return Ok(e),
12015 };
12016 match target {
12017 DialectType::TSQL | DialectType::Fabric => {
12018 // ISNULL(JSON_QUERY(...), JSON_VALUE(...))
12019 let json_query = Expression::Function(Box::new(Function::new(
12020 "JSON_QUERY".to_string(),
12021 vec![f.this.clone(), f.path.clone()],
12022 )));
12023 let json_value = Expression::Function(Box::new(Function::new(
12024 "JSON_VALUE".to_string(),
12025 vec![f.this, f.path],
12026 )));
12027 Ok(Expression::Function(Box::new(Function::new(
12028 "ISNULL".to_string(),
12029 vec![json_query, json_value],
12030 ))))
12031 }
12032 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
12033 Ok(Expression::Function(Box::new(Function::new(
12034 "GET_JSON_OBJECT".to_string(),
12035 vec![f.this, f.path],
12036 ))))
12037 }
12038 DialectType::PostgreSQL | DialectType::Redshift => {
12039 Ok(Expression::Function(Box::new(Function::new(
12040 "JSON_EXTRACT_PATH_TEXT".to_string(),
12041 vec![f.this, f.path],
12042 ))))
12043 }
12044 DialectType::DuckDB | DialectType::SQLite => {
12045 // json -> path arrow syntax
12046 Ok(Expression::JsonExtract(Box::new(crate::expressions::JsonExtractFunc {
12047 this: f.this,
12048 path: f.path,
12049 returning: f.returning,
12050 arrow_syntax: true,
12051 hash_arrow_syntax: false,
12052 wrapper_option: f.wrapper_option,
12053 quotes_option: f.quotes_option,
12054 on_scalar_string: f.on_scalar_string,
12055 on_error: f.on_error,
12056 })))
12057 }
12058 DialectType::Snowflake => {
12059 // GET_PATH(PARSE_JSON(json), 'path')
12060 // Strip $. prefix from path
12061 // Only wrap in PARSE_JSON if not already a PARSE_JSON call or ParseJson expression
12062 let json_expr = match &f.this {
12063 Expression::Function(ref inner_f) if inner_f.name.eq_ignore_ascii_case("PARSE_JSON") => {
12064 f.this
12065 }
12066 Expression::ParseJson(_) => {
12067 // Already a ParseJson expression, which generates as PARSE_JSON(...)
12068 f.this
12069 }
12070 _ => {
12071 Expression::Function(Box::new(Function::new(
12072 "PARSE_JSON".to_string(), vec![f.this],
12073 )))
12074 }
12075 };
12076 let path_str = match &f.path {
12077 Expression::Literal(Literal::String(s)) => {
12078 let stripped = s.strip_prefix("$.").unwrap_or(s);
12079 Expression::Literal(Literal::String(stripped.to_string()))
12080 }
12081 other => other.clone(),
12082 };
12083 Ok(Expression::Function(Box::new(Function::new(
12084 "GET_PATH".to_string(), vec![json_expr, path_str],
12085 ))))
12086 }
12087 _ => {
12088 // Default: keep as JSON_QUERY/JSON_VALUE function
12089 let func_name = if is_query { "JSON_QUERY" } else { "JSON_VALUE" };
12090 Ok(Expression::Function(Box::new(Function::new(
12091 func_name.to_string(),
12092 vec![f.this, f.path],
12093 ))))
12094 }
12095 }
12096 }
12097
12098 Action::JsonLiteralToJsonParse => {
12099 // CAST('x' AS JSON) -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake
12100 if let Expression::Cast(c) = e {
12101 let func_name = if matches!(target, DialectType::Snowflake) {
12102 "PARSE_JSON"
12103 } else {
12104 "JSON_PARSE"
12105 };
12106 Ok(Expression::Function(Box::new(Function::new(
12107 func_name.to_string(),
12108 vec![c.this],
12109 ))))
12110 } else {
12111 Ok(e)
12112 }
12113 }
12114
12115 Action::AtTimeZoneConvert => {
12116 // AT TIME ZONE -> target-specific conversion
12117 if let Expression::AtTimeZone(atz) = e {
12118 match target {
12119 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
12120 Ok(Expression::Function(Box::new(Function::new(
12121 "AT_TIMEZONE".to_string(),
12122 vec![atz.this, atz.zone],
12123 ))))
12124 }
12125 DialectType::Spark | DialectType::Databricks => {
12126 Ok(Expression::Function(Box::new(Function::new(
12127 "FROM_UTC_TIMESTAMP".to_string(),
12128 vec![atz.this, atz.zone],
12129 ))))
12130 }
12131 DialectType::Snowflake => {
12132 // CONVERT_TIMEZONE('zone', expr)
12133 Ok(Expression::Function(Box::new(Function::new(
12134 "CONVERT_TIMEZONE".to_string(),
12135 vec![atz.zone, atz.this],
12136 ))))
12137 }
12138 DialectType::BigQuery => {
12139 // TIMESTAMP(DATETIME(expr, 'zone'))
12140 let datetime_call = Expression::Function(Box::new(Function::new(
12141 "DATETIME".to_string(),
12142 vec![atz.this, atz.zone],
12143 )));
12144 Ok(Expression::Function(Box::new(Function::new(
12145 "TIMESTAMP".to_string(),
12146 vec![datetime_call],
12147 ))))
12148 }
12149 _ => {
12150 Ok(Expression::Function(Box::new(Function::new(
12151 "AT_TIMEZONE".to_string(),
12152 vec![atz.this, atz.zone],
12153 ))))
12154 }
12155 }
12156 } else {
12157 Ok(e)
12158 }
12159 }
12160
12161 Action::DayOfWeekConvert => {
12162 // DAY_OF_WEEK -> ISODOW for DuckDB, ((DAYOFWEEK(x) % 7) + 1) for Spark
12163 if let Expression::DayOfWeek(f) = e {
12164 match target {
12165 DialectType::DuckDB => {
12166 Ok(Expression::Function(Box::new(Function::new(
12167 "ISODOW".to_string(),
12168 vec![f.this],
12169 ))))
12170 }
12171 DialectType::Spark | DialectType::Databricks => {
12172 // ((DAYOFWEEK(x) % 7) + 1)
12173 let dayofweek = Expression::Function(Box::new(Function::new(
12174 "DAYOFWEEK".to_string(),
12175 vec![f.this],
12176 )));
12177 let modulo = Expression::Mod(Box::new(BinaryOp {
12178 left: dayofweek,
12179 right: Expression::number(7),
12180 left_comments: Vec::new(),
12181 operator_comments: Vec::new(),
12182 trailing_comments: Vec::new(),
12183 }));
12184 let paren_mod = Expression::Paren(Box::new(Paren {
12185 this: modulo,
12186 trailing_comments: Vec::new(),
12187 }));
12188 let add_one = Expression::Add(Box::new(BinaryOp {
12189 left: paren_mod,
12190 right: Expression::number(1),
12191 left_comments: Vec::new(),
12192 operator_comments: Vec::new(),
12193 trailing_comments: Vec::new(),
12194 }));
12195 Ok(Expression::Paren(Box::new(Paren {
12196 this: add_one,
12197 trailing_comments: Vec::new(),
12198 })))
12199 }
12200 _ => Ok(Expression::DayOfWeek(f)),
12201 }
12202 } else {
12203 Ok(e)
12204 }
12205 }
12206
12207 Action::MaxByMinByConvert => {
12208 // MAX_BY -> argMax for ClickHouse, drop 3rd arg for Spark
12209 // MIN_BY -> argMin for ClickHouse, ARG_MIN for DuckDB, drop 3rd arg for Spark/ClickHouse
12210 // Handle both Expression::Function and Expression::AggregateFunction
12211 let (is_max, args) = match &e {
12212 Expression::Function(f) => (f.name.eq_ignore_ascii_case("MAX_BY"), f.args.clone()),
12213 Expression::AggregateFunction(af) => (af.name.eq_ignore_ascii_case("MAX_BY"), af.args.clone()),
12214 _ => return Ok(e),
12215 };
12216 match target {
12217 DialectType::ClickHouse => {
12218 let name = if is_max { "argMax" } else { "argMin" };
12219 let mut args = args;
12220 args.truncate(2);
12221 Ok(Expression::Function(Box::new(Function::new(name.to_string(), args))))
12222 }
12223 DialectType::DuckDB => {
12224 let name = if is_max { "ARG_MAX" } else { "ARG_MIN" };
12225 Ok(Expression::Function(Box::new(Function::new(name.to_string(), args))))
12226 }
12227 DialectType::Spark | DialectType::Databricks => {
12228 let mut args = args;
12229 args.truncate(2);
12230 let name = if is_max { "MAX_BY" } else { "MIN_BY" };
12231 Ok(Expression::Function(Box::new(Function::new(name.to_string(), args))))
12232 }
12233 _ => Ok(e),
12234 }
12235 }
12236
12237 Action::ElementAtConvert => {
12238 // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
12239 let (arr, idx) = if let Expression::ElementAt(bf) = e {
12240 (bf.this, bf.expression)
12241 } else if let Expression::Function(ref f) = e {
12242 if f.args.len() >= 2 {
12243 if let Expression::Function(f) = e {
12244 let mut args = f.args;
12245 let arr = args.remove(0);
12246 let idx = args.remove(0);
12247 (arr, idx)
12248 } else {
12249 unreachable!("outer condition already matched Expression::Function")
12250 }
12251 } else {
12252 return Ok(e);
12253 }
12254 } else {
12255 return Ok(e);
12256 };
12257 match target {
12258 DialectType::PostgreSQL => {
12259 // Wrap array in parens for PostgreSQL: (ARRAY[1,2,3])[4]
12260 let arr_expr = Expression::Paren(Box::new(Paren {
12261 this: arr,
12262 trailing_comments: vec![],
12263 }));
12264 Ok(Expression::Subscript(Box::new(crate::expressions::Subscript {
12265 this: arr_expr,
12266 index: idx,
12267 })))
12268 }
12269 DialectType::BigQuery => {
12270 // BigQuery: convert ARRAY[...] to bare [...] for subscript
12271 let arr_expr = match arr {
12272 Expression::ArrayFunc(af) => {
12273 Expression::ArrayFunc(Box::new(crate::expressions::ArrayConstructor {
12274 expressions: af.expressions,
12275 bracket_notation: true,
12276 use_list_keyword: false,
12277 }))
12278 }
12279 other => other,
12280 };
12281 let safe_ordinal = Expression::Function(Box::new(Function::new(
12282 "SAFE_ORDINAL".to_string(),
12283 vec![idx],
12284 )));
12285 Ok(Expression::Subscript(Box::new(crate::expressions::Subscript {
12286 this: arr_expr,
12287 index: safe_ordinal,
12288 })))
12289 }
12290 _ => Ok(Expression::Function(Box::new(Function::new(
12291 "ELEMENT_AT".to_string(),
12292 vec![arr, idx],
12293 ))))
12294 }
12295 }
12296
12297 Action::CurrentUserParens => {
12298 // CURRENT_USER -> CURRENT_USER() for Snowflake
12299 Ok(Expression::Function(Box::new(Function::new(
12300 "CURRENT_USER".to_string(),
12301 vec![],
12302 ))))
12303 }
12304
12305 Action::ArrayAggToCollectList => {
12306 // ARRAY_AGG(x ORDER BY ...) -> COLLECT_LIST(x) for Hive/Spark
12307 // Python sqlglot Hive.arrayagg_sql strips ORDER BY for simple cases
12308 // but preserves it when DISTINCT/IGNORE NULLS/LIMIT are present
12309 match e {
12310 Expression::AggregateFunction(mut af) => {
12311 let is_simple = !af.distinct && af.ignore_nulls.is_none() && af.limit.is_none();
12312 let args = if af.args.is_empty() { vec![] } else { vec![af.args[0].clone()] };
12313 af.name = "COLLECT_LIST".to_string();
12314 af.args = args;
12315 if is_simple {
12316 af.order_by = Vec::new();
12317 }
12318 Ok(Expression::AggregateFunction(af))
12319 }
12320 Expression::ArrayAgg(agg) => {
12321 let is_simple = !agg.distinct && agg.ignore_nulls.is_none() && agg.limit.is_none();
12322 Ok(Expression::AggregateFunction(Box::new(crate::expressions::AggregateFunction {
12323 name: "COLLECT_LIST".to_string(),
12324 args: vec![agg.this.clone()],
12325 distinct: agg.distinct,
12326 filter: agg.filter.clone(),
12327 order_by: if is_simple { Vec::new() } else { agg.order_by.clone() },
12328 limit: agg.limit.clone(),
12329 ignore_nulls: agg.ignore_nulls,
12330 })))
12331 }
12332 _ => Ok(e),
12333 }
12334 }
12335
12336 Action::ArraySyntaxConvert => {
12337 match e {
12338 // ARRAY[1, 2] (ArrayFunc bracket_notation=false) -> set bracket_notation=true
12339 // so the generator uses dialect-specific output (ARRAY() for Spark, [] for BigQuery)
12340 Expression::ArrayFunc(arr) if !arr.bracket_notation => {
12341 Ok(Expression::ArrayFunc(Box::new(crate::expressions::ArrayConstructor {
12342 expressions: arr.expressions,
12343 bracket_notation: true,
12344 use_list_keyword: false,
12345 })))
12346 }
12347 // ARRAY(y) function style -> ArrayFunc for target dialect
12348 // bracket_notation=true for BigQuery/DuckDB/ClickHouse/StarRocks (output []), false for Presto (output ARRAY[])
12349 Expression::Function(f) if f.name.eq_ignore_ascii_case("ARRAY") => {
12350 let bracket = matches!(target, DialectType::BigQuery | DialectType::DuckDB | DialectType::ClickHouse | DialectType::StarRocks);
12351 Ok(Expression::ArrayFunc(Box::new(crate::expressions::ArrayConstructor {
12352 expressions: f.args,
12353 bracket_notation: bracket,
12354 use_list_keyword: false,
12355 })))
12356 }
12357 _ => Ok(e),
12358 }
12359 }
12360
12361 Action::CastToJsonForSpark => {
12362 // CAST(x AS JSON) -> TO_JSON(x) for Spark
12363 if let Expression::Cast(c) = e {
12364 Ok(Expression::Function(Box::new(Function::new("TO_JSON".to_string(), vec![c.this]))))
12365 } else {
12366 Ok(e)
12367 }
12368 }
12369
12370 Action::CastJsonToFromJson => {
12371 // CAST(ParseJson(literal) AS ARRAY/MAP/STRUCT) -> FROM_JSON(literal, type_string) for Spark
12372 if let Expression::Cast(c) = e {
12373 // Extract the string literal from ParseJson
12374 let literal_expr = if let Expression::ParseJson(pj) = c.this {
12375 pj.this
12376 } else {
12377 c.this
12378 };
12379 // Convert the target DataType to Spark's type string format
12380 let type_str = Self::data_type_to_spark_string(&c.to);
12381 Ok(Expression::Function(Box::new(Function::new(
12382 "FROM_JSON".to_string(),
12383 vec![literal_expr, Expression::Literal(Literal::String(type_str))],
12384 ))))
12385 } else {
12386 Ok(e)
12387 }
12388 }
12389
12390 Action::ToJsonConvert => {
12391 // TO_JSON(x) -> target-specific conversion
12392 if let Expression::ToJson(f) = e {
12393 let arg = f.this;
12394 match target {
12395 DialectType::Presto | DialectType::Trino => {
12396 // JSON_FORMAT(CAST(x AS JSON))
12397 let cast_json = Expression::Cast(Box::new(Cast {
12398 this: arg,
12399 to: DataType::Custom { name: "JSON".to_string() },
12400 trailing_comments: vec![],
12401 double_colon_syntax: false,
12402 format: None,
12403 default: None,
12404 }));
12405 Ok(Expression::Function(Box::new(Function::new("JSON_FORMAT".to_string(), vec![cast_json]))))
12406 }
12407 DialectType::BigQuery => {
12408 Ok(Expression::Function(Box::new(Function::new("TO_JSON_STRING".to_string(), vec![arg]))))
12409 }
12410 DialectType::DuckDB => {
12411 // CAST(TO_JSON(x) AS TEXT)
12412 let to_json = Expression::ToJson(Box::new(crate::expressions::UnaryFunc { this: arg, original_name: None }));
12413 Ok(Expression::Cast(Box::new(Cast {
12414 this: to_json,
12415 to: DataType::Text,
12416 trailing_comments: vec![],
12417 double_colon_syntax: false,
12418 format: None,
12419 default: None,
12420 })))
12421 }
12422 _ => Ok(Expression::ToJson(Box::new(crate::expressions::UnaryFunc { this: arg, original_name: None })))
12423 }
12424 } else {
12425 Ok(e)
12426 }
12427 }
12428
12429 Action::VarianceToClickHouse => {
12430 if let Expression::Variance(f) = e {
12431 Ok(Expression::Function(Box::new(Function::new("varSamp".to_string(), vec![f.this]))))
12432 } else { Ok(e) }
12433 }
12434
12435 Action::StddevToClickHouse => {
12436 if let Expression::Stddev(f) = e {
12437 Ok(Expression::Function(Box::new(Function::new("stddevSamp".to_string(), vec![f.this]))))
12438 } else { Ok(e) }
12439 }
12440
12441 Action::ApproxQuantileConvert => {
12442 if let Expression::ApproxQuantile(aq) = e {
12443 let mut args = vec![*aq.this];
12444 if let Some(q) = aq.quantile { args.push(*q); }
12445 Ok(Expression::Function(Box::new(Function::new("APPROX_PERCENTILE".to_string(), args))))
12446 } else { Ok(e) }
12447 }
12448
12449 Action::DollarParamConvert => {
12450 if let Expression::Parameter(p) = e {
12451 Ok(Expression::Parameter(Box::new(crate::expressions::Parameter {
12452 name: p.name,
12453 index: p.index,
12454 style: crate::expressions::ParameterStyle::At,
12455 quoted: p.quoted,
12456 expression: p.expression,
12457 })))
12458 } else { Ok(e) }
12459 }
12460
12461 Action::EscapeStringNormalize => {
12462 if let Expression::Literal(Literal::EscapeString(s)) = e {
12463 // Strip prefix (e.g., "e:" or "E:") if present from tokenizer
12464 let stripped = if s.starts_with("e:") || s.starts_with("E:") {
12465 s[2..].to_string()
12466 } else {
12467 s
12468 };
12469 let normalized = stripped.replace('\n', "\\n").replace('\r', "\\r").replace('\t', "\\t");
12470 match target {
12471 DialectType::BigQuery => {
12472 // BigQuery: e'...' -> CAST(b'...' AS STRING)
12473 // Use Raw for the b'...' part to avoid double-escaping
12474 let raw_sql = format!("CAST(b'{}' AS STRING)", normalized);
12475 Ok(Expression::Raw(crate::expressions::Raw { sql: raw_sql }))
12476 }
12477 _ => {
12478 Ok(Expression::Literal(Literal::EscapeString(normalized)))
12479 }
12480 }
12481 } else { Ok(e) }
12482 }
12483
12484 Action::StraightJoinCase => {
12485 // straight_join: keep lowercase for DuckDB, quote for MySQL
12486 if let Expression::Column(col) = e {
12487 if col.name.name == "STRAIGHT_JOIN" {
12488 let mut new_col = col;
12489 new_col.name.name = "straight_join".to_string();
12490 if matches!(target, DialectType::MySQL) {
12491 // MySQL: needs quoting since it's a reserved keyword
12492 new_col.name.quoted = true;
12493 }
12494 Ok(Expression::Column(new_col))
12495 } else {
12496 Ok(Expression::Column(col))
12497 }
12498 } else { Ok(e) }
12499 }
12500
12501 Action::TablesampleReservoir => {
12502 // TABLESAMPLE -> TABLESAMPLE RESERVOIR for DuckDB
12503 if let Expression::TableSample(mut ts) = e {
12504 if let Some(ref mut sample) = ts.sample {
12505 sample.method = crate::expressions::SampleMethod::Reservoir;
12506 sample.explicit_method = true;
12507 }
12508 Ok(Expression::TableSample(ts))
12509 } else { Ok(e) }
12510 }
12511
12512 Action::TablesampleSnowflakeStrip => {
12513 // Strip method and PERCENT for Snowflake target from non-Snowflake source
12514 match e {
12515 Expression::TableSample(mut ts) => {
12516 if let Some(ref mut sample) = ts.sample {
12517 sample.suppress_method_output = true;
12518 sample.unit_after_size = false;
12519 sample.is_percent = false;
12520 }
12521 Ok(Expression::TableSample(ts))
12522 }
12523 Expression::Table(mut t) => {
12524 if let Some(ref mut sample) = t.table_sample {
12525 sample.suppress_method_output = true;
12526 sample.unit_after_size = false;
12527 sample.is_percent = false;
12528 }
12529 Ok(Expression::Table(t))
12530 }
12531 _ => Ok(e),
12532 }
12533 }
12534
12535 Action::FirstToAnyValue => {
12536 // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
12537 if let Expression::First(mut agg) = e {
12538 agg.ignore_nulls = None;
12539 agg.name = Some("ANY_VALUE".to_string());
12540 Ok(Expression::AnyValue(agg))
12541 } else { Ok(e) }
12542 }
12543
12544 Action::ArrayIndexConvert => {
12545 // Subscript index: 1-based to 0-based for BigQuery
12546 if let Expression::Subscript(mut sub) = e {
12547 if let Expression::Literal(Literal::Number(ref n)) = sub.index {
12548 if let Ok(val) = n.parse::<i64>() {
12549 sub.index = Expression::Literal(Literal::Number((val - 1).to_string()));
12550 }
12551 }
12552 Ok(Expression::Subscript(sub))
12553 } else { Ok(e) }
12554 }
12555
12556 Action::AnyValueIgnoreNulls => {
12557 // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
12558 if let Expression::AnyValue(mut av) = e {
12559 if av.ignore_nulls.is_none() {
12560 av.ignore_nulls = Some(true);
12561 }
12562 Ok(Expression::AnyValue(av))
12563 } else { Ok(e) }
12564 }
12565
12566 Action::BigQueryNullsOrdering => {
12567 // BigQuery doesn't support NULLS FIRST/LAST in window function ORDER BY
12568 if let Expression::WindowFunction(mut wf) = e {
12569 for o in &mut wf.over.order_by {
12570 o.nulls_first = None;
12571 }
12572 Ok(Expression::WindowFunction(wf))
12573 } else if let Expression::Ordered(mut o) = e {
12574 o.nulls_first = None;
12575 Ok(Expression::Ordered(o))
12576 } else { Ok(e) }
12577 }
12578
12579 Action::SnowflakeFloatProtect => {
12580 // Convert DataType::Float to DataType::Custom("FLOAT") to prevent
12581 // Snowflake's target transform from converting it to DOUBLE.
12582 // Non-Snowflake sources should keep their FLOAT spelling.
12583 if let Expression::DataType(DataType::Float { .. }) = e {
12584 Ok(Expression::DataType(DataType::Custom { name: "FLOAT".to_string() }))
12585 } else { Ok(e) }
12586 }
12587
12588 Action::MysqlNullsOrdering => {
12589 // MySQL doesn't support NULLS FIRST/LAST - strip or rewrite
12590 if let Expression::Ordered(mut o) = e {
12591 let nulls_last = o.nulls_first == Some(false);
12592 let desc = o.desc;
12593 // MySQL default: ASC -> NULLS LAST, DESC -> NULLS FIRST
12594 // If requested ordering matches default, just strip NULLS clause
12595 let matches_default = if desc {
12596 // DESC default is NULLS FIRST, so nulls_first=true matches
12597 o.nulls_first == Some(true)
12598 } else {
12599 // ASC default is NULLS LAST, so nulls_first=false matches
12600 nulls_last
12601 };
12602 if matches_default {
12603 o.nulls_first = None;
12604 Ok(Expression::Ordered(o))
12605 } else {
12606 // Need CASE WHEN x IS NULL THEN 0/1 ELSE 0/1 END, x
12607 // For ASC NULLS FIRST: ORDER BY CASE WHEN x IS NULL THEN 0 ELSE 1 END, x ASC
12608 // For DESC NULLS LAST: ORDER BY CASE WHEN x IS NULL THEN 1 ELSE 0 END, x DESC
12609 let null_val = if desc { 1 } else { 0 };
12610 let non_null_val = if desc { 0 } else { 1 };
12611 let _case_expr = Expression::Case(Box::new(Case {
12612 operand: None,
12613 whens: vec![(
12614 Expression::IsNull(Box::new(crate::expressions::IsNull {
12615 this: o.this.clone(),
12616 not: false,
12617 postfix_form: false,
12618 })),
12619 Expression::number(null_val),
12620 )],
12621 else_: Some(Expression::number(non_null_val)),
12622 }));
12623 o.nulls_first = None;
12624 // Return a tuple of [case_expr, ordered_expr]
12625 // We need to return both as part of the ORDER BY
12626 // But since transform_recursive processes individual expressions,
12627 // we can't easily add extra ORDER BY items here.
12628 // Instead, strip the nulls_first
12629 o.nulls_first = None;
12630 Ok(Expression::Ordered(o))
12631 }
12632 } else { Ok(e) }
12633 }
12634
12635 Action::MysqlNullsLastRewrite => {
12636 // DuckDB -> MySQL: Add CASE WHEN IS NULL THEN 1 ELSE 0 END to ORDER BY
12637 // to simulate NULLS LAST for ASC ordering
12638 if let Expression::WindowFunction(mut wf) = e {
12639 let mut new_order_by = Vec::new();
12640 for o in wf.over.order_by {
12641 if !o.desc {
12642 // ASC: DuckDB has NULLS LAST, MySQL has NULLS FIRST
12643 // Add CASE WHEN expr IS NULL THEN 1 ELSE 0 END before expr
12644 let case_expr = Expression::Case(Box::new(Case {
12645 operand: None,
12646 whens: vec![(
12647 Expression::IsNull(Box::new(crate::expressions::IsNull {
12648 this: o.this.clone(),
12649 not: false,
12650 postfix_form: false,
12651 })),
12652 Expression::Literal(Literal::Number("1".to_string())),
12653 )],
12654 else_: Some(Expression::Literal(Literal::Number("0".to_string()))),
12655 }));
12656 new_order_by.push(crate::expressions::Ordered {
12657 this: case_expr,
12658 desc: false,
12659 nulls_first: None,
12660 explicit_asc: false,
12661 with_fill: None,
12662 });
12663 let mut ordered = o;
12664 ordered.nulls_first = None;
12665 new_order_by.push(ordered);
12666 } else {
12667 // DESC: DuckDB has NULLS LAST, MySQL also has NULLS LAST (NULLs smallest in DESC)
12668 // No change needed
12669 let mut ordered = o;
12670 ordered.nulls_first = None;
12671 new_order_by.push(ordered);
12672 }
12673 }
12674 wf.over.order_by = new_order_by;
12675 Ok(Expression::WindowFunction(wf))
12676 } else { Ok(e) }
12677 }
12678
12679 Action::RespectNullsConvert => {
12680 // RESPECT NULLS -> strip for SQLite (FIRST_VALUE(c) OVER (...))
12681 if let Expression::WindowFunction(mut wf) = e {
12682 match &mut wf.this {
12683 Expression::FirstValue(ref mut vf) => {
12684 if vf.ignore_nulls == Some(false) {
12685 vf.ignore_nulls = None;
12686 // For SQLite, we'd need to add NULLS LAST to ORDER BY in the OVER clause
12687 // but that's handled by the generator's NULLS ordering
12688 }
12689 }
12690 Expression::LastValue(ref mut vf) => {
12691 if vf.ignore_nulls == Some(false) {
12692 vf.ignore_nulls = None;
12693 }
12694 }
12695 _ => {}
12696 }
12697 Ok(Expression::WindowFunction(wf))
12698 } else { Ok(e) }
12699 }
12700
12701 Action::CreateTableStripComment => {
12702 // Strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
12703 if let Expression::CreateTable(mut ct) = e {
12704 for col in &mut ct.columns {
12705 col.comment = None;
12706 col.constraints.retain(|c| !matches!(c, crate::expressions::ColumnConstraint::Comment(_)));
12707 // Also remove Comment from constraint_order
12708 col.constraint_order.retain(|c| !matches!(c, crate::expressions::ConstraintType::Comment));
12709 }
12710 // Strip properties (USING, PARTITIONED BY, etc.)
12711 ct.properties.clear();
12712 Ok(Expression::CreateTable(ct))
12713 } else { Ok(e) }
12714 }
12715
12716 Action::AlterTableToSpRename => {
12717 // ALTER TABLE db.t1 RENAME TO db.t2 -> EXEC sp_rename 'db.t1', 't2'
12718 if let Expression::AlterTable(ref at) = e {
12719 if let Some(crate::expressions::AlterTableAction::RenameTable(ref new_tbl)) = at.actions.first() {
12720 // Build the old table name using TSQL bracket quoting
12721 let old_name = if let Some(ref schema) = at.name.schema {
12722 if at.name.name.quoted || schema.quoted {
12723 format!("[{}].[{}]", schema.name, at.name.name.name)
12724 } else {
12725 format!("{}.{}", schema.name, at.name.name.name)
12726 }
12727 } else {
12728 if at.name.name.quoted {
12729 format!("[{}]", at.name.name.name)
12730 } else {
12731 at.name.name.name.clone()
12732 }
12733 };
12734 let new_name = new_tbl.name.name.clone();
12735 // EXEC sp_rename 'old_name', 'new_name'
12736 let sql = format!("EXEC sp_rename '{}', '{}'", old_name, new_name);
12737 Ok(Expression::Raw(crate::expressions::Raw { sql }))
12738 } else { Ok(e) }
12739 } else { Ok(e) }
12740 }
12741
12742 Action::SnowflakeIntervalFormat => {
12743 // INTERVAL '2' HOUR -> INTERVAL '2 HOUR' for Snowflake
12744 if let Expression::Interval(mut iv) = e {
12745 if let (Some(Expression::Literal(Literal::String(ref val))), Some(ref unit_spec)) = (&iv.this, &iv.unit) {
12746 let unit_str = match unit_spec {
12747 crate::expressions::IntervalUnitSpec::Simple { unit, .. } => {
12748 match unit {
12749 crate::expressions::IntervalUnit::Year => "YEAR",
12750 crate::expressions::IntervalUnit::Quarter => "QUARTER",
12751 crate::expressions::IntervalUnit::Month => "MONTH",
12752 crate::expressions::IntervalUnit::Week => "WEEK",
12753 crate::expressions::IntervalUnit::Day => "DAY",
12754 crate::expressions::IntervalUnit::Hour => "HOUR",
12755 crate::expressions::IntervalUnit::Minute => "MINUTE",
12756 crate::expressions::IntervalUnit::Second => "SECOND",
12757 crate::expressions::IntervalUnit::Millisecond => "MILLISECOND",
12758 crate::expressions::IntervalUnit::Microsecond => "MICROSECOND",
12759 }
12760 }
12761 _ => "",
12762 };
12763 if !unit_str.is_empty() {
12764 let combined = format!("{} {}", val, unit_str);
12765 iv.this = Some(Expression::Literal(Literal::String(combined)));
12766 iv.unit = None;
12767 }
12768 }
12769 Ok(Expression::Interval(iv))
12770 } else { Ok(e) }
12771 }
12772
12773 Action::ArrayConcatBracketConvert => {
12774 // Expression::Array/ArrayFunc -> target-specific
12775 // For PostgreSQL: Array -> ArrayFunc (bracket_notation: false)
12776 // For Redshift: Array/ArrayFunc -> Function("ARRAY", args) to produce ARRAY(1, 2) with parens
12777 match e {
12778 Expression::Array(arr) => {
12779 if matches!(target, DialectType::Redshift) {
12780 Ok(Expression::Function(Box::new(Function::new(
12781 "ARRAY".to_string(), arr.expressions,
12782 ))))
12783 } else {
12784 Ok(Expression::ArrayFunc(Box::new(crate::expressions::ArrayConstructor {
12785 expressions: arr.expressions,
12786 bracket_notation: false,
12787 use_list_keyword: false,
12788 })))
12789 }
12790 }
12791 Expression::ArrayFunc(arr) => {
12792 // Only for Redshift: convert bracket-notation ArrayFunc to Function("ARRAY")
12793 if matches!(target, DialectType::Redshift) {
12794 Ok(Expression::Function(Box::new(Function::new(
12795 "ARRAY".to_string(), arr.expressions,
12796 ))))
12797 } else {
12798 Ok(Expression::ArrayFunc(arr))
12799 }
12800 }
12801 _ => Ok(e),
12802 }
12803 }
12804
12805 Action::BitAggFloatCast => {
12806 // BIT_OR/BIT_AND/BIT_XOR with float/decimal cast arg -> wrap with ROUND+INT cast for DuckDB
12807 // For FLOAT/DOUBLE/REAL: CAST(ROUND(CAST(val AS type)) AS INT)
12808 // For DECIMAL: CAST(CAST(val AS DECIMAL(p,s)) AS INT)
12809 let int_type = DataType::Int { length: None, integer_spelling: false };
12810 let wrap_agg = |agg_this: Expression, int_dt: DataType| -> Expression {
12811 if let Expression::Cast(c) = agg_this {
12812 match &c.to {
12813 DataType::Float { .. } | DataType::Double { .. }
12814 | DataType::Custom { .. } => {
12815 // FLOAT/DOUBLE/REAL: CAST(ROUND(CAST(val AS type)) AS INT)
12816 // Change FLOAT to REAL (Float with real_spelling=true) for DuckDB generator
12817 let inner_type = match &c.to {
12818 DataType::Float { precision, scale, .. } => DataType::Float { precision: *precision, scale: *scale, real_spelling: true },
12819 other => other.clone(),
12820 };
12821 let inner_cast = Expression::Cast(Box::new(crate::expressions::Cast {
12822 this: c.this.clone(),
12823 to: inner_type,
12824 trailing_comments: Vec::new(),
12825 double_colon_syntax: false,
12826 format: None,
12827 default: None,
12828 }));
12829 let rounded = Expression::Function(Box::new(Function::new("ROUND".to_string(), vec![inner_cast])));
12830 Expression::Cast(Box::new(crate::expressions::Cast {
12831 this: rounded,
12832 to: int_dt,
12833 trailing_comments: Vec::new(),
12834 double_colon_syntax: false,
12835 format: None,
12836 default: None,
12837 }))
12838 }
12839 DataType::Decimal { .. } => {
12840 // DECIMAL: CAST(CAST(val AS DECIMAL(p,s)) AS INT)
12841 Expression::Cast(Box::new(crate::expressions::Cast {
12842 this: Expression::Cast(c),
12843 to: int_dt,
12844 trailing_comments: Vec::new(),
12845 double_colon_syntax: false,
12846 format: None,
12847 default: None,
12848 }))
12849 }
12850 _ => Expression::Cast(c),
12851 }
12852 } else {
12853 agg_this
12854 }
12855 };
12856 match e {
12857 Expression::BitwiseOrAgg(mut f) => {
12858 f.this = wrap_agg(f.this, int_type);
12859 Ok(Expression::BitwiseOrAgg(f))
12860 }
12861 Expression::BitwiseAndAgg(mut f) => {
12862 let int_type = DataType::Int { length: None, integer_spelling: false };
12863 f.this = wrap_agg(f.this, int_type);
12864 Ok(Expression::BitwiseAndAgg(f))
12865 }
12866 Expression::BitwiseXorAgg(mut f) => {
12867 let int_type = DataType::Int { length: None, integer_spelling: false };
12868 f.this = wrap_agg(f.this, int_type);
12869 Ok(Expression::BitwiseXorAgg(f))
12870 }
12871 _ => Ok(e),
12872 }
12873 }
12874
12875 Action::BitAggSnowflakeRename => {
12876 // BIT_OR -> BITORAGG, BIT_AND -> BITANDAGG, BIT_XOR -> BITXORAGG for Snowflake
12877 match e {
12878 Expression::BitwiseOrAgg(f) => {
12879 Ok(Expression::Function(Box::new(Function::new("BITORAGG".to_string(), vec![f.this]))))
12880 }
12881 Expression::BitwiseAndAgg(f) => {
12882 Ok(Expression::Function(Box::new(Function::new("BITANDAGG".to_string(), vec![f.this]))))
12883 }
12884 Expression::BitwiseXorAgg(f) => {
12885 Ok(Expression::Function(Box::new(Function::new("BITXORAGG".to_string(), vec![f.this]))))
12886 }
12887 _ => Ok(e),
12888 }
12889 }
12890
12891 Action::StrftimeCastTimestamp => {
12892 // CAST(x AS TIMESTAMP) -> CAST(x AS TIMESTAMP_NTZ) for Spark
12893 if let Expression::Cast(mut c) = e {
12894 if matches!(c.to, DataType::Timestamp { timezone: false, .. }) {
12895 c.to = DataType::Custom { name: "TIMESTAMP_NTZ".to_string() };
12896 }
12897 Ok(Expression::Cast(c))
12898 } else { Ok(e) }
12899 }
12900
12901 Action::DecimalDefaultPrecision => {
12902 // DECIMAL without precision -> DECIMAL(18, 3) for Snowflake
12903 if let Expression::Cast(mut c) = e {
12904 if matches!(c.to, DataType::Decimal { precision: None, .. }) {
12905 c.to = DataType::Decimal {
12906 precision: Some(18),
12907 scale: Some(3),
12908 };
12909 }
12910 Ok(Expression::Cast(c))
12911 } else { Ok(e) }
12912 }
12913
12914 Action::FilterToIff => {
12915 // FILTER(WHERE cond) -> rewrite aggregate: AGG(IFF(cond, val, NULL))
12916 if let Expression::Filter(f) = e {
12917 let condition = *f.expression;
12918 let agg = *f.this;
12919 // Strip WHERE from condition
12920 let cond = match condition {
12921 Expression::Where(w) => w.this,
12922 other => other,
12923 };
12924 // Extract the aggregate function and its argument
12925 // We want AVG(IFF(condition, x, NULL))
12926 match agg {
12927 Expression::Function(mut func) => {
12928 if !func.args.is_empty() {
12929 let orig_arg = func.args[0].clone();
12930 let iff_call = Expression::Function(Box::new(Function::new(
12931 "IFF".to_string(),
12932 vec![cond, orig_arg, Expression::Null(Null)],
12933 )));
12934 func.args[0] = iff_call;
12935 Ok(Expression::Function(func))
12936 } else {
12937 Ok(Expression::Filter(Box::new(crate::expressions::Filter {
12938 this: Box::new(Expression::Function(func)),
12939 expression: Box::new(cond),
12940 })))
12941 }
12942 }
12943 Expression::Avg(mut avg) => {
12944 let iff_call = Expression::Function(Box::new(Function::new(
12945 "IFF".to_string(),
12946 vec![cond, avg.this.clone(), Expression::Null(Null)],
12947 )));
12948 avg.this = iff_call;
12949 Ok(Expression::Avg(avg))
12950 }
12951 Expression::Sum(mut s) => {
12952 let iff_call = Expression::Function(Box::new(Function::new(
12953 "IFF".to_string(),
12954 vec![cond, s.this.clone(), Expression::Null(Null)],
12955 )));
12956 s.this = iff_call;
12957 Ok(Expression::Sum(s))
12958 }
12959 Expression::Count(mut c) => {
12960 if let Some(ref this_expr) = c.this {
12961 let iff_call = Expression::Function(Box::new(Function::new(
12962 "IFF".to_string(),
12963 vec![cond, this_expr.clone(), Expression::Null(Null)],
12964 )));
12965 c.this = Some(iff_call);
12966 }
12967 Ok(Expression::Count(c))
12968 }
12969 other => {
12970 // Fallback: keep as Filter
12971 Ok(Expression::Filter(Box::new(crate::expressions::Filter {
12972 this: Box::new(other),
12973 expression: Box::new(cond),
12974 })))
12975 }
12976 }
12977 } else { Ok(e) }
12978 }
12979
12980 Action::AggFilterToIff => {
12981 // AggFunc.filter -> IFF wrapping: AVG(x) FILTER(WHERE cond) -> AVG(IFF(cond, x, NULL))
12982 // Helper macro to handle the common AggFunc case
12983 macro_rules! handle_agg_filter_to_iff {
12984 ($variant:ident, $agg:expr) => {{
12985 let mut agg = $agg;
12986 if let Some(filter_cond) = agg.filter.take() {
12987 let iff_call = Expression::Function(Box::new(Function::new(
12988 "IFF".to_string(),
12989 vec![filter_cond, agg.this.clone(), Expression::Null(Null)],
12990 )));
12991 agg.this = iff_call;
12992 }
12993 Ok(Expression::$variant(agg))
12994 }};
12995 }
12996
12997 match e {
12998 Expression::Avg(agg) => handle_agg_filter_to_iff!(Avg, agg),
12999 Expression::Sum(agg) => handle_agg_filter_to_iff!(Sum, agg),
13000 Expression::Min(agg) => handle_agg_filter_to_iff!(Min, agg),
13001 Expression::Max(agg) => handle_agg_filter_to_iff!(Max, agg),
13002 Expression::ArrayAgg(agg) => handle_agg_filter_to_iff!(ArrayAgg, agg),
13003 Expression::CountIf(agg) => handle_agg_filter_to_iff!(CountIf, agg),
13004 Expression::Stddev(agg) => handle_agg_filter_to_iff!(Stddev, agg),
13005 Expression::StddevPop(agg) => handle_agg_filter_to_iff!(StddevPop, agg),
13006 Expression::StddevSamp(agg) => handle_agg_filter_to_iff!(StddevSamp, agg),
13007 Expression::Variance(agg) => handle_agg_filter_to_iff!(Variance, agg),
13008 Expression::VarPop(agg) => handle_agg_filter_to_iff!(VarPop, agg),
13009 Expression::VarSamp(agg) => handle_agg_filter_to_iff!(VarSamp, agg),
13010 Expression::Median(agg) => handle_agg_filter_to_iff!(Median, agg),
13011 Expression::Mode(agg) => handle_agg_filter_to_iff!(Mode, agg),
13012 Expression::First(agg) => handle_agg_filter_to_iff!(First, agg),
13013 Expression::Last(agg) => handle_agg_filter_to_iff!(Last, agg),
13014 Expression::AnyValue(agg) => handle_agg_filter_to_iff!(AnyValue, agg),
13015 Expression::ApproxDistinct(agg) => handle_agg_filter_to_iff!(ApproxDistinct, agg),
13016 Expression::Count(mut c) => {
13017 if let Some(filter_cond) = c.filter.take() {
13018 if let Some(ref this_expr) = c.this {
13019 let iff_call = Expression::Function(Box::new(Function::new(
13020 "IFF".to_string(),
13021 vec![filter_cond, this_expr.clone(), Expression::Null(Null)],
13022 )));
13023 c.this = Some(iff_call);
13024 }
13025 }
13026 Ok(Expression::Count(c))
13027 }
13028 other => Ok(other),
13029 }
13030 }
13031
13032 Action::JsonToGetPath => {
13033 // JSON_EXTRACT(JSON('x'), '$.key') -> GET_PATH(PARSE_JSON('x'), 'key')
13034 if let Expression::JsonExtract(je) = e {
13035 // Convert JSON() to PARSE_JSON()
13036 let this = match &je.this {
13037 Expression::Function(f) if f.name.eq_ignore_ascii_case("JSON") && f.args.len() == 1 => {
13038 Expression::Function(Box::new(Function::new("PARSE_JSON".to_string(), f.args.clone())))
13039 }
13040 _ => je.this.clone(),
13041 };
13042 // Convert path: extract key from JSONPath or strip $. prefix from string
13043 let path = match &je.path {
13044 Expression::JSONPath(jp) => {
13045 // Extract the key from JSONPath: $root.key -> 'key'
13046 let mut key_parts = Vec::new();
13047 for expr in &jp.expressions {
13048 match expr {
13049 Expression::JSONPathRoot(_) => {} // skip root
13050 Expression::JSONPathKey(k) => {
13051 if let Expression::Literal(Literal::String(s)) = &*k.this {
13052 key_parts.push(s.clone());
13053 }
13054 }
13055 _ => {}
13056 }
13057 }
13058 if !key_parts.is_empty() {
13059 Expression::Literal(Literal::String(key_parts.join(".")))
13060 } else {
13061 je.path.clone()
13062 }
13063 }
13064 Expression::Literal(Literal::String(s)) if s.starts_with("$.") => {
13065 Expression::Literal(Literal::String(s[2..].to_string()))
13066 }
13067 Expression::Literal(Literal::String(s)) if s.starts_with('$') => {
13068 Expression::Literal(Literal::String(s[1..].to_string()))
13069 }
13070 _ => je.path.clone(),
13071 };
13072 Ok(Expression::Function(Box::new(Function::new(
13073 "GET_PATH".to_string(),
13074 vec![this, path],
13075 ))))
13076 } else { Ok(e) }
13077 }
13078
13079 Action::StructToRow => {
13080 // DuckDB struct/dict -> BigQuery STRUCT(value AS key, ...) / Presto ROW
13081 // Handles both Expression::Struct and Expression::MapFunc(curly_brace_syntax=true)
13082
13083 // Extract key-value pairs from either Struct or MapFunc
13084 let kv_pairs: Option<Vec<(String, Expression)>> = match &e {
13085 Expression::Struct(s) => {
13086 Some(s.fields.iter().map(|(opt_name, field_expr)| {
13087 if let Some(name) = opt_name {
13088 (name.clone(), field_expr.clone())
13089 } else if let Expression::NamedArgument(na) = field_expr {
13090 (na.name.name.clone(), na.value.clone())
13091 } else {
13092 (String::new(), field_expr.clone())
13093 }
13094 }).collect())
13095 }
13096 Expression::MapFunc(m) if m.curly_brace_syntax => {
13097 Some(m.keys.iter().zip(m.values.iter()).map(|(key, value)| {
13098 let key_name = match key {
13099 Expression::Literal(Literal::String(s)) => s.clone(),
13100 Expression::Identifier(id) => id.name.clone(),
13101 _ => String::new(),
13102 };
13103 (key_name, value.clone())
13104 }).collect())
13105 }
13106 _ => None,
13107 };
13108
13109 if let Some(pairs) = kv_pairs {
13110 let mut named_args = Vec::new();
13111 for (key_name, value) in pairs {
13112 if matches!(target, DialectType::BigQuery) && !key_name.is_empty() {
13113 named_args.push(Expression::Alias(Box::new(
13114 crate::expressions::Alias::new(value, Identifier::new(key_name))
13115 )));
13116 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
13117 named_args.push(value);
13118 } else {
13119 named_args.push(value);
13120 }
13121 }
13122
13123 if matches!(target, DialectType::BigQuery) {
13124 Ok(Expression::Function(Box::new(Function::new(
13125 "STRUCT".to_string(),
13126 named_args,
13127 ))))
13128 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
13129 // For Presto/Trino, infer types and wrap in CAST(ROW(...) AS ROW(name TYPE, ...))
13130 let row_func = Expression::Function(Box::new(Function::new(
13131 "ROW".to_string(),
13132 named_args,
13133 )));
13134
13135 // Try to infer types for each pair
13136 let kv_pairs_again: Option<Vec<(String, Expression)>> = match &e {
13137 Expression::Struct(s) => {
13138 Some(s.fields.iter().map(|(opt_name, field_expr)| {
13139 if let Some(name) = opt_name {
13140 (name.clone(), field_expr.clone())
13141 } else if let Expression::NamedArgument(na) = field_expr {
13142 (na.name.name.clone(), na.value.clone())
13143 } else {
13144 (String::new(), field_expr.clone())
13145 }
13146 }).collect())
13147 }
13148 Expression::MapFunc(m) if m.curly_brace_syntax => {
13149 Some(m.keys.iter().zip(m.values.iter()).map(|(key, value)| {
13150 let key_name = match key {
13151 Expression::Literal(Literal::String(s)) => s.clone(),
13152 Expression::Identifier(id) => id.name.clone(),
13153 _ => String::new(),
13154 };
13155 (key_name, value.clone())
13156 }).collect())
13157 }
13158 _ => None,
13159 };
13160
13161 if let Some(pairs) = kv_pairs_again {
13162 // Infer types for all values
13163 let mut all_inferred = true;
13164 let mut fields = Vec::new();
13165 for (name, value) in &pairs {
13166 let inferred_type = match value {
13167 Expression::Literal(Literal::Number(n)) => {
13168 if n.contains('.') {
13169 Some(DataType::Double { precision: None, scale: None })
13170 } else {
13171 Some(DataType::Int { length: None, integer_spelling: true })
13172 }
13173 }
13174 Expression::Literal(Literal::String(_)) => {
13175 Some(DataType::VarChar { length: None, parenthesized_length: false })
13176 }
13177 Expression::Boolean(_) => Some(DataType::Boolean),
13178 _ => None,
13179 };
13180 if let Some(dt) = inferred_type {
13181 fields.push(crate::expressions::StructField::new(name.clone(), dt));
13182 } else {
13183 all_inferred = false;
13184 break;
13185 }
13186 }
13187
13188 if all_inferred && !fields.is_empty() {
13189 let row_type = DataType::Struct { fields, nested: true };
13190 Ok(Expression::Cast(Box::new(Cast {
13191 this: row_func,
13192 to: row_type,
13193 trailing_comments: Vec::new(),
13194 double_colon_syntax: false,
13195 format: None,
13196 default: None,
13197 })))
13198 } else {
13199 Ok(row_func)
13200 }
13201 } else {
13202 Ok(row_func)
13203 }
13204 } else {
13205 Ok(Expression::Function(Box::new(Function::new(
13206 "ROW".to_string(),
13207 named_args,
13208 ))))
13209 }
13210 } else { Ok(e) }
13211 }
13212
13213 Action::SparkStructConvert => {
13214 // Spark STRUCT(val AS name, ...) -> Presto CAST(ROW(...) AS ROW(name TYPE, ...))
13215 // or DuckDB {'name': val, ...}
13216 if let Expression::Function(f) = e {
13217 // Extract name-value pairs from aliased args
13218 let mut pairs: Vec<(String, Expression)> = Vec::new();
13219 for arg in &f.args {
13220 match arg {
13221 Expression::Alias(a) => {
13222 pairs.push((a.alias.name.clone(), a.this.clone()));
13223 }
13224 _ => {
13225 pairs.push((String::new(), arg.clone()));
13226 }
13227 }
13228 }
13229
13230 match target {
13231 DialectType::DuckDB => {
13232 // Convert to DuckDB struct literal {'name': value, ...}
13233 let mut keys = Vec::new();
13234 let mut values = Vec::new();
13235 for (name, value) in &pairs {
13236 keys.push(Expression::Literal(Literal::String(name.clone())));
13237 values.push(value.clone());
13238 }
13239 Ok(Expression::MapFunc(Box::new(crate::expressions::MapConstructor {
13240 keys,
13241 values,
13242 curly_brace_syntax: true,
13243 with_map_keyword: false,
13244 })))
13245 }
13246 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
13247 // Convert to CAST(ROW(val1, val2) AS ROW(name1 TYPE1, name2 TYPE2))
13248 let row_args: Vec<Expression> = pairs.iter().map(|(_, v)| v.clone()).collect();
13249 let row_func = Expression::Function(Box::new(Function::new(
13250 "ROW".to_string(), row_args,
13251 )));
13252
13253 // Infer types
13254 let mut all_inferred = true;
13255 let mut fields = Vec::new();
13256 for (name, value) in &pairs {
13257 let inferred_type = match value {
13258 Expression::Literal(Literal::Number(n)) => {
13259 if n.contains('.') {
13260 Some(DataType::Double { precision: None, scale: None })
13261 } else {
13262 Some(DataType::Int { length: None, integer_spelling: true })
13263 }
13264 }
13265 Expression::Literal(Literal::String(_)) => {
13266 Some(DataType::VarChar { length: None, parenthesized_length: false })
13267 }
13268 Expression::Boolean(_) => Some(DataType::Boolean),
13269 _ => None,
13270 };
13271 if let Some(dt) = inferred_type {
13272 fields.push(crate::expressions::StructField::new(name.clone(), dt));
13273 } else {
13274 all_inferred = false;
13275 break;
13276 }
13277 }
13278
13279 if all_inferred && !fields.is_empty() {
13280 let row_type = DataType::Struct { fields, nested: true };
13281 Ok(Expression::Cast(Box::new(Cast {
13282 this: row_func,
13283 to: row_type,
13284 trailing_comments: Vec::new(),
13285 double_colon_syntax: false,
13286 format: None,
13287 default: None,
13288 })))
13289 } else {
13290 Ok(row_func)
13291 }
13292 }
13293 _ => Ok(Expression::Function(f)),
13294 }
13295 } else { Ok(e) }
13296 }
13297
13298 Action::ApproxCountDistinctToApproxDistinct => {
13299 // APPROX_COUNT_DISTINCT(x) -> APPROX_DISTINCT(x)
13300 if let Expression::ApproxCountDistinct(f) = e {
13301 Ok(Expression::ApproxDistinct(f))
13302 } else {
13303 Ok(e)
13304 }
13305 }
13306
13307 Action::CollectListToArrayAgg => {
13308 // COLLECT_LIST(x) -> ARRAY_AGG(x) FILTER(WHERE x IS NOT NULL)
13309 if let Expression::AggregateFunction(f) = e {
13310 let filter_expr = if !f.args.is_empty() {
13311 let arg = f.args[0].clone();
13312 Some(Expression::IsNull(Box::new(crate::expressions::IsNull {
13313 this: arg,
13314 not: true,
13315 postfix_form: false,
13316 })))
13317 } else {
13318 None
13319 };
13320 let agg = crate::expressions::AggFunc {
13321 this: if f.args.is_empty() { Expression::Null(crate::expressions::Null) } else { f.args[0].clone() },
13322 distinct: f.distinct,
13323 order_by: f.order_by.clone(),
13324 filter: filter_expr,
13325 ignore_nulls: None,
13326 name: None,
13327 having_max: None,
13328 limit: None,
13329 };
13330 Ok(Expression::ArrayAgg(Box::new(agg)))
13331 } else {
13332 Ok(e)
13333 }
13334 }
13335
13336 Action::CollectSetConvert => {
13337 // COLLECT_SET(x) -> target-specific
13338 if let Expression::AggregateFunction(f) = e {
13339 match target {
13340 DialectType::Presto => {
13341 Ok(Expression::AggregateFunction(Box::new(crate::expressions::AggregateFunction {
13342 name: "SET_AGG".to_string(),
13343 args: f.args,
13344 distinct: false,
13345 order_by: f.order_by,
13346 filter: f.filter,
13347 limit: f.limit,
13348 ignore_nulls: f.ignore_nulls,
13349 })))
13350 }
13351 DialectType::Snowflake => {
13352 Ok(Expression::AggregateFunction(Box::new(crate::expressions::AggregateFunction {
13353 name: "ARRAY_UNIQUE_AGG".to_string(),
13354 args: f.args,
13355 distinct: false,
13356 order_by: f.order_by,
13357 filter: f.filter,
13358 limit: f.limit,
13359 ignore_nulls: f.ignore_nulls,
13360 })))
13361 }
13362 DialectType::Trino | DialectType::DuckDB => {
13363 let agg = crate::expressions::AggFunc {
13364 this: if f.args.is_empty() { Expression::Null(crate::expressions::Null) } else { f.args[0].clone() },
13365 distinct: true,
13366 order_by: Vec::new(),
13367 filter: None,
13368 ignore_nulls: None,
13369 name: None,
13370 having_max: None,
13371 limit: None,
13372 };
13373 Ok(Expression::ArrayAgg(Box::new(agg)))
13374 }
13375 _ => Ok(Expression::AggregateFunction(f))
13376 }
13377 } else {
13378 Ok(e)
13379 }
13380 }
13381
13382 Action::PercentileConvert => {
13383 // PERCENTILE(x, 0.5) -> QUANTILE(x, 0.5) / APPROX_PERCENTILE(x, 0.5)
13384 if let Expression::AggregateFunction(f) = e {
13385 let name = match target {
13386 DialectType::DuckDB => "QUANTILE",
13387 DialectType::Presto | DialectType::Trino => "APPROX_PERCENTILE",
13388 _ => "PERCENTILE",
13389 };
13390 Ok(Expression::AggregateFunction(Box::new(crate::expressions::AggregateFunction {
13391 name: name.to_string(),
13392 args: f.args,
13393 distinct: f.distinct,
13394 order_by: f.order_by,
13395 filter: f.filter,
13396 limit: f.limit,
13397 ignore_nulls: f.ignore_nulls,
13398 })))
13399 } else {
13400 Ok(e)
13401 }
13402 }
13403
13404 Action::CorrIsnanWrap => {
13405 // CORR(a, b) -> CASE WHEN ISNAN(CORR(a, b)) THEN NULL ELSE CORR(a, b) END
13406 // The CORR expression could be AggregateFunction, WindowFunction, or Filter-wrapped
13407 let corr_clone = e.clone();
13408 let isnan = Expression::Function(Box::new(Function::new(
13409 "ISNAN".to_string(), vec![corr_clone.clone()],
13410 )));
13411 let case_expr = Expression::Case(Box::new(Case {
13412 operand: None,
13413 whens: vec![(isnan, Expression::Null(crate::expressions::Null))],
13414 else_: Some(corr_clone),
13415 }));
13416 Ok(case_expr)
13417 }
13418
13419 Action::TruncToDateTrunc => {
13420 // TRUNC(timestamp, 'MONTH') -> DATE_TRUNC('MONTH', timestamp)
13421 if let Expression::Function(f) = e {
13422 if f.args.len() == 2 {
13423 let timestamp = f.args[0].clone();
13424 let unit_expr = f.args[1].clone();
13425
13426 if matches!(target, DialectType::ClickHouse) {
13427 // For ClickHouse, produce Expression::DateTrunc which the generator
13428 // outputs as DATE_TRUNC(...) without going through the ClickHouse
13429 // target transform that would convert it to dateTrunc
13430 let unit_str = Self::get_unit_str_static(&unit_expr);
13431 let dt_field = match unit_str.as_str() {
13432 "YEAR" => DateTimeField::Year,
13433 "MONTH" => DateTimeField::Month,
13434 "DAY" => DateTimeField::Day,
13435 "HOUR" => DateTimeField::Hour,
13436 "MINUTE" => DateTimeField::Minute,
13437 "SECOND" => DateTimeField::Second,
13438 "WEEK" => DateTimeField::Week,
13439 "QUARTER" => DateTimeField::Quarter,
13440 _ => DateTimeField::Custom(unit_str),
13441 };
13442 Ok(Expression::DateTrunc(Box::new(crate::expressions::DateTruncFunc {
13443 this: timestamp,
13444 unit: dt_field,
13445 })))
13446 } else {
13447 let new_args = vec![unit_expr, timestamp];
13448 Ok(Expression::Function(Box::new(Function::new("DATE_TRUNC".to_string(), new_args))))
13449 }
13450 } else {
13451 Ok(Expression::Function(f))
13452 }
13453 } else {
13454 Ok(e)
13455 }
13456 }
13457
13458 Action::ArrayContainsConvert => {
13459 if let Expression::ArrayContains(f) = e {
13460 match target {
13461 DialectType::Presto | DialectType::Trino => {
13462 // ARRAY_CONTAINS(arr, val) -> CONTAINS(arr, val)
13463 Ok(Expression::Function(Box::new(Function::new("CONTAINS".to_string(), vec![f.this, f.expression]))))
13464 }
13465 DialectType::Snowflake => {
13466 // ARRAY_CONTAINS(arr, val) -> ARRAY_CONTAINS(CAST(val AS VARIANT), arr)
13467 let cast_val = Expression::Cast(Box::new(crate::expressions::Cast {
13468 this: f.expression,
13469 to: crate::expressions::DataType::Custom { name: "VARIANT".to_string() },
13470 trailing_comments: Vec::new(),
13471 double_colon_syntax: false,
13472 format: None,
13473 default: None,
13474 }));
13475 Ok(Expression::Function(Box::new(Function::new("ARRAY_CONTAINS".to_string(), vec![cast_val, f.this]))))
13476 }
13477 _ => Ok(Expression::ArrayContains(f))
13478 }
13479 } else {
13480 Ok(e)
13481 }
13482 }
13483
13484 Action::StrPositionExpand => {
13485 // StrPosition with position arg -> complex STRPOS expansion for Presto/DuckDB
13486 // LOCATE(substr, str, pos) / STRPOS(str, substr, pos) ->
13487 // For Presto: IF(STRPOS(SUBSTRING(str, pos), substr) = 0, 0, STRPOS(SUBSTRING(str, pos), substr) + pos - 1)
13488 // For DuckDB: CASE WHEN STRPOS(SUBSTRING(str, pos), substr) = 0 THEN 0 ELSE STRPOS(SUBSTRING(str, pos), substr) + pos - 1 END
13489 if let Expression::StrPosition(sp) = e {
13490 let crate::expressions::StrPosition { this, substr, position, occurrence } = *sp;
13491 let string = *this;
13492 let substr_expr = match substr {
13493 Some(s) => *s,
13494 None => Expression::Null(Null),
13495 };
13496 let pos = match position {
13497 Some(p) => *p,
13498 None => Expression::number(1),
13499 };
13500
13501 // SUBSTRING(string, pos)
13502 let substring_call = Expression::Function(Box::new(Function::new(
13503 "SUBSTRING".to_string(), vec![string.clone(), pos.clone()],
13504 )));
13505 // STRPOS(SUBSTRING(string, pos), substr)
13506 let strpos_call = Expression::Function(Box::new(Function::new(
13507 "STRPOS".to_string(), vec![substring_call, substr_expr.clone()],
13508 )));
13509 // STRPOS(...) + pos - 1
13510 let pos_adjusted = Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
13511 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
13512 strpos_call.clone(),
13513 pos.clone(),
13514 ))),
13515 Expression::number(1),
13516 )));
13517 // STRPOS(...) = 0
13518 let is_zero = Expression::Eq(Box::new(crate::expressions::BinaryOp::new(
13519 strpos_call.clone(),
13520 Expression::number(0),
13521 )));
13522
13523 match target {
13524 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
13525 // IF(STRPOS(SUBSTRING(str, pos), substr) = 0, 0, STRPOS(SUBSTRING(str, pos), substr) + pos - 1)
13526 Ok(Expression::Function(Box::new(Function::new(
13527 "IF".to_string(),
13528 vec![is_zero, Expression::number(0), pos_adjusted],
13529 ))))
13530 }
13531 DialectType::DuckDB => {
13532 // CASE WHEN STRPOS(SUBSTRING(str, pos), substr) = 0 THEN 0 ELSE STRPOS(SUBSTRING(str, pos), substr) + pos - 1 END
13533 Ok(Expression::Case(Box::new(Case {
13534 operand: None,
13535 whens: vec![
13536 (is_zero, Expression::number(0)),
13537 ],
13538 else_: Some(pos_adjusted),
13539 })))
13540 }
13541 _ => {
13542 // Reconstruct StrPosition
13543 Ok(Expression::StrPosition(Box::new(crate::expressions::StrPosition {
13544 this: Box::new(string),
13545 substr: Some(Box::new(substr_expr)),
13546 position: Some(Box::new(pos)),
13547 occurrence,
13548 })))
13549 }
13550 }
13551 } else {
13552 Ok(e)
13553 }
13554 }
13555
13556 Action::MonthsBetweenConvert => {
13557 if let Expression::MonthsBetween(mb) = e {
13558 let crate::expressions::BinaryFunc { this: end_date, expression: start_date, .. } = *mb;
13559 match target {
13560 DialectType::DuckDB => {
13561 let cast_end = Self::ensure_cast_date(end_date);
13562 let cast_start = Self::ensure_cast_date(start_date);
13563 let dd = Expression::Function(Box::new(Function::new(
13564 "DATE_DIFF".to_string(), vec![Expression::string("MONTH"), cast_start.clone(), cast_end.clone()],
13565 )));
13566 let day_end = Expression::Function(Box::new(Function::new("DAY".to_string(), vec![cast_end.clone()])));
13567 let day_start = Expression::Function(Box::new(Function::new("DAY".to_string(), vec![cast_start.clone()])));
13568 let last_day_end = Expression::Function(Box::new(Function::new("LAST_DAY".to_string(), vec![cast_end.clone()])));
13569 let last_day_start = Expression::Function(Box::new(Function::new("LAST_DAY".to_string(), vec![cast_start.clone()])));
13570 let day_last_end = Expression::Function(Box::new(Function::new("DAY".to_string(), vec![last_day_end])));
13571 let day_last_start = Expression::Function(Box::new(Function::new("DAY".to_string(), vec![last_day_start])));
13572 let cond1 = Expression::Eq(Box::new(BinaryOp::new(day_end.clone(), day_last_end)));
13573 let cond2 = Expression::Eq(Box::new(BinaryOp::new(day_start.clone(), day_last_start)));
13574 let both_cond = Expression::And(Box::new(BinaryOp::new(cond1, cond2)));
13575 let day_diff = Expression::Sub(Box::new(BinaryOp::new(day_end, day_start)));
13576 let day_diff_paren = Expression::Paren(Box::new(crate::expressions::Paren {
13577 this: day_diff,
13578 trailing_comments: Vec::new(),
13579 }));
13580 let frac = Expression::Div(Box::new(BinaryOp::new(
13581 day_diff_paren,
13582 Expression::Literal(Literal::Number("31.0".to_string())),
13583 )));
13584 let case_expr = Expression::Case(Box::new(Case {
13585 operand: None,
13586 whens: vec![(both_cond, Expression::number(0))],
13587 else_: Some(frac),
13588 }));
13589 Ok(Expression::Add(Box::new(BinaryOp::new(dd, case_expr))))
13590 }
13591 DialectType::Snowflake | DialectType::Redshift => {
13592 let unit = Expression::Identifier(Identifier::new("MONTH"));
13593 Ok(Expression::Function(Box::new(Function::new(
13594 "DATEDIFF".to_string(), vec![unit, start_date, end_date],
13595 ))))
13596 }
13597 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
13598 Ok(Expression::Function(Box::new(Function::new(
13599 "DATE_DIFF".to_string(), vec![Expression::string("MONTH"), start_date, end_date],
13600 ))))
13601 }
13602 _ => {
13603 Ok(Expression::MonthsBetween(Box::new(crate::expressions::BinaryFunc {
13604 this: end_date, expression: start_date, original_name: None,
13605 })))
13606 }
13607 }
13608 } else {
13609 Ok(e)
13610 }
13611 }
13612
13613 Action::AddMonthsConvert => {
13614 if let Expression::AddMonths(am) = e {
13615 let date = am.this;
13616 let val = am.expression;
13617 match target {
13618 DialectType::TSQL | DialectType::Fabric => {
13619 let cast_date = Self::ensure_cast_datetime2(date);
13620 Ok(Expression::Function(Box::new(Function::new(
13621 "DATEADD".to_string(), vec![
13622 Expression::Identifier(Identifier::new("MONTH")),
13623 val, cast_date,
13624 ],
13625 ))))
13626 }
13627 DialectType::DuckDB if matches!(source, DialectType::Snowflake) => {
13628 // DuckDB ADD_MONTHS from Snowflake: CASE WHEN LAST_DAY(date) = date THEN LAST_DAY(date + interval) ELSE date + interval END
13629 // Optionally wrapped in CAST(... AS type) if the input had a specific type
13630
13631 // Determine the cast type from the date expression
13632 let (cast_date, return_type) = match &date {
13633 Expression::Literal(Literal::String(_)) => {
13634 // String literal: CAST(str AS TIMESTAMP), no outer CAST
13635 (Expression::Cast(Box::new(Cast {
13636 this: date.clone(), to: DataType::Timestamp { precision: None, timezone: false },
13637 trailing_comments: Vec::new(), double_colon_syntax: false, format: None, default: None,
13638 })), None)
13639 }
13640 Expression::Cast(c) => {
13641 // Already cast (e.g., '2023-01-31'::DATE) - keep the cast, wrap result in CAST(... AS type)
13642 (date.clone(), Some(c.to.clone()))
13643 }
13644 _ => {
13645 // Expression or NULL::TYPE - keep as-is, check for cast type
13646 if let Expression::Cast(c) = &date {
13647 (date.clone(), Some(c.to.clone()))
13648 } else {
13649 (date.clone(), None)
13650 }
13651 }
13652 };
13653
13654 // Build the interval expression
13655 // For non-integer values (float, decimal, cast), use TO_MONTHS(CAST(ROUND(val) AS INT))
13656 // For integer values, use INTERVAL val MONTH
13657 let is_non_integer_val = match &val {
13658 Expression::Literal(Literal::Number(n)) => n.contains('.'),
13659 Expression::Cast(_) => true, // e.g., 3.2::DECIMAL(10,2)
13660 Expression::Neg(n) => {
13661 if let Expression::Literal(Literal::Number(s)) = &n.this {
13662 s.contains('.')
13663 } else { false }
13664 }
13665 _ => false,
13666 };
13667
13668 let add_interval = if is_non_integer_val {
13669 // TO_MONTHS(CAST(ROUND(val) AS INT))
13670 let round_val = Expression::Function(Box::new(Function::new(
13671 "ROUND".to_string(), vec![val.clone()],
13672 )));
13673 let cast_int = Expression::Cast(Box::new(Cast {
13674 this: round_val, to: DataType::Int { length: None, integer_spelling: false },
13675 trailing_comments: Vec::new(), double_colon_syntax: false, format: None, default: None,
13676 }));
13677 Expression::Function(Box::new(Function::new(
13678 "TO_MONTHS".to_string(), vec![cast_int],
13679 )))
13680 } else {
13681 // INTERVAL val MONTH
13682 // For negative numbers, wrap in parens
13683 let interval_val = match &val {
13684 Expression::Literal(Literal::Number(n)) if n.starts_with('-') => {
13685 Expression::Paren(Box::new(Paren { this: val.clone(), trailing_comments: Vec::new() }))
13686 }
13687 Expression::Neg(_) => {
13688 Expression::Paren(Box::new(Paren { this: val.clone(), trailing_comments: Vec::new() }))
13689 }
13690 Expression::Null(_) => {
13691 Expression::Paren(Box::new(Paren { this: val.clone(), trailing_comments: Vec::new() }))
13692 }
13693 _ => val.clone(),
13694 };
13695 Expression::Interval(Box::new(crate::expressions::Interval {
13696 this: Some(interval_val),
13697 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
13698 unit: crate::expressions::IntervalUnit::Month,
13699 use_plural: false,
13700 }),
13701 }))
13702 };
13703
13704 // Build: date + interval
13705 let date_plus_interval = Expression::Add(Box::new(BinaryOp::new(
13706 cast_date.clone(), add_interval.clone(),
13707 )));
13708
13709 // Build LAST_DAY(date)
13710 let last_day_date = Expression::Function(Box::new(Function::new(
13711 "LAST_DAY".to_string(), vec![cast_date.clone()],
13712 )));
13713
13714 // Build LAST_DAY(date + interval)
13715 let last_day_date_plus = Expression::Function(Box::new(Function::new(
13716 "LAST_DAY".to_string(), vec![date_plus_interval.clone()],
13717 )));
13718
13719 // Build: CASE WHEN LAST_DAY(date) = date THEN LAST_DAY(date + interval) ELSE date + interval END
13720 let case_expr = Expression::Case(Box::new(Case {
13721 operand: None,
13722 whens: vec![(
13723 Expression::Eq(Box::new(BinaryOp::new(
13724 last_day_date, cast_date.clone(),
13725 ))),
13726 last_day_date_plus,
13727 )],
13728 else_: Some(date_plus_interval),
13729 }));
13730
13731 // Wrap in CAST(... AS type) if needed
13732 if let Some(dt) = return_type {
13733 Ok(Expression::Cast(Box::new(Cast {
13734 this: case_expr, to: dt,
13735 trailing_comments: Vec::new(), double_colon_syntax: false, format: None, default: None,
13736 })))
13737 } else {
13738 Ok(case_expr)
13739 }
13740 }
13741 DialectType::DuckDB => {
13742 // Non-Snowflake source: simple date + INTERVAL
13743 let cast_date = if matches!(&date, Expression::Literal(Literal::String(_))) {
13744 Expression::Cast(Box::new(Cast {
13745 this: date, to: DataType::Timestamp { precision: None, timezone: false },
13746 trailing_comments: Vec::new(), double_colon_syntax: false, format: None, default: None,
13747 }))
13748 } else { date };
13749 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
13750 this: Some(val),
13751 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
13752 unit: crate::expressions::IntervalUnit::Month,
13753 use_plural: false,
13754 }),
13755 }));
13756 Ok(Expression::Add(Box::new(BinaryOp::new(cast_date, interval))))
13757 }
13758 DialectType::Snowflake => {
13759 // Keep ADD_MONTHS when source is also Snowflake
13760 if matches!(source, DialectType::Snowflake) {
13761 Ok(Expression::Function(Box::new(Function::new(
13762 "ADD_MONTHS".to_string(), vec![date, val],
13763 ))))
13764 } else {
13765 Ok(Expression::Function(Box::new(Function::new(
13766 "DATEADD".to_string(), vec![
13767 Expression::Identifier(Identifier::new("MONTH")),
13768 val, date,
13769 ],
13770 ))))
13771 }
13772 }
13773 DialectType::Redshift => {
13774 Ok(Expression::Function(Box::new(Function::new(
13775 "DATEADD".to_string(), vec![
13776 Expression::Identifier(Identifier::new("MONTH")),
13777 val, date,
13778 ],
13779 ))))
13780 }
13781 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
13782 let cast_date = if matches!(&date, Expression::Literal(Literal::String(_))) {
13783 Expression::Cast(Box::new(Cast {
13784 this: date, to: DataType::Timestamp { precision: None, timezone: false },
13785 trailing_comments: Vec::new(), double_colon_syntax: false, format: None, default: None,
13786 }))
13787 } else { date };
13788 Ok(Expression::Function(Box::new(Function::new(
13789 "DATE_ADD".to_string(), vec![
13790 Expression::string("MONTH"),
13791 val, cast_date,
13792 ],
13793 ))))
13794 }
13795 DialectType::BigQuery => {
13796 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
13797 this: Some(val),
13798 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
13799 unit: crate::expressions::IntervalUnit::Month,
13800 use_plural: false,
13801 }),
13802 }));
13803 let cast_date = if matches!(&date, Expression::Literal(Literal::String(_))) {
13804 Expression::Cast(Box::new(Cast {
13805 this: date, to: DataType::Custom { name: "DATETIME".to_string() },
13806 trailing_comments: Vec::new(), double_colon_syntax: false, format: None, default: None,
13807 }))
13808 } else { date };
13809 Ok(Expression::Function(Box::new(Function::new(
13810 "DATE_ADD".to_string(), vec![cast_date, interval],
13811 ))))
13812 }
13813 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
13814 Ok(Expression::Function(Box::new(Function::new(
13815 "ADD_MONTHS".to_string(), vec![date, val],
13816 ))))
13817 }
13818 _ => {
13819 // Default: keep as AddMonths expression
13820 Ok(Expression::AddMonths(Box::new(crate::expressions::BinaryFunc {
13821 this: date, expression: val, original_name: None,
13822 })))
13823 }
13824 }
13825 } else {
13826 Ok(e)
13827 }
13828 }
13829
13830 Action::PercentileContConvert => {
13831 // PERCENTILE_CONT(p) WITHIN GROUP (ORDER BY col) ->
13832 // Presto/Trino: APPROX_PERCENTILE(col, p)
13833 // Spark/Databricks: PERCENTILE_APPROX(col, p)
13834 if let Expression::WithinGroup(wg) = e {
13835 // Extract percentile value and order by column
13836 let (percentile, _is_disc) = match &wg.this {
13837 Expression::Function(f) => {
13838 let is_disc = f.name.eq_ignore_ascii_case("PERCENTILE_DISC");
13839 let pct = f.args.first().cloned().unwrap_or(Expression::Literal(Literal::Number("0.5".to_string())));
13840 (pct, is_disc)
13841 }
13842 Expression::AggregateFunction(af) => {
13843 let is_disc = af.name.eq_ignore_ascii_case("PERCENTILE_DISC");
13844 let pct = af.args.first().cloned().unwrap_or(Expression::Literal(Literal::Number("0.5".to_string())));
13845 (pct, is_disc)
13846 }
13847 Expression::PercentileCont(pc) => {
13848 (pc.percentile.clone(), false)
13849 }
13850 _ => return Ok(Expression::WithinGroup(wg)),
13851 };
13852 let col = wg.order_by.first().map(|o| o.this.clone())
13853 .unwrap_or(Expression::Literal(Literal::Number("1".to_string())));
13854
13855 let func_name = match target {
13856 DialectType::Presto | DialectType::Trino | DialectType::Athena => "APPROX_PERCENTILE",
13857 _ => "PERCENTILE_APPROX", // Spark, Databricks
13858 };
13859 Ok(Expression::Function(Box::new(Function::new(
13860 func_name.to_string(), vec![col, percentile],
13861 ))))
13862 } else {
13863 Ok(e)
13864 }
13865 }
13866
13867 Action::CurrentUserSparkParens => {
13868 // CURRENT_USER -> CURRENT_USER() for Spark
13869 if let Expression::CurrentUser(_) = e {
13870 Ok(Expression::Function(Box::new(Function::new("CURRENT_USER".to_string(), vec![]))))
13871 } else {
13872 Ok(e)
13873 }
13874 }
13875
13876 Action::SparkDateFuncCast => {
13877 // MONTH/YEAR/DAY('string') from Spark -> wrap arg in CAST to DATE
13878 let cast_arg = |arg: Expression| -> Expression {
13879 match target {
13880 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
13881 Self::double_cast_timestamp_date(arg)
13882 }
13883 _ => {
13884 // DuckDB, PostgreSQL, etc: CAST(arg AS DATE)
13885 Self::ensure_cast_date(arg)
13886 }
13887 }
13888 };
13889 match e {
13890 Expression::Month(f) => {
13891 Ok(Expression::Month(Box::new(crate::expressions::UnaryFunc::new(cast_arg(f.this)))))
13892 }
13893 Expression::Year(f) => {
13894 Ok(Expression::Year(Box::new(crate::expressions::UnaryFunc::new(cast_arg(f.this)))))
13895 }
13896 Expression::Day(f) => {
13897 Ok(Expression::Day(Box::new(crate::expressions::UnaryFunc::new(cast_arg(f.this)))))
13898 }
13899 other => Ok(other),
13900 }
13901 }
13902
13903 Action::MapFromArraysConvert => {
13904 // Expression::MapFromArrays -> target-specific
13905 if let Expression::MapFromArrays(mfa) = e {
13906 let keys = mfa.this;
13907 let values = mfa.expression;
13908 match target {
13909 DialectType::Snowflake => {
13910 Ok(Expression::Function(Box::new(Function::new(
13911 "OBJECT_CONSTRUCT".to_string(),
13912 vec![keys, values],
13913 ))))
13914 }
13915 _ => {
13916 // Hive, Presto, DuckDB, etc.: MAP(keys, values)
13917 Ok(Expression::Function(Box::new(Function::new(
13918 "MAP".to_string(),
13919 vec![keys, values],
13920 ))))
13921 }
13922 }
13923 } else {
13924 Ok(e)
13925 }
13926 }
13927
13928 Action::AnyToExists => {
13929 if let Expression::Any(q) = e {
13930 if let Some(op) = q.op.clone() {
13931 let lambda_param = crate::expressions::Identifier::new("x");
13932 let rhs = Expression::Identifier(lambda_param.clone());
13933 let body = match op {
13934 crate::expressions::QuantifiedOp::Eq => Expression::Eq(Box::new(BinaryOp::new(q.this, rhs))),
13935 crate::expressions::QuantifiedOp::Neq => Expression::Neq(Box::new(BinaryOp::new(q.this, rhs))),
13936 crate::expressions::QuantifiedOp::Lt => Expression::Lt(Box::new(BinaryOp::new(q.this, rhs))),
13937 crate::expressions::QuantifiedOp::Lte => Expression::Lte(Box::new(BinaryOp::new(q.this, rhs))),
13938 crate::expressions::QuantifiedOp::Gt => Expression::Gt(Box::new(BinaryOp::new(q.this, rhs))),
13939 crate::expressions::QuantifiedOp::Gte => Expression::Gte(Box::new(BinaryOp::new(q.this, rhs))),
13940 };
13941 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
13942 parameters: vec![lambda_param],
13943 body,
13944 colon: false,
13945 parameter_types: Vec::new(),
13946 }));
13947 Ok(Expression::Function(Box::new(Function::new(
13948 "EXISTS".to_string(),
13949 vec![q.subquery, lambda],
13950 ))))
13951 } else {
13952 Ok(Expression::Any(q))
13953 }
13954 } else {
13955 Ok(e)
13956 }
13957 }
13958
13959 Action::GenerateSeriesConvert => {
13960 // GENERATE_SERIES(start, end[, step]) -> SEQUENCE for Spark/Databricks/Hive, wrapped in UNNEST/EXPLODE
13961 // For DuckDB target: wrap in UNNEST(GENERATE_SERIES(...))
13962 // For PG/Redshift target: keep as GENERATE_SERIES but normalize interval string step
13963 if let Expression::Function(f) = e {
13964 if f.name.eq_ignore_ascii_case("GENERATE_SERIES") && f.args.len() >= 2 {
13965 let start = f.args[0].clone();
13966 let end = f.args[1].clone();
13967 let step = f.args.get(2).cloned();
13968
13969 // Normalize step: convert string interval like '1day' or ' 2 days ' to INTERVAL expression
13970 let step = step.map(|s| Self::normalize_interval_string(s, target));
13971
13972 // Helper: wrap CURRENT_TIMESTAMP in CAST(... AS TIMESTAMP) for Presto/Trino/Spark
13973 let maybe_cast_timestamp = |arg: Expression| -> Expression {
13974 if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena
13975 | DialectType::Spark | DialectType::Databricks | DialectType::Hive) {
13976 match &arg {
13977 Expression::CurrentTimestamp(_) => {
13978 Expression::Cast(Box::new(Cast {
13979 this: arg,
13980 to: DataType::Timestamp { precision: None, timezone: false },
13981 trailing_comments: Vec::new(),
13982 double_colon_syntax: false,
13983 format: None,
13984 default: None,
13985 }))
13986 }
13987 _ => arg,
13988 }
13989 } else {
13990 arg
13991 }
13992 };
13993
13994 let start = maybe_cast_timestamp(start);
13995 let end = maybe_cast_timestamp(end);
13996
13997 // For PostgreSQL/Redshift target, keep as GENERATE_SERIES
13998 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
13999 let mut gs_args = vec![start, end];
14000 if let Some(step) = step {
14001 gs_args.push(step);
14002 }
14003 return Ok(Expression::Function(Box::new(Function::new(
14004 "GENERATE_SERIES".to_string(), gs_args,
14005 ))));
14006 }
14007
14008 // For DuckDB target: wrap in UNNEST(GENERATE_SERIES(...))
14009 if matches!(target, DialectType::DuckDB) {
14010 let mut gs_args = vec![start, end];
14011 if let Some(step) = step {
14012 gs_args.push(step);
14013 }
14014 let gs = Expression::Function(Box::new(Function::new(
14015 "GENERATE_SERIES".to_string(), gs_args,
14016 )));
14017 return Ok(Expression::Function(Box::new(Function::new(
14018 "UNNEST".to_string(), vec![gs],
14019 ))));
14020 }
14021
14022 let mut seq_args = vec![start, end];
14023 if let Some(step) = step {
14024 seq_args.push(step);
14025 }
14026
14027 let seq = Expression::Function(Box::new(Function::new(
14028 "SEQUENCE".to_string(), seq_args,
14029 )));
14030
14031 match target {
14032 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
14033 // Wrap in UNNEST
14034 Ok(Expression::Function(Box::new(Function::new(
14035 "UNNEST".to_string(), vec![seq],
14036 ))))
14037 }
14038 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
14039 // Wrap in EXPLODE
14040 Ok(Expression::Function(Box::new(Function::new(
14041 "EXPLODE".to_string(), vec![seq],
14042 ))))
14043 }
14044 _ => {
14045 // Just SEQUENCE for others
14046 Ok(seq)
14047 }
14048 }
14049 } else {
14050 Ok(Expression::Function(f))
14051 }
14052 } else {
14053 Ok(e)
14054 }
14055 }
14056
14057 Action::ConcatCoalesceWrap => {
14058 // CONCAT(a, b) function -> CONCAT(COALESCE(CAST(a AS VARCHAR), ''), ...) for Presto
14059 // CONCAT(a, b) function -> CONCAT(COALESCE(a, ''), ...) for ClickHouse
14060 if let Expression::Function(f) = e {
14061 if f.name.eq_ignore_ascii_case("CONCAT") {
14062 let new_args: Vec<Expression> = f.args.into_iter().map(|arg| {
14063 let cast_arg = if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena) {
14064 Expression::Cast(Box::new(Cast {
14065 this: arg, to: DataType::VarChar { length: None, parenthesized_length: false },
14066 trailing_comments: Vec::new(), double_colon_syntax: false, format: None, default: None,
14067 }))
14068 } else {
14069 arg
14070 };
14071 Expression::Function(Box::new(Function::new(
14072 "COALESCE".to_string(), vec![cast_arg, Expression::string("")],
14073 )))
14074 }).collect();
14075 Ok(Expression::Function(Box::new(Function::new("CONCAT".to_string(), new_args))))
14076 } else {
14077 Ok(Expression::Function(f))
14078 }
14079 } else {
14080 Ok(e)
14081 }
14082 }
14083
14084 Action::PipeConcatToConcat => {
14085 // a || b (Concat operator) -> CONCAT(CAST(a AS VARCHAR), CAST(b AS VARCHAR)) for Presto/Trino
14086 if let Expression::Concat(op) = e {
14087 let cast_left = Expression::Cast(Box::new(Cast {
14088 this: op.left, to: DataType::VarChar { length: None, parenthesized_length: false },
14089 trailing_comments: Vec::new(), double_colon_syntax: false, format: None, default: None,
14090 }));
14091 let cast_right = Expression::Cast(Box::new(Cast {
14092 this: op.right, to: DataType::VarChar { length: None, parenthesized_length: false },
14093 trailing_comments: Vec::new(), double_colon_syntax: false, format: None, default: None,
14094 }));
14095 Ok(Expression::Function(Box::new(Function::new(
14096 "CONCAT".to_string(), vec![cast_left, cast_right],
14097 ))))
14098 } else {
14099 Ok(e)
14100 }
14101 }
14102
14103 Action::DivFuncConvert => {
14104 // DIV(a, b) -> target-specific integer division
14105 if let Expression::Function(f) = e {
14106 if f.name.eq_ignore_ascii_case("DIV") && f.args.len() == 2 {
14107 let a = f.args[0].clone();
14108 let b = f.args[1].clone();
14109 match target {
14110 DialectType::DuckDB => {
14111 // DIV(a, b) -> CAST(a // b AS DECIMAL)
14112 let int_div = Expression::IntDiv(Box::new(crate::expressions::BinaryFunc {
14113 this: a, expression: b, original_name: None,
14114 }));
14115 Ok(Expression::Cast(Box::new(Cast {
14116 this: int_div, to: DataType::Decimal { precision: None, scale: None },
14117 trailing_comments: Vec::new(), double_colon_syntax: false, format: None, default: None,
14118 })))
14119 }
14120 DialectType::BigQuery => {
14121 // DIV(a, b) -> CAST(DIV(a, b) AS NUMERIC)
14122 let div_func = Expression::Function(Box::new(Function::new(
14123 "DIV".to_string(), vec![a, b],
14124 )));
14125 Ok(Expression::Cast(Box::new(Cast {
14126 this: div_func, to: DataType::Custom { name: "NUMERIC".to_string() },
14127 trailing_comments: Vec::new(), double_colon_syntax: false, format: None, default: None,
14128 })))
14129 }
14130 DialectType::SQLite => {
14131 // DIV(a, b) -> CAST(CAST(CAST(a AS REAL) / b AS INTEGER) AS REAL)
14132 let cast_a = Expression::Cast(Box::new(Cast {
14133 this: a, to: DataType::Custom { name: "REAL".to_string() },
14134 trailing_comments: Vec::new(), double_colon_syntax: false, format: None, default: None,
14135 }));
14136 let div = Expression::Div(Box::new(BinaryOp::new(cast_a, b)));
14137 let cast_int = Expression::Cast(Box::new(Cast {
14138 this: div, to: DataType::Int { length: None, integer_spelling: true },
14139 trailing_comments: Vec::new(), double_colon_syntax: false, format: None, default: None,
14140 }));
14141 Ok(Expression::Cast(Box::new(Cast {
14142 this: cast_int, to: DataType::Custom { name: "REAL".to_string() },
14143 trailing_comments: Vec::new(), double_colon_syntax: false, format: None, default: None,
14144 })))
14145 }
14146 _ => Ok(Expression::Function(f)),
14147 }
14148 } else {
14149 Ok(Expression::Function(f))
14150 }
14151 } else {
14152 Ok(e)
14153 }
14154 }
14155
14156 Action::JsonObjectAggConvert => {
14157 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
14158 match e {
14159 Expression::Function(f) => {
14160 Ok(Expression::Function(Box::new(Function::new(
14161 "JSON_GROUP_OBJECT".to_string(), f.args,
14162 ))))
14163 }
14164 Expression::AggregateFunction(af) => {
14165 // AggregateFunction stores all args in the `args` vec
14166 Ok(Expression::Function(Box::new(Function::new(
14167 "JSON_GROUP_OBJECT".to_string(), af.args,
14168 ))))
14169 }
14170 other => Ok(other),
14171 }
14172 }
14173
14174 Action::JsonbExistsConvert => {
14175 // JSONB_EXISTS('json', 'key') -> JSON_EXISTS('json', '$.key') for DuckDB
14176 if let Expression::Function(f) = e {
14177 if f.args.len() == 2 {
14178 let json_expr = f.args[0].clone();
14179 let key = match &f.args[1] {
14180 Expression::Literal(crate::expressions::Literal::String(s)) => format!("$.{}", s),
14181 _ => return Ok(Expression::Function(f)),
14182 };
14183 Ok(Expression::Function(Box::new(Function::new(
14184 "JSON_EXISTS".to_string(), vec![json_expr, Expression::string(&key)],
14185 ))))
14186 } else {
14187 Ok(Expression::Function(f))
14188 }
14189 } else {
14190 Ok(e)
14191 }
14192 }
14193
14194 Action::DateBinConvert => {
14195 // DATE_BIN('interval', ts, origin) -> TIME_BUCKET('interval', ts, origin) for DuckDB
14196 if let Expression::Function(f) = e {
14197 Ok(Expression::Function(Box::new(Function::new(
14198 "TIME_BUCKET".to_string(), f.args,
14199 ))))
14200 } else {
14201 Ok(e)
14202 }
14203 }
14204
14205 Action::MysqlCastCharToText => {
14206 // MySQL CAST(x AS CHAR) was originally TEXT -> convert to target text type
14207 if let Expression::Cast(mut c) = e {
14208 c.to = DataType::Text;
14209 Ok(Expression::Cast(c))
14210 } else {
14211 Ok(e)
14212 }
14213 }
14214
14215 Action::SparkCastVarcharToString => {
14216 // Spark parses VARCHAR(n)/CHAR(n) as TEXT -> normalize to STRING
14217 match e {
14218 Expression::Cast(mut c) => {
14219 c.to = Self::normalize_varchar_to_string(c.to);
14220 Ok(Expression::Cast(c))
14221 }
14222 Expression::TryCast(mut c) => {
14223 c.to = Self::normalize_varchar_to_string(c.to);
14224 Ok(Expression::TryCast(c))
14225 }
14226 _ => Ok(e),
14227 }
14228 }
14229
14230 Action::MinMaxToLeastGreatest => {
14231 // Multi-arg MIN(a,b,c) -> LEAST(a,b,c), MAX(a,b,c) -> GREATEST(a,b,c)
14232 if let Expression::Function(f) = e {
14233 let name = f.name.to_uppercase();
14234 let new_name = match name.as_str() {
14235 "MIN" => "LEAST",
14236 "MAX" => "GREATEST",
14237 _ => return Ok(Expression::Function(f)),
14238 };
14239 Ok(Expression::Function(Box::new(Function::new(
14240 new_name.to_string(),
14241 f.args,
14242 ))))
14243 } else {
14244 Ok(e)
14245 }
14246 }
14247
14248 Action::ClickHouseUniqToApproxCountDistinct => {
14249 // ClickHouse uniq(x) -> APPROX_COUNT_DISTINCT(x) for non-ClickHouse targets
14250 if let Expression::Function(f) = e {
14251 Ok(Expression::Function(Box::new(Function::new(
14252 "APPROX_COUNT_DISTINCT".to_string(),
14253 f.args,
14254 ))))
14255 } else {
14256 Ok(e)
14257 }
14258 }
14259
14260 Action::ClickHouseAnyToAnyValue => {
14261 // ClickHouse any(x) -> ANY_VALUE(x) for non-ClickHouse targets
14262 if let Expression::Function(f) = e {
14263 Ok(Expression::Function(Box::new(Function::new(
14264 "ANY_VALUE".to_string(),
14265 f.args,
14266 ))))
14267 } else {
14268 Ok(e)
14269 }
14270 }
14271
14272 Action::OracleVarchar2ToVarchar => {
14273 // Oracle VARCHAR2(N CHAR/BYTE) / NVARCHAR2(N) -> VarChar(N) for non-Oracle targets
14274 if let Expression::DataType(DataType::Custom { ref name }) = e {
14275 let upper = name.to_uppercase();
14276 // Extract length from VARCHAR2(N ...) or NVARCHAR2(N ...)
14277 let inner = if upper.starts_with("VARCHAR2(") || upper.starts_with("NVARCHAR2(") {
14278 let start = if upper.starts_with("N") { 10 } else { 9 }; // skip "NVARCHAR2(" or "VARCHAR2("
14279 let end = name.len() - 1; // skip trailing ")"
14280 Some(&name[start..end])
14281 } else {
14282 Option::None
14283 };
14284 if let Some(inner_str) = inner {
14285 // Parse the number part, ignoring BYTE/CHAR qualifier
14286 let num_str = inner_str.split_whitespace().next().unwrap_or("");
14287 if let Ok(n) = num_str.parse::<u32>() {
14288 Ok(Expression::DataType(DataType::VarChar { length: Some(n), parenthesized_length: false }))
14289 } else {
14290 Ok(e)
14291 }
14292 } else {
14293 // Plain VARCHAR2 / NVARCHAR2 without parens
14294 Ok(Expression::DataType(DataType::VarChar { length: Option::None, parenthesized_length: false }))
14295 }
14296 } else {
14297 Ok(e)
14298 }
14299 }
14300
14301 }
14302 })
14303 }
14304
14305 /// Check if a DataType is or contains VARCHAR/CHAR (for Spark VARCHAR->STRING normalization)
14306 fn has_varchar_char_type(dt: &crate::expressions::DataType) -> bool {
14307 use crate::expressions::DataType;
14308 match dt {
14309 DataType::VarChar { .. } | DataType::Char { .. } => true,
14310 DataType::Struct { fields, .. } => fields.iter().any(|f| Self::has_varchar_char_type(&f.data_type)),
14311 _ => false,
14312 }
14313 }
14314
14315 /// Recursively normalize VARCHAR/CHAR to STRING in a DataType (for Spark)
14316 fn normalize_varchar_to_string(dt: crate::expressions::DataType) -> crate::expressions::DataType {
14317 use crate::expressions::DataType;
14318 match dt {
14319 DataType::VarChar { .. } | DataType::Char { .. } => DataType::Custom { name: "STRING".to_string() },
14320 DataType::Struct { fields, nested } => {
14321 let fields = fields.into_iter().map(|mut f| {
14322 f.data_type = Self::normalize_varchar_to_string(f.data_type);
14323 f
14324 }).collect();
14325 DataType::Struct { fields, nested }
14326 }
14327 other => other,
14328 }
14329 }
14330
14331 /// Normalize an interval string like '1day' or ' 2 days ' to proper INTERVAL expression
14332 fn normalize_interval_string(expr: Expression, target: DialectType) -> Expression {
14333 if let Expression::Literal(crate::expressions::Literal::String(ref s)) = expr {
14334 // Try to parse patterns like '1day', '1 day', '2 days', ' 2 days '
14335 let trimmed = s.trim();
14336
14337 // Find where digits end and unit text begins
14338 let digit_end = trimmed.find(|c: char| !c.is_ascii_digit()).unwrap_or(trimmed.len());
14339 if digit_end == 0 || digit_end == trimmed.len() {
14340 return expr;
14341 }
14342 let num = &trimmed[..digit_end];
14343 let unit_text = trimmed[digit_end..].trim().to_uppercase();
14344 if unit_text.is_empty() {
14345 return expr;
14346 }
14347
14348 let known_units = ["DAY", "DAYS", "HOUR", "HOURS", "MINUTE", "MINUTES", "SECOND", "SECONDS", "WEEK", "WEEKS", "MONTH", "MONTHS", "YEAR", "YEARS"];
14349 if !known_units.contains(&unit_text.as_str()) {
14350 return expr;
14351 }
14352
14353 let unit_str = unit_text.clone();
14354 // Singularize
14355 let unit_singular = if unit_str.ends_with('S') && unit_str.len() > 3 {
14356 &unit_str[..unit_str.len()-1]
14357 } else {
14358 &unit_str
14359 };
14360 let unit = unit_singular;
14361
14362 match target {
14363 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
14364 // INTERVAL '2' DAY
14365 let iu = match unit {
14366 "DAY" => crate::expressions::IntervalUnit::Day,
14367 "HOUR" => crate::expressions::IntervalUnit::Hour,
14368 "MINUTE" => crate::expressions::IntervalUnit::Minute,
14369 "SECOND" => crate::expressions::IntervalUnit::Second,
14370 "WEEK" => crate::expressions::IntervalUnit::Week,
14371 "MONTH" => crate::expressions::IntervalUnit::Month,
14372 "YEAR" => crate::expressions::IntervalUnit::Year,
14373 _ => return expr,
14374 };
14375 return Expression::Interval(Box::new(crate::expressions::Interval {
14376 this: Some(Expression::string(num)),
14377 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
14378 unit: iu,
14379 use_plural: false,
14380 }),
14381 }));
14382 }
14383 DialectType::PostgreSQL | DialectType::Redshift | DialectType::DuckDB => {
14384 // INTERVAL '2 DAYS'
14385 let plural = if num != "1" && !unit_str.ends_with('S') {
14386 format!("{} {}S", num, unit)
14387 } else if unit_str.ends_with('S') {
14388 format!("{} {}", num, unit_str)
14389 } else {
14390 format!("{} {}", num, unit)
14391 };
14392 return Expression::Interval(Box::new(crate::expressions::Interval {
14393 this: Some(Expression::string(&plural)),
14394 unit: None,
14395 }));
14396 }
14397 _ => {
14398 // Spark/Databricks/Hive: INTERVAL '1' DAY
14399 let iu = match unit {
14400 "DAY" => crate::expressions::IntervalUnit::Day,
14401 "HOUR" => crate::expressions::IntervalUnit::Hour,
14402 "MINUTE" => crate::expressions::IntervalUnit::Minute,
14403 "SECOND" => crate::expressions::IntervalUnit::Second,
14404 "WEEK" => crate::expressions::IntervalUnit::Week,
14405 "MONTH" => crate::expressions::IntervalUnit::Month,
14406 "YEAR" => crate::expressions::IntervalUnit::Year,
14407 _ => return expr,
14408 };
14409 return Expression::Interval(Box::new(crate::expressions::Interval {
14410 this: Some(Expression::string(num)),
14411 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
14412 unit: iu,
14413 use_plural: false,
14414 }),
14415 }));
14416 }
14417 }
14418 }
14419 // If it's already an INTERVAL expression, pass through
14420 expr
14421 }
14422
14423 /// Rewrite SELECT expressions containing UNNEST into expanded form with CROSS JOINs.
14424 /// DuckDB: SELECT UNNEST(arr1), UNNEST(arr2) ->
14425 /// BigQuery: SELECT IF(pos = pos_2, col, NULL) AS col, ... FROM UNNEST(GENERATE_ARRAY(0, ...)) AS pos CROSS JOIN ...
14426 /// Presto: SELECT IF(_u.pos = _u_2.pos_2, _u_2.col) AS col, ... FROM UNNEST(SEQUENCE(1, ...)) AS _u(pos) CROSS JOIN ...
14427 fn rewrite_unnest_expansion(select: &crate::expressions::Select, target: DialectType) -> Option<crate::expressions::Select> {
14428 use crate::expressions::{
14429 Alias, BinaryOp, Column, From, Function, Identifier, Join, JoinKind,
14430 Literal, UnnestFunc,
14431 };
14432
14433 let index_offset: i64 = match target {
14434 DialectType::Presto | DialectType::Trino => 1,
14435 _ => 0, // BigQuery, Snowflake
14436 };
14437
14438 let if_func_name = match target {
14439 DialectType::Snowflake => "IFF",
14440 _ => "IF",
14441 };
14442
14443 let array_length_func = match target {
14444 DialectType::BigQuery => "ARRAY_LENGTH",
14445 DialectType::Presto | DialectType::Trino => "CARDINALITY",
14446 DialectType::Snowflake => "ARRAY_SIZE",
14447 _ => "ARRAY_LENGTH",
14448 };
14449
14450 let use_table_aliases = matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Snowflake);
14451 let null_third_arg = matches!(target, DialectType::BigQuery | DialectType::Snowflake);
14452
14453 fn make_col(name: &str, table: Option<&str>) -> Expression {
14454 if let Some(tbl) = table {
14455 Expression::Column(Column {
14456 name: Identifier::new(name.to_string()),
14457 table: Some(Identifier::new(tbl.to_string())),
14458 join_mark: false,
14459 trailing_comments: Vec::new(),
14460 })
14461 } else {
14462 Expression::Identifier(Identifier::new(name.to_string()))
14463 }
14464 }
14465
14466 fn make_join(this: Expression) -> Join {
14467 Join {
14468 this,
14469 on: None,
14470 using: Vec::new(),
14471 kind: JoinKind::Cross,
14472 use_inner_keyword: false,
14473 use_outer_keyword: false,
14474 deferred_condition: false,
14475 join_hint: None,
14476 match_condition: None,
14477 pivots: Vec::new(),
14478 }
14479 }
14480
14481 // Collect UNNEST info from SELECT expressions
14482 struct UnnestInfo {
14483 arr_expr: Expression,
14484 col_alias: String,
14485 pos_alias: String,
14486 source_alias: String,
14487 original_expr: Expression,
14488 has_outer_alias: Option<String>,
14489 }
14490
14491 let mut unnest_infos: Vec<UnnestInfo> = Vec::new();
14492 let mut col_counter = 0usize;
14493 let mut pos_counter = 1usize;
14494 let mut source_counter = 1usize;
14495
14496 fn extract_unnest_arg(expr: &Expression) -> Option<Expression> {
14497 match expr {
14498 Expression::Unnest(u) => Some(u.this.clone()),
14499 Expression::Function(f) if f.name.eq_ignore_ascii_case("UNNEST") && !f.args.is_empty() => {
14500 Some(f.args[0].clone())
14501 }
14502 Expression::Alias(a) => extract_unnest_arg(&a.this),
14503 Expression::Add(op) | Expression::Sub(op) | Expression::Mul(op) | Expression::Div(op) => {
14504 extract_unnest_arg(&op.left).or_else(|| extract_unnest_arg(&op.right))
14505 }
14506 _ => None,
14507 }
14508 }
14509
14510 fn get_alias_name(expr: &Expression) -> Option<String> {
14511 if let Expression::Alias(a) = expr {
14512 Some(a.alias.name.clone())
14513 } else {
14514 None
14515 }
14516 }
14517
14518 for sel_expr in &select.expressions {
14519 if let Some(arr) = extract_unnest_arg(sel_expr) {
14520 col_counter += 1;
14521 pos_counter += 1;
14522 source_counter += 1;
14523
14524 let col_alias = if col_counter == 1 { "col".to_string() } else { format!("col_{}", col_counter) };
14525 let pos_alias = format!("pos_{}", pos_counter);
14526 let source_alias = format!("_u_{}", source_counter);
14527 let has_outer_alias = get_alias_name(sel_expr);
14528
14529 unnest_infos.push(UnnestInfo {
14530 arr_expr: arr,
14531 col_alias,
14532 pos_alias,
14533 source_alias,
14534 original_expr: sel_expr.clone(),
14535 has_outer_alias,
14536 });
14537 }
14538 }
14539
14540 if unnest_infos.is_empty() {
14541 return None;
14542 }
14543
14544 let series_alias = "pos".to_string();
14545 let series_source_alias = "_u".to_string();
14546 let tbl_ref = if use_table_aliases { Some(series_source_alias.as_str()) } else { None };
14547
14548 // Build new SELECT expressions
14549 let mut new_select_exprs = Vec::new();
14550 for info in &unnest_infos {
14551 let actual_col_name = info.has_outer_alias.as_ref().unwrap_or(&info.col_alias);
14552 let src_ref = if use_table_aliases { Some(info.source_alias.as_str()) } else { None };
14553
14554 let pos_col = make_col(&series_alias, tbl_ref);
14555 let unnest_pos_col = make_col(&info.pos_alias, src_ref);
14556 let col_ref = make_col(actual_col_name, src_ref);
14557
14558 let eq_cond = Expression::Eq(Box::new(BinaryOp::new(pos_col.clone(), unnest_pos_col.clone())));
14559 let mut if_args = vec![eq_cond, col_ref];
14560 if null_third_arg {
14561 if_args.push(Expression::Null(crate::expressions::Null));
14562 }
14563
14564 let if_expr = Expression::Function(Box::new(Function::new(if_func_name.to_string(), if_args)));
14565 let final_expr = Self::replace_unnest_with_if(&info.original_expr, &if_expr);
14566
14567 new_select_exprs.push(Expression::Alias(Box::new(Alias::new(
14568 final_expr,
14569 Identifier::new(actual_col_name.clone()),
14570 ))));
14571 }
14572
14573 // Build array size expressions for GREATEST
14574 let size_exprs: Vec<Expression> = unnest_infos.iter().map(|info| {
14575 Expression::Function(Box::new(Function::new(array_length_func.to_string(), vec![info.arr_expr.clone()])))
14576 }).collect();
14577
14578 let greatest = Expression::Function(Box::new(Function::new("GREATEST".to_string(), size_exprs)));
14579
14580 let series_end = if index_offset == 0 {
14581 Expression::Sub(Box::new(BinaryOp::new(greatest, Expression::Literal(Literal::Number("1".to_string())))))
14582 } else {
14583 greatest
14584 };
14585
14586 // Build the position array source
14587 let series_unnest_expr = match target {
14588 DialectType::BigQuery => {
14589 let gen_array = Expression::Function(Box::new(Function::new(
14590 "GENERATE_ARRAY".to_string(),
14591 vec![Expression::Literal(Literal::Number("0".to_string())), series_end],
14592 )));
14593 Expression::Unnest(Box::new(UnnestFunc {
14594 this: gen_array, expressions: Vec::new(), with_ordinality: false, alias: None, offset_alias: None,
14595 }))
14596 }
14597 DialectType::Presto | DialectType::Trino => {
14598 let sequence = Expression::Function(Box::new(Function::new(
14599 "SEQUENCE".to_string(),
14600 vec![Expression::Literal(Literal::Number("1".to_string())), series_end],
14601 )));
14602 Expression::Unnest(Box::new(UnnestFunc {
14603 this: sequence, expressions: Vec::new(), with_ordinality: false, alias: None, offset_alias: None,
14604 }))
14605 }
14606 DialectType::Snowflake => {
14607 let range_end = Expression::Add(Box::new(BinaryOp::new(
14608 Expression::Paren(Box::new(crate::expressions::Paren { this: series_end, trailing_comments: Vec::new() })),
14609 Expression::Literal(Literal::Number("1".to_string())),
14610 )));
14611 let gen_range = Expression::Function(Box::new(Function::new(
14612 "ARRAY_GENERATE_RANGE".to_string(),
14613 vec![Expression::Literal(Literal::Number("0".to_string())), range_end],
14614 )));
14615 let flatten_arg = Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
14616 name: Identifier::new("INPUT".to_string()),
14617 value: gen_range,
14618 separator: crate::expressions::NamedArgSeparator::DArrow,
14619 }));
14620 let flatten = Expression::Function(Box::new(Function::new("FLATTEN".to_string(), vec![flatten_arg])));
14621 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])))
14622 }
14623 _ => return None,
14624 };
14625
14626 // Build series alias expression
14627 let series_alias_expr = if use_table_aliases {
14628 let col_aliases = if matches!(target, DialectType::Snowflake) {
14629 vec![
14630 Identifier::new("seq".to_string()), Identifier::new("key".to_string()),
14631 Identifier::new("path".to_string()), Identifier::new("index".to_string()),
14632 Identifier::new(series_alias.clone()), Identifier::new("this".to_string()),
14633 ]
14634 } else {
14635 vec![Identifier::new(series_alias.clone())]
14636 };
14637 Expression::Alias(Box::new(Alias {
14638 this: series_unnest_expr,
14639 alias: Identifier::new(series_source_alias.clone()),
14640 column_aliases: col_aliases,
14641 pre_alias_comments: Vec::new(),
14642 trailing_comments: Vec::new(),
14643 }))
14644 } else {
14645 Expression::Alias(Box::new(Alias::new(series_unnest_expr, Identifier::new(series_alias.clone()))))
14646 };
14647
14648 // Build CROSS JOINs for each UNNEST
14649 let mut joins = Vec::new();
14650 for info in &unnest_infos {
14651 let actual_col_name = info.has_outer_alias.as_ref().unwrap_or(&info.col_alias);
14652
14653 let unnest_join_expr = match target {
14654 DialectType::BigQuery => {
14655 // UNNEST([1,2,3]) AS col WITH OFFSET AS pos_2
14656 let unnest = UnnestFunc {
14657 this: info.arr_expr.clone(),
14658 expressions: Vec::new(),
14659 with_ordinality: true,
14660 alias: Some(Identifier::new(actual_col_name.clone())),
14661 offset_alias: Some(Identifier::new(info.pos_alias.clone())),
14662 };
14663 Expression::Unnest(Box::new(unnest))
14664 }
14665 DialectType::Presto | DialectType::Trino => {
14666 let unnest = UnnestFunc {
14667 this: info.arr_expr.clone(),
14668 expressions: Vec::new(),
14669 with_ordinality: true,
14670 alias: None,
14671 offset_alias: None,
14672 };
14673 Expression::Alias(Box::new(Alias {
14674 this: Expression::Unnest(Box::new(unnest)),
14675 alias: Identifier::new(info.source_alias.clone()),
14676 column_aliases: vec![
14677 Identifier::new(actual_col_name.clone()),
14678 Identifier::new(info.pos_alias.clone()),
14679 ],
14680 pre_alias_comments: Vec::new(),
14681 trailing_comments: Vec::new(),
14682 }))
14683 }
14684 DialectType::Snowflake => {
14685 let flatten_arg = Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
14686 name: Identifier::new("INPUT".to_string()),
14687 value: info.arr_expr.clone(),
14688 separator: crate::expressions::NamedArgSeparator::DArrow,
14689 }));
14690 let flatten = Expression::Function(Box::new(Function::new("FLATTEN".to_string(), vec![flatten_arg])));
14691 let table_fn = Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])));
14692 Expression::Alias(Box::new(Alias {
14693 this: table_fn,
14694 alias: Identifier::new(info.source_alias.clone()),
14695 column_aliases: vec![
14696 Identifier::new("seq".to_string()), Identifier::new("key".to_string()),
14697 Identifier::new("path".to_string()), Identifier::new(info.pos_alias.clone()),
14698 Identifier::new(actual_col_name.clone()), Identifier::new("this".to_string()),
14699 ],
14700 pre_alias_comments: Vec::new(),
14701 trailing_comments: Vec::new(),
14702 }))
14703 }
14704 _ => return None,
14705 };
14706
14707 joins.push(make_join(unnest_join_expr));
14708 }
14709
14710 // Build WHERE clause
14711 let mut where_conditions: Vec<Expression> = Vec::new();
14712 for info in &unnest_infos {
14713 let src_ref = if use_table_aliases { Some(info.source_alias.as_str()) } else { None };
14714 let pos_col = make_col(&series_alias, tbl_ref);
14715 let unnest_pos_col = make_col(&info.pos_alias, src_ref);
14716
14717 let arr_size = Expression::Function(Box::new(Function::new(
14718 array_length_func.to_string(), vec![info.arr_expr.clone()],
14719 )));
14720
14721 let size_ref = if index_offset == 0 {
14722 Expression::Paren(Box::new(crate::expressions::Paren {
14723 this: Expression::Sub(Box::new(BinaryOp::new(arr_size, Expression::Literal(Literal::Number("1".to_string()))))),
14724 trailing_comments: Vec::new(),
14725 }))
14726 } else {
14727 arr_size
14728 };
14729
14730 let eq = Expression::Eq(Box::new(BinaryOp::new(pos_col.clone(), unnest_pos_col.clone())));
14731 let gt = Expression::Gt(Box::new(BinaryOp::new(pos_col, size_ref.clone())));
14732 let pos_eq_size = Expression::Eq(Box::new(BinaryOp::new(unnest_pos_col, size_ref)));
14733 let and_cond = Expression::And(Box::new(BinaryOp::new(gt, pos_eq_size)));
14734 let paren_and = Expression::Paren(Box::new(crate::expressions::Paren { this: and_cond, trailing_comments: Vec::new() }));
14735 let or_cond = Expression::Or(Box::new(BinaryOp::new(eq, paren_and)));
14736
14737 where_conditions.push(or_cond);
14738 }
14739
14740 let where_expr = if where_conditions.len() == 1 {
14741 // Single condition: no parens needed
14742 where_conditions.into_iter().next().unwrap()
14743 } else {
14744 // Multiple conditions: wrap each OR in parens, then combine with AND
14745 let wrap = |e: Expression| Expression::Paren(Box::new(crate::expressions::Paren { this: e, trailing_comments: Vec::new() }));
14746 let mut iter = where_conditions.into_iter();
14747 let first = wrap(iter.next().unwrap());
14748 let second = wrap(iter.next().unwrap());
14749 let mut combined = Expression::Paren(Box::new(crate::expressions::Paren {
14750 this: Expression::And(Box::new(BinaryOp::new(first, second))),
14751 trailing_comments: Vec::new(),
14752 }));
14753 for cond in iter {
14754 combined = Expression::And(Box::new(BinaryOp::new(combined, wrap(cond))));
14755 }
14756 combined
14757 };
14758
14759 // Build the new SELECT
14760 let mut new_select = select.clone();
14761 new_select.expressions = new_select_exprs;
14762
14763 if new_select.from.is_some() {
14764 let mut all_joins = vec![make_join(series_alias_expr)];
14765 all_joins.extend(joins);
14766 new_select.joins.extend(all_joins);
14767 } else {
14768 new_select.from = Some(From { expressions: vec![series_alias_expr] });
14769 new_select.joins.extend(joins);
14770 }
14771
14772 if let Some(ref existing_where) = new_select.where_clause {
14773 let combined = Expression::And(Box::new(BinaryOp::new(existing_where.this.clone(), where_expr)));
14774 new_select.where_clause = Some(crate::expressions::Where { this: combined });
14775 } else {
14776 new_select.where_clause = Some(crate::expressions::Where { this: where_expr });
14777 }
14778
14779 Some(new_select)
14780 }
14781
14782 /// Helper to replace UNNEST(...) inside an expression with a replacement expression.
14783 fn replace_unnest_with_if(original: &Expression, replacement: &Expression) -> Expression {
14784 match original {
14785 Expression::Unnest(_) => replacement.clone(),
14786 Expression::Function(f) if f.name.eq_ignore_ascii_case("UNNEST") => replacement.clone(),
14787 Expression::Alias(a) => Self::replace_unnest_with_if(&a.this, replacement),
14788 Expression::Add(op) => {
14789 let left = Self::replace_unnest_with_if(&op.left, replacement);
14790 let right = Self::replace_unnest_with_if(&op.right, replacement);
14791 Expression::Add(Box::new(crate::expressions::BinaryOp::new(left, right)))
14792 }
14793 Expression::Sub(op) => {
14794 let left = Self::replace_unnest_with_if(&op.left, replacement);
14795 let right = Self::replace_unnest_with_if(&op.right, replacement);
14796 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(left, right)))
14797 }
14798 Expression::Mul(op) => {
14799 let left = Self::replace_unnest_with_if(&op.left, replacement);
14800 let right = Self::replace_unnest_with_if(&op.right, replacement);
14801 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(left, right)))
14802 }
14803 Expression::Div(op) => {
14804 let left = Self::replace_unnest_with_if(&op.left, replacement);
14805 let right = Self::replace_unnest_with_if(&op.right, replacement);
14806 Expression::Div(Box::new(crate::expressions::BinaryOp::new(left, right)))
14807 }
14808 _ => original.clone(),
14809 }
14810 }
14811
14812 /// Transform TSQL SELECT INTO -> CREATE TABLE AS for DuckDB/Snowflake
14813 /// or PostgreSQL #temp -> TEMPORARY.
14814 /// Also strips # from INSERT INTO #table for non-TSQL targets.
14815 fn transform_select_into(expr: Expression, _source: DialectType, target: DialectType) -> Expression {
14816 use crate::expressions::{CreateTable, Expression, TableRef};
14817
14818 // Handle INSERT INTO #temp -> INSERT INTO temp for non-TSQL targets
14819 if let Expression::Insert(ref insert) = expr {
14820 if insert.table.name.name.starts_with('#') && !matches!(target, DialectType::TSQL | DialectType::Fabric) {
14821 let mut new_insert = insert.clone();
14822 new_insert.table.name.name = insert.table.name.name.trim_start_matches('#').to_string();
14823 return Expression::Insert(new_insert);
14824 }
14825 return expr;
14826 }
14827
14828 if let Expression::Select(ref select) = expr {
14829 if let Some(ref into) = select.into {
14830 let table_name_raw = match &into.this {
14831 Expression::Table(tr) => tr.name.name.clone(),
14832 Expression::Identifier(id) => id.name.clone(),
14833 _ => String::new(),
14834 };
14835 let is_temp = table_name_raw.starts_with('#') || into.temporary;
14836 let clean_name = table_name_raw.trim_start_matches('#').to_string();
14837
14838 match target {
14839 DialectType::DuckDB | DialectType::Snowflake => {
14840 // SELECT INTO -> CREATE TABLE AS SELECT
14841 let mut new_select = select.clone();
14842 new_select.into = None;
14843 let ct = CreateTable {
14844 name: TableRef::new(clean_name),
14845 on_cluster: None,
14846 columns: Vec::new(),
14847 constraints: Vec::new(),
14848 if_not_exists: false,
14849 temporary: is_temp,
14850 or_replace: false,
14851 table_modifier: None,
14852 as_select: Some(Expression::Select(new_select)),
14853 as_select_parenthesized: false,
14854 on_commit: None,
14855 clone_source: None,
14856 clone_at_clause: None,
14857 shallow_clone: false, is_copy: false,
14858 leading_comments: Vec::new(),
14859 with_properties: Vec::new(),
14860 teradata_post_name_options: Vec::new(),
14861 with_data: None,
14862 with_statistics: None,
14863 teradata_indexes: Vec::new(),
14864 with_cte: None,
14865 properties: Vec::new(),
14866 partition_of: None,
14867 post_table_properties: Vec::new(),
14868 mysql_table_options: Vec::new(),
14869 inherits: Vec::new(),
14870 on_property: None,
14871 copy_grants: false,
14872 using_template: None,
14873 rollup: None,
14874 };
14875 return Expression::CreateTable(Box::new(ct));
14876 }
14877 DialectType::PostgreSQL | DialectType::Redshift => {
14878 // PostgreSQL: #foo -> INTO TEMPORARY foo
14879 if is_temp && !into.temporary {
14880 let mut new_select = select.clone();
14881 let mut new_into = into.clone();
14882 new_into.temporary = true;
14883 new_into.unlogged = false;
14884 new_into.this = Expression::Table(TableRef::new(clean_name));
14885 new_select.into = Some(new_into);
14886 Expression::Select(new_select)
14887 } else {
14888 expr
14889 }
14890 }
14891 _ => expr,
14892 }
14893 } else {
14894 expr
14895 }
14896 } else {
14897 expr
14898 }
14899 }
14900
14901 /// Transform CREATE TABLE WITH properties for cross-dialect transpilation.
14902 /// Handles FORMAT, PARTITIONED_BY, and other Presto WITH properties.
14903 fn transform_create_table_properties(
14904 ct: &mut crate::expressions::CreateTable,
14905 _source: DialectType,
14906 target: DialectType,
14907 ) {
14908 use crate::expressions::{
14909 BinaryOp, BooleanLiteral, Expression, FileFormatProperty, Identifier, Literal,
14910 Properties,
14911 };
14912
14913 // Helper to convert a raw property value string to the correct Expression
14914 let value_to_expr = |v: &str| -> Expression {
14915 let trimmed = v.trim();
14916 // Check if it's a quoted string (starts and ends with ')
14917 if trimmed.starts_with('\'') && trimmed.ends_with('\'') {
14918 Expression::Literal(Literal::String(trimmed[1..trimmed.len()-1].to_string()))
14919 }
14920 // Check if it's a number
14921 else if trimmed.parse::<i64>().is_ok() || trimmed.parse::<f64>().is_ok() {
14922 Expression::Literal(Literal::Number(trimmed.to_string()))
14923 }
14924 // Check if it's ARRAY[...] or ARRAY(...)
14925 else if trimmed.to_uppercase().starts_with("ARRAY") {
14926 // Convert ARRAY['y'] to ARRAY('y') for Hive/Spark
14927 let inner = trimmed
14928 .trim_start_matches(|c: char| c.is_alphabetic()) // Remove ARRAY
14929 .trim_start_matches('[')
14930 .trim_start_matches('(')
14931 .trim_end_matches(']')
14932 .trim_end_matches(')');
14933 let elements: Vec<Expression> = inner
14934 .split(',')
14935 .map(|e| {
14936 let elem = e.trim().trim_matches('\'');
14937 Expression::Literal(Literal::String(elem.to_string()))
14938 })
14939 .collect();
14940 Expression::Function(Box::new(crate::expressions::Function::new(
14941 "ARRAY".to_string(),
14942 elements,
14943 )))
14944 }
14945 // Otherwise, just output as identifier (unquoted)
14946 else {
14947 Expression::Identifier(Identifier::new(trimmed.to_string()))
14948 }
14949 };
14950
14951 if ct.with_properties.is_empty() && ct.properties.is_empty() {
14952 return;
14953 }
14954
14955 // Handle Presto-style WITH properties
14956 if !ct.with_properties.is_empty() {
14957 // Extract FORMAT property and remaining properties
14958 let mut format_value: Option<String> = None;
14959 let mut partitioned_by: Option<String> = None;
14960 let mut other_props: Vec<(String, String)> = Vec::new();
14961
14962 for (key, value) in ct.with_properties.drain(..) {
14963 let key_upper = key.to_uppercase();
14964 if key_upper == "FORMAT" {
14965 // Strip surrounding quotes from value if present
14966 format_value = Some(value.trim_matches('\'').to_string());
14967 } else if key_upper == "PARTITIONED_BY" {
14968 partitioned_by = Some(value);
14969 } else {
14970 other_props.push((key, value));
14971 }
14972 }
14973
14974 match target {
14975 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
14976 // Presto: keep WITH properties but lowercase 'format' key
14977 if let Some(fmt) = format_value {
14978 ct.with_properties.push(("format".to_string(), format!("'{}'", fmt)));
14979 }
14980 if let Some(part) = partitioned_by {
14981 // Convert (col1, col2) to ARRAY['col1', 'col2'] format
14982 let trimmed = part.trim();
14983 let inner = trimmed.trim_start_matches('(').trim_end_matches(')');
14984 // Also handle ARRAY['...'] format - keep as-is
14985 if trimmed.to_uppercase().starts_with("ARRAY") {
14986 ct.with_properties.push(("PARTITIONED_BY".to_string(), part));
14987 } else {
14988 // Parse column names from the parenthesized list
14989 let cols: Vec<&str> = inner.split(',').map(|c| c.trim().trim_matches('"').trim_matches('\'')).collect();
14990 let array_val = format!("ARRAY[{}]", cols.iter().map(|c| format!("'{}'", c)).collect::<Vec<_>>().join(", "));
14991 ct.with_properties.push(("PARTITIONED_BY".to_string(), array_val));
14992 }
14993 }
14994 ct.with_properties.extend(other_props);
14995 }
14996 DialectType::Hive => {
14997 // Hive: FORMAT -> STORED AS, other props -> TBLPROPERTIES
14998 if let Some(fmt) = format_value {
14999 ct.properties.push(Expression::FileFormatProperty(Box::new(
15000 FileFormatProperty {
15001 this: Some(Box::new(Expression::Identifier(Identifier::new(fmt)))),
15002 expressions: vec![],
15003 hive_format: Some(Box::new(Expression::Boolean(BooleanLiteral { value: true }))),
15004 },
15005 )));
15006 }
15007 if let Some(_part) = partitioned_by {
15008 // PARTITIONED_BY handling is complex - move columns to partitioned by
15009 // For now, the partition columns are extracted from the column list
15010 Self::apply_partitioned_by(ct, &_part, target);
15011 }
15012 if !other_props.is_empty() {
15013 let eq_exprs: Vec<Expression> = other_props
15014 .into_iter()
15015 .map(|(k, v)| Expression::Eq(Box::new(BinaryOp::new(
15016 Expression::Literal(Literal::String(k)),
15017 value_to_expr(&v),
15018 ))))
15019 .collect();
15020 ct.properties.push(Expression::Properties(Box::new(
15021 Properties { expressions: eq_exprs },
15022 )));
15023 }
15024 }
15025 DialectType::Spark | DialectType::Databricks => {
15026 // Spark: FORMAT -> USING, other props -> TBLPROPERTIES
15027 if let Some(fmt) = format_value {
15028 ct.properties.push(Expression::FileFormatProperty(Box::new(
15029 FileFormatProperty {
15030 this: Some(Box::new(Expression::Identifier(Identifier::new(fmt)))),
15031 expressions: vec![],
15032 hive_format: None, // None means USING syntax
15033 },
15034 )));
15035 }
15036 if let Some(_part) = partitioned_by {
15037 Self::apply_partitioned_by(ct, &_part, target);
15038 }
15039 if !other_props.is_empty() {
15040 let eq_exprs: Vec<Expression> = other_props
15041 .into_iter()
15042 .map(|(k, v)| Expression::Eq(Box::new(BinaryOp::new(
15043 Expression::Literal(Literal::String(k)),
15044 value_to_expr(&v),
15045 ))))
15046 .collect();
15047 ct.properties.push(Expression::Properties(Box::new(
15048 Properties { expressions: eq_exprs },
15049 )));
15050 }
15051 }
15052 DialectType::DuckDB => {
15053 // DuckDB: strip all WITH properties (FORMAT, PARTITIONED_BY, etc.)
15054 // Keep nothing
15055 }
15056 _ => {
15057 // For other dialects, keep WITH properties as-is
15058 if let Some(fmt) = format_value {
15059 ct.with_properties.push(("FORMAT".to_string(), format!("'{}'", fmt)));
15060 }
15061 if let Some(part) = partitioned_by {
15062 ct.with_properties.push(("PARTITIONED_BY".to_string(), part));
15063 }
15064 ct.with_properties.extend(other_props);
15065 }
15066 }
15067 }
15068
15069 // Handle STORED AS 'PARQUET' (quoted format name) -> STORED AS PARQUET (unquoted)
15070 // and Hive STORED AS -> Presto WITH (format=...) conversion
15071 if !ct.properties.is_empty() {
15072 let is_presto_target = matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena);
15073 let is_duckdb_target = matches!(target, DialectType::DuckDB);
15074
15075 if is_presto_target || is_duckdb_target {
15076 let mut new_properties = Vec::new();
15077 for prop in ct.properties.drain(..) {
15078 match &prop {
15079 Expression::FileFormatProperty(ffp) => {
15080 if is_presto_target {
15081 // Convert STORED AS/USING to WITH (format=...)
15082 if let Some(ref fmt_expr) = ffp.this {
15083 let fmt_str = match fmt_expr.as_ref() {
15084 Expression::Identifier(id) => id.name.clone(),
15085 Expression::Literal(Literal::String(s)) => s.clone(),
15086 _ => {
15087 new_properties.push(prop);
15088 continue;
15089 }
15090 };
15091 ct.with_properties.push(("format".to_string(), format!("'{}'", fmt_str)));
15092 }
15093 }
15094 // DuckDB: just strip file format properties
15095 }
15096 // Convert TBLPROPERTIES to WITH properties for Presto target
15097 Expression::Properties(props) if is_presto_target => {
15098 for expr in &props.expressions {
15099 if let Expression::Eq(eq) = expr {
15100 // Extract key and value from the Eq expression
15101 let key = match &eq.left {
15102 Expression::Literal(Literal::String(s)) => s.clone(),
15103 Expression::Identifier(id) => id.name.clone(),
15104 _ => continue,
15105 };
15106 let value = match &eq.right {
15107 Expression::Literal(Literal::String(s)) => format!("'{}'", s),
15108 Expression::Literal(Literal::Number(n)) => n.clone(),
15109 Expression::Identifier(id) => id.name.clone(),
15110 _ => continue,
15111 };
15112 ct.with_properties.push((key, value));
15113 }
15114 }
15115 }
15116 // Convert PartitionedByProperty for Presto target
15117 Expression::PartitionedByProperty(ref pbp) if is_presto_target => {
15118 // Check if it contains ColumnDef expressions (Hive-style with types)
15119 if let Expression::Tuple(ref tuple) = *pbp.this {
15120 let mut col_names: Vec<String> = Vec::new();
15121 let mut col_defs: Vec<crate::expressions::ColumnDef> = Vec::new();
15122 let mut has_col_defs = false;
15123 for expr in &tuple.expressions {
15124 if let Expression::ColumnDef(ref cd) = expr {
15125 has_col_defs = true;
15126 col_names.push(cd.name.name.clone());
15127 col_defs.push(*cd.clone());
15128 } else if let Expression::Column(ref col) = expr {
15129 col_names.push(col.name.name.clone());
15130 } else if let Expression::Identifier(ref id) = expr {
15131 col_names.push(id.name.clone());
15132 } else {
15133 // For function expressions like MONTHS(y), serialize to SQL
15134 let generic = Dialect::get(DialectType::Generic);
15135 if let Ok(sql) = generic.generate(expr) {
15136 col_names.push(sql);
15137 }
15138 }
15139 }
15140 if has_col_defs {
15141 // Merge partition column defs into the main column list
15142 for cd in col_defs {
15143 ct.columns.push(cd);
15144 }
15145 }
15146 if !col_names.is_empty() {
15147 // Add PARTITIONED_BY property
15148 let array_val = format!("ARRAY[{}]",
15149 col_names.iter().map(|n| format!("'{}'", n)).collect::<Vec<_>>().join(", "));
15150 ct.with_properties.push(("PARTITIONED_BY".to_string(), array_val));
15151 }
15152 }
15153 // Skip - don't keep in properties
15154 }
15155 _ => {
15156 if !is_duckdb_target {
15157 new_properties.push(prop);
15158 }
15159 }
15160 }
15161 }
15162 ct.properties = new_properties;
15163 } else {
15164 // For Hive/Spark targets, unquote format names in STORED AS
15165 for prop in &mut ct.properties {
15166 if let Expression::FileFormatProperty(ref mut ffp) = prop {
15167 if let Some(ref mut fmt_expr) = ffp.this {
15168 if let Expression::Literal(Literal::String(s)) = fmt_expr.as_ref() {
15169 // Convert STORED AS 'PARQUET' to STORED AS PARQUET (unquote)
15170 let unquoted = s.clone();
15171 *fmt_expr = Box::new(Expression::Identifier(Identifier::new(unquoted)));
15172 }
15173 }
15174 }
15175 }
15176 }
15177 }
15178 }
15179
15180 /// Apply PARTITIONED_BY conversion: move partition columns from column list to PARTITIONED BY
15181 fn apply_partitioned_by(ct: &mut crate::expressions::CreateTable, partitioned_by_value: &str, target: DialectType) {
15182 use crate::expressions::{Column, Expression, Identifier, PartitionedByProperty, Tuple};
15183
15184 // Parse the ARRAY['col1', 'col2'] value to extract column names
15185 let mut col_names: Vec<String> = Vec::new();
15186 // The value looks like ARRAY['y', 'z'] or ARRAY('y', 'z')
15187 let inner = partitioned_by_value
15188 .trim()
15189 .trim_start_matches("ARRAY")
15190 .trim_start_matches('[')
15191 .trim_start_matches('(')
15192 .trim_end_matches(']')
15193 .trim_end_matches(')');
15194 for part in inner.split(',') {
15195 let col = part.trim().trim_matches('\'').trim_matches('"');
15196 if !col.is_empty() {
15197 col_names.push(col.to_string());
15198 }
15199 }
15200
15201 if col_names.is_empty() {
15202 return;
15203 }
15204
15205 if matches!(target, DialectType::Hive) {
15206 // Hive: PARTITIONED BY (col_name type, ...) - move columns out of column list
15207 let mut partition_col_defs = Vec::new();
15208 for col_name in &col_names {
15209 // Find and remove from columns
15210 if let Some(pos) = ct.columns.iter().position(|c| c.name.name.eq_ignore_ascii_case(col_name)) {
15211 let col_def = ct.columns.remove(pos);
15212 partition_col_defs.push(Expression::ColumnDef(Box::new(col_def)));
15213 }
15214 }
15215 if !partition_col_defs.is_empty() {
15216 ct.properties.push(Expression::PartitionedByProperty(Box::new(
15217 PartitionedByProperty {
15218 this: Box::new(Expression::Tuple(Box::new(Tuple { expressions: partition_col_defs }))),
15219 },
15220 )));
15221 }
15222 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
15223 // Spark: PARTITIONED BY (col1, col2) - just column names, keep in column list
15224 // Use quoted identifiers to match the quoting style of the original column definitions
15225 let partition_exprs: Vec<Expression> = col_names
15226 .iter()
15227 .map(|name| {
15228 // Check if the column exists in the column list and use its quoting
15229 let is_quoted = ct.columns.iter().any(|c| c.name.name.eq_ignore_ascii_case(name) && c.name.quoted);
15230 let ident = if is_quoted { Identifier::quoted(name.clone()) } else { Identifier::new(name.clone()) };
15231 Expression::Column(Column {
15232 name: ident,
15233 table: None,
15234 join_mark: false,
15235 trailing_comments: Vec::new(),
15236 })
15237 })
15238 .collect();
15239 ct.properties.push(Expression::PartitionedByProperty(Box::new(
15240 PartitionedByProperty {
15241 this: Box::new(Expression::Tuple(Box::new(Tuple { expressions: partition_exprs }))),
15242 },
15243 )));
15244 }
15245 // DuckDB: strip partitioned_by entirely (already handled)
15246 }
15247
15248 /// Convert a DataType to Spark's type string format (using angle brackets)
15249 fn data_type_to_spark_string(dt: &crate::expressions::DataType) -> String {
15250 use crate::expressions::DataType;
15251 match dt {
15252 DataType::Int { .. } => "INT".to_string(),
15253 DataType::BigInt { .. } => "BIGINT".to_string(),
15254 DataType::SmallInt { .. } => "SMALLINT".to_string(),
15255 DataType::TinyInt { .. } => "TINYINT".to_string(),
15256 DataType::Float { .. } => "FLOAT".to_string(),
15257 DataType::Double { .. } => "DOUBLE".to_string(),
15258 DataType::Decimal { precision: Some(p), scale: Some(s) } => format!("DECIMAL({}, {})", p, s),
15259 DataType::Decimal { precision: Some(p), .. } => format!("DECIMAL({})", p),
15260 DataType::Decimal { .. } => "DECIMAL".to_string(),
15261 DataType::VarChar { .. } | DataType::Text | DataType::String { .. } => "STRING".to_string(),
15262 DataType::Char { .. } => "STRING".to_string(),
15263 DataType::Boolean => "BOOLEAN".to_string(),
15264 DataType::Date => "DATE".to_string(),
15265 DataType::Timestamp { .. } => "TIMESTAMP".to_string(),
15266 DataType::Json | DataType::JsonB => "STRING".to_string(),
15267 DataType::Binary { .. } => "BINARY".to_string(),
15268 DataType::Array { element_type, .. } => format!("ARRAY<{}>", Self::data_type_to_spark_string(element_type)),
15269 DataType::Map { key_type, value_type } => format!("MAP<{}, {}>", Self::data_type_to_spark_string(key_type), Self::data_type_to_spark_string(value_type)),
15270 DataType::Struct { fields, .. } => {
15271 let field_strs: Vec<String> = fields.iter().map(|f| {
15272 if f.name.is_empty() {
15273 Self::data_type_to_spark_string(&f.data_type)
15274 } else {
15275 format!("{}: {}", f.name, Self::data_type_to_spark_string(&f.data_type))
15276 }
15277 }).collect();
15278 format!("STRUCT<{}>", field_strs.join(", "))
15279 }
15280 DataType::Custom { name } => name.clone(),
15281 _ => format!("{:?}", dt),
15282 }
15283 }
15284
15285 /// Extract value and unit from an Interval expression
15286 /// Returns (value_expression, IntervalUnit)
15287 fn extract_interval_parts(interval_expr: &Expression) -> (Expression, crate::expressions::IntervalUnit) {
15288 use crate::expressions::{IntervalUnit, IntervalUnitSpec};
15289
15290 if let Expression::Interval(iv) = interval_expr {
15291 let val = iv.this.clone().unwrap_or(Expression::number(0));
15292 let unit = match &iv.unit {
15293 Some(IntervalUnitSpec::Simple { unit, .. }) => *unit,
15294 None => {
15295 // Unit might be embedded in the string value (Snowflake format: '5 DAY')
15296 if let Expression::Literal(crate::expressions::Literal::String(s)) = &val {
15297 let parts: Vec<&str> = s.trim().splitn(2, ' ').collect();
15298 if parts.len() == 2 {
15299 let unit_str = parts[1].trim().to_uppercase();
15300 let parsed_unit = match unit_str.as_str() {
15301 "YEAR" | "YEARS" => IntervalUnit::Year,
15302 "QUARTER" | "QUARTERS" => IntervalUnit::Quarter,
15303 "MONTH" | "MONTHS" => IntervalUnit::Month,
15304 "WEEK" | "WEEKS" | "ISOWEEK" => IntervalUnit::Week,
15305 "DAY" | "DAYS" => IntervalUnit::Day,
15306 "HOUR" | "HOURS" => IntervalUnit::Hour,
15307 "MINUTE" | "MINUTES" => IntervalUnit::Minute,
15308 "SECOND" | "SECONDS" => IntervalUnit::Second,
15309 "MILLISECOND" | "MILLISECONDS" => IntervalUnit::Millisecond,
15310 "MICROSECOND" | "MICROSECONDS" => IntervalUnit::Microsecond,
15311 _ => IntervalUnit::Day,
15312 };
15313 // Return just the numeric part as value and parsed unit
15314 return (Expression::Literal(crate::expressions::Literal::String(parts[0].to_string())), parsed_unit);
15315 }
15316 IntervalUnit::Day
15317 } else {
15318 IntervalUnit::Day
15319 }
15320 }
15321 _ => IntervalUnit::Day,
15322 };
15323 (val, unit)
15324 } else {
15325 // Not an interval - pass through
15326 (interval_expr.clone(), crate::expressions::IntervalUnit::Day)
15327 }
15328 }
15329
15330 /// Normalize BigQuery-specific functions to standard forms that target dialects can handle
15331 fn normalize_bigquery_function(e: Expression, source: DialectType, target: DialectType) -> Result<Expression> {
15332 use crate::expressions::{BinaryOp, Cast, DataType, Function, Identifier, Literal, Paren};
15333
15334 let f = if let Expression::Function(f) = e { *f } else { return Ok(e); };
15335 let name = f.name.to_uppercase();
15336 let mut args = f.args;
15337
15338 /// Helper to extract unit string from an identifier, column, or literal expression
15339 fn get_unit_str(expr: &Expression) -> String {
15340 match expr {
15341 Expression::Identifier(id) => id.name.to_uppercase(),
15342 Expression::Literal(Literal::String(s)) => s.to_uppercase(),
15343 Expression::Column(col) => col.name.name.to_uppercase(),
15344 // Handle WEEK(MONDAY), WEEK(SUNDAY) etc. which are parsed as Function("WEEK", [Column("MONDAY")])
15345 Expression::Function(f) => {
15346 let base = f.name.to_uppercase();
15347 if !f.args.is_empty() {
15348 // e.g., WEEK(MONDAY) -> "WEEK(MONDAY)"
15349 let inner = get_unit_str(&f.args[0]);
15350 format!("{}({})", base, inner)
15351 } else {
15352 base
15353 }
15354 }
15355 _ => "DAY".to_string(),
15356 }
15357 }
15358
15359 /// Parse unit string to IntervalUnit
15360 fn parse_interval_unit(s: &str) -> crate::expressions::IntervalUnit {
15361 match s {
15362 "YEAR" => crate::expressions::IntervalUnit::Year,
15363 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
15364 "MONTH" => crate::expressions::IntervalUnit::Month,
15365 "WEEK" | "ISOWEEK" => crate::expressions::IntervalUnit::Week,
15366 "DAY" => crate::expressions::IntervalUnit::Day,
15367 "HOUR" => crate::expressions::IntervalUnit::Hour,
15368 "MINUTE" => crate::expressions::IntervalUnit::Minute,
15369 "SECOND" => crate::expressions::IntervalUnit::Second,
15370 "MILLISECOND" => crate::expressions::IntervalUnit::Millisecond,
15371 "MICROSECOND" => crate::expressions::IntervalUnit::Microsecond,
15372 _ if s.starts_with("WEEK(") => crate::expressions::IntervalUnit::Week,
15373 _ => crate::expressions::IntervalUnit::Day,
15374 }
15375 }
15376
15377 match name.as_str() {
15378 // TIMESTAMP_DIFF(date1, date2, unit) -> TIMESTAMPDIFF(unit, date2, date1)
15379 // (BigQuery: result = date1 - date2, Standard: result = end - start)
15380 "TIMESTAMP_DIFF" | "DATETIME_DIFF" | "TIME_DIFF" if args.len() == 3 => {
15381 let date1 = args.remove(0);
15382 let date2 = args.remove(0);
15383 let unit_expr = args.remove(0);
15384 let unit_str = get_unit_str(&unit_expr);
15385
15386 if matches!(target, DialectType::BigQuery) {
15387 // BigQuery -> BigQuery: just uppercase the unit
15388 let unit = Expression::Identifier(Identifier::new(unit_str.clone()));
15389 return Ok(Expression::Function(Box::new(Function::new(
15390 f.name, vec![date1, date2, unit],
15391 ))));
15392 }
15393
15394 // For Snowflake: use TimestampDiff expression so it generates TIMESTAMPDIFF
15395 // (Function("TIMESTAMPDIFF") would be converted to DATEDIFF by Snowflake's function normalization)
15396 if matches!(target, DialectType::Snowflake) {
15397 return Ok(Expression::TimestampDiff(Box::new(crate::expressions::TimestampDiff {
15398 this: Box::new(date2),
15399 expression: Box::new(date1),
15400 unit: Some(unit_str),
15401 })));
15402 }
15403
15404 // For DuckDB: DATE_DIFF('UNIT', start, end) with proper CAST
15405 if matches!(target, DialectType::DuckDB) {
15406 let (cast_d1, cast_d2) = if name == "TIME_DIFF" {
15407 // CAST to TIME
15408 let cast_fn = |e: Expression| -> Expression {
15409 match e {
15410 Expression::Literal(Literal::String(s)) => {
15411 Expression::Cast(Box::new(Cast {
15412 this: Expression::Literal(Literal::String(s)),
15413 to: DataType::Custom { name: "TIME".to_string() },
15414 trailing_comments: vec![],
15415 double_colon_syntax: false,
15416 format: None,
15417 default: None,
15418 }))
15419 }
15420 other => other,
15421 }
15422 };
15423 (cast_fn(date1), cast_fn(date2))
15424 } else if name == "DATETIME_DIFF" {
15425 // CAST to TIMESTAMP
15426 (Self::ensure_cast_timestamp(date1), Self::ensure_cast_timestamp(date2))
15427 } else {
15428 // TIMESTAMP_DIFF: CAST to TIMESTAMPTZ
15429 (Self::ensure_cast_timestamptz(date1), Self::ensure_cast_timestamptz(date2))
15430 };
15431 return Ok(Expression::Function(Box::new(Function::new(
15432 "DATE_DIFF".to_string(), vec![
15433 Expression::Literal(Literal::String(unit_str)),
15434 cast_d2,
15435 cast_d1,
15436 ],
15437 ))));
15438 }
15439
15440 // Convert to standard TIMESTAMPDIFF(unit, start, end)
15441 let unit = Expression::Identifier(Identifier::new(unit_str));
15442 Ok(Expression::Function(Box::new(Function::new(
15443 "TIMESTAMPDIFF".to_string(), vec![unit, date2, date1],
15444 ))))
15445 }
15446
15447 // DATEDIFF(unit, start, end) -> target-specific form
15448 // Used by: Redshift, Snowflake, TSQL, Databricks, Spark
15449 "DATEDIFF" if args.len() == 3 => {
15450 let arg0 = args.remove(0);
15451 let arg1 = args.remove(0);
15452 let arg2 = args.remove(0);
15453 let unit_str = get_unit_str(&arg0);
15454
15455 // Redshift DATEDIFF(unit, start, end) order: result = end - start
15456 // Snowflake DATEDIFF(unit, start, end) order: result = end - start
15457 // TSQL DATEDIFF(unit, start, end) order: result = end - start
15458
15459 if matches!(target, DialectType::Snowflake) {
15460 // Snowflake: DATEDIFF(UNIT, start, end) - uppercase unit
15461 let unit = Expression::Identifier(Identifier::new(unit_str));
15462 return Ok(Expression::Function(Box::new(Function::new(
15463 "DATEDIFF".to_string(), vec![unit, arg1, arg2],
15464 ))));
15465 }
15466
15467 if matches!(target, DialectType::DuckDB) {
15468 // DuckDB: DATE_DIFF('UNIT', start, end) with CAST
15469 let cast_d1 = Self::ensure_cast_timestamp(arg1);
15470 let cast_d2 = Self::ensure_cast_timestamp(arg2);
15471 return Ok(Expression::Function(Box::new(Function::new(
15472 "DATE_DIFF".to_string(), vec![
15473 Expression::Literal(Literal::String(unit_str)),
15474 cast_d1,
15475 cast_d2,
15476 ],
15477 ))));
15478 }
15479
15480 if matches!(target, DialectType::BigQuery) {
15481 // BigQuery: DATE_DIFF(end_date, start_date, UNIT) - reversed args, CAST to DATETIME
15482 let cast_d1 = Self::ensure_cast_datetime(arg1);
15483 let cast_d2 = Self::ensure_cast_datetime(arg2);
15484 let unit = Expression::Identifier(Identifier::new(unit_str));
15485 return Ok(Expression::Function(Box::new(Function::new(
15486 "DATE_DIFF".to_string(), vec![cast_d2, cast_d1, unit],
15487 ))));
15488 }
15489
15490 if matches!(target, DialectType::Spark | DialectType::Databricks) {
15491 // Spark/Databricks: DATEDIFF(UNIT, start, end) - uppercase unit
15492 let unit = Expression::Identifier(Identifier::new(unit_str));
15493 return Ok(Expression::Function(Box::new(Function::new(
15494 "DATEDIFF".to_string(), vec![unit, arg1, arg2],
15495 ))));
15496 }
15497
15498 if matches!(target, DialectType::Hive) {
15499 // Hive: DATEDIFF(end, start) for DAY only, use MONTHS_BETWEEN for MONTH
15500 match unit_str.as_str() {
15501 "MONTH" => {
15502 return Ok(Expression::Function(Box::new(Function::new(
15503 "CAST".to_string(), vec![
15504 Expression::Function(Box::new(Function::new(
15505 "MONTHS_BETWEEN".to_string(), vec![arg2, arg1],
15506 ))),
15507 ],
15508 ))));
15509 }
15510 "WEEK" => {
15511 return Ok(Expression::Cast(Box::new(Cast {
15512 this: Expression::Div(Box::new(crate::expressions::BinaryOp::new(
15513 Expression::Function(Box::new(Function::new(
15514 "DATEDIFF".to_string(), vec![arg2, arg1],
15515 ))),
15516 Expression::Literal(Literal::Number("7".to_string())),
15517 ))),
15518 to: DataType::Int { length: None, integer_spelling: false },
15519 trailing_comments: vec![],
15520 double_colon_syntax: false,
15521 format: None,
15522 default: None,
15523 })));
15524 }
15525 _ => {
15526 // Default: DATEDIFF(end, start) for DAY
15527 return Ok(Expression::Function(Box::new(Function::new(
15528 "DATEDIFF".to_string(), vec![arg2, arg1],
15529 ))));
15530 }
15531 }
15532 }
15533
15534 if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena) {
15535 // Presto/Trino: DATE_DIFF('UNIT', start, end)
15536 return Ok(Expression::Function(Box::new(Function::new(
15537 "DATE_DIFF".to_string(), vec![
15538 Expression::Literal(Literal::String(unit_str)),
15539 arg1,
15540 arg2,
15541 ],
15542 ))));
15543 }
15544
15545 if matches!(target, DialectType::TSQL) {
15546 // TSQL: DATEDIFF(UNIT, start, CAST(end AS DATETIME2))
15547 let cast_d2 = Self::ensure_cast_datetime2(arg2);
15548 let unit = Expression::Identifier(Identifier::new(unit_str));
15549 return Ok(Expression::Function(Box::new(Function::new(
15550 "DATEDIFF".to_string(), vec![unit, arg1, cast_d2],
15551 ))));
15552 }
15553
15554 if matches!(target, DialectType::PostgreSQL) {
15555 // PostgreSQL doesn't have DATEDIFF - use date subtraction or EXTRACT
15556 // For now, use DATEDIFF (passthrough) with uppercased unit
15557 let unit = Expression::Identifier(Identifier::new(unit_str));
15558 return Ok(Expression::Function(Box::new(Function::new(
15559 "DATEDIFF".to_string(), vec![unit, arg1, arg2],
15560 ))));
15561 }
15562
15563 // Default: DATEDIFF(UNIT, start, end) with uppercase unit
15564 let unit = Expression::Identifier(Identifier::new(unit_str));
15565 Ok(Expression::Function(Box::new(Function::new(
15566 "DATEDIFF".to_string(), vec![unit, arg1, arg2],
15567 ))))
15568 }
15569
15570 // DATE_DIFF(date1, date2, unit) -> standard form
15571 "DATE_DIFF" if args.len() == 3 => {
15572 let date1 = args.remove(0);
15573 let date2 = args.remove(0);
15574 let unit_expr = args.remove(0);
15575 let unit_str = get_unit_str(&unit_expr);
15576
15577 if matches!(target, DialectType::BigQuery) {
15578 // BigQuery -> BigQuery: just uppercase the unit, normalize WEEK(SUNDAY) -> WEEK
15579 let norm_unit = if unit_str == "WEEK(SUNDAY)" { "WEEK".to_string() } else { unit_str };
15580 let norm_d1 = Self::date_literal_to_cast(date1);
15581 let norm_d2 = Self::date_literal_to_cast(date2);
15582 let unit = Expression::Identifier(Identifier::new(norm_unit));
15583 return Ok(Expression::Function(Box::new(Function::new(
15584 f.name, vec![norm_d1, norm_d2, unit],
15585 ))));
15586 }
15587
15588 if matches!(target, DialectType::MySQL) {
15589 // MySQL DATEDIFF only takes 2 args (date1, date2), returns day difference
15590 let norm_d1 = Self::date_literal_to_cast(date1);
15591 let norm_d2 = Self::date_literal_to_cast(date2);
15592 return Ok(Expression::Function(Box::new(Function::new(
15593 "DATEDIFF".to_string(), vec![norm_d1, norm_d2],
15594 ))));
15595 }
15596
15597 if matches!(target, DialectType::StarRocks) {
15598 // StarRocks: DATE_DIFF('UNIT', date1, date2) - unit as string, args NOT swapped
15599 let norm_d1 = Self::date_literal_to_cast(date1);
15600 let norm_d2 = Self::date_literal_to_cast(date2);
15601 return Ok(Expression::Function(Box::new(Function::new(
15602 "DATE_DIFF".to_string(), vec![
15603 Expression::Literal(Literal::String(unit_str)),
15604 norm_d1,
15605 norm_d2,
15606 ],
15607 ))));
15608 }
15609
15610 if matches!(target, DialectType::DuckDB) {
15611 // DuckDB: DATE_DIFF('UNIT', date2, date1) with proper CAST for dates
15612 let norm_d1 = Self::ensure_cast_date(date1);
15613 let norm_d2 = Self::ensure_cast_date(date2);
15614
15615 // Handle WEEK variants: WEEK(MONDAY)/WEEK(SUNDAY)/ISOWEEK/WEEK
15616 let is_week_variant = unit_str == "WEEK" || unit_str.starts_with("WEEK(") || unit_str == "ISOWEEK";
15617 if is_week_variant {
15618 // For DuckDB, WEEK-based diffs use DATE_TRUNC approach
15619 // WEEK(MONDAY) / ISOWEEK: DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2), DATE_TRUNC('WEEK', d1))
15620 // WEEK / WEEK(SUNDAY): DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2 + INTERVAL '1' DAY), DATE_TRUNC('WEEK', d1 + INTERVAL '1' DAY))
15621 // WEEK(SATURDAY): DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2 + INTERVAL '-5' DAY), DATE_TRUNC('WEEK', d1 + INTERVAL '-5' DAY))
15622 let day_offset = if unit_str == "WEEK(MONDAY)" || unit_str == "ISOWEEK" {
15623 None // ISO weeks start on Monday, aligned with DATE_TRUNC('WEEK')
15624 } else if unit_str == "WEEK" || unit_str == "WEEK(SUNDAY)" {
15625 Some("1") // Shift Sunday to Monday alignment
15626 } else if unit_str == "WEEK(SATURDAY)" {
15627 Some("-5")
15628 } else if unit_str == "WEEK(TUESDAY)" {
15629 Some("-1")
15630 } else if unit_str == "WEEK(WEDNESDAY)" {
15631 Some("-2")
15632 } else if unit_str == "WEEK(THURSDAY)" {
15633 Some("-3")
15634 } else if unit_str == "WEEK(FRIDAY)" {
15635 Some("-4")
15636 } else {
15637 Some("1") // default to Sunday
15638 };
15639
15640 let make_trunc = |date: Expression, offset: Option<&str>| -> Expression {
15641 let shifted = if let Some(off) = offset {
15642 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
15643 this: Some(Expression::Literal(Literal::String(off.to_string()))),
15644 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: crate::expressions::IntervalUnit::Day, use_plural: false }),
15645 }));
15646 Expression::Add(Box::new(crate::expressions::BinaryOp::new(date, interval)))
15647 } else {
15648 date
15649 };
15650 Expression::Function(Box::new(Function::new(
15651 "DATE_TRUNC".to_string(), vec![Expression::Literal(Literal::String("WEEK".to_string())), shifted],
15652 )))
15653 };
15654
15655 let trunc_d2 = make_trunc(norm_d2, day_offset);
15656 let trunc_d1 = make_trunc(norm_d1, day_offset);
15657 return Ok(Expression::Function(Box::new(Function::new(
15658 "DATE_DIFF".to_string(), vec![
15659 Expression::Literal(Literal::String("WEEK".to_string())),
15660 trunc_d2,
15661 trunc_d1,
15662 ],
15663 ))));
15664 }
15665
15666 return Ok(Expression::Function(Box::new(Function::new(
15667 "DATE_DIFF".to_string(), vec![
15668 Expression::Literal(Literal::String(unit_str)),
15669 norm_d2,
15670 norm_d1,
15671 ],
15672 ))));
15673 }
15674
15675 // Default: DATEDIFF(unit, date2, date1)
15676 let unit = Expression::Identifier(Identifier::new(unit_str));
15677 Ok(Expression::Function(Box::new(Function::new(
15678 "DATEDIFF".to_string(), vec![unit, date2, date1],
15679 ))))
15680 }
15681
15682 // TIMESTAMP_ADD(ts, INTERVAL n UNIT) -> target-specific
15683 "TIMESTAMP_ADD" | "DATETIME_ADD" | "TIME_ADD" if args.len() == 2 => {
15684 let ts = args.remove(0);
15685 let interval_expr = args.remove(0);
15686 let (val, unit) = Self::extract_interval_parts(&interval_expr);
15687
15688 match target {
15689 DialectType::Snowflake => {
15690 // TIMESTAMPADD(UNIT, val, CAST(ts AS TIMESTAMPTZ))
15691 // Use TimestampAdd expression so Snowflake generates TIMESTAMPADD
15692 // (Function("TIMESTAMPADD") would be converted to DATEADD by Snowflake's function normalization)
15693 let unit_str = Self::interval_unit_to_string(&unit);
15694 let cast_ts = Self::maybe_cast_ts_to_tz(ts, &name);
15695 Ok(Expression::TimestampAdd(Box::new(crate::expressions::TimestampAdd {
15696 this: Box::new(val),
15697 expression: Box::new(cast_ts),
15698 unit: Some(unit_str),
15699 })))
15700 }
15701 DialectType::Spark | DialectType::Databricks => {
15702 if name == "DATETIME_ADD" && matches!(target, DialectType::Spark) {
15703 // Spark DATETIME_ADD: ts + INTERVAL val UNIT
15704 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
15705 this: Some(val),
15706 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit, use_plural: false }),
15707 }));
15708 Ok(Expression::Add(Box::new(crate::expressions::BinaryOp::new(ts, interval))))
15709 } else if name == "DATETIME_ADD" && matches!(target, DialectType::Databricks) {
15710 // Databricks DATETIME_ADD: TIMESTAMPADD(UNIT, val, ts)
15711 let unit_str = Self::interval_unit_to_string(&unit);
15712 Ok(Expression::Function(Box::new(Function::new(
15713 "TIMESTAMPADD".to_string(),
15714 vec![Expression::Identifier(Identifier::new(unit_str)), val, ts],
15715 ))))
15716 } else {
15717 // Presto-style: DATE_ADD('unit', val, CAST(ts AS TIMESTAMP))
15718 let unit_str = Self::interval_unit_to_string(&unit);
15719 let cast_ts = if name.starts_with("TIMESTAMP") || name.starts_with("DATETIME") {
15720 Self::maybe_cast_ts(ts)
15721 } else {
15722 ts
15723 };
15724 Ok(Expression::Function(Box::new(Function::new(
15725 "DATE_ADD".to_string(),
15726 vec![Expression::Identifier(Identifier::new(unit_str)), val, cast_ts],
15727 ))))
15728 }
15729 }
15730 DialectType::MySQL => {
15731 // DATE_ADD(TIMESTAMP(ts), INTERVAL val UNIT) for MySQL
15732 let mysql_ts = if name.starts_with("TIMESTAMP") {
15733 // Check if already wrapped in TIMESTAMP() function (from cross-dialect normalization)
15734 match &ts {
15735 Expression::Function(ref inner_f) if inner_f.name.eq_ignore_ascii_case("TIMESTAMP") => {
15736 // Already wrapped, keep as-is
15737 ts
15738 }
15739 _ => {
15740 // Unwrap typed literals: TIMESTAMP '...' -> '...' for TIMESTAMP() wrapper
15741 let unwrapped = match ts {
15742 Expression::Literal(Literal::Timestamp(s)) => Expression::Literal(Literal::String(s)),
15743 other => other,
15744 };
15745 Expression::Function(Box::new(Function::new("TIMESTAMP".to_string(), vec![unwrapped])))
15746 }
15747 }
15748 } else {
15749 ts
15750 };
15751 Ok(Expression::DateAdd(Box::new(crate::expressions::DateAddFunc {
15752 this: mysql_ts,
15753 interval: val,
15754 unit,
15755 })))
15756 }
15757 _ => {
15758 // DuckDB and others use DateAdd expression (DuckDB converts to + INTERVAL)
15759 let cast_ts = if matches!(target, DialectType::DuckDB) {
15760 if name == "DATETIME_ADD" {
15761 Self::ensure_cast_timestamp(ts)
15762 } else if name.starts_with("TIMESTAMP") {
15763 Self::maybe_cast_ts_to_tz(ts, &name)
15764 } else {
15765 ts
15766 }
15767 } else {
15768 ts
15769 };
15770 Ok(Expression::DateAdd(Box::new(crate::expressions::DateAddFunc {
15771 this: cast_ts,
15772 interval: val,
15773 unit,
15774 })))
15775 }
15776 }
15777 }
15778
15779 // TIMESTAMP_SUB(ts, INTERVAL n UNIT) -> target-specific
15780 "TIMESTAMP_SUB" | "DATETIME_SUB" | "TIME_SUB" if args.len() == 2 => {
15781 let ts = args.remove(0);
15782 let interval_expr = args.remove(0);
15783 let (val, unit) = Self::extract_interval_parts(&interval_expr);
15784
15785 match target {
15786 DialectType::Snowflake => {
15787 // TIMESTAMPADD(UNIT, val * -1, CAST(ts AS TIMESTAMPTZ))
15788 let unit_str = Self::interval_unit_to_string(&unit);
15789 let cast_ts = Self::maybe_cast_ts_to_tz(ts, &name);
15790 let neg_val = Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
15791 val,
15792 Expression::Neg(Box::new(crate::expressions::UnaryOp { this: Expression::number(1) })),
15793 )));
15794 Ok(Expression::TimestampAdd(Box::new(crate::expressions::TimestampAdd {
15795 this: Box::new(neg_val),
15796 expression: Box::new(cast_ts),
15797 unit: Some(unit_str),
15798 })))
15799 }
15800 DialectType::Spark | DialectType::Databricks => {
15801 if (name == "DATETIME_SUB" && matches!(target, DialectType::Spark))
15802 || (name == "TIMESTAMP_SUB" && matches!(target, DialectType::Spark))
15803 {
15804 // Spark: ts - INTERVAL val UNIT
15805 let cast_ts = if name.starts_with("TIMESTAMP") {
15806 Self::maybe_cast_ts(ts)
15807 } else {
15808 ts
15809 };
15810 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
15811 this: Some(val),
15812 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit, use_plural: false }),
15813 }));
15814 Ok(Expression::Sub(Box::new(crate::expressions::BinaryOp::new(cast_ts, interval))))
15815 } else {
15816 // Databricks: TIMESTAMPADD(UNIT, val * -1, ts)
15817 let unit_str = Self::interval_unit_to_string(&unit);
15818 let neg_val = Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
15819 val,
15820 Expression::Neg(Box::new(crate::expressions::UnaryOp { this: Expression::number(1) })),
15821 )));
15822 Ok(Expression::Function(Box::new(Function::new(
15823 "TIMESTAMPADD".to_string(),
15824 vec![Expression::Identifier(Identifier::new(unit_str)), neg_val, ts],
15825 ))))
15826 }
15827 }
15828 DialectType::MySQL => {
15829 let mysql_ts = if name.starts_with("TIMESTAMP") {
15830 // Check if already wrapped in TIMESTAMP() function (from cross-dialect normalization)
15831 match &ts {
15832 Expression::Function(ref inner_f) if inner_f.name.eq_ignore_ascii_case("TIMESTAMP") => {
15833 // Already wrapped, keep as-is
15834 ts
15835 }
15836 _ => {
15837 let unwrapped = match ts {
15838 Expression::Literal(Literal::Timestamp(s)) => Expression::Literal(Literal::String(s)),
15839 other => other,
15840 };
15841 Expression::Function(Box::new(Function::new("TIMESTAMP".to_string(), vec![unwrapped])))
15842 }
15843 }
15844 } else {
15845 ts
15846 };
15847 Ok(Expression::DateSub(Box::new(crate::expressions::DateAddFunc {
15848 this: mysql_ts,
15849 interval: val,
15850 unit,
15851 })))
15852 }
15853 _ => {
15854 let cast_ts = if matches!(target, DialectType::DuckDB) {
15855 if name == "DATETIME_SUB" {
15856 Self::ensure_cast_timestamp(ts)
15857 } else if name.starts_with("TIMESTAMP") {
15858 Self::maybe_cast_ts_to_tz(ts, &name)
15859 } else {
15860 ts
15861 }
15862 } else {
15863 ts
15864 };
15865 Ok(Expression::DateSub(Box::new(crate::expressions::DateAddFunc {
15866 this: cast_ts,
15867 interval: val,
15868 unit,
15869 })))
15870 }
15871 }
15872 }
15873
15874 // DATE_SUB(date, INTERVAL n UNIT) -> target-specific
15875 "DATE_SUB" if args.len() == 2 => {
15876 let date = args.remove(0);
15877 let interval_expr = args.remove(0);
15878 let (val, unit) = Self::extract_interval_parts(&interval_expr);
15879
15880 match target {
15881 DialectType::Databricks | DialectType::Spark => {
15882 // Databricks/Spark: DATE_ADD(date, -val)
15883 // Use DateAdd expression with negative val so it generates correctly
15884 // The generator will output DATE_ADD(date, INTERVAL -val DAY)
15885 // Then Databricks transform converts 2-arg DATE_ADD(date, interval) to DATEADD(DAY, interval, date)
15886 // Instead, we directly output as a simple negated DateSub
15887 Ok(Expression::DateSub(Box::new(crate::expressions::DateAddFunc {
15888 this: date,
15889 interval: val,
15890 unit,
15891 })))
15892 }
15893 DialectType::DuckDB => {
15894 // DuckDB: CAST(date AS DATE) - INTERVAL 'val' UNIT
15895 let cast_date = Self::ensure_cast_date(date);
15896 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
15897 this: Some(val),
15898 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit, use_plural: false }),
15899 }));
15900 Ok(Expression::Sub(Box::new(crate::expressions::BinaryOp::new(cast_date, interval))))
15901 }
15902 DialectType::Snowflake => {
15903 // Snowflake: Let Snowflake's own DateSub -> DATEADD(UNIT, val * -1, date) handler work
15904 // Just ensure the date is cast properly
15905 let cast_date = Self::ensure_cast_date(date);
15906 Ok(Expression::DateSub(Box::new(crate::expressions::DateAddFunc {
15907 this: cast_date,
15908 interval: val,
15909 unit,
15910 })))
15911 }
15912 DialectType::PostgreSQL => {
15913 // PostgreSQL: date - INTERVAL 'val UNIT'
15914 let unit_str = Self::interval_unit_to_string(&unit);
15915 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
15916 this: Some(Expression::Literal(Literal::String(format!("{} {}", Self::expr_to_string(&val), unit_str)))),
15917 unit: None,
15918 }));
15919 Ok(Expression::Sub(Box::new(crate::expressions::BinaryOp::new(date, interval))))
15920 }
15921 _ => {
15922 Ok(Expression::DateSub(Box::new(crate::expressions::DateAddFunc {
15923 this: date,
15924 interval: val,
15925 unit,
15926 })))
15927 }
15928 }
15929 }
15930
15931 // DATEADD(unit, val, date) -> target-specific form
15932 // Used by: Redshift, Snowflake, TSQL, ClickHouse
15933 "DATEADD" if args.len() == 3 => {
15934 let arg0 = args.remove(0);
15935 let arg1 = args.remove(0);
15936 let arg2 = args.remove(0);
15937 let unit_str = get_unit_str(&arg0);
15938
15939 if matches!(target, DialectType::Snowflake | DialectType::TSQL) {
15940 // Keep DATEADD(UNIT, val, date) with uppercased unit
15941 let unit = Expression::Identifier(Identifier::new(unit_str));
15942 // Only CAST to DATETIME2 for TSQL target when source is NOT Spark/Databricks family
15943 let date = if matches!(target, DialectType::TSQL)
15944 && !matches!(source, DialectType::Spark | DialectType::Databricks | DialectType::Hive) {
15945 Self::ensure_cast_datetime2(arg2)
15946 } else {
15947 arg2
15948 };
15949 return Ok(Expression::Function(Box::new(Function::new(
15950 "DATEADD".to_string(), vec![unit, arg1, date],
15951 ))));
15952 }
15953
15954 if matches!(target, DialectType::DuckDB) {
15955 // DuckDB: date + INTERVAL 'val' UNIT
15956 let iu = parse_interval_unit(&unit_str);
15957 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
15958 this: Some(arg1),
15959 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: iu, use_plural: false }),
15960 }));
15961 let cast_date = Self::ensure_cast_timestamp(arg2);
15962 return Ok(Expression::Add(Box::new(crate::expressions::BinaryOp::new(cast_date, interval))));
15963 }
15964
15965 if matches!(target, DialectType::BigQuery) {
15966 // BigQuery: DATE_ADD(date, INTERVAL val UNIT) or TIMESTAMP_ADD(ts, INTERVAL val UNIT)
15967 let iu = parse_interval_unit(&unit_str);
15968 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
15969 this: Some(arg1),
15970 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: iu, use_plural: false }),
15971 }));
15972 return Ok(Expression::Function(Box::new(Function::new(
15973 "DATE_ADD".to_string(), vec![arg2, interval],
15974 ))));
15975 }
15976
15977 if matches!(target, DialectType::Databricks) {
15978 // Databricks: keep DATEADD(UNIT, val, date) format
15979 let unit = Expression::Identifier(Identifier::new(unit_str));
15980 return Ok(Expression::Function(Box::new(Function::new(
15981 "DATEADD".to_string(), vec![unit, arg1, arg2],
15982 ))));
15983 }
15984
15985 if matches!(target, DialectType::Spark) {
15986 // Spark: convert month-based units to ADD_MONTHS, rest to DATE_ADD
15987 fn multiply_expr_dateadd(expr: Expression, factor: i64) -> Expression {
15988 if let Expression::Literal(crate::expressions::Literal::Number(n)) = &expr {
15989 if let Ok(val) = n.parse::<i64>() {
15990 return Expression::Literal(crate::expressions::Literal::Number((val * factor).to_string()));
15991 }
15992 }
15993 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
15994 expr, Expression::Literal(crate::expressions::Literal::Number(factor.to_string())),
15995 )))
15996 }
15997 match unit_str.as_str() {
15998 "YEAR" => {
15999 let months = multiply_expr_dateadd(arg1, 12);
16000 return Ok(Expression::Function(Box::new(Function::new(
16001 "ADD_MONTHS".to_string(), vec![arg2, months],
16002 ))));
16003 }
16004 "QUARTER" => {
16005 let months = multiply_expr_dateadd(arg1, 3);
16006 return Ok(Expression::Function(Box::new(Function::new(
16007 "ADD_MONTHS".to_string(), vec![arg2, months],
16008 ))));
16009 }
16010 "MONTH" => {
16011 return Ok(Expression::Function(Box::new(Function::new(
16012 "ADD_MONTHS".to_string(), vec![arg2, arg1],
16013 ))));
16014 }
16015 "WEEK" => {
16016 let days = multiply_expr_dateadd(arg1, 7);
16017 return Ok(Expression::Function(Box::new(Function::new(
16018 "DATE_ADD".to_string(), vec![arg2, days],
16019 ))));
16020 }
16021 "DAY" => {
16022 return Ok(Expression::Function(Box::new(Function::new(
16023 "DATE_ADD".to_string(), vec![arg2, arg1],
16024 ))));
16025 }
16026 _ => {
16027 let unit = Expression::Identifier(Identifier::new(unit_str));
16028 return Ok(Expression::Function(Box::new(Function::new(
16029 "DATE_ADD".to_string(), vec![unit, arg1, arg2],
16030 ))));
16031 }
16032 }
16033 }
16034
16035 if matches!(target, DialectType::Hive) {
16036 // Hive: DATE_ADD(date, val) for DAY, or date + INTERVAL for others
16037 match unit_str.as_str() {
16038 "DAY" => {
16039 return Ok(Expression::Function(Box::new(Function::new(
16040 "DATE_ADD".to_string(), vec![arg2, arg1],
16041 ))));
16042 }
16043 "MONTH" => {
16044 return Ok(Expression::Function(Box::new(Function::new(
16045 "ADD_MONTHS".to_string(), vec![arg2, arg1],
16046 ))));
16047 }
16048 _ => {
16049 let iu = parse_interval_unit(&unit_str);
16050 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
16051 this: Some(arg1),
16052 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: iu, use_plural: false }),
16053 }));
16054 return Ok(Expression::Add(Box::new(crate::expressions::BinaryOp::new(arg2, interval))));
16055 }
16056 }
16057 }
16058
16059 if matches!(target, DialectType::PostgreSQL) {
16060 // PostgreSQL: date + INTERVAL 'val UNIT'
16061 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
16062 this: Some(Expression::Literal(Literal::String(format!("{} {}", Self::expr_to_string(&arg1), unit_str)))),
16063 unit: None,
16064 }));
16065 return Ok(Expression::Add(Box::new(crate::expressions::BinaryOp::new(arg2, interval))));
16066 }
16067
16068 if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena) {
16069 // Presto/Trino: DATE_ADD('UNIT', val, date)
16070 return Ok(Expression::Function(Box::new(Function::new(
16071 "DATE_ADD".to_string(), vec![
16072 Expression::Literal(Literal::String(unit_str)),
16073 arg1,
16074 arg2,
16075 ],
16076 ))));
16077 }
16078
16079 if matches!(target, DialectType::ClickHouse) {
16080 // ClickHouse: DATE_ADD(UNIT, val, date)
16081 let unit = Expression::Identifier(Identifier::new(unit_str));
16082 return Ok(Expression::Function(Box::new(Function::new(
16083 "DATE_ADD".to_string(), vec![unit, arg1, arg2],
16084 ))));
16085 }
16086
16087 // Default: keep DATEADD with uppercased unit
16088 let unit = Expression::Identifier(Identifier::new(unit_str));
16089 Ok(Expression::Function(Box::new(Function::new(
16090 "DATEADD".to_string(), vec![unit, arg1, arg2],
16091 ))))
16092 }
16093
16094 // DATE_ADD(unit, val, date) - 3 arg form from ClickHouse/Presto
16095 "DATE_ADD" if args.len() == 3 => {
16096 let arg0 = args.remove(0);
16097 let arg1 = args.remove(0);
16098 let arg2 = args.remove(0);
16099 let unit_str = get_unit_str(&arg0);
16100
16101 if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena) {
16102 // Presto/Trino: DATE_ADD('UNIT', val, date)
16103 return Ok(Expression::Function(Box::new(Function::new(
16104 "DATE_ADD".to_string(), vec![
16105 Expression::Literal(Literal::String(unit_str)),
16106 arg1,
16107 arg2,
16108 ],
16109 ))));
16110 }
16111
16112 if matches!(target, DialectType::Snowflake | DialectType::TSQL | DialectType::Redshift) {
16113 // DATEADD(UNIT, val, date)
16114 let unit = Expression::Identifier(Identifier::new(unit_str));
16115 let date = if matches!(target, DialectType::TSQL) {
16116 Self::ensure_cast_datetime2(arg2)
16117 } else {
16118 arg2
16119 };
16120 return Ok(Expression::Function(Box::new(Function::new(
16121 "DATEADD".to_string(), vec![unit, arg1, date],
16122 ))));
16123 }
16124
16125 if matches!(target, DialectType::DuckDB) {
16126 // DuckDB: date + INTERVAL val UNIT
16127 let iu = parse_interval_unit(&unit_str);
16128 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
16129 this: Some(arg1),
16130 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: iu, use_plural: false }),
16131 }));
16132 return Ok(Expression::Add(Box::new(crate::expressions::BinaryOp::new(arg2, interval))));
16133 }
16134
16135 if matches!(target, DialectType::Spark | DialectType::Databricks) {
16136 // Spark: DATE_ADD(UNIT, val, date) with uppercased unit
16137 let unit = Expression::Identifier(Identifier::new(unit_str));
16138 return Ok(Expression::Function(Box::new(Function::new(
16139 "DATE_ADD".to_string(), vec![unit, arg1, arg2],
16140 ))));
16141 }
16142
16143 // Default: DATE_ADD(UNIT, val, date)
16144 let unit = Expression::Identifier(Identifier::new(unit_str));
16145 Ok(Expression::Function(Box::new(Function::new(
16146 "DATE_ADD".to_string(), vec![unit, arg1, arg2],
16147 ))))
16148 }
16149
16150 // DATE_ADD(date, INTERVAL val UNIT) - 2 arg BigQuery form
16151 "DATE_ADD" if args.len() == 2 => {
16152 let date = args.remove(0);
16153 let interval_expr = args.remove(0);
16154 let (val, unit) = Self::extract_interval_parts(&interval_expr);
16155 let unit_str = Self::interval_unit_to_string(&unit);
16156
16157 match target {
16158 DialectType::DuckDB => {
16159 // DuckDB: CAST(date AS DATE) + INTERVAL 'val' UNIT
16160 let cast_date = Self::ensure_cast_date(date);
16161 let quoted_val = Self::quote_interval_val(&val);
16162 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
16163 this: Some(quoted_val),
16164 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit, use_plural: false }),
16165 }));
16166 Ok(Expression::Add(Box::new(crate::expressions::BinaryOp::new(cast_date, interval))))
16167 }
16168 DialectType::PostgreSQL => {
16169 // PostgreSQL: date + INTERVAL 'val UNIT'
16170 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
16171 this: Some(Expression::Literal(Literal::String(format!("{} {}", Self::expr_to_string(&val), unit_str)))),
16172 unit: None,
16173 }));
16174 Ok(Expression::Add(Box::new(crate::expressions::BinaryOp::new(date, interval))))
16175 }
16176 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
16177 // Presto: DATE_ADD('UNIT', CAST('val' AS BIGINT), date)
16178 let val_str = Self::expr_to_string(&val);
16179 Ok(Expression::Function(Box::new(Function::new(
16180 "DATE_ADD".to_string(), vec![
16181 Expression::Literal(Literal::String(unit_str)),
16182 Expression::Cast(Box::new(Cast {
16183 this: Expression::Literal(Literal::String(val_str)),
16184 to: DataType::BigInt { length: None },
16185 trailing_comments: vec![],
16186 double_colon_syntax: false,
16187 format: None,
16188 default: None,
16189 })),
16190 date,
16191 ],
16192 ))))
16193 }
16194 DialectType::Spark | DialectType::Hive => {
16195 // Spark/Hive: DATE_ADD(date, val) for DAY
16196 match unit_str.as_str() {
16197 "DAY" => {
16198 Ok(Expression::Function(Box::new(Function::new(
16199 "DATE_ADD".to_string(), vec![date, val],
16200 ))))
16201 }
16202 "MONTH" => {
16203 Ok(Expression::Function(Box::new(Function::new(
16204 "ADD_MONTHS".to_string(), vec![date, val],
16205 ))))
16206 }
16207 _ => {
16208 let iu = parse_interval_unit(&unit_str);
16209 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
16210 this: Some(val),
16211 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: iu, use_plural: false }),
16212 }));
16213 Ok(Expression::Function(Box::new(Function::new(
16214 "DATE_ADD".to_string(), vec![date, interval],
16215 ))))
16216 }
16217 }
16218 }
16219 DialectType::Snowflake => {
16220 // Snowflake: DATEADD(UNIT, 'val', CAST(date AS DATE))
16221 let cast_date = Self::ensure_cast_date(date);
16222 let val_str = Self::expr_to_string(&val);
16223 Ok(Expression::Function(Box::new(Function::new(
16224 "DATEADD".to_string(), vec![
16225 Expression::Identifier(Identifier::new(unit_str)),
16226 Expression::Literal(Literal::String(val_str)),
16227 cast_date,
16228 ],
16229 ))))
16230 }
16231 DialectType::TSQL | DialectType::Fabric => {
16232 let cast_date = Self::ensure_cast_datetime2(date);
16233 Ok(Expression::Function(Box::new(Function::new(
16234 "DATEADD".to_string(), vec![
16235 Expression::Identifier(Identifier::new(unit_str)),
16236 val, cast_date,
16237 ],
16238 ))))
16239 }
16240 DialectType::Redshift => {
16241 Ok(Expression::Function(Box::new(Function::new(
16242 "DATEADD".to_string(), vec![
16243 Expression::Identifier(Identifier::new(unit_str)),
16244 val, date,
16245 ],
16246 ))))
16247 }
16248 DialectType::MySQL => {
16249 // MySQL: DATE_ADD(date, INTERVAL 'val' UNIT)
16250 let quoted_val = Self::quote_interval_val(&val);
16251 let iu = parse_interval_unit(&unit_str);
16252 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
16253 this: Some(quoted_val),
16254 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: iu, use_plural: false }),
16255 }));
16256 Ok(Expression::Function(Box::new(Function::new(
16257 "DATE_ADD".to_string(), vec![date, interval],
16258 ))))
16259 }
16260 DialectType::BigQuery => {
16261 // BigQuery: DATE_ADD(date, INTERVAL 'val' UNIT)
16262 let quoted_val = Self::quote_interval_val(&val);
16263 let iu = parse_interval_unit(&unit_str);
16264 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
16265 this: Some(quoted_val),
16266 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: iu, use_plural: false }),
16267 }));
16268 Ok(Expression::Function(Box::new(Function::new(
16269 "DATE_ADD".to_string(), vec![date, interval],
16270 ))))
16271 }
16272 DialectType::Databricks => {
16273 Ok(Expression::Function(Box::new(Function::new(
16274 "DATEADD".to_string(), vec![
16275 Expression::Identifier(Identifier::new(unit_str)),
16276 val, date,
16277 ],
16278 ))))
16279 }
16280 _ => {
16281 // Default: keep as DATE_ADD with decomposed interval
16282 Ok(Expression::DateAdd(Box::new(crate::expressions::DateAddFunc {
16283 this: date,
16284 interval: val,
16285 unit,
16286 })))
16287 }
16288 }
16289 }
16290
16291 // ADD_MONTHS(date, val) -> target-specific form
16292 "ADD_MONTHS" if args.len() == 2 => {
16293 let date = args.remove(0);
16294 let val = args.remove(0);
16295
16296 if matches!(target, DialectType::TSQL) {
16297 // TSQL: DATEADD(MONTH, val, CAST(date AS DATETIME2))
16298 let cast_date = Self::ensure_cast_datetime2(date);
16299 return Ok(Expression::Function(Box::new(Function::new(
16300 "DATEADD".to_string(), vec![
16301 Expression::Identifier(Identifier::new("MONTH")),
16302 val,
16303 cast_date,
16304 ],
16305 ))));
16306 }
16307
16308 if matches!(target, DialectType::DuckDB) {
16309 // DuckDB: date + INTERVAL val MONTH
16310 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
16311 this: Some(val),
16312 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
16313 unit: crate::expressions::IntervalUnit::Month,
16314 use_plural: false,
16315 }),
16316 }));
16317 return Ok(Expression::Add(Box::new(crate::expressions::BinaryOp::new(date, interval))));
16318 }
16319
16320 if matches!(target, DialectType::Snowflake) {
16321 // Snowflake: keep ADD_MONTHS when source is also Snowflake, else DATEADD
16322 if matches!(source, DialectType::Snowflake) {
16323 return Ok(Expression::Function(Box::new(Function::new(
16324 "ADD_MONTHS".to_string(), vec![date, val],
16325 ))));
16326 }
16327 return Ok(Expression::Function(Box::new(Function::new(
16328 "DATEADD".to_string(), vec![
16329 Expression::Identifier(Identifier::new("MONTH")),
16330 val,
16331 date,
16332 ],
16333 ))));
16334 }
16335
16336 if matches!(target, DialectType::Spark | DialectType::Databricks) {
16337 // Spark: ADD_MONTHS(date, val) - keep as is
16338 return Ok(Expression::Function(Box::new(Function::new(
16339 "ADD_MONTHS".to_string(), vec![date, val],
16340 ))));
16341 }
16342
16343 if matches!(target, DialectType::Hive) {
16344 return Ok(Expression::Function(Box::new(Function::new(
16345 "ADD_MONTHS".to_string(), vec![date, val],
16346 ))));
16347 }
16348
16349 if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena) {
16350 // Presto: DATE_ADD('MONTH', val, date)
16351 return Ok(Expression::Function(Box::new(Function::new(
16352 "DATE_ADD".to_string(), vec![
16353 Expression::Literal(Literal::String("MONTH".to_string())),
16354 val,
16355 date,
16356 ],
16357 ))));
16358 }
16359
16360 // Default: keep ADD_MONTHS
16361 Ok(Expression::Function(Box::new(Function::new(
16362 "ADD_MONTHS".to_string(), vec![date, val],
16363 ))))
16364 }
16365
16366 // SAFE_DIVIDE(x, y) -> target-specific form directly
16367 "SAFE_DIVIDE" if args.len() == 2 => {
16368 let x = args.remove(0);
16369 let y = args.remove(0);
16370 // Wrap x and y in parens if they're complex expressions
16371 let y_ref = match &y {
16372 Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => y.clone(),
16373 _ => Expression::Paren(Box::new(Paren { this: y.clone(), trailing_comments: vec![] })),
16374 };
16375 let x_ref = match &x {
16376 Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => x.clone(),
16377 _ => Expression::Paren(Box::new(Paren { this: x.clone(), trailing_comments: vec![] })),
16378 };
16379 let condition = Expression::Neq(Box::new(crate::expressions::BinaryOp::new(y_ref.clone(), Expression::number(0))));
16380 let div_expr = Expression::Div(Box::new(crate::expressions::BinaryOp::new(x_ref.clone(), y_ref.clone())));
16381
16382 match target {
16383 DialectType::DuckDB | DialectType::PostgreSQL => {
16384 // CASE WHEN y <> 0 THEN x / y ELSE NULL END
16385 let result_div = if matches!(target, DialectType::PostgreSQL) {
16386 let cast_x = Expression::Cast(Box::new(Cast {
16387 this: x_ref,
16388 to: DataType::Custom { name: "DOUBLE PRECISION".to_string() },
16389 trailing_comments: vec![],
16390 double_colon_syntax: false,
16391 format: None,
16392 default: None,
16393 }));
16394 Expression::Div(Box::new(crate::expressions::BinaryOp::new(cast_x, y_ref)))
16395 } else {
16396 div_expr
16397 };
16398 Ok(Expression::Case(Box::new(crate::expressions::Case {
16399 operand: None,
16400 whens: vec![(condition, result_div)],
16401 else_: Some(Expression::Null(crate::expressions::Null)),
16402 })))
16403 }
16404 DialectType::Snowflake => {
16405 // IFF(y <> 0, x / y, NULL)
16406 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
16407 condition,
16408 true_value: div_expr,
16409 false_value: Some(Expression::Null(crate::expressions::Null)),
16410 original_name: Some("IFF".to_string()),
16411 })))
16412 }
16413 DialectType::Presto | DialectType::Trino => {
16414 // IF(y <> 0, CAST(x AS DOUBLE) / y, NULL)
16415 let cast_x = Expression::Cast(Box::new(Cast {
16416 this: x_ref,
16417 to: DataType::Double { precision: None, scale: None },
16418 trailing_comments: vec![],
16419 double_colon_syntax: false,
16420 format: None,
16421 default: None,
16422 }));
16423 let cast_div = Expression::Div(Box::new(crate::expressions::BinaryOp::new(cast_x, y_ref)));
16424 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
16425 condition,
16426 true_value: cast_div,
16427 false_value: Some(Expression::Null(crate::expressions::Null)),
16428 original_name: None,
16429 })))
16430 }
16431 _ => {
16432 // IF(y <> 0, x / y, NULL)
16433 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
16434 condition,
16435 true_value: div_expr,
16436 false_value: Some(Expression::Null(crate::expressions::Null)),
16437 original_name: None,
16438 })))
16439 }
16440 }
16441 }
16442
16443 // GENERATE_UUID() -> UUID() with CAST to string
16444 "GENERATE_UUID" => {
16445 let uuid_expr = Expression::Uuid(Box::new(crate::expressions::Uuid {
16446 this: None,
16447 name: None,
16448 is_string: None,
16449 }));
16450 // Most targets need CAST(UUID() AS TEXT/VARCHAR/STRING)
16451 let cast_type = match target {
16452 DialectType::DuckDB => Some(DataType::Text),
16453 DialectType::Presto | DialectType::Trino => Some(DataType::VarChar { length: None, parenthesized_length: false }),
16454 DialectType::Spark | DialectType::Databricks | DialectType::Hive => Some(DataType::String { length: None }),
16455 _ => None,
16456 };
16457 if let Some(dt) = cast_type {
16458 Ok(Expression::Cast(Box::new(Cast {
16459 this: uuid_expr,
16460 to: dt,
16461 trailing_comments: vec![],
16462 double_colon_syntax: false,
16463 format: None,
16464 default: None,
16465 })))
16466 } else {
16467 Ok(uuid_expr)
16468 }
16469 }
16470
16471 // COUNTIF(x) -> CountIf expression
16472 "COUNTIF" if args.len() == 1 => {
16473 let arg = args.remove(0);
16474 Ok(Expression::CountIf(Box::new(crate::expressions::AggFunc {
16475 this: arg,
16476 distinct: false,
16477 filter: None,
16478 order_by: vec![],
16479 name: None,
16480 ignore_nulls: None,
16481 having_max: None,
16482 limit: None,
16483 })))
16484 }
16485
16486 // EDIT_DISTANCE(col1, col2, ...) -> Levenshtein expression
16487 "EDIT_DISTANCE" => {
16488 // Strip named arguments (max_distance => N) and pass as positional
16489 let mut positional_args: Vec<Expression> = vec![];
16490 for arg in args {
16491 match arg {
16492 Expression::NamedArgument(na) => {
16493 positional_args.push(na.value);
16494 }
16495 other => positional_args.push(other),
16496 }
16497 }
16498 if positional_args.len() >= 2 {
16499 let col1 = positional_args.remove(0);
16500 let col2 = positional_args.remove(0);
16501 let levenshtein = crate::expressions::BinaryFunc {
16502 this: col1,
16503 expression: col2,
16504 original_name: None,
16505 };
16506 // Pass extra args through a function wrapper with all args
16507 if !positional_args.is_empty() {
16508 let mut all_args = vec![levenshtein.this, levenshtein.expression];
16509 all_args.extend(positional_args);
16510 // PostgreSQL: use LEVENSHTEIN_LESS_EQUAL when max_distance is provided
16511 let func_name = if matches!(target, DialectType::PostgreSQL) {
16512 "LEVENSHTEIN_LESS_EQUAL"
16513 } else {
16514 "LEVENSHTEIN"
16515 };
16516 return Ok(Expression::Function(Box::new(Function::new(
16517 func_name.to_string(), all_args,
16518 ))));
16519 }
16520 Ok(Expression::Levenshtein(Box::new(levenshtein)))
16521 } else {
16522 Ok(Expression::Function(Box::new(Function::new("EDIT_DISTANCE".to_string(), positional_args))))
16523 }
16524 }
16525
16526 // TIMESTAMP_SECONDS(x) -> UnixToTime with scale 0
16527 "TIMESTAMP_SECONDS" if args.len() == 1 => {
16528 let arg = args.remove(0);
16529 Ok(Expression::UnixToTime(Box::new(crate::expressions::UnixToTime {
16530 this: Box::new(arg),
16531 scale: Some(0),
16532 zone: None,
16533 hours: None,
16534 minutes: None,
16535 format: None,
16536 target_type: None,
16537 })))
16538 }
16539
16540 // TIMESTAMP_MILLIS(x) -> UnixToTime with scale 3
16541 "TIMESTAMP_MILLIS" if args.len() == 1 => {
16542 let arg = args.remove(0);
16543 Ok(Expression::UnixToTime(Box::new(crate::expressions::UnixToTime {
16544 this: Box::new(arg),
16545 scale: Some(3),
16546 zone: None,
16547 hours: None,
16548 minutes: None,
16549 format: None,
16550 target_type: None,
16551 })))
16552 }
16553
16554 // TIMESTAMP_MICROS(x) -> UnixToTime with scale 6
16555 "TIMESTAMP_MICROS" if args.len() == 1 => {
16556 let arg = args.remove(0);
16557 Ok(Expression::UnixToTime(Box::new(crate::expressions::UnixToTime {
16558 this: Box::new(arg),
16559 scale: Some(6),
16560 zone: None,
16561 hours: None,
16562 minutes: None,
16563 format: None,
16564 target_type: None,
16565 })))
16566 }
16567
16568 // DIV(x, y) -> IntDiv expression
16569 "DIV" if args.len() == 2 => {
16570 let x = args.remove(0);
16571 let y = args.remove(0);
16572 Ok(Expression::IntDiv(Box::new(crate::expressions::BinaryFunc {
16573 this: x,
16574 expression: y,
16575 original_name: None,
16576 })))
16577 }
16578
16579 // TO_HEX(x) -> target-specific form
16580 "TO_HEX" if args.len() == 1 => {
16581 let arg = args.remove(0);
16582 // Check if inner function already returns hex string in certain targets
16583 let inner_returns_hex = matches!(&arg, Expression::Function(f) if matches!(f.name.as_str(), "MD5" | "SHA1" | "SHA256" | "SHA512"));
16584 if matches!(target, DialectType::BigQuery) {
16585 // BQ->BQ: keep as TO_HEX
16586 Ok(Expression::Function(Box::new(Function::new("TO_HEX".to_string(), vec![arg]))))
16587 } else if matches!(target, DialectType::DuckDB) && inner_returns_hex {
16588 // DuckDB: MD5/SHA already return hex strings, so TO_HEX is redundant
16589 Ok(arg)
16590 } else if matches!(target, DialectType::Snowflake) && inner_returns_hex {
16591 // Snowflake: TO_HEX(SHA1(x)) -> TO_CHAR(SHA1_BINARY(x))
16592 // TO_HEX(MD5(x)) -> TO_CHAR(MD5_BINARY(x))
16593 // TO_HEX(SHA256(x)) -> TO_CHAR(SHA2_BINARY(x, 256))
16594 // TO_HEX(SHA512(x)) -> TO_CHAR(SHA2_BINARY(x, 512))
16595 if let Expression::Function(ref inner_f) = arg {
16596 let inner_args = inner_f.args.clone();
16597 let binary_func = match inner_f.name.to_uppercase().as_str() {
16598 "SHA1" => Expression::Function(Box::new(Function::new("SHA1_BINARY".to_string(), inner_args))),
16599 "MD5" => Expression::Function(Box::new(Function::new("MD5_BINARY".to_string(), inner_args))),
16600 "SHA256" => {
16601 let mut a = inner_args;
16602 a.push(Expression::number(256));
16603 Expression::Function(Box::new(Function::new("SHA2_BINARY".to_string(), a)))
16604 }
16605 "SHA512" => {
16606 let mut a = inner_args;
16607 a.push(Expression::number(512));
16608 Expression::Function(Box::new(Function::new("SHA2_BINARY".to_string(), a)))
16609 }
16610 _ => arg.clone(),
16611 };
16612 Ok(Expression::Function(Box::new(Function::new("TO_CHAR".to_string(), vec![binary_func]))))
16613 } else {
16614 let inner = Expression::Function(Box::new(Function::new("HEX".to_string(), vec![arg])));
16615 Ok(Expression::Lower(Box::new(crate::expressions::UnaryFunc::new(inner))))
16616 }
16617 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
16618 let inner = Expression::Function(Box::new(Function::new("TO_HEX".to_string(), vec![arg])));
16619 Ok(Expression::Lower(Box::new(crate::expressions::UnaryFunc::new(inner))))
16620 } else {
16621 let inner = Expression::Function(Box::new(Function::new("HEX".to_string(), vec![arg])));
16622 Ok(Expression::Lower(Box::new(crate::expressions::UnaryFunc::new(inner))))
16623 }
16624 }
16625
16626 // LAST_DAY(date, unit) -> strip unit for most targets, or transform for PostgreSQL
16627 "LAST_DAY" if args.len() == 2 => {
16628 let date = args.remove(0);
16629 let _unit = args.remove(0); // Strip the unit (MONTH is default)
16630 Ok(Expression::Function(Box::new(Function::new(
16631 "LAST_DAY".to_string(), vec![date],
16632 ))))
16633 }
16634
16635 // GENERATE_ARRAY(start, end, step?) -> GenerateSeries expression
16636 "GENERATE_ARRAY" => {
16637 let start = args.get(0).cloned();
16638 let end = args.get(1).cloned();
16639 let step = args.get(2).cloned();
16640 Ok(Expression::GenerateSeries(Box::new(crate::expressions::GenerateSeries {
16641 start: start.map(Box::new),
16642 end: end.map(Box::new),
16643 step: step.map(Box::new),
16644 is_end_exclusive: None,
16645 })))
16646 }
16647
16648 // GENERATE_TIMESTAMP_ARRAY(start, end, step) -> GenerateSeries expression
16649 "GENERATE_TIMESTAMP_ARRAY" => {
16650 let start = args.get(0).cloned();
16651 let end = args.get(1).cloned();
16652 let step = args.get(2).cloned();
16653
16654 if matches!(target, DialectType::DuckDB) {
16655 // DuckDB: GENERATE_SERIES(CAST(start AS TIMESTAMP), CAST(end AS TIMESTAMP), step)
16656 // Only cast string literals - leave columns/expressions as-is
16657 let maybe_cast_ts = |expr: Expression| -> Expression {
16658 if matches!(&expr, Expression::Literal(Literal::String(_))) {
16659 Expression::Cast(Box::new(Cast {
16660 this: expr,
16661 to: DataType::Timestamp { precision: None, timezone: false },
16662 trailing_comments: vec![],
16663 double_colon_syntax: false,
16664 format: None,
16665 default: None,
16666 }))
16667 } else {
16668 expr
16669 }
16670 };
16671 let cast_start = start.map(maybe_cast_ts);
16672 let cast_end = end.map(maybe_cast_ts);
16673 Ok(Expression::GenerateSeries(Box::new(crate::expressions::GenerateSeries {
16674 start: cast_start.map(Box::new),
16675 end: cast_end.map(Box::new),
16676 step: step.map(Box::new),
16677 is_end_exclusive: None,
16678 })))
16679 } else {
16680 Ok(Expression::GenerateSeries(Box::new(crate::expressions::GenerateSeries {
16681 start: start.map(Box::new),
16682 end: end.map(Box::new),
16683 step: step.map(Box::new),
16684 is_end_exclusive: None,
16685 })))
16686 }
16687 }
16688
16689 // TO_JSON(x) -> target-specific (from Spark/Hive)
16690 "TO_JSON" => {
16691 match target {
16692 DialectType::Presto | DialectType::Trino => {
16693 // JSON_FORMAT(CAST(x AS JSON))
16694 let arg = args.into_iter().next().unwrap_or(Expression::Null(crate::expressions::Null));
16695 let cast_json = Expression::Cast(Box::new(Cast {
16696 this: arg,
16697 to: DataType::Custom { name: "JSON".to_string() },
16698 trailing_comments: vec![],
16699 double_colon_syntax: false,
16700 format: None,
16701 default: None,
16702 }));
16703 Ok(Expression::Function(Box::new(Function::new("JSON_FORMAT".to_string(), vec![cast_json]))))
16704 }
16705 DialectType::BigQuery => {
16706 Ok(Expression::Function(Box::new(Function::new("TO_JSON_STRING".to_string(), args))))
16707 }
16708 DialectType::DuckDB => {
16709 // CAST(TO_JSON(x) AS TEXT)
16710 let arg = args.into_iter().next().unwrap_or(Expression::Null(crate::expressions::Null));
16711 let to_json = Expression::Function(Box::new(Function::new("TO_JSON".to_string(), vec![arg])));
16712 Ok(Expression::Cast(Box::new(Cast {
16713 this: to_json,
16714 to: DataType::Text,
16715 trailing_comments: vec![],
16716 double_colon_syntax: false,
16717 format: None,
16718 default: None,
16719 })))
16720 }
16721 _ => Ok(Expression::Function(Box::new(Function::new("TO_JSON".to_string(), args))))
16722 }
16723 }
16724
16725 // TO_JSON_STRING(x) -> target-specific
16726 "TO_JSON_STRING" => {
16727 match target {
16728 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
16729 Ok(Expression::Function(Box::new(Function::new("TO_JSON".to_string(), args))))
16730 }
16731 DialectType::Presto | DialectType::Trino => {
16732 // JSON_FORMAT(CAST(x AS JSON))
16733 let arg = args.into_iter().next().unwrap_or(Expression::Null(crate::expressions::Null));
16734 let cast_json = Expression::Cast(Box::new(Cast {
16735 this: arg,
16736 to: DataType::Custom { name: "JSON".to_string() },
16737 trailing_comments: vec![],
16738 double_colon_syntax: false,
16739 format: None,
16740 default: None,
16741 }));
16742 Ok(Expression::Function(Box::new(Function::new("JSON_FORMAT".to_string(), vec![cast_json]))))
16743 }
16744 DialectType::DuckDB => {
16745 // CAST(TO_JSON(x) AS TEXT)
16746 let arg = args.into_iter().next().unwrap_or(Expression::Null(crate::expressions::Null));
16747 let to_json = Expression::Function(Box::new(Function::new("TO_JSON".to_string(), vec![arg])));
16748 Ok(Expression::Cast(Box::new(Cast {
16749 this: to_json,
16750 to: DataType::Text,
16751 trailing_comments: vec![],
16752 double_colon_syntax: false,
16753 format: None,
16754 default: None,
16755 })))
16756 }
16757 DialectType::Snowflake => {
16758 // TO_JSON(x)
16759 Ok(Expression::Function(Box::new(Function::new("TO_JSON".to_string(), args))))
16760 }
16761 _ => Ok(Expression::Function(Box::new(Function::new("TO_JSON_STRING".to_string(), args))))
16762 }
16763 }
16764
16765 // SAFE_ADD(x, y) -> SafeAdd expression
16766 "SAFE_ADD" if args.len() == 2 => {
16767 let x = args.remove(0);
16768 let y = args.remove(0);
16769 Ok(Expression::SafeAdd(Box::new(crate::expressions::SafeAdd {
16770 this: Box::new(x),
16771 expression: Box::new(y),
16772 })))
16773 }
16774
16775 // SAFE_SUBTRACT(x, y) -> SafeSubtract expression
16776 "SAFE_SUBTRACT" if args.len() == 2 => {
16777 let x = args.remove(0);
16778 let y = args.remove(0);
16779 Ok(Expression::SafeSubtract(Box::new(crate::expressions::SafeSubtract {
16780 this: Box::new(x),
16781 expression: Box::new(y),
16782 })))
16783 }
16784
16785 // SAFE_MULTIPLY(x, y) -> SafeMultiply expression
16786 "SAFE_MULTIPLY" if args.len() == 2 => {
16787 let x = args.remove(0);
16788 let y = args.remove(0);
16789 Ok(Expression::SafeMultiply(Box::new(crate::expressions::SafeMultiply {
16790 this: Box::new(x),
16791 expression: Box::new(y),
16792 })))
16793 }
16794
16795 // REGEXP_CONTAINS(str, pattern) -> RegexpLike expression
16796 "REGEXP_CONTAINS" if args.len() == 2 => {
16797 let str_expr = args.remove(0);
16798 let pattern = args.remove(0);
16799 Ok(Expression::RegexpLike(Box::new(crate::expressions::RegexpFunc {
16800 this: str_expr,
16801 pattern,
16802 flags: None,
16803 })))
16804 }
16805
16806 // CONTAINS_SUBSTR(a, b) -> CONTAINS(LOWER(a), LOWER(b))
16807 "CONTAINS_SUBSTR" if args.len() == 2 => {
16808 let a = args.remove(0);
16809 let b = args.remove(0);
16810 let lower_a = Expression::Lower(Box::new(crate::expressions::UnaryFunc::new(a)));
16811 let lower_b = Expression::Lower(Box::new(crate::expressions::UnaryFunc::new(b)));
16812 Ok(Expression::Function(Box::new(Function::new(
16813 "CONTAINS".to_string(), vec![lower_a, lower_b],
16814 ))))
16815 }
16816
16817 // INT64(x) -> CAST(x AS BIGINT)
16818 "INT64" if args.len() == 1 => {
16819 let arg = args.remove(0);
16820 Ok(Expression::Cast(Box::new(Cast {
16821 this: arg,
16822 to: DataType::BigInt { length: None },
16823 trailing_comments: vec![],
16824 double_colon_syntax: false,
16825 format: None,
16826 default: None,
16827 })))
16828 }
16829
16830 // INSTR(str, substr) -> target-specific
16831 "INSTR" if args.len() >= 2 => {
16832 let str_expr = args.remove(0);
16833 let substr = args.remove(0);
16834 if matches!(target, DialectType::Snowflake) {
16835 // CHARINDEX(substr, str)
16836 Ok(Expression::Function(Box::new(Function::new("CHARINDEX".to_string(), vec![substr, str_expr]))))
16837 } else if matches!(target, DialectType::BigQuery) {
16838 // Keep as INSTR
16839 Ok(Expression::Function(Box::new(Function::new("INSTR".to_string(), vec![str_expr, substr]))))
16840 } else {
16841 // Default: keep as INSTR
16842 Ok(Expression::Function(Box::new(Function::new("INSTR".to_string(), vec![str_expr, substr]))))
16843 }
16844 }
16845
16846 // BigQuery DATE_TRUNC(expr, unit) -> DATE_TRUNC('unit', expr) for standard SQL
16847 "DATE_TRUNC" if args.len() == 2 => {
16848 let expr = args.remove(0);
16849 let unit_expr = args.remove(0);
16850 let unit_str = get_unit_str(&unit_expr);
16851
16852 match target {
16853 DialectType::DuckDB | DialectType::Snowflake | DialectType::PostgreSQL
16854 | DialectType::Presto | DialectType::Trino
16855 | DialectType::Databricks | DialectType::Spark
16856 | DialectType::Redshift | DialectType::ClickHouse | DialectType::TSQL => {
16857 // Standard: DATE_TRUNC('UNIT', expr)
16858 Ok(Expression::Function(Box::new(Function::new(
16859 "DATE_TRUNC".to_string(),
16860 vec![Expression::Literal(Literal::String(unit_str)), expr],
16861 ))))
16862 }
16863 _ => {
16864 // Keep BigQuery arg order: DATE_TRUNC(expr, unit)
16865 Ok(Expression::Function(Box::new(Function::new(
16866 "DATE_TRUNC".to_string(),
16867 vec![expr, unit_expr],
16868 ))))
16869 }
16870 }
16871 }
16872
16873 // TIMESTAMP_TRUNC / DATETIME_TRUNC -> target-specific
16874 "TIMESTAMP_TRUNC" | "DATETIME_TRUNC" if args.len() >= 2 => {
16875 // TIMESTAMP_TRUNC(ts, unit) or TIMESTAMP_TRUNC(ts, unit, timezone)
16876 let ts = args.remove(0);
16877 let unit_expr = args.remove(0);
16878 let tz = if !args.is_empty() { Some(args.remove(0)) } else { None };
16879 let unit_str = get_unit_str(&unit_expr);
16880
16881 match target {
16882 DialectType::DuckDB => {
16883 // DuckDB: DATE_TRUNC('UNIT', CAST(ts AS TIMESTAMPTZ))
16884 // With timezone: DATE_TRUNC('UNIT', ts AT TIME ZONE 'tz') AT TIME ZONE 'tz' (for DAY granularity)
16885 // Without timezone for MINUTE+ granularity: just DATE_TRUNC
16886 let is_coarse = matches!(unit_str.as_str(), "DAY" | "WEEK" | "MONTH" | "QUARTER" | "YEAR");
16887 // For DATETIME_TRUNC, cast string args to TIMESTAMP
16888 let cast_ts = if name == "DATETIME_TRUNC" {
16889 match ts {
16890 Expression::Literal(Literal::String(ref _s)) => {
16891 Expression::Cast(Box::new(Cast {
16892 this: ts,
16893 to: DataType::Timestamp { precision: None, timezone: false },
16894 trailing_comments: vec![],
16895 double_colon_syntax: false,
16896 format: None,
16897 default: None,
16898 }))
16899 }
16900 _ => Self::maybe_cast_ts_to_tz(ts, &name),
16901 }
16902 } else {
16903 Self::maybe_cast_ts_to_tz(ts, &name)
16904 };
16905
16906 if let Some(tz_arg) = tz {
16907 if is_coarse {
16908 // DATE_TRUNC('UNIT', ts AT TIME ZONE 'tz') AT TIME ZONE 'tz'
16909 let at_tz = Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
16910 this: cast_ts,
16911 zone: tz_arg.clone(),
16912 }));
16913 let date_trunc = Expression::Function(Box::new(Function::new(
16914 "DATE_TRUNC".to_string(), vec![Expression::Literal(Literal::String(unit_str)), at_tz],
16915 )));
16916 Ok(Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
16917 this: date_trunc,
16918 zone: tz_arg,
16919 })))
16920 } else {
16921 // For MINUTE/HOUR: no AT TIME ZONE wrapper, just DATE_TRUNC('UNIT', ts)
16922 Ok(Expression::Function(Box::new(Function::new(
16923 "DATE_TRUNC".to_string(), vec![Expression::Literal(Literal::String(unit_str)), cast_ts],
16924 ))))
16925 }
16926 } else {
16927 // No timezone: DATE_TRUNC('UNIT', CAST(ts AS TIMESTAMPTZ))
16928 Ok(Expression::Function(Box::new(Function::new(
16929 "DATE_TRUNC".to_string(), vec![Expression::Literal(Literal::String(unit_str)), cast_ts],
16930 ))))
16931 }
16932 }
16933 DialectType::Databricks | DialectType::Spark => {
16934 // Databricks/Spark: DATE_TRUNC('UNIT', ts)
16935 Ok(Expression::Function(Box::new(Function::new(
16936 "DATE_TRUNC".to_string(), vec![Expression::Literal(Literal::String(unit_str)), ts],
16937 ))))
16938 }
16939 _ => {
16940 // Default: keep as TIMESTAMP_TRUNC('UNIT', ts, [tz])
16941 let unit = Expression::Literal(Literal::String(unit_str));
16942 let mut date_trunc_args = vec![unit, ts];
16943 if let Some(tz_arg) = tz {
16944 date_trunc_args.push(tz_arg);
16945 }
16946 Ok(Expression::Function(Box::new(Function::new(
16947 "TIMESTAMP_TRUNC".to_string(), date_trunc_args,
16948 ))))
16949 }
16950 }
16951 }
16952
16953 // TIME(h, m, s) -> target-specific, TIME('string') -> CAST('string' AS TIME)
16954 "TIME" => {
16955 if args.len() == 3 {
16956 // TIME(h, m, s) constructor
16957 match target {
16958 DialectType::TSQL => {
16959 // TIMEFROMPARTS(h, m, s, 0, 0)
16960 args.push(Expression::number(0));
16961 args.push(Expression::number(0));
16962 Ok(Expression::Function(Box::new(Function::new("TIMEFROMPARTS".to_string(), args))))
16963 }
16964 DialectType::MySQL => {
16965 Ok(Expression::Function(Box::new(Function::new("MAKETIME".to_string(), args))))
16966 }
16967 DialectType::PostgreSQL => {
16968 Ok(Expression::Function(Box::new(Function::new("MAKE_TIME".to_string(), args))))
16969 }
16970 _ => Ok(Expression::Function(Box::new(Function::new("TIME".to_string(), args))))
16971 }
16972 } else if args.len() == 1 {
16973 let arg = args.remove(0);
16974 if matches!(target, DialectType::Spark) {
16975 // Spark: CAST(x AS TIMESTAMP) (yes, TIMESTAMP not TIME)
16976 Ok(Expression::Cast(Box::new(Cast {
16977 this: arg,
16978 to: DataType::Timestamp { timezone: false, precision: None },
16979 trailing_comments: vec![],
16980 double_colon_syntax: false,
16981 format: None,
16982 default: None,
16983 })))
16984 } else {
16985 // Most targets: CAST(x AS TIME)
16986 Ok(Expression::Cast(Box::new(Cast {
16987 this: arg,
16988 to: DataType::Time { precision: None, timezone: false },
16989 trailing_comments: vec![],
16990 double_colon_syntax: false,
16991 format: None,
16992 default: None,
16993 })))
16994 }
16995 } else if args.len() == 2 {
16996 // TIME(expr, timezone) -> CAST(CAST(expr AS TIMESTAMPTZ) AT TIME ZONE tz AS TIME)
16997 let expr = args.remove(0);
16998 let tz = args.remove(0);
16999 let cast_tstz = Expression::Cast(Box::new(Cast {
17000 this: expr,
17001 to: DataType::Timestamp { timezone: true, precision: None },
17002 trailing_comments: vec![],
17003 double_colon_syntax: false,
17004 format: None,
17005 default: None,
17006 }));
17007 let at_tz = Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
17008 this: cast_tstz,
17009 zone: tz,
17010 }));
17011 Ok(Expression::Cast(Box::new(Cast {
17012 this: at_tz,
17013 to: DataType::Time { precision: None, timezone: false },
17014 trailing_comments: vec![],
17015 double_colon_syntax: false,
17016 format: None,
17017 default: None,
17018 })))
17019 } else {
17020 Ok(Expression::Function(Box::new(Function::new("TIME".to_string(), args))))
17021 }
17022 }
17023
17024 // DATETIME('string') -> CAST('string' AS TIMESTAMP)
17025 // DATETIME('date', TIME 'time') -> CAST(CAST('date' AS DATE) + CAST('time' AS TIME) AS TIMESTAMP)
17026 // DATETIME('string', 'timezone') -> CAST(CAST('string' AS TIMESTAMPTZ) AT TIME ZONE tz AS TIMESTAMP)
17027 // DATETIME(y, m, d, h, min, s) -> target-specific
17028 "DATETIME" => {
17029 // For BigQuery target: keep DATETIME function but convert TIME literal to CAST
17030 if matches!(target, DialectType::BigQuery) {
17031 if args.len() == 2 {
17032 let has_time_literal = matches!(&args[1], Expression::Literal(Literal::Time(_)));
17033 if has_time_literal {
17034 let first = args.remove(0);
17035 let second = args.remove(0);
17036 let time_as_cast = match second {
17037 Expression::Literal(Literal::Time(s)) => Expression::Cast(Box::new(Cast {
17038 this: Expression::Literal(Literal::String(s)),
17039 to: DataType::Time { precision: None, timezone: false },
17040 trailing_comments: vec![],
17041 double_colon_syntax: false,
17042 format: None,
17043 default: None,
17044 })),
17045 other => other,
17046 };
17047 return Ok(Expression::Function(Box::new(Function::new(
17048 "DATETIME".to_string(), vec![first, time_as_cast],
17049 ))));
17050 }
17051 }
17052 return Ok(Expression::Function(Box::new(Function::new("DATETIME".to_string(), args))));
17053 }
17054
17055 if args.len() == 1 {
17056 let arg = args.remove(0);
17057 Ok(Expression::Cast(Box::new(Cast {
17058 this: arg,
17059 to: DataType::Timestamp { timezone: false, precision: None },
17060 trailing_comments: vec![],
17061 double_colon_syntax: false,
17062 format: None,
17063 default: None,
17064 })))
17065 } else if args.len() == 2 {
17066 let first = args.remove(0);
17067 let second = args.remove(0);
17068 // Check if second arg is a TIME literal
17069 let is_time_literal = matches!(&second, Expression::Literal(Literal::Time(_)));
17070 if is_time_literal {
17071 // DATETIME('date', TIME 'time') -> CAST(CAST(date AS DATE) + CAST('time' AS TIME) AS TIMESTAMP)
17072 let cast_date = Expression::Cast(Box::new(Cast {
17073 this: first,
17074 to: DataType::Date,
17075 trailing_comments: vec![],
17076 double_colon_syntax: false,
17077 format: None,
17078 default: None,
17079 }));
17080 // Convert TIME 'x' literal to string 'x' so CAST produces CAST('x' AS TIME) not CAST(TIME 'x' AS TIME)
17081 let time_as_string = match second {
17082 Expression::Literal(Literal::Time(s)) => Expression::Literal(Literal::String(s)),
17083 other => other,
17084 };
17085 let cast_time = Expression::Cast(Box::new(Cast {
17086 this: time_as_string,
17087 to: DataType::Time { precision: None, timezone: false },
17088 trailing_comments: vec![],
17089 double_colon_syntax: false,
17090 format: None,
17091 default: None,
17092 }));
17093 let add_expr = Expression::Add(Box::new(BinaryOp::new(cast_date, cast_time)));
17094 Ok(Expression::Cast(Box::new(Cast {
17095 this: add_expr,
17096 to: DataType::Timestamp { timezone: false, precision: None },
17097 trailing_comments: vec![],
17098 double_colon_syntax: false,
17099 format: None,
17100 default: None,
17101 })))
17102 } else {
17103 // DATETIME('string', 'timezone')
17104 let cast_tstz = Expression::Cast(Box::new(Cast {
17105 this: first,
17106 to: DataType::Timestamp { timezone: true, precision: None },
17107 trailing_comments: vec![],
17108 double_colon_syntax: false,
17109 format: None,
17110 default: None,
17111 }));
17112 let at_tz = Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
17113 this: cast_tstz,
17114 zone: second,
17115 }));
17116 Ok(Expression::Cast(Box::new(Cast {
17117 this: at_tz,
17118 to: DataType::Timestamp { timezone: false, precision: None },
17119 trailing_comments: vec![],
17120 double_colon_syntax: false,
17121 format: None,
17122 default: None,
17123 })))
17124 }
17125 } else if args.len() >= 3 {
17126 // DATETIME(y, m, d, h, min, s) -> TIMESTAMP_FROM_PARTS for Snowflake
17127 // For other targets, use MAKE_TIMESTAMP or similar
17128 if matches!(target, DialectType::Snowflake) {
17129 Ok(Expression::Function(Box::new(Function::new("TIMESTAMP_FROM_PARTS".to_string(), args))))
17130 } else {
17131 Ok(Expression::Function(Box::new(Function::new("DATETIME".to_string(), args))))
17132 }
17133 } else {
17134 Ok(Expression::Function(Box::new(Function::new("DATETIME".to_string(), args))))
17135 }
17136 }
17137
17138 // TIMESTAMP(x) -> CAST(x AS TIMESTAMP WITH TIME ZONE) for Presto
17139 // TIMESTAMP(x, tz) -> CAST(x AS TIMESTAMP) AT TIME ZONE tz for DuckDB
17140 "TIMESTAMP" => {
17141 if args.len() == 1 {
17142 let arg = args.remove(0);
17143 Ok(Expression::Cast(Box::new(Cast {
17144 this: arg,
17145 to: DataType::Timestamp { timezone: true, precision: None },
17146 trailing_comments: vec![],
17147 double_colon_syntax: false,
17148 format: None,
17149 default: None,
17150 })))
17151 } else if args.len() == 2 {
17152 let arg = args.remove(0);
17153 let tz = args.remove(0);
17154 let cast_ts = Expression::Cast(Box::new(Cast {
17155 this: arg,
17156 to: DataType::Timestamp { timezone: false, precision: None },
17157 trailing_comments: vec![],
17158 double_colon_syntax: false,
17159 format: None,
17160 default: None,
17161 }));
17162 if matches!(target, DialectType::Snowflake) {
17163 // CONVERT_TIMEZONE('tz', CAST(x AS TIMESTAMP))
17164 Ok(Expression::Function(Box::new(Function::new(
17165 "CONVERT_TIMEZONE".to_string(), vec![tz, cast_ts],
17166 ))))
17167 } else {
17168 Ok(Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
17169 this: cast_ts,
17170 zone: tz,
17171 })))
17172 }
17173 } else {
17174 Ok(Expression::Function(Box::new(Function::new("TIMESTAMP".to_string(), args))))
17175 }
17176 }
17177
17178 // STRING(x) -> CAST(x AS VARCHAR/TEXT)
17179 // STRING(x, tz) -> CAST(CAST(x AS TIMESTAMP) AT TIME ZONE 'UTC' AT TIME ZONE tz AS VARCHAR/TEXT)
17180 "STRING" => {
17181 if args.len() == 1 {
17182 let arg = args.remove(0);
17183 let cast_type = match target {
17184 DialectType::DuckDB => DataType::Text,
17185 _ => DataType::VarChar { length: None, parenthesized_length: false },
17186 };
17187 Ok(Expression::Cast(Box::new(Cast {
17188 this: arg,
17189 to: cast_type,
17190 trailing_comments: vec![],
17191 double_colon_syntax: false,
17192 format: None,
17193 default: None,
17194 })))
17195 } else if args.len() == 2 {
17196 let arg = args.remove(0);
17197 let tz = args.remove(0);
17198 let cast_type = match target {
17199 DialectType::DuckDB => DataType::Text,
17200 _ => DataType::VarChar { length: None, parenthesized_length: false },
17201 };
17202 if matches!(target, DialectType::Snowflake) {
17203 // STRING(x, tz) -> CAST(CONVERT_TIMEZONE('UTC', tz, x) AS VARCHAR)
17204 let convert_tz = Expression::Function(Box::new(Function::new(
17205 "CONVERT_TIMEZONE".to_string(),
17206 vec![Expression::Literal(Literal::String("UTC".to_string())), tz, arg],
17207 )));
17208 Ok(Expression::Cast(Box::new(Cast {
17209 this: convert_tz,
17210 to: cast_type,
17211 trailing_comments: vec![],
17212 double_colon_syntax: false,
17213 format: None,
17214 default: None,
17215 })))
17216 } else {
17217 // STRING(x, tz) -> CAST(CAST(x AS TIMESTAMP) AT TIME ZONE 'UTC' AT TIME ZONE tz AS TEXT/VARCHAR)
17218 let cast_ts = Expression::Cast(Box::new(Cast {
17219 this: arg,
17220 to: DataType::Timestamp { timezone: false, precision: None },
17221 trailing_comments: vec![],
17222 double_colon_syntax: false,
17223 format: None,
17224 default: None,
17225 }));
17226 let at_utc = Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
17227 this: cast_ts,
17228 zone: Expression::Literal(Literal::String("UTC".to_string())),
17229 }));
17230 let at_tz = Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
17231 this: at_utc,
17232 zone: tz,
17233 }));
17234 Ok(Expression::Cast(Box::new(Cast {
17235 this: at_tz,
17236 to: cast_type,
17237 trailing_comments: vec![],
17238 double_colon_syntax: false,
17239 format: None,
17240 default: None,
17241 })))
17242 }
17243 } else {
17244 Ok(Expression::Function(Box::new(Function::new("STRING".to_string(), args))))
17245 }
17246 }
17247
17248 // UNIX_SECONDS, UNIX_MILLIS, UNIX_MICROS as functions (not expressions)
17249 "UNIX_SECONDS" if args.len() == 1 => {
17250 let ts = args.remove(0);
17251 match target {
17252 DialectType::DuckDB => {
17253 // CAST(EPOCH(CAST(ts AS TIMESTAMPTZ)) AS BIGINT)
17254 let cast_ts = Self::ensure_cast_timestamptz(ts);
17255 let epoch = Expression::Function(Box::new(Function::new("EPOCH".to_string(), vec![cast_ts])));
17256 Ok(Expression::Cast(Box::new(Cast {
17257 this: epoch,
17258 to: DataType::BigInt { length: None },
17259 trailing_comments: vec![],
17260 double_colon_syntax: false,
17261 format: None,
17262 default: None,
17263 })))
17264 }
17265 DialectType::Snowflake => {
17266 // TIMESTAMPDIFF(SECONDS, CAST('1970-01-01 00:00:00+00' AS TIMESTAMPTZ), ts)
17267 let epoch = Expression::Cast(Box::new(Cast {
17268 this: Expression::Literal(Literal::String("1970-01-01 00:00:00+00".to_string())),
17269 to: DataType::Timestamp { timezone: true, precision: None },
17270 trailing_comments: vec![],
17271 double_colon_syntax: false,
17272 format: None,
17273 default: None,
17274 }));
17275 Ok(Expression::TimestampDiff(Box::new(crate::expressions::TimestampDiff {
17276 this: Box::new(epoch),
17277 expression: Box::new(ts),
17278 unit: Some("SECONDS".to_string()),
17279 })))
17280 }
17281 _ => Ok(Expression::Function(Box::new(Function::new("UNIX_SECONDS".to_string(), vec![ts]))))
17282 }
17283 }
17284
17285 "UNIX_MILLIS" if args.len() == 1 => {
17286 let ts = args.remove(0);
17287 match target {
17288 DialectType::DuckDB => {
17289 // EPOCH_MS(CAST(ts AS TIMESTAMPTZ))
17290 let cast_ts = Self::ensure_cast_timestamptz(ts);
17291 Ok(Expression::Function(Box::new(Function::new("EPOCH_MS".to_string(), vec![cast_ts]))))
17292 }
17293 _ => Ok(Expression::Function(Box::new(Function::new("UNIX_MILLIS".to_string(), vec![ts]))))
17294 }
17295 }
17296
17297 "UNIX_MICROS" if args.len() == 1 => {
17298 let ts = args.remove(0);
17299 match target {
17300 DialectType::DuckDB => {
17301 // EPOCH_US(CAST(ts AS TIMESTAMPTZ))
17302 let cast_ts = Self::ensure_cast_timestamptz(ts);
17303 Ok(Expression::Function(Box::new(Function::new("EPOCH_US".to_string(), vec![cast_ts]))))
17304 }
17305 _ => Ok(Expression::Function(Box::new(Function::new("UNIX_MICROS".to_string(), vec![ts]))))
17306 }
17307 }
17308
17309 // ARRAY_CONCAT -> target-specific
17310 "ARRAY_CONCAT" => {
17311 match target {
17312 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
17313 // CONCAT(arr1, arr2, ...)
17314 Ok(Expression::Function(Box::new(Function::new("CONCAT".to_string(), args))))
17315 }
17316 DialectType::Presto | DialectType::Trino => {
17317 // CONCAT(arr1, arr2, ...)
17318 Ok(Expression::Function(Box::new(Function::new("CONCAT".to_string(), args))))
17319 }
17320 DialectType::Snowflake => {
17321 // ARRAY_CAT(arr1, ARRAY_CAT(arr2, arr3))
17322 if args.len() == 1 {
17323 // ARRAY_CAT requires 2 args, add empty array as []
17324 let empty_arr = Expression::ArrayFunc(Box::new(crate::expressions::ArrayConstructor {
17325 expressions: vec![],
17326 bracket_notation: true,
17327 use_list_keyword: false,
17328 }));
17329 let mut new_args = args;
17330 new_args.push(empty_arr);
17331 Ok(Expression::Function(Box::new(Function::new("ARRAY_CAT".to_string(), new_args))))
17332 } else if args.is_empty() {
17333 Ok(Expression::Function(Box::new(Function::new("ARRAY_CAT".to_string(), args))))
17334 } else {
17335 let mut it = args.into_iter().rev();
17336 let mut result = it.next().unwrap();
17337 for arr in it {
17338 result = Expression::Function(Box::new(Function::new(
17339 "ARRAY_CAT".to_string(), vec![arr, result],
17340 )));
17341 }
17342 Ok(result)
17343 }
17344 }
17345 DialectType::PostgreSQL => {
17346 // ARRAY_CAT(arr1, ARRAY_CAT(arr2, arr3))
17347 if args.len() <= 1 {
17348 Ok(Expression::Function(Box::new(Function::new("ARRAY_CAT".to_string(), args))))
17349 } else {
17350 let mut it = args.into_iter().rev();
17351 let mut result = it.next().unwrap();
17352 for arr in it {
17353 result = Expression::Function(Box::new(Function::new(
17354 "ARRAY_CAT".to_string(), vec![arr, result],
17355 )));
17356 }
17357 Ok(result)
17358 }
17359 }
17360 DialectType::Redshift => {
17361 // ARRAY_CONCAT(arr1, ARRAY_CONCAT(arr2, arr3))
17362 if args.len() <= 2 {
17363 Ok(Expression::Function(Box::new(Function::new("ARRAY_CONCAT".to_string(), args))))
17364 } else {
17365 let mut it = args.into_iter().rev();
17366 let mut result = it.next().unwrap();
17367 for arr in it {
17368 result = Expression::Function(Box::new(Function::new(
17369 "ARRAY_CONCAT".to_string(), vec![arr, result],
17370 )));
17371 }
17372 Ok(result)
17373 }
17374 }
17375 DialectType::DuckDB => {
17376 // ARRAY_CONCAT(arr1, ARRAY_CONCAT(arr2, arr3))
17377 if args.len() <= 2 {
17378 Ok(Expression::Function(Box::new(Function::new("ARRAY_CONCAT".to_string(), args))))
17379 } else {
17380 let mut it = args.into_iter().rev();
17381 let mut result = it.next().unwrap();
17382 for arr in it {
17383 result = Expression::Function(Box::new(Function::new(
17384 "ARRAY_CONCAT".to_string(), vec![arr, result],
17385 )));
17386 }
17387 Ok(result)
17388 }
17389 }
17390 _ => Ok(Expression::Function(Box::new(Function::new("ARRAY_CONCAT".to_string(), args))))
17391 }
17392 }
17393
17394 // ARRAY_CONCAT_AGG -> Snowflake: ARRAY_FLATTEN(ARRAY_AGG(x))
17395 "ARRAY_CONCAT_AGG" if args.len() == 1 => {
17396 let arg = args.remove(0);
17397 match target {
17398 DialectType::Snowflake => {
17399 let array_agg = Expression::ArrayAgg(Box::new(crate::expressions::AggFunc {
17400 this: arg,
17401 distinct: false,
17402 filter: None,
17403 order_by: vec![],
17404 name: None,
17405 ignore_nulls: None,
17406 having_max: None,
17407 limit: None,
17408 }));
17409 Ok(Expression::Function(Box::new(Function::new(
17410 "ARRAY_FLATTEN".to_string(), vec![array_agg],
17411 ))))
17412 }
17413 _ => Ok(Expression::Function(Box::new(Function::new("ARRAY_CONCAT_AGG".to_string(), vec![arg]))))
17414 }
17415 }
17416
17417 // MD5/SHA1/SHA256/SHA512 -> target-specific hash functions
17418 "MD5" if args.len() == 1 => {
17419 let arg = args.remove(0);
17420 match target {
17421 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
17422 // UNHEX(MD5(x))
17423 let md5 = Expression::Function(Box::new(Function::new("MD5".to_string(), vec![arg])));
17424 Ok(Expression::Function(Box::new(Function::new("UNHEX".to_string(), vec![md5]))))
17425 }
17426 DialectType::Snowflake => {
17427 // MD5_BINARY(x)
17428 Ok(Expression::Function(Box::new(Function::new("MD5_BINARY".to_string(), vec![arg]))))
17429 }
17430 _ => Ok(Expression::Function(Box::new(Function::new("MD5".to_string(), vec![arg]))))
17431 }
17432 }
17433
17434 "SHA1" if args.len() == 1 => {
17435 let arg = args.remove(0);
17436 match target {
17437 DialectType::DuckDB => {
17438 // UNHEX(SHA1(x))
17439 let sha1 = Expression::Function(Box::new(Function::new("SHA1".to_string(), vec![arg])));
17440 Ok(Expression::Function(Box::new(Function::new("UNHEX".to_string(), vec![sha1]))))
17441 }
17442 _ => Ok(Expression::Function(Box::new(Function::new("SHA1".to_string(), vec![arg]))))
17443 }
17444 }
17445
17446 "SHA256" if args.len() == 1 => {
17447 let arg = args.remove(0);
17448 match target {
17449 DialectType::DuckDB => {
17450 // UNHEX(SHA256(x))
17451 let sha = Expression::Function(Box::new(Function::new("SHA256".to_string(), vec![arg])));
17452 Ok(Expression::Function(Box::new(Function::new("UNHEX".to_string(), vec![sha]))))
17453 }
17454 DialectType::Snowflake => {
17455 // SHA2_BINARY(x, 256)
17456 Ok(Expression::Function(Box::new(Function::new("SHA2_BINARY".to_string(), vec![arg, Expression::number(256)]))))
17457 }
17458 DialectType::Redshift | DialectType::Spark => {
17459 // SHA2(x, 256)
17460 Ok(Expression::Function(Box::new(Function::new("SHA2".to_string(), vec![arg, Expression::number(256)]))))
17461 }
17462 _ => Ok(Expression::Function(Box::new(Function::new("SHA256".to_string(), vec![arg]))))
17463 }
17464 }
17465
17466 "SHA512" if args.len() == 1 => {
17467 let arg = args.remove(0);
17468 match target {
17469 DialectType::Snowflake => {
17470 // SHA2_BINARY(x, 512)
17471 Ok(Expression::Function(Box::new(Function::new("SHA2_BINARY".to_string(), vec![arg, Expression::number(512)]))))
17472 }
17473 DialectType::Redshift | DialectType::Spark => {
17474 // SHA2(x, 512)
17475 Ok(Expression::Function(Box::new(Function::new("SHA2".to_string(), vec![arg, Expression::number(512)]))))
17476 }
17477 _ => Ok(Expression::Function(Box::new(Function::new("SHA512".to_string(), vec![arg]))))
17478 }
17479 }
17480
17481 // REGEXP_EXTRACT_ALL(str, pattern) -> add default group arg
17482 "REGEXP_EXTRACT_ALL" if args.len() == 2 => {
17483 let str_expr = args.remove(0);
17484 let pattern = args.remove(0);
17485
17486 // Check if pattern contains capturing groups (parentheses)
17487 let has_groups = match &pattern {
17488 Expression::Literal(Literal::String(s)) => s.contains('(') && s.contains(')'),
17489 _ => false,
17490 };
17491
17492 match target {
17493 DialectType::DuckDB => {
17494 let group = if has_groups { Expression::number(1) } else { Expression::number(0) };
17495 Ok(Expression::Function(Box::new(Function::new("REGEXP_EXTRACT_ALL".to_string(), vec![str_expr, pattern, group]))))
17496 }
17497 DialectType::Spark | DialectType::Databricks => {
17498 // Spark's default group_index is 1 (same as BigQuery), so omit for capturing groups
17499 if has_groups {
17500 Ok(Expression::Function(Box::new(Function::new("REGEXP_EXTRACT_ALL".to_string(), vec![str_expr, pattern]))))
17501 } else {
17502 Ok(Expression::Function(Box::new(Function::new("REGEXP_EXTRACT_ALL".to_string(), vec![str_expr, pattern, Expression::number(0)]))))
17503 }
17504 }
17505 DialectType::Presto | DialectType::Trino => {
17506 if has_groups {
17507 Ok(Expression::Function(Box::new(Function::new("REGEXP_EXTRACT_ALL".to_string(), vec![str_expr, pattern, Expression::number(1)]))))
17508 } else {
17509 Ok(Expression::Function(Box::new(Function::new("REGEXP_EXTRACT_ALL".to_string(), vec![str_expr, pattern]))))
17510 }
17511 }
17512 DialectType::Snowflake => {
17513 if has_groups {
17514 // REGEXP_EXTRACT_ALL(str, pattern, 1, 1, 'c', 1)
17515 Ok(Expression::Function(Box::new(Function::new("REGEXP_EXTRACT_ALL".to_string(), vec![
17516 str_expr, pattern, Expression::number(1), Expression::number(1),
17517 Expression::Literal(Literal::String("c".to_string())), Expression::number(1),
17518 ]))))
17519 } else {
17520 Ok(Expression::Function(Box::new(Function::new("REGEXP_EXTRACT_ALL".to_string(), vec![str_expr, pattern]))))
17521 }
17522 }
17523 _ => Ok(Expression::Function(Box::new(Function::new("REGEXP_EXTRACT_ALL".to_string(), vec![str_expr, pattern]))))
17524 }
17525 }
17526
17527 // MOD(x, y) -> x % y for PostgreSQL/DuckDB
17528 "MOD" if args.len() == 2 => {
17529 match target {
17530 DialectType::PostgreSQL | DialectType::DuckDB => {
17531 let x = args.remove(0);
17532 let y = args.remove(0);
17533 Ok(Expression::Mod(Box::new(crate::expressions::BinaryOp::new(x, y))))
17534 }
17535 _ => Ok(Expression::Function(Box::new(Function::new("MOD".to_string(), args))))
17536 }
17537 }
17538
17539 // CONCAT(a, b, ...) -> a || b || ... for DuckDB with 3+ args
17540 "CONCAT" if args.len() > 2 => {
17541 match target {
17542 DialectType::DuckDB => {
17543 let mut it = args.into_iter();
17544 let mut result = it.next().unwrap();
17545 for arg in it {
17546 result = Expression::DPipe(Box::new(crate::expressions::DPipe { this: Box::new(result), expression: Box::new(arg), safe: None }));
17547 }
17548 Ok(result)
17549 }
17550 _ => Ok(Expression::Function(Box::new(Function::new("CONCAT".to_string(), args))))
17551 }
17552 }
17553
17554 // GENERATE_DATE_ARRAY(start, end[, step]) -> target-specific
17555 "GENERATE_DATE_ARRAY" => {
17556 if matches!(target, DialectType::BigQuery) {
17557 // BQ->BQ: add default interval if not present
17558 if args.len() == 2 {
17559 let start = args.remove(0);
17560 let end = args.remove(0);
17561 let default_interval = Expression::Interval(Box::new(crate::expressions::Interval {
17562 this: Some(Expression::Literal(Literal::String("1".to_string()))),
17563 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: crate::expressions::IntervalUnit::Day, use_plural: false }),
17564 }));
17565 Ok(Expression::Function(Box::new(Function::new("GENERATE_DATE_ARRAY".to_string(), vec![start, end, default_interval]))))
17566 } else {
17567 Ok(Expression::Function(Box::new(Function::new("GENERATE_DATE_ARRAY".to_string(), args))))
17568 }
17569 } else if matches!(target, DialectType::DuckDB) {
17570 // DuckDB: CAST(GENERATE_SERIES(CAST(start AS DATE), CAST(end AS DATE), step) AS DATE[])
17571 let start = args.get(0).cloned();
17572 let end = args.get(1).cloned();
17573 let step = args.get(2).cloned().or_else(|| Some(Expression::Interval(Box::new(crate::expressions::Interval {
17574 this: Some(Expression::Literal(Literal::String("1".to_string()))),
17575 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: crate::expressions::IntervalUnit::Day, use_plural: false }),
17576 }))));
17577
17578 // Wrap start/end in CAST(... AS DATE) only for string literals
17579 let maybe_cast_date = |expr: Expression| -> Expression {
17580 if matches!(&expr, Expression::Literal(Literal::String(_))) {
17581 Expression::Cast(Box::new(Cast {
17582 this: expr,
17583 to: DataType::Date,
17584 trailing_comments: vec![],
17585 double_colon_syntax: false,
17586 format: None,
17587 default: None,
17588 }))
17589 } else {
17590 expr
17591 }
17592 };
17593 let cast_start = start.map(maybe_cast_date);
17594 let cast_end = end.map(maybe_cast_date);
17595
17596 let gen_series = Expression::GenerateSeries(Box::new(crate::expressions::GenerateSeries {
17597 start: cast_start.map(Box::new),
17598 end: cast_end.map(Box::new),
17599 step: step.map(Box::new),
17600 is_end_exclusive: None,
17601 }));
17602
17603 // Wrap in CAST(... AS DATE[])
17604 Ok(Expression::Cast(Box::new(Cast {
17605 this: gen_series,
17606 to: DataType::Array { element_type: Box::new(DataType::Date), dimension: None },
17607 trailing_comments: vec![],
17608 double_colon_syntax: false,
17609 format: None,
17610 default: None,
17611 })))
17612 } else if matches!(target, DialectType::Snowflake) {
17613 // Snowflake: keep as GENERATE_DATE_ARRAY function for later transform
17614 // (transform_generate_date_array_snowflake will convert to ARRAY_GENERATE_RANGE + DATEADD)
17615 if args.len() == 2 {
17616 let start = args.remove(0);
17617 let end = args.remove(0);
17618 let default_interval = Expression::Interval(Box::new(crate::expressions::Interval {
17619 this: Some(Expression::Literal(Literal::String("1".to_string()))),
17620 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: crate::expressions::IntervalUnit::Day, use_plural: false }),
17621 }));
17622 Ok(Expression::Function(Box::new(Function::new("GENERATE_DATE_ARRAY".to_string(), vec![start, end, default_interval]))))
17623 } else {
17624 Ok(Expression::Function(Box::new(Function::new("GENERATE_DATE_ARRAY".to_string(), args))))
17625 }
17626 } else {
17627 // Convert to GenerateSeries for other targets
17628 let start = args.get(0).cloned();
17629 let end = args.get(1).cloned();
17630 let step = args.get(2).cloned().or_else(|| Some(Expression::Interval(Box::new(crate::expressions::Interval {
17631 this: Some(Expression::Literal(Literal::String("1".to_string()))),
17632 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: crate::expressions::IntervalUnit::Day, use_plural: false }),
17633 }))));
17634 Ok(Expression::GenerateSeries(Box::new(crate::expressions::GenerateSeries {
17635 start: start.map(Box::new),
17636 end: end.map(Box::new),
17637 step: step.map(Box::new),
17638 is_end_exclusive: None,
17639 })))
17640 }
17641 }
17642
17643 // PARSE_DATE(format, str) -> target-specific
17644 "PARSE_DATE" if args.len() == 2 => {
17645 let format = args.remove(0);
17646 let str_expr = args.remove(0);
17647 match target {
17648 DialectType::DuckDB => {
17649 // CAST(STRPTIME(str, duck_format) AS DATE)
17650 let duck_format = Self::bq_format_to_duckdb(&format);
17651 let strptime = Expression::Function(Box::new(Function::new("STRPTIME".to_string(), vec![str_expr, duck_format])));
17652 Ok(Expression::Cast(Box::new(Cast {
17653 this: strptime,
17654 to: DataType::Date,
17655 trailing_comments: vec![],
17656 double_colon_syntax: false,
17657 format: None,
17658 default: None,
17659 })))
17660 }
17661 DialectType::Snowflake => {
17662 // _POLYGLOT_DATE(str, snowflake_format)
17663 // Use marker so Snowflake target transform keeps it as DATE() instead of TO_DATE()
17664 let sf_format = Self::bq_format_to_snowflake(&format);
17665 Ok(Expression::Function(Box::new(Function::new("_POLYGLOT_DATE".to_string(), vec![str_expr, sf_format]))))
17666 }
17667 _ => Ok(Expression::Function(Box::new(Function::new("PARSE_DATE".to_string(), vec![format, str_expr]))))
17668 }
17669 }
17670
17671 // PARSE_TIMESTAMP(format, str) -> target-specific
17672 "PARSE_TIMESTAMP" if args.len() >= 2 => {
17673 let format = args.remove(0);
17674 let str_expr = args.remove(0);
17675 let tz = if !args.is_empty() { Some(args.remove(0)) } else { None };
17676 match target {
17677 DialectType::DuckDB => {
17678 let duck_format = Self::bq_format_to_duckdb(&format);
17679 let strptime = Expression::Function(Box::new(Function::new("STRPTIME".to_string(), vec![str_expr, duck_format])));
17680 Ok(strptime)
17681 }
17682 _ => {
17683 let mut result_args = vec![format, str_expr];
17684 if let Some(tz_arg) = tz { result_args.push(tz_arg); }
17685 Ok(Expression::Function(Box::new(Function::new("PARSE_TIMESTAMP".to_string(), result_args))))
17686 }
17687 }
17688 }
17689
17690 // FORMAT_DATE(format, date) -> target-specific
17691 "FORMAT_DATE" if args.len() == 2 => {
17692 let format = args.remove(0);
17693 let date_expr = args.remove(0);
17694 match target {
17695 DialectType::DuckDB => {
17696 // STRFTIME(CAST(date AS DATE), format)
17697 let cast_date = Expression::Cast(Box::new(Cast {
17698 this: date_expr,
17699 to: DataType::Date,
17700 trailing_comments: vec![],
17701 double_colon_syntax: false,
17702 format: None,
17703 default: None,
17704 }));
17705 Ok(Expression::Function(Box::new(Function::new("STRFTIME".to_string(), vec![cast_date, format]))))
17706 }
17707 _ => Ok(Expression::Function(Box::new(Function::new("FORMAT_DATE".to_string(), vec![format, date_expr]))))
17708 }
17709 }
17710
17711 // FORMAT_DATETIME(format, datetime) -> target-specific
17712 "FORMAT_DATETIME" if args.len() == 2 => {
17713 let format = args.remove(0);
17714 let dt_expr = args.remove(0);
17715
17716 if matches!(target, DialectType::BigQuery) {
17717 // BQ->BQ: normalize %H:%M:%S to %T, %x to %D
17718 let norm_format = Self::bq_format_normalize_bq(&format);
17719 // Also strip DATETIME keyword from typed literals
17720 let norm_dt = match dt_expr {
17721 Expression::Literal(Literal::Timestamp(s)) => {
17722 Expression::Cast(Box::new(Cast {
17723 this: Expression::Literal(Literal::String(s)),
17724 to: DataType::Custom { name: "DATETIME".to_string() },
17725 trailing_comments: vec![],
17726 double_colon_syntax: false,
17727 format: None,
17728 default: None,
17729 }))
17730 }
17731 other => other,
17732 };
17733 return Ok(Expression::Function(Box::new(Function::new("FORMAT_DATETIME".to_string(), vec![norm_format, norm_dt]))));
17734 }
17735
17736 match target {
17737 DialectType::DuckDB => {
17738 // STRFTIME(CAST(dt AS TIMESTAMP), duckdb_format)
17739 let cast_dt = Self::ensure_cast_timestamp(dt_expr);
17740 let duck_format = Self::bq_format_to_duckdb(&format);
17741 Ok(Expression::Function(Box::new(Function::new("STRFTIME".to_string(), vec![cast_dt, duck_format]))))
17742 }
17743 _ => Ok(Expression::Function(Box::new(Function::new("FORMAT_DATETIME".to_string(), vec![format, dt_expr]))))
17744 }
17745 }
17746
17747 // FORMAT_TIMESTAMP(format, ts) -> target-specific
17748 "FORMAT_TIMESTAMP" if args.len() == 2 => {
17749 let format = args.remove(0);
17750 let ts_expr = args.remove(0);
17751 match target {
17752 DialectType::DuckDB => {
17753 // STRFTIME(CAST(CAST(ts AS TIMESTAMPTZ) AS TIMESTAMP), format)
17754 let cast_tstz = Self::ensure_cast_timestamptz(ts_expr);
17755 let cast_ts = Expression::Cast(Box::new(Cast {
17756 this: cast_tstz,
17757 to: DataType::Timestamp { timezone: false, precision: None },
17758 trailing_comments: vec![],
17759 double_colon_syntax: false,
17760 format: None,
17761 default: None,
17762 }));
17763 Ok(Expression::Function(Box::new(Function::new("STRFTIME".to_string(), vec![cast_ts, format]))))
17764 }
17765 DialectType::Snowflake => {
17766 // TO_CHAR(CAST(CAST(ts AS TIMESTAMPTZ) AS TIMESTAMP), snowflake_format)
17767 let cast_tstz = Self::ensure_cast_timestamptz(ts_expr);
17768 let cast_ts = Expression::Cast(Box::new(Cast {
17769 this: cast_tstz,
17770 to: DataType::Timestamp { timezone: false, precision: None },
17771 trailing_comments: vec![],
17772 double_colon_syntax: false,
17773 format: None,
17774 default: None,
17775 }));
17776 let sf_format = Self::bq_format_to_snowflake(&format);
17777 Ok(Expression::Function(Box::new(Function::new("TO_CHAR".to_string(), vec![cast_ts, sf_format]))))
17778 }
17779 _ => Ok(Expression::Function(Box::new(Function::new("FORMAT_TIMESTAMP".to_string(), vec![format, ts_expr]))))
17780 }
17781 }
17782
17783 // UNIX_DATE(date) -> DATE_DIFF('DAY', '1970-01-01', date) for DuckDB
17784 "UNIX_DATE" if args.len() == 1 => {
17785 let date = args.remove(0);
17786 match target {
17787 DialectType::DuckDB => {
17788 let epoch = Expression::Cast(Box::new(Cast {
17789 this: Expression::Literal(Literal::String("1970-01-01".to_string())),
17790 to: DataType::Date,
17791 trailing_comments: vec![],
17792 double_colon_syntax: false,
17793 format: None,
17794 default: None,
17795 }));
17796 // DATE_DIFF('DAY', epoch, date) but date might be DATE '...' literal
17797 // Need to convert DATE literal to CAST
17798 let norm_date = Self::date_literal_to_cast(date);
17799 Ok(Expression::Function(Box::new(Function::new(
17800 "DATE_DIFF".to_string(), vec![
17801 Expression::Literal(Literal::String("DAY".to_string())),
17802 epoch,
17803 norm_date,
17804 ],
17805 ))))
17806 }
17807 _ => Ok(Expression::Function(Box::new(Function::new("UNIX_DATE".to_string(), vec![date]))))
17808 }
17809 }
17810
17811 // UNIX_SECONDS(ts) -> target-specific
17812 "UNIX_SECONDS" if args.len() == 1 => {
17813 let ts = args.remove(0);
17814 match target {
17815 DialectType::DuckDB => {
17816 // CAST(EPOCH(CAST(ts AS TIMESTAMPTZ)) AS BIGINT)
17817 let norm_ts = Self::ts_literal_to_cast_tz(ts);
17818 let epoch = Expression::Function(Box::new(Function::new("EPOCH".to_string(), vec![norm_ts])));
17819 Ok(Expression::Cast(Box::new(Cast {
17820 this: epoch,
17821 to: DataType::BigInt { length: None },
17822 trailing_comments: vec![],
17823 double_colon_syntax: false,
17824 format: None,
17825 default: None,
17826 })))
17827 }
17828 DialectType::Snowflake => {
17829 // TIMESTAMPDIFF(SECONDS, CAST('1970-01-01 00:00:00+00' AS TIMESTAMPTZ), ts)
17830 let epoch = Expression::Cast(Box::new(Cast {
17831 this: Expression::Literal(Literal::String("1970-01-01 00:00:00+00".to_string())),
17832 to: DataType::Timestamp { timezone: true, precision: None },
17833 trailing_comments: vec![],
17834 double_colon_syntax: false,
17835 format: None,
17836 default: None,
17837 }));
17838 Ok(Expression::Function(Box::new(Function::new(
17839 "TIMESTAMPDIFF".to_string(), vec![
17840 Expression::Identifier(Identifier::new("SECONDS".to_string())),
17841 epoch,
17842 ts,
17843 ],
17844 ))))
17845 }
17846 _ => Ok(Expression::Function(Box::new(Function::new("UNIX_SECONDS".to_string(), vec![ts]))))
17847 }
17848 }
17849
17850 // UNIX_MILLIS(ts) -> target-specific
17851 "UNIX_MILLIS" if args.len() == 1 => {
17852 let ts = args.remove(0);
17853 match target {
17854 DialectType::DuckDB => {
17855 let norm_ts = Self::ts_literal_to_cast_tz(ts);
17856 Ok(Expression::Function(Box::new(Function::new("EPOCH_MS".to_string(), vec![norm_ts]))))
17857 }
17858 _ => Ok(Expression::Function(Box::new(Function::new("UNIX_MILLIS".to_string(), vec![ts]))))
17859 }
17860 }
17861
17862 // UNIX_MICROS(ts) -> target-specific
17863 "UNIX_MICROS" if args.len() == 1 => {
17864 let ts = args.remove(0);
17865 match target {
17866 DialectType::DuckDB => {
17867 let norm_ts = Self::ts_literal_to_cast_tz(ts);
17868 Ok(Expression::Function(Box::new(Function::new("EPOCH_US".to_string(), vec![norm_ts]))))
17869 }
17870 _ => Ok(Expression::Function(Box::new(Function::new("UNIX_MICROS".to_string(), vec![ts]))))
17871 }
17872 }
17873
17874 // INSTR(str, substr) -> target-specific
17875 "INSTR" => {
17876 if matches!(target, DialectType::BigQuery) {
17877 // BQ->BQ: keep as INSTR
17878 Ok(Expression::Function(Box::new(Function::new("INSTR".to_string(), args))))
17879 } else if matches!(target, DialectType::Snowflake) && args.len() == 2 {
17880 // Snowflake: CHARINDEX(substr, str) - swap args
17881 let str_expr = args.remove(0);
17882 let substr = args.remove(0);
17883 Ok(Expression::Function(Box::new(Function::new("CHARINDEX".to_string(), vec![substr, str_expr]))))
17884 } else {
17885 // Keep as INSTR for other targets
17886 Ok(Expression::Function(Box::new(Function::new("INSTR".to_string(), args))))
17887 }
17888 }
17889
17890 // CURRENT_TIMESTAMP / CURRENT_DATE handling - parens normalization and timezone
17891 "CURRENT_TIMESTAMP" | "CURRENT_DATE" | "CURRENT_DATETIME" | "CURRENT_TIME" => {
17892 if matches!(target, DialectType::BigQuery) {
17893 // BQ->BQ: always output with parens (function form), keep any timezone arg
17894 Ok(Expression::Function(Box::new(Function::new(name, args))))
17895 } else if name == "CURRENT_DATE" && args.len() == 1 {
17896 // CURRENT_DATE('UTC') - has timezone arg
17897 let tz_arg = args.remove(0);
17898 match target {
17899 DialectType::DuckDB => {
17900 // CAST(CURRENT_TIMESTAMP AT TIME ZONE 'UTC' AS DATE)
17901 let ct = Expression::CurrentTimestamp(crate::expressions::CurrentTimestamp { precision: None, sysdate: false });
17902 let at_tz = Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
17903 this: ct,
17904 zone: tz_arg,
17905 }));
17906 Ok(Expression::Cast(Box::new(Cast {
17907 this: at_tz,
17908 to: DataType::Date,
17909 trailing_comments: vec![],
17910 double_colon_syntax: false,
17911 format: None,
17912 default: None,
17913 })))
17914 }
17915 DialectType::Snowflake => {
17916 // CAST(CONVERT_TIMEZONE('UTC', CURRENT_TIMESTAMP()) AS DATE)
17917 let ct = Expression::Function(Box::new(Function::new("CURRENT_TIMESTAMP".to_string(), vec![])));
17918 let convert = Expression::Function(Box::new(Function::new("CONVERT_TIMEZONE".to_string(), vec![tz_arg, ct])));
17919 Ok(Expression::Cast(Box::new(Cast {
17920 this: convert,
17921 to: DataType::Date,
17922 trailing_comments: vec![],
17923 double_colon_syntax: false,
17924 format: None,
17925 default: None,
17926 })))
17927 }
17928 _ => {
17929 // PostgreSQL, MySQL, etc.: CURRENT_DATE AT TIME ZONE 'UTC'
17930 let cd = Expression::CurrentDate(crate::expressions::CurrentDate);
17931 Ok(Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
17932 this: cd,
17933 zone: tz_arg,
17934 })))
17935 }
17936 }
17937 } else if (name == "CURRENT_TIMESTAMP" || name == "CURRENT_TIME" || name == "CURRENT_DATE") && args.is_empty()
17938 && matches!(target, DialectType::PostgreSQL | DialectType::DuckDB | DialectType::Presto | DialectType::Trino)
17939 {
17940 // These targets want no-parens CURRENT_TIMESTAMP / CURRENT_DATE / CURRENT_TIME
17941 if name == "CURRENT_TIMESTAMP" {
17942 Ok(Expression::CurrentTimestamp(crate::expressions::CurrentTimestamp {
17943 precision: None,
17944 sysdate: false,
17945 }))
17946 } else if name == "CURRENT_DATE" {
17947 Ok(Expression::CurrentDate(crate::expressions::CurrentDate))
17948 } else {
17949 // CURRENT_TIME
17950 Ok(Expression::CurrentTime(crate::expressions::CurrentTime {
17951 precision: None,
17952 }))
17953 }
17954 } else {
17955 // All other targets: keep as function (with parens)
17956 Ok(Expression::Function(Box::new(Function::new(name, args))))
17957 }
17958 }
17959
17960 // JSON_QUERY(json, path) -> target-specific
17961 "JSON_QUERY" if args.len() == 2 => {
17962 match target {
17963 DialectType::DuckDB | DialectType::SQLite => {
17964 // json -> path syntax
17965 let json_expr = args.remove(0);
17966 let path = args.remove(0);
17967 Ok(Expression::JsonExtract(Box::new(crate::expressions::JsonExtractFunc {
17968 this: json_expr,
17969 path,
17970 returning: None,
17971 arrow_syntax: true,
17972 hash_arrow_syntax: false,
17973 wrapper_option: None,
17974 quotes_option: None,
17975 on_scalar_string: false,
17976 on_error: None,
17977 })))
17978 }
17979 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
17980 Ok(Expression::Function(Box::new(Function::new(
17981 "GET_JSON_OBJECT".to_string(), args,
17982 ))))
17983 }
17984 DialectType::PostgreSQL | DialectType::Redshift => {
17985 Ok(Expression::Function(Box::new(Function::new(
17986 "JSON_EXTRACT_PATH".to_string(), args,
17987 ))))
17988 }
17989 _ => Ok(Expression::Function(Box::new(Function::new("JSON_QUERY".to_string(), args))))
17990 }
17991 }
17992
17993 // JSON_VALUE_ARRAY(json, path) -> target-specific
17994 "JSON_VALUE_ARRAY" if args.len() == 2 => {
17995 match target {
17996 DialectType::DuckDB => {
17997 // CAST(json -> path AS TEXT[])
17998 let json_expr = args.remove(0);
17999 let path = args.remove(0);
18000 let arrow = Expression::JsonExtract(Box::new(crate::expressions::JsonExtractFunc {
18001 this: json_expr,
18002 path,
18003 returning: None,
18004 arrow_syntax: true,
18005 hash_arrow_syntax: false,
18006 wrapper_option: None,
18007 quotes_option: None,
18008 on_scalar_string: false,
18009 on_error: None,
18010 }));
18011 Ok(Expression::Cast(Box::new(Cast {
18012 this: arrow,
18013 to: DataType::Array { element_type: Box::new(DataType::Text), dimension: None },
18014 trailing_comments: vec![],
18015 double_colon_syntax: false,
18016 format: None,
18017 default: None,
18018 })))
18019 }
18020 DialectType::Snowflake => {
18021 let json_expr = args.remove(0);
18022 let path_expr = args.remove(0);
18023 // Convert JSON path from $.path to just path
18024 let sf_path = if let Expression::Literal(Literal::String(ref s)) = path_expr {
18025 let trimmed = s.trim_start_matches('$').trim_start_matches('.');
18026 Expression::Literal(Literal::String(trimmed.to_string()))
18027 } else {
18028 path_expr
18029 };
18030 let parse_json = Expression::Function(Box::new(Function::new("PARSE_JSON".to_string(), vec![json_expr])));
18031 let get_path = Expression::Function(Box::new(Function::new("GET_PATH".to_string(), vec![parse_json, sf_path])));
18032 // TRANSFORM(get_path, x -> CAST(x AS VARCHAR))
18033 let cast_expr = Expression::Cast(Box::new(Cast {
18034 this: Expression::Identifier(Identifier::new("x")),
18035 to: DataType::VarChar { length: None, parenthesized_length: false },
18036 trailing_comments: vec![],
18037 double_colon_syntax: false,
18038 format: None,
18039 default: None,
18040 }));
18041 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
18042 parameters: vec![Identifier::new("x")],
18043 body: cast_expr,
18044 colon: false,
18045 parameter_types: vec![],
18046 }));
18047 Ok(Expression::Function(Box::new(Function::new("TRANSFORM".to_string(), vec![get_path, lambda]))))
18048 }
18049 _ => Ok(Expression::Function(Box::new(Function::new("JSON_VALUE_ARRAY".to_string(), args))))
18050 }
18051 }
18052
18053 // BigQuery REGEXP_EXTRACT(val, regex[, position[, occurrence]]) -> target dialects
18054 // BigQuery's 3rd arg is "position" (starting char index), 4th is "occurrence" (which match to return)
18055 // This is different from Hive/Spark where 3rd arg is "group_index"
18056 "REGEXP_EXTRACT" if matches!(source, DialectType::BigQuery) => {
18057 match target {
18058 DialectType::DuckDB | DialectType::Presto | DialectType::Trino | DialectType::Athena => {
18059 if args.len() == 2 {
18060 // REGEXP_EXTRACT(val, regex) -> REGEXP_EXTRACT(val, regex, 1)
18061 args.push(Expression::number(1));
18062 Ok(Expression::Function(Box::new(Function::new("REGEXP_EXTRACT".to_string(), args))))
18063 } else if args.len() == 3 {
18064 let val = args.remove(0);
18065 let regex = args.remove(0);
18066 let position = args.remove(0);
18067 let is_pos_1 = matches!(&position, Expression::Literal(Literal::Number(n)) if n == "1");
18068 if is_pos_1 {
18069 Ok(Expression::Function(Box::new(Function::new("REGEXP_EXTRACT".to_string(), vec![val, regex, Expression::number(1)]))))
18070 } else {
18071 let substring_expr = Expression::Function(Box::new(Function::new("SUBSTRING".to_string(), vec![val, position])));
18072 let nullif_expr = Expression::Function(Box::new(Function::new("NULLIF".to_string(), vec![substring_expr, Expression::Literal(Literal::String(String::new()))])));
18073 Ok(Expression::Function(Box::new(Function::new("REGEXP_EXTRACT".to_string(), vec![nullif_expr, regex, Expression::number(1)]))))
18074 }
18075 } else if args.len() == 4 {
18076 let val = args.remove(0);
18077 let regex = args.remove(0);
18078 let position = args.remove(0);
18079 let occurrence = args.remove(0);
18080 let is_pos_1 = matches!(&position, Expression::Literal(Literal::Number(n)) if n == "1");
18081 let is_occ_1 = matches!(&occurrence, Expression::Literal(Literal::Number(n)) if n == "1");
18082 if is_pos_1 && is_occ_1 {
18083 Ok(Expression::Function(Box::new(Function::new("REGEXP_EXTRACT".to_string(), vec![val, regex, Expression::number(1)]))))
18084 } else {
18085 let subject = if is_pos_1 {
18086 val
18087 } else {
18088 let substring_expr = Expression::Function(Box::new(Function::new("SUBSTRING".to_string(), vec![val, position])));
18089 Expression::Function(Box::new(Function::new("NULLIF".to_string(), vec![substring_expr, Expression::Literal(Literal::String(String::new()))])))
18090 };
18091 let extract_all = Expression::Function(Box::new(Function::new("REGEXP_EXTRACT_ALL".to_string(), vec![subject, regex, Expression::number(1)])));
18092 Ok(Expression::Function(Box::new(Function::new("ARRAY_EXTRACT".to_string(), vec![extract_all, occurrence]))))
18093 }
18094 } else {
18095 Ok(Expression::Function(Box::new(Function { name: f.name, args, distinct: f.distinct, trailing_comments: f.trailing_comments, use_bracket_syntax: f.use_bracket_syntax, no_parens: f.no_parens, quoted: f.quoted })))
18096 }
18097 }
18098 DialectType::Snowflake => {
18099 // BigQuery REGEXP_EXTRACT -> Snowflake REGEXP_SUBSTR
18100 Ok(Expression::Function(Box::new(Function::new("REGEXP_SUBSTR".to_string(), args))))
18101 }
18102 _ => {
18103 // For other targets (Hive/Spark/BigQuery): pass through as-is
18104 // BigQuery's default group behavior matches Hive/Spark for 2-arg case
18105 Ok(Expression::Function(Box::new(Function { name: f.name, args, distinct: f.distinct, trailing_comments: f.trailing_comments, use_bracket_syntax: f.use_bracket_syntax, no_parens: f.no_parens, quoted: f.quoted })))
18106 }
18107 }
18108 }
18109
18110 // BigQuery STRUCT(args) -> target-specific struct expression
18111 "STRUCT" => {
18112 // Convert Function args to Struct fields
18113 let mut fields: Vec<(Option<String>, Expression)> = Vec::new();
18114 for (i, arg) in args.into_iter().enumerate() {
18115 match arg {
18116 Expression::Alias(a) => {
18117 // Named field: expr AS name
18118 fields.push((Some(a.alias.name.clone()), a.this));
18119 }
18120 other => {
18121 // Unnamed field: for Spark/Hive, keep as None
18122 // For Snowflake, auto-name as _N
18123 // For DuckDB, use column name for column refs, _N for others
18124 if matches!(target, DialectType::Snowflake) {
18125 fields.push((Some(format!("_{}", i)), other));
18126 } else if matches!(target, DialectType::DuckDB) {
18127 let auto_name = match &other {
18128 Expression::Column(col) => col.name.name.clone(),
18129 _ => format!("_{}", i),
18130 };
18131 fields.push((Some(auto_name), other));
18132 } else {
18133 fields.push((None, other));
18134 }
18135 }
18136 }
18137 }
18138
18139 match target {
18140 DialectType::Snowflake => {
18141 // OBJECT_CONSTRUCT('name', value, ...)
18142 let mut oc_args = Vec::new();
18143 for (name, val) in &fields {
18144 if let Some(n) = name {
18145 oc_args.push(Expression::Literal(Literal::String(n.clone())));
18146 oc_args.push(val.clone());
18147 } else {
18148 oc_args.push(val.clone());
18149 }
18150 }
18151 Ok(Expression::Function(Box::new(Function::new("OBJECT_CONSTRUCT".to_string(), oc_args))))
18152 }
18153 DialectType::DuckDB => {
18154 // {'name': value, ...}
18155 Ok(Expression::Struct(Box::new(crate::expressions::Struct { fields })))
18156 }
18157 DialectType::Hive => {
18158 // STRUCT(val1, val2, ...) - strip aliases
18159 let hive_fields: Vec<(Option<String>, Expression)> = fields.into_iter().map(|(_, v)| (None, v)).collect();
18160 Ok(Expression::Struct(Box::new(crate::expressions::Struct { fields: hive_fields })))
18161 }
18162 DialectType::Spark | DialectType::Databricks => {
18163 // Use Expression::Struct to bypass Spark target transform auto-naming
18164 Ok(Expression::Struct(Box::new(crate::expressions::Struct { fields })))
18165 }
18166 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
18167 // Check if all fields are named AND all have inferable types - if so, wrap in CAST(ROW(...) AS ROW(name TYPE, ...))
18168 let all_named = !fields.is_empty() && fields.iter().all(|(name, _)| name.is_some());
18169 let all_types_inferable = all_named && fields.iter().all(|(_, val)| Self::can_infer_presto_type(val));
18170 let row_args: Vec<Expression> = fields.iter().map(|(_, v)| v.clone()).collect();
18171 let row_expr = Expression::Function(Box::new(Function::new("ROW".to_string(), row_args)));
18172 if all_named && all_types_inferable {
18173 // Build ROW type with inferred types
18174 let mut row_type_fields = Vec::new();
18175 for (name, val) in &fields {
18176 if let Some(n) = name {
18177 let type_str = Self::infer_sql_type_for_presto(val);
18178 row_type_fields.push(crate::expressions::StructField::new(
18179 n.clone(),
18180 crate::expressions::DataType::Custom { name: type_str },
18181 ));
18182 }
18183 }
18184 let row_type = crate::expressions::DataType::Struct { fields: row_type_fields, nested: true };
18185 Ok(Expression::Cast(Box::new(Cast {
18186 this: row_expr,
18187 to: row_type,
18188 trailing_comments: Vec::new(),
18189 double_colon_syntax: false,
18190 format: None,
18191 default: None,
18192 })))
18193 } else {
18194 Ok(row_expr)
18195 }
18196 }
18197 _ => {
18198 // Default: keep as STRUCT function with original args
18199 let mut new_args = Vec::new();
18200 for (name, val) in fields {
18201 if let Some(n) = name {
18202 new_args.push(Expression::Alias(Box::new(crate::expressions::Alias::new(
18203 val, Identifier::new(n),
18204 ))));
18205 } else {
18206 new_args.push(val);
18207 }
18208 }
18209 Ok(Expression::Function(Box::new(Function::new("STRUCT".to_string(), new_args))))
18210 }
18211 }
18212 }
18213
18214 // ROUND(x, n, 'ROUND_HALF_EVEN') -> ROUND_EVEN(x, n) for DuckDB
18215 "ROUND" if args.len() == 3 => {
18216 let x = args.remove(0);
18217 let n = args.remove(0);
18218 let mode = args.remove(0);
18219 // Check if mode is 'ROUND_HALF_EVEN'
18220 let is_half_even = matches!(&mode, Expression::Literal(Literal::String(s)) if s.eq_ignore_ascii_case("ROUND_HALF_EVEN"));
18221 if is_half_even && matches!(target, DialectType::DuckDB) {
18222 Ok(Expression::Function(Box::new(Function::new("ROUND_EVEN".to_string(), vec![x, n]))))
18223 } else {
18224 // Pass through with all args
18225 Ok(Expression::Function(Box::new(Function::new("ROUND".to_string(), vec![x, n, mode]))))
18226 }
18227 }
18228
18229 // MAKE_INTERVAL(year, month, named_args...) -> INTERVAL string for Snowflake/DuckDB
18230 "MAKE_INTERVAL" => {
18231 // MAKE_INTERVAL(1, 2, minute => 5, day => 3)
18232 // The positional args are: year, month
18233 // Named args are: day =>, minute =>, etc.
18234 // For Snowflake: INTERVAL '1 year, 2 month, 5 minute, 3 day'
18235 // For DuckDB: INTERVAL '1 year 2 month 5 minute 3 day'
18236 // For BigQuery->BigQuery: reorder named args (day before minute)
18237 if matches!(target, DialectType::Snowflake | DialectType::DuckDB) {
18238 let mut parts: Vec<(String, String)> = Vec::new();
18239 let mut pos_idx = 0;
18240 let pos_units = ["year", "month"];
18241 for arg in &args {
18242 if let Expression::NamedArgument(na) = arg {
18243 // Named arg like minute => 5
18244 let unit = na.name.name.clone();
18245 if let Expression::Literal(Literal::Number(n)) = &na.value {
18246 parts.push((unit, n.clone()));
18247 }
18248 } else if pos_idx < pos_units.len() {
18249 if let Expression::Literal(Literal::Number(n)) = arg {
18250 parts.push((pos_units[pos_idx].to_string(), n.clone()));
18251 }
18252 pos_idx += 1;
18253 }
18254 }
18255 // Don't sort - preserve original argument order
18256 let separator = if matches!(target, DialectType::Snowflake) { ", " } else { " " };
18257 let interval_str = parts.iter()
18258 .map(|(u, v)| format!("{} {}", v, u))
18259 .collect::<Vec<_>>()
18260 .join(separator);
18261 Ok(Expression::Interval(Box::new(crate::expressions::Interval {
18262 this: Some(Expression::Literal(Literal::String(interval_str))),
18263 unit: None,
18264 })))
18265 } else if matches!(target, DialectType::BigQuery) {
18266 // BigQuery->BigQuery: reorder named args (day, minute, etc.)
18267 let mut positional = Vec::new();
18268 let mut named: Vec<(String, Expression, crate::expressions::NamedArgSeparator)> = Vec::new();
18269 let _pos_units = ["year", "month"];
18270 let mut _pos_idx = 0;
18271 for arg in args {
18272 if let Expression::NamedArgument(na) = arg {
18273 named.push((na.name.name.clone(), na.value, na.separator));
18274 } else {
18275 positional.push(arg);
18276 _pos_idx += 1;
18277 }
18278 }
18279 // Sort named args by: day, hour, minute, second
18280 let unit_order = |u: &str| -> usize {
18281 match u.to_lowercase().as_str() {
18282 "day" => 0, "hour" => 1, "minute" => 2, "second" => 3, _ => 4,
18283 }
18284 };
18285 named.sort_by_key(|(u, _, _)| unit_order(u));
18286 let mut result_args = positional;
18287 for (name, value, sep) in named {
18288 result_args.push(Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
18289 name: Identifier::new(&name),
18290 value,
18291 separator: sep,
18292 })));
18293 }
18294 Ok(Expression::Function(Box::new(Function::new("MAKE_INTERVAL".to_string(), result_args))))
18295 } else {
18296 Ok(Expression::Function(Box::new(Function::new("MAKE_INTERVAL".to_string(), args))))
18297 }
18298 }
18299
18300 // ARRAY_TO_STRING(array, sep, null_text) -> ARRAY_TO_STRING(LIST_TRANSFORM(array, x -> COALESCE(x, null_text)), sep) for DuckDB
18301 "ARRAY_TO_STRING" if args.len() == 3 => {
18302 let arr = args.remove(0);
18303 let sep = args.remove(0);
18304 let null_text = args.remove(0);
18305 match target {
18306 DialectType::DuckDB => {
18307 // LIST_TRANSFORM(array, x -> COALESCE(x, null_text))
18308 let _lambda_param = Expression::Identifier(crate::expressions::Identifier::new("x"));
18309 let coalesce = Expression::Coalesce(Box::new(crate::expressions::VarArgFunc {
18310 original_name: None,
18311 expressions: vec![
18312 Expression::Identifier(crate::expressions::Identifier::new("x")),
18313 null_text,
18314 ],
18315 }));
18316 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
18317 parameters: vec![crate::expressions::Identifier::new("x")],
18318 body: coalesce,
18319 colon: false,
18320 parameter_types: vec![],
18321 }));
18322 let list_transform = Expression::Function(Box::new(Function::new("LIST_TRANSFORM".to_string(), vec![arr, lambda])));
18323 Ok(Expression::Function(Box::new(Function::new("ARRAY_TO_STRING".to_string(), vec![list_transform, sep]))))
18324 }
18325 _ => Ok(Expression::Function(Box::new(Function::new("ARRAY_TO_STRING".to_string(), vec![arr, sep, null_text]))))
18326 }
18327 }
18328
18329 // LENGTH(x) -> CASE TYPEOF(x) ... for DuckDB
18330 "LENGTH" if args.len() == 1 => {
18331 let arg = args.remove(0);
18332 match target {
18333 DialectType::DuckDB => {
18334 // CASE TYPEOF(foo) WHEN 'BLOB' THEN OCTET_LENGTH(CAST(foo AS BLOB)) ELSE LENGTH(CAST(foo AS TEXT)) END
18335 let typeof_func = Expression::Function(Box::new(Function::new("TYPEOF".to_string(), vec![arg.clone()])));
18336 let blob_cast = Expression::Cast(Box::new(Cast {
18337 this: arg.clone(),
18338 to: DataType::VarBinary { length: None },
18339 trailing_comments: vec![],
18340 double_colon_syntax: false,
18341 format: None,
18342 default: None,
18343 }));
18344 let octet_length = Expression::Function(Box::new(Function::new("OCTET_LENGTH".to_string(), vec![blob_cast])));
18345 let text_cast = Expression::Cast(Box::new(Cast {
18346 this: arg,
18347 to: DataType::Text,
18348 trailing_comments: vec![],
18349 double_colon_syntax: false,
18350 format: None,
18351 default: None,
18352 }));
18353 let length_text = Expression::Function(Box::new(Function::new("LENGTH".to_string(), vec![text_cast])));
18354 Ok(Expression::Case(Box::new(crate::expressions::Case {
18355 operand: Some(typeof_func),
18356 whens: vec![(Expression::Literal(Literal::String("BLOB".to_string())), octet_length)],
18357 else_: Some(length_text),
18358 })))
18359 }
18360 _ => Ok(Expression::Function(Box::new(Function::new("LENGTH".to_string(), vec![arg]))))
18361 }
18362 }
18363
18364 // PERCENTILE_CONT(x, fraction RESPECT NULLS) -> QUANTILE_CONT(x, fraction) for DuckDB
18365 "PERCENTILE_CONT" if args.len() >= 2 && matches!(source, DialectType::BigQuery) => {
18366 // BigQuery PERCENTILE_CONT(x, fraction [RESPECT|IGNORE NULLS]) OVER ()
18367 // The args should be [x, fraction] with the null handling stripped
18368 // For DuckDB: QUANTILE_CONT(x, fraction)
18369 // For Spark: PERCENTILE_CONT(x, fraction) RESPECT NULLS (handled at window level)
18370 match target {
18371 DialectType::DuckDB => {
18372 // Strip down to just 2 args, rename to QUANTILE_CONT
18373 let x = args[0].clone();
18374 let frac = args[1].clone();
18375 Ok(Expression::Function(Box::new(Function::new("QUANTILE_CONT".to_string(), vec![x, frac]))))
18376 }
18377 _ => Ok(Expression::Function(Box::new(Function::new("PERCENTILE_CONT".to_string(), args))))
18378 }
18379 }
18380
18381 // All others: pass through
18382 _ => Ok(Expression::Function(Box::new(Function { name: f.name, args, distinct: f.distinct, trailing_comments: f.trailing_comments, use_bracket_syntax: f.use_bracket_syntax, no_parens: f.no_parens, quoted: f.quoted })))
18383 }
18384 }
18385
18386 /// Check if we can reliably infer the SQL type for Presto/Trino ROW CAST.
18387 /// Returns false for column references and other non-literal expressions where the type is unknown.
18388 fn can_infer_presto_type(expr: &Expression) -> bool {
18389 match expr {
18390 Expression::Literal(_) => true,
18391 Expression::Boolean(_) => true,
18392 Expression::Array(_) | Expression::ArrayFunc(_) => true,
18393 Expression::Struct(_) | Expression::StructFunc(_) => true,
18394 Expression::Function(f) => {
18395 let up = f.name.to_uppercase();
18396 up == "STRUCT" || up == "ROW" || up == "CURRENT_DATE" || up == "CURRENT_TIMESTAMP" || up == "NOW"
18397 }
18398 Expression::Cast(_) => true,
18399 Expression::Neg(inner) => Self::can_infer_presto_type(&inner.this),
18400 _ => false,
18401 }
18402 }
18403
18404 /// Infer SQL type name for a Presto/Trino ROW CAST from a literal expression
18405 fn infer_sql_type_for_presto(expr: &Expression) -> String {
18406 use crate::expressions::Literal;
18407 match expr {
18408 Expression::Literal(Literal::String(_)) => "VARCHAR".to_string(),
18409 Expression::Literal(Literal::Number(n)) => {
18410 if n.contains('.') { "DOUBLE".to_string() } else { "INTEGER".to_string() }
18411 }
18412 Expression::Boolean(_) => "BOOLEAN".to_string(),
18413 Expression::Literal(Literal::Date(_)) => "DATE".to_string(),
18414 Expression::Literal(Literal::Timestamp(_)) => "TIMESTAMP".to_string(),
18415 Expression::Literal(Literal::Datetime(_)) => "TIMESTAMP".to_string(),
18416 Expression::Array(_) | Expression::ArrayFunc(_) => "ARRAY(VARCHAR)".to_string(),
18417 Expression::Struct(_) | Expression::StructFunc(_) => "ROW".to_string(),
18418 Expression::Function(f) => {
18419 let up = f.name.to_uppercase();
18420 if up == "STRUCT" || up == "ROW" { "ROW".to_string() }
18421 else if up == "CURRENT_DATE" { "DATE".to_string() }
18422 else if up == "CURRENT_TIMESTAMP" || up == "NOW" { "TIMESTAMP".to_string() }
18423 else { "VARCHAR".to_string() }
18424 }
18425 Expression::Cast(c) => {
18426 // If already cast, use the target type
18427 Self::data_type_to_presto_string(&c.to)
18428 }
18429 _ => "VARCHAR".to_string(),
18430 }
18431 }
18432
18433 /// Convert a DataType to its Presto/Trino string representation for ROW type
18434 fn data_type_to_presto_string(dt: &crate::expressions::DataType) -> String {
18435 use crate::expressions::DataType;
18436 match dt {
18437 DataType::VarChar { .. } | DataType::Text | DataType::String { .. } => "VARCHAR".to_string(),
18438 DataType::Int { .. } | DataType::BigInt { .. } | DataType::SmallInt { .. } | DataType::TinyInt { .. } => "INTEGER".to_string(),
18439 DataType::Float { .. } | DataType::Double { .. } => "DOUBLE".to_string(),
18440 DataType::Boolean => "BOOLEAN".to_string(),
18441 DataType::Date => "DATE".to_string(),
18442 DataType::Timestamp { .. } => "TIMESTAMP".to_string(),
18443 DataType::Struct { fields, .. } => {
18444 let field_strs: Vec<String> = fields.iter().map(|f| {
18445 format!("{} {}", f.name, Self::data_type_to_presto_string(&f.data_type))
18446 }).collect();
18447 format!("ROW({})", field_strs.join(", "))
18448 }
18449 DataType::Array { element_type, .. } => {
18450 format!("ARRAY({})", Self::data_type_to_presto_string(element_type))
18451 }
18452 DataType::Custom { name } => {
18453 // Pass through custom type names (e.g., "INTEGER", "VARCHAR" from earlier inference)
18454 name.clone()
18455 }
18456 _ => "VARCHAR".to_string(),
18457 }
18458 }
18459
18460 /// Convert IntervalUnit to string
18461 fn interval_unit_to_string(unit: &crate::expressions::IntervalUnit) -> String {
18462 match unit {
18463 crate::expressions::IntervalUnit::Year => "YEAR".to_string(),
18464 crate::expressions::IntervalUnit::Quarter => "QUARTER".to_string(),
18465 crate::expressions::IntervalUnit::Month => "MONTH".to_string(),
18466 crate::expressions::IntervalUnit::Week => "WEEK".to_string(),
18467 crate::expressions::IntervalUnit::Day => "DAY".to_string(),
18468 crate::expressions::IntervalUnit::Hour => "HOUR".to_string(),
18469 crate::expressions::IntervalUnit::Minute => "MINUTE".to_string(),
18470 crate::expressions::IntervalUnit::Second => "SECOND".to_string(),
18471 crate::expressions::IntervalUnit::Millisecond => "MILLISECOND".to_string(),
18472 crate::expressions::IntervalUnit::Microsecond => "MICROSECOND".to_string(),
18473 }
18474 }
18475
18476 /// Extract unit string from an expression (uppercased)
18477 fn get_unit_str_static(expr: &Expression) -> String {
18478 use crate::expressions::Literal;
18479 match expr {
18480 Expression::Identifier(id) => id.name.to_uppercase(),
18481 Expression::Literal(Literal::String(s)) => s.to_uppercase(),
18482 Expression::Column(col) => col.name.name.to_uppercase(),
18483 Expression::Function(f) => {
18484 let base = f.name.to_uppercase();
18485 if !f.args.is_empty() {
18486 let inner = Self::get_unit_str_static(&f.args[0]);
18487 format!("{}({})", base, inner)
18488 } else {
18489 base
18490 }
18491 }
18492 _ => "DAY".to_string(),
18493 }
18494 }
18495
18496 /// Parse unit string to IntervalUnit
18497 fn parse_interval_unit_static(s: &str) -> crate::expressions::IntervalUnit {
18498 match s {
18499 "YEAR" | "YY" | "YYYY" => crate::expressions::IntervalUnit::Year,
18500 "QUARTER" | "QQ" | "Q" => crate::expressions::IntervalUnit::Quarter,
18501 "MONTH" | "MM" | "M" => crate::expressions::IntervalUnit::Month,
18502 "WEEK" | "WK" | "WW" | "ISOWEEK" => crate::expressions::IntervalUnit::Week,
18503 "DAY" | "DD" | "D" | "DY" => crate::expressions::IntervalUnit::Day,
18504 "HOUR" | "HH" => crate::expressions::IntervalUnit::Hour,
18505 "MINUTE" | "MI" | "N" => crate::expressions::IntervalUnit::Minute,
18506 "SECOND" | "SS" | "S" => crate::expressions::IntervalUnit::Second,
18507 "MILLISECOND" | "MS" => crate::expressions::IntervalUnit::Millisecond,
18508 "MICROSECOND" | "MCS" | "US" => crate::expressions::IntervalUnit::Microsecond,
18509 _ if s.starts_with("WEEK(") => crate::expressions::IntervalUnit::Week,
18510 _ => crate::expressions::IntervalUnit::Day,
18511 }
18512 }
18513
18514 /// Convert expression to simple string for interval building
18515 fn expr_to_string_static(expr: &Expression) -> String {
18516 use crate::expressions::Literal;
18517 match expr {
18518 Expression::Literal(Literal::Number(s)) => s.clone(),
18519 Expression::Literal(Literal::String(s)) => s.clone(),
18520 Expression::Identifier(id) => id.name.clone(),
18521 Expression::Neg(f) => format!("-{}", Self::expr_to_string_static(&f.this)),
18522 _ => "1".to_string(),
18523 }
18524 }
18525
18526 /// Extract a simple string representation from a literal expression
18527 fn expr_to_string(expr: &Expression) -> String {
18528 use crate::expressions::Literal;
18529 match expr {
18530 Expression::Literal(Literal::Number(s)) => s.clone(),
18531 Expression::Literal(Literal::String(s)) => s.clone(),
18532 Expression::Neg(f) => format!("-{}", Self::expr_to_string(&f.this)),
18533 Expression::Identifier(id) => id.name.clone(),
18534 _ => "1".to_string(),
18535 }
18536 }
18537
18538 /// Quote an interval value expression as a string literal if it's a number (or negated number)
18539 fn quote_interval_val(expr: &Expression) -> Expression {
18540 use crate::expressions::Literal;
18541 match expr {
18542 Expression::Literal(Literal::Number(n)) => {
18543 Expression::Literal(Literal::String(n.clone()))
18544 }
18545 Expression::Literal(Literal::String(_)) => expr.clone(),
18546 Expression::Neg(inner) => {
18547 if let Expression::Literal(Literal::Number(n)) = &inner.this {
18548 Expression::Literal(Literal::String(format!("-{}", n)))
18549 } else {
18550 expr.clone()
18551 }
18552 }
18553 _ => expr.clone(),
18554 }
18555 }
18556
18557 /// Check if a timestamp string contains timezone info (offset like +02:00, or named timezone)
18558 fn timestamp_string_has_timezone(ts: &str) -> bool {
18559 let trimmed = ts.trim();
18560 // Check for numeric timezone offsets: +N, -N, +NN:NN, -NN:NN at end
18561 if let Some(last_space) = trimmed.rfind(' ') {
18562 let suffix = &trimmed[last_space + 1..];
18563 if (suffix.starts_with('+') || suffix.starts_with('-')) && suffix.len() > 1 {
18564 let rest = &suffix[1..];
18565 if rest.chars().all(|c| c.is_ascii_digit() || c == ':') {
18566 return true;
18567 }
18568 }
18569 }
18570 // Check for named timezone abbreviations
18571 let ts_lower = trimmed.to_lowercase();
18572 let tz_abbrevs = [" utc", " gmt", " cet", " est", " pst", " cst", " mst"];
18573 for abbrev in &tz_abbrevs {
18574 if ts_lower.ends_with(abbrev) {
18575 return true;
18576 }
18577 }
18578 false
18579 }
18580
18581 /// Maybe CAST timestamp literal to TIMESTAMPTZ for Snowflake
18582 fn maybe_cast_ts_to_tz(expr: Expression, func_name: &str) -> Expression {
18583 use crate::expressions::{Cast, DataType, Literal};
18584 match expr {
18585 Expression::Literal(Literal::Timestamp(s)) => {
18586 let tz = func_name.starts_with("TIMESTAMP");
18587 Expression::Cast(Box::new(Cast {
18588 this: Expression::Literal(Literal::String(s)),
18589 to: if tz {
18590 DataType::Timestamp { timezone: true, precision: None }
18591 } else {
18592 DataType::Timestamp { timezone: false, precision: None }
18593 },
18594 trailing_comments: vec![],
18595 double_colon_syntax: false,
18596 format: None,
18597 default: None,
18598 }))
18599 }
18600 other => other,
18601 }
18602 }
18603
18604 /// Maybe CAST timestamp literal to TIMESTAMP (no tz)
18605 fn maybe_cast_ts(expr: Expression) -> Expression {
18606 use crate::expressions::{Cast, DataType, Literal};
18607 match expr {
18608 Expression::Literal(Literal::Timestamp(s)) => {
18609 Expression::Cast(Box::new(Cast {
18610 this: Expression::Literal(Literal::String(s)),
18611 to: DataType::Timestamp { timezone: false, precision: None },
18612 trailing_comments: vec![],
18613 double_colon_syntax: false,
18614 format: None,
18615 default: None,
18616 }))
18617 }
18618 other => other,
18619 }
18620 }
18621
18622 /// Convert DATE 'x' literal to CAST('x' AS DATE)
18623 fn date_literal_to_cast(expr: Expression) -> Expression {
18624 use crate::expressions::{Cast, DataType, Literal};
18625 match expr {
18626 Expression::Literal(Literal::Date(s)) => {
18627 Expression::Cast(Box::new(Cast {
18628 this: Expression::Literal(Literal::String(s)),
18629 to: DataType::Date,
18630 trailing_comments: vec![],
18631 double_colon_syntax: false,
18632 format: None,
18633 default: None,
18634 }))
18635 }
18636 other => other,
18637 }
18638 }
18639
18640 /// Ensure an expression that should be a date is CAST(... AS DATE).
18641 /// Handles both DATE literals and string literals that look like dates.
18642 fn ensure_cast_date(expr: Expression) -> Expression {
18643 use crate::expressions::{Cast, DataType, Literal};
18644 match expr {
18645 Expression::Literal(Literal::Date(s)) => {
18646 Expression::Cast(Box::new(Cast {
18647 this: Expression::Literal(Literal::String(s)),
18648 to: DataType::Date,
18649 trailing_comments: vec![],
18650 double_colon_syntax: false,
18651 format: None,
18652 default: None,
18653 }))
18654 }
18655 Expression::Literal(Literal::String(ref _s)) => {
18656 // String literal that should be a date -> CAST('s' AS DATE)
18657 Expression::Cast(Box::new(Cast {
18658 this: expr,
18659 to: DataType::Date,
18660 trailing_comments: vec![],
18661 double_colon_syntax: false,
18662 format: None,
18663 default: None,
18664 }))
18665 }
18666 // Already a CAST or other expression -> leave as-is
18667 other => other,
18668 }
18669 }
18670
18671 /// Force CAST(expr AS DATE) for any expression (not just literals)
18672 /// Skips if the expression is already a CAST to DATE
18673 fn force_cast_date(expr: Expression) -> Expression {
18674 use crate::expressions::{Cast, DataType};
18675 // If it's already a CAST to DATE, don't double-wrap
18676 if let Expression::Cast(ref c) = expr {
18677 if matches!(c.to, DataType::Date) {
18678 return expr;
18679 }
18680 }
18681 Expression::Cast(Box::new(Cast {
18682 this: expr,
18683 to: DataType::Date,
18684 trailing_comments: vec![],
18685 double_colon_syntax: false,
18686 format: None,
18687 default: None,
18688 }))
18689 }
18690
18691 /// Internal TO_DATE function that won't be converted to CAST by the Snowflake handler.
18692 /// Uses the name `_POLYGLOT_TO_DATE` which is not recognized by the TO_DATE -> CAST logic.
18693 /// The Snowflake DATEDIFF handler converts these back to TO_DATE.
18694 const PRESERVED_TO_DATE: &'static str = "_POLYGLOT_TO_DATE";
18695
18696 fn ensure_to_date_preserved(expr: Expression) -> Expression {
18697 use crate::expressions::{Literal, Function};
18698 if matches!(expr, Expression::Literal(Literal::String(_))) {
18699 Expression::Function(Box::new(Function::new(Self::PRESERVED_TO_DATE.to_string(), vec![expr])))
18700 } else {
18701 expr
18702 }
18703 }
18704
18705 /// TRY_CAST(expr AS DATE) - used for DuckDB when TO_DATE is unwrapped
18706 fn try_cast_date(expr: Expression) -> Expression {
18707 use crate::expressions::{Cast, DataType};
18708 Expression::TryCast(Box::new(Cast {
18709 this: expr,
18710 to: DataType::Date,
18711 trailing_comments: vec![],
18712 double_colon_syntax: false,
18713 format: None,
18714 default: None,
18715 }))
18716 }
18717
18718 /// CAST(CAST(expr AS TIMESTAMP) AS DATE) - used when Hive string dates need to be cast
18719 fn double_cast_timestamp_date(expr: Expression) -> Expression {
18720 use crate::expressions::{Cast, DataType};
18721 let inner = Expression::Cast(Box::new(Cast {
18722 this: expr,
18723 to: DataType::Timestamp { timezone: false, precision: None },
18724 trailing_comments: vec![],
18725 double_colon_syntax: false,
18726 format: None,
18727 default: None,
18728 }));
18729 Expression::Cast(Box::new(Cast {
18730 this: inner,
18731 to: DataType::Date,
18732 trailing_comments: vec![],
18733 double_colon_syntax: false,
18734 format: None,
18735 default: None,
18736 }))
18737 }
18738
18739 /// CAST(CAST(expr AS DATETIME) AS DATE) - BigQuery variant
18740 fn double_cast_datetime_date(expr: Expression) -> Expression {
18741 use crate::expressions::{Cast, DataType};
18742 let inner = Expression::Cast(Box::new(Cast {
18743 this: expr,
18744 to: DataType::Custom { name: "DATETIME".to_string() },
18745 trailing_comments: vec![],
18746 double_colon_syntax: false,
18747 format: None,
18748 default: None,
18749 }));
18750 Expression::Cast(Box::new(Cast {
18751 this: inner,
18752 to: DataType::Date,
18753 trailing_comments: vec![],
18754 double_colon_syntax: false,
18755 format: None,
18756 default: None,
18757 }))
18758 }
18759
18760 /// CAST(CAST(expr AS DATETIME2) AS DATE) - TSQL variant
18761 fn double_cast_datetime2_date(expr: Expression) -> Expression {
18762 use crate::expressions::{Cast, DataType};
18763 let inner = Expression::Cast(Box::new(Cast {
18764 this: expr,
18765 to: DataType::Custom { name: "DATETIME2".to_string() },
18766 trailing_comments: vec![],
18767 double_colon_syntax: false,
18768 format: None,
18769 default: None,
18770 }));
18771 Expression::Cast(Box::new(Cast {
18772 this: inner,
18773 to: DataType::Date,
18774 trailing_comments: vec![],
18775 double_colon_syntax: false,
18776 format: None,
18777 default: None,
18778 }))
18779 }
18780
18781 /// Convert Hive/Java-style date format strings to C-style (strftime) format
18782 /// e.g., "yyyy-MM-dd'T'HH" -> "%Y-%m-%d'T'%H"
18783 fn hive_format_to_c_format(fmt: &str) -> String {
18784 let mut result = String::new();
18785 let chars: Vec<char> = fmt.chars().collect();
18786 let mut i = 0;
18787 while i < chars.len() {
18788 match chars[i] {
18789 'y' => {
18790 let mut count = 0;
18791 while i < chars.len() && chars[i] == 'y' { count += 1; i += 1; }
18792 if count >= 4 { result.push_str("%Y"); }
18793 else if count == 2 { result.push_str("%y"); }
18794 else { result.push_str("%Y"); }
18795 }
18796 'M' => {
18797 let mut count = 0;
18798 while i < chars.len() && chars[i] == 'M' { count += 1; i += 1; }
18799 if count >= 3 { result.push_str("%b"); }
18800 else if count == 2 { result.push_str("%m"); }
18801 else { result.push_str("%m"); }
18802 }
18803 'd' => {
18804 let mut _count = 0;
18805 while i < chars.len() && chars[i] == 'd' { _count += 1; i += 1; }
18806 result.push_str("%d");
18807 }
18808 'H' => {
18809 let mut _count = 0;
18810 while i < chars.len() && chars[i] == 'H' { _count += 1; i += 1; }
18811 result.push_str("%H");
18812 }
18813 'h' => {
18814 let mut _count = 0;
18815 while i < chars.len() && chars[i] == 'h' { _count += 1; i += 1; }
18816 result.push_str("%I");
18817 }
18818 'm' => {
18819 let mut _count = 0;
18820 while i < chars.len() && chars[i] == 'm' { _count += 1; i += 1; }
18821 result.push_str("%M");
18822 }
18823 's' => {
18824 let mut _count = 0;
18825 while i < chars.len() && chars[i] == 's' { _count += 1; i += 1; }
18826 result.push_str("%S");
18827 }
18828 'S' => {
18829 // Fractional seconds - skip
18830 while i < chars.len() && chars[i] == 'S' { i += 1; }
18831 result.push_str("%f");
18832 }
18833 'a' => {
18834 // AM/PM
18835 while i < chars.len() && chars[i] == 'a' { i += 1; }
18836 result.push_str("%p");
18837 }
18838 'E' => {
18839 let mut count = 0;
18840 while i < chars.len() && chars[i] == 'E' { count += 1; i += 1; }
18841 if count >= 4 { result.push_str("%A"); }
18842 else { result.push_str("%a"); }
18843 }
18844 '\'' => {
18845 // Quoted literal text - pass through the quotes and content
18846 result.push('\'');
18847 i += 1;
18848 while i < chars.len() && chars[i] != '\'' {
18849 result.push(chars[i]);
18850 i += 1;
18851 }
18852 if i < chars.len() { result.push('\''); i += 1; }
18853 }
18854 c => {
18855 result.push(c);
18856 i += 1;
18857 }
18858 }
18859 }
18860 result
18861 }
18862
18863 /// Convert Hive/Java format to Presto format (uses %T for HH:mm:ss)
18864 fn hive_format_to_presto_format(fmt: &str) -> String {
18865 let c_fmt = Self::hive_format_to_c_format(fmt);
18866 // Presto uses %T for HH:MM:SS
18867 c_fmt.replace("%H:%M:%S", "%T")
18868 }
18869
18870 /// Ensure a timestamp-like expression for DuckDB with CAST(... AS TIMESTAMP)
18871 fn ensure_cast_timestamp(expr: Expression) -> Expression {
18872 use crate::expressions::{Cast, DataType, Literal};
18873 match expr {
18874 Expression::Literal(Literal::Timestamp(s)) => {
18875 Expression::Cast(Box::new(Cast {
18876 this: Expression::Literal(Literal::String(s)),
18877 to: DataType::Timestamp { timezone: false, precision: None },
18878 trailing_comments: vec![],
18879 double_colon_syntax: false,
18880 format: None,
18881 default: None,
18882 }))
18883 }
18884 Expression::Literal(Literal::String(ref _s)) => {
18885 Expression::Cast(Box::new(Cast {
18886 this: expr,
18887 to: DataType::Timestamp { timezone: false, precision: None },
18888 trailing_comments: vec![],
18889 double_colon_syntax: false,
18890 format: None,
18891 default: None,
18892 }))
18893 }
18894 Expression::Literal(Literal::Datetime(s)) => {
18895 Expression::Cast(Box::new(Cast {
18896 this: Expression::Literal(Literal::String(s)),
18897 to: DataType::Timestamp { timezone: false, precision: None },
18898 trailing_comments: vec![],
18899 double_colon_syntax: false,
18900 format: None,
18901 default: None,
18902 }))
18903 }
18904 other => other,
18905 }
18906 }
18907
18908 /// Force CAST to TIMESTAMP for any expression (not just literals)
18909 /// Used when transpiling from Redshift/TSQL where DATEDIFF/DATEADD args need explicit timestamp cast
18910 fn force_cast_timestamp(expr: Expression) -> Expression {
18911 use crate::expressions::{Cast, DataType};
18912 // Don't double-wrap if already a CAST to TIMESTAMP
18913 if let Expression::Cast(ref c) = expr {
18914 if matches!(c.to, DataType::Timestamp { .. }) {
18915 return expr;
18916 }
18917 }
18918 Expression::Cast(Box::new(Cast {
18919 this: expr,
18920 to: DataType::Timestamp { timezone: false, precision: None },
18921 trailing_comments: vec![],
18922 double_colon_syntax: false,
18923 format: None,
18924 default: None,
18925 }))
18926 }
18927
18928 /// Ensure a timestamp-like expression for DuckDB with CAST(... AS TIMESTAMPTZ)
18929 fn ensure_cast_timestamptz(expr: Expression) -> Expression {
18930 use crate::expressions::{Cast, DataType, Literal};
18931 match expr {
18932 Expression::Literal(Literal::Timestamp(s)) => {
18933 Expression::Cast(Box::new(Cast {
18934 this: Expression::Literal(Literal::String(s)),
18935 to: DataType::Timestamp { timezone: true, precision: None },
18936 trailing_comments: vec![],
18937 double_colon_syntax: false,
18938 format: None,
18939 default: None,
18940 }))
18941 }
18942 Expression::Literal(Literal::String(ref _s)) => {
18943 Expression::Cast(Box::new(Cast {
18944 this: expr,
18945 to: DataType::Timestamp { timezone: true, precision: None },
18946 trailing_comments: vec![],
18947 double_colon_syntax: false,
18948 format: None,
18949 default: None,
18950 }))
18951 }
18952 Expression::Literal(Literal::Datetime(s)) => {
18953 Expression::Cast(Box::new(Cast {
18954 this: Expression::Literal(Literal::String(s)),
18955 to: DataType::Timestamp { timezone: true, precision: None },
18956 trailing_comments: vec![],
18957 double_colon_syntax: false,
18958 format: None,
18959 default: None,
18960 }))
18961 }
18962 other => other,
18963 }
18964 }
18965
18966 /// Ensure expression is CAST to DATETIME (for BigQuery)
18967 fn ensure_cast_datetime(expr: Expression) -> Expression {
18968 use crate::expressions::{Cast, DataType, Literal};
18969 match expr {
18970 Expression::Literal(Literal::String(ref _s)) => {
18971 Expression::Cast(Box::new(Cast {
18972 this: expr,
18973 to: DataType::Custom { name: "DATETIME".to_string() },
18974 trailing_comments: vec![],
18975 double_colon_syntax: false,
18976 format: None,
18977 default: None,
18978 }))
18979 }
18980 other => other,
18981 }
18982 }
18983
18984 /// Force CAST expression to DATETIME (for BigQuery) - always wraps unless already DATETIME
18985 fn force_cast_datetime(expr: Expression) -> Expression {
18986 use crate::expressions::{Cast, DataType};
18987 if let Expression::Cast(ref c) = expr {
18988 if let DataType::Custom { ref name } = c.to {
18989 if name.eq_ignore_ascii_case("DATETIME") {
18990 return expr;
18991 }
18992 }
18993 }
18994 Expression::Cast(Box::new(Cast {
18995 this: expr,
18996 to: DataType::Custom { name: "DATETIME".to_string() },
18997 trailing_comments: vec![],
18998 double_colon_syntax: false,
18999 format: None,
19000 default: None,
19001 }))
19002 }
19003
19004 /// Ensure expression is CAST to DATETIME2 (for TSQL)
19005 fn ensure_cast_datetime2(expr: Expression) -> Expression {
19006 use crate::expressions::{Cast, DataType, Literal};
19007 match expr {
19008 Expression::Literal(Literal::String(ref _s)) => {
19009 Expression::Cast(Box::new(Cast {
19010 this: expr,
19011 to: DataType::Custom { name: "DATETIME2".to_string() },
19012 trailing_comments: vec![],
19013 double_colon_syntax: false,
19014 format: None,
19015 default: None,
19016 }))
19017 }
19018 other => other,
19019 }
19020 }
19021
19022 /// Convert TIMESTAMP 'x' literal to CAST('x' AS TIMESTAMPTZ) for DuckDB
19023 fn ts_literal_to_cast_tz(expr: Expression) -> Expression {
19024 use crate::expressions::{Cast, DataType, Literal};
19025 match expr {
19026 Expression::Literal(Literal::Timestamp(s)) => {
19027 Expression::Cast(Box::new(Cast {
19028 this: Expression::Literal(Literal::String(s)),
19029 to: DataType::Timestamp { timezone: true, precision: None },
19030 trailing_comments: vec![],
19031 double_colon_syntax: false,
19032 format: None,
19033 default: None,
19034 }))
19035 }
19036 other => other,
19037 }
19038 }
19039
19040 /// Convert BigQuery format string to Snowflake format string
19041 fn bq_format_to_snowflake(format_expr: &Expression) -> Expression {
19042 use crate::expressions::Literal;
19043 if let Expression::Literal(Literal::String(s)) = format_expr {
19044 let sf = s
19045 .replace("%Y", "yyyy")
19046 .replace("%m", "mm")
19047 .replace("%d", "DD")
19048 .replace("%H", "HH24")
19049 .replace("%M", "MI")
19050 .replace("%S", "SS")
19051 .replace("%b", "mon")
19052 .replace("%B", "Month")
19053 .replace("%e", "FMDD");
19054 Expression::Literal(Literal::String(sf))
19055 } else {
19056 format_expr.clone()
19057 }
19058 }
19059
19060 /// Convert BigQuery format string to DuckDB format string
19061 fn bq_format_to_duckdb(format_expr: &Expression) -> Expression {
19062 use crate::expressions::Literal;
19063 if let Expression::Literal(Literal::String(s)) = format_expr {
19064 let duck = s
19065 .replace("%T", "%H:%M:%S")
19066 .replace("%F", "%Y-%m-%d")
19067 .replace("%D", "%m/%d/%y")
19068 .replace("%x", "%m/%d/%y")
19069 .replace("%c", "%a %b %-d %H:%M:%S %Y")
19070 .replace("%e", "%-d")
19071 .replace("%E6S", "%S.%f");
19072 Expression::Literal(Literal::String(duck))
19073 } else {
19074 format_expr.clone()
19075 }
19076 }
19077
19078 /// Convert BigQuery CAST FORMAT elements (like YYYY, MM, DD) to strftime (like %Y, %m, %d)
19079 fn bq_cast_format_to_strftime(format_expr: &Expression) -> Expression {
19080 use crate::expressions::Literal;
19081 if let Expression::Literal(Literal::String(s)) = format_expr {
19082 // Replace format elements from longest to shortest to avoid partial matches
19083 let result = s
19084 .replace("YYYYMMDD", "%Y%m%d")
19085 .replace("YYYY", "%Y")
19086 .replace("YY", "%y")
19087 .replace("MONTH", "%B")
19088 .replace("MON", "%b")
19089 .replace("MM", "%m")
19090 .replace("DD", "%d")
19091 .replace("HH24", "%H")
19092 .replace("HH12", "%I")
19093 .replace("HH", "%I")
19094 .replace("MI", "%M")
19095 .replace("SSTZH", "%S%z")
19096 .replace("SS", "%S")
19097 .replace("TZH", "%z");
19098 Expression::Literal(Literal::String(result))
19099 } else {
19100 format_expr.clone()
19101 }
19102 }
19103
19104 /// Normalize BigQuery format strings for BQ->BQ output
19105 fn bq_format_normalize_bq(format_expr: &Expression) -> Expression {
19106 use crate::expressions::Literal;
19107 if let Expression::Literal(Literal::String(s)) = format_expr {
19108 let norm = s
19109 .replace("%H:%M:%S", "%T")
19110 .replace("%x", "%D");
19111 Expression::Literal(Literal::String(norm))
19112 } else {
19113 format_expr.clone()
19114 }
19115 }
19116}
19117
19118#[cfg(test)]
19119mod tests {
19120 use super::*;
19121
19122 #[test]
19123 fn test_dialect_type_from_str() {
19124 assert_eq!("postgres".parse::<DialectType>().unwrap(), DialectType::PostgreSQL);
19125 assert_eq!("postgresql".parse::<DialectType>().unwrap(), DialectType::PostgreSQL);
19126 assert_eq!("mysql".parse::<DialectType>().unwrap(), DialectType::MySQL);
19127 assert_eq!("bigquery".parse::<DialectType>().unwrap(), DialectType::BigQuery);
19128 }
19129
19130 #[test]
19131 fn test_basic_transpile() {
19132 let dialect = Dialect::get(DialectType::Generic);
19133 let result = dialect.transpile_to("SELECT 1", DialectType::PostgreSQL).unwrap();
19134 assert_eq!(result.len(), 1);
19135 assert_eq!(result[0], "SELECT 1");
19136 }
19137
19138 #[test]
19139 fn test_function_transformation_mysql() {
19140 // NVL should be transformed to IFNULL in MySQL
19141 let dialect = Dialect::get(DialectType::Generic);
19142 let result = dialect.transpile_to("SELECT NVL(a, b)", DialectType::MySQL).unwrap();
19143 assert_eq!(result[0], "SELECT IFNULL(a, b)");
19144 }
19145
19146 #[test]
19147 fn test_get_path_duckdb() {
19148 // Test: step by step
19149 let snowflake = Dialect::get(DialectType::Snowflake);
19150
19151 // Step 1: Parse and check what Snowflake produces as intermediate
19152 let result_sf_sf = snowflake.transpile_to(
19153 "SELECT PARSE_JSON('{\"fruit\":\"banana\"}'):fruit",
19154 DialectType::Snowflake,
19155 ).unwrap();
19156 eprintln!("Snowflake->Snowflake colon: {}", result_sf_sf[0]);
19157
19158 // Step 2: DuckDB target
19159 let result_sf_dk = snowflake.transpile_to(
19160 "SELECT PARSE_JSON('{\"fruit\":\"banana\"}'):fruit",
19161 DialectType::DuckDB,
19162 ).unwrap();
19163 eprintln!("Snowflake->DuckDB colon: {}", result_sf_dk[0]);
19164
19165 // Step 3: GET_PATH directly
19166 let result_gp = snowflake.transpile_to(
19167 "SELECT GET_PATH(PARSE_JSON('{\"fruit\":\"banana\"}'), 'fruit')",
19168 DialectType::DuckDB,
19169 ).unwrap();
19170 eprintln!("Snowflake->DuckDB explicit GET_PATH: {}", result_gp[0]);
19171 }
19172
19173 #[test]
19174 fn test_function_transformation_postgres() {
19175 // IFNULL should be transformed to COALESCE in PostgreSQL
19176 let dialect = Dialect::get(DialectType::Generic);
19177 let result = dialect.transpile_to("SELECT IFNULL(a, b)", DialectType::PostgreSQL).unwrap();
19178 assert_eq!(result[0], "SELECT COALESCE(a, b)");
19179
19180 // NVL should also be transformed to COALESCE
19181 let result = dialect.transpile_to("SELECT NVL(a, b)", DialectType::PostgreSQL).unwrap();
19182 assert_eq!(result[0], "SELECT COALESCE(a, b)");
19183 }
19184
19185 #[test]
19186 fn test_hive_cast_to_trycast() {
19187 // Hive CAST should become TRY_CAST for targets that support it
19188 let hive = Dialect::get(DialectType::Hive);
19189 let result = hive.transpile_to("CAST(1 AS INT)", DialectType::DuckDB).unwrap();
19190 assert_eq!(result[0], "TRY_CAST(1 AS INT)");
19191
19192 let result = hive.transpile_to("CAST(1 AS INT)", DialectType::Presto).unwrap();
19193 assert_eq!(result[0], "TRY_CAST(1 AS INTEGER)");
19194 }
19195
19196 #[test]
19197 fn test_hive_array_identity() {
19198 // Hive ARRAY<DATE> should preserve angle bracket syntax
19199 let sql = "CREATE EXTERNAL TABLE `my_table` (`a7` ARRAY<DATE>) ROW FORMAT SERDE 'a' STORED AS INPUTFORMAT 'b' OUTPUTFORMAT 'c' LOCATION 'd' TBLPROPERTIES ('e'='f')";
19200 let hive = Dialect::get(DialectType::Hive);
19201
19202 // Test via transpile_to (this works)
19203 let result = hive.transpile_to(sql, DialectType::Hive).unwrap();
19204 eprintln!("Hive ARRAY via transpile_to: {}", result[0]);
19205 assert!(result[0].contains("ARRAY<DATE>"), "transpile_to: Expected ARRAY<DATE>, got: {}", result[0]);
19206
19207 // Test via parse -> transform -> generate (identity test path)
19208 let ast = hive.parse(sql).unwrap();
19209 let transformed = hive.transform(ast[0].clone()).unwrap();
19210 let output = hive.generate(&transformed).unwrap();
19211 eprintln!("Hive ARRAY via identity path: {}", output);
19212 assert!(output.contains("ARRAY<DATE>"), "identity path: Expected ARRAY<DATE>, got: {}", output);
19213 }
19214
19215 #[test]
19216 fn test_starrocks_delete_between_expansion() {
19217 // StarRocks doesn't support BETWEEN in DELETE statements
19218 let dialect = Dialect::get(DialectType::Generic);
19219
19220 // BETWEEN should be expanded to >= AND <= in DELETE
19221 let result = dialect.transpile_to("DELETE FROM t WHERE a BETWEEN b AND c", DialectType::StarRocks).unwrap();
19222 assert_eq!(result[0], "DELETE FROM t WHERE a >= b AND a <= c");
19223
19224 // NOT BETWEEN should be expanded to < OR > in DELETE
19225 let result = dialect.transpile_to("DELETE FROM t WHERE a NOT BETWEEN b AND c", DialectType::StarRocks).unwrap();
19226 assert_eq!(result[0], "DELETE FROM t WHERE a < b OR a > c");
19227
19228 // BETWEEN in SELECT should NOT be expanded (StarRocks supports it there)
19229 let result = dialect.transpile_to("SELECT * FROM t WHERE a BETWEEN b AND c", DialectType::StarRocks).unwrap();
19230 assert!(result[0].contains("BETWEEN"), "BETWEEN should be preserved in SELECT");
19231 }
19232
19233 #[test]
19234 fn test_snowflake_ltrim_rtrim_parse() {
19235 let sf = Dialect::get(DialectType::Snowflake);
19236 let sql = "SELECT LTRIM(RTRIM(col)) FROM t1";
19237 let result = sf.transpile_to(sql, DialectType::DuckDB);
19238 match &result {
19239 Ok(r) => eprintln!("LTRIM/RTRIM result: {}", r[0]),
19240 Err(e) => eprintln!("LTRIM/RTRIM error: {}", e),
19241 }
19242 assert!(result.is_ok(), "Expected successful parse of LTRIM(RTRIM(col)), got error: {:?}", result.err());
19243 }
19244
19245 #[test]
19246 fn test_duckdb_count_if_parse() {
19247 let duck = Dialect::get(DialectType::DuckDB);
19248 let sql = "COUNT_IF(x)";
19249 let result = duck.transpile_to(sql, DialectType::DuckDB);
19250 match &result {
19251 Ok(r) => eprintln!("COUNT_IF result: {}", r[0]),
19252 Err(e) => eprintln!("COUNT_IF error: {}", e),
19253 }
19254 assert!(result.is_ok(), "Expected successful parse of COUNT_IF(x), got error: {:?}", result.err());
19255 }
19256
19257 #[test]
19258 fn test_tsql_cast_tinyint_parse() {
19259 let tsql = Dialect::get(DialectType::TSQL);
19260 let sql = "CAST(X AS TINYINT)";
19261 let result = tsql.transpile_to(sql, DialectType::DuckDB);
19262 match &result {
19263 Ok(r) => eprintln!("TSQL CAST TINYINT result: {}", r[0]),
19264 Err(e) => eprintln!("TSQL CAST TINYINT error: {}", e),
19265 }
19266 assert!(result.is_ok(), "Expected successful transpile, got error: {:?}", result.err());
19267 }
19268
19269 #[test]
19270 fn test_pg_hash_bitwise_xor() {
19271 let dialect = Dialect::get(DialectType::PostgreSQL);
19272 let result = dialect.transpile_to("x # y", DialectType::PostgreSQL).unwrap();
19273 assert_eq!(result[0], "x # y");
19274 }
19275
19276 #[test]
19277 fn test_pg_array_to_duckdb() {
19278 let dialect = Dialect::get(DialectType::PostgreSQL);
19279 let result = dialect.transpile_to("SELECT ARRAY[1, 2, 3] @> ARRAY[1, 2]", DialectType::DuckDB).unwrap();
19280 assert_eq!(result[0], "SELECT [1, 2, 3] @> [1, 2]");
19281 }
19282
19283}