polyglot_sql/dialects/mod.rs
1//! SQL Dialect System
2//!
3//! This module implements the dialect abstraction layer that enables SQL transpilation
4//! between 30+ database engines. Each dialect encapsulates three concerns:
5//!
6//! - **Tokenization**: Dialect-specific lexing rules (e.g., BigQuery uses backtick quoting,
7//! MySQL uses backtick for identifiers, TSQL uses square brackets).
8//! - **Generation**: How AST nodes are rendered back to SQL text, including identifier quoting
9//! style, function name casing, and syntax variations.
10//! - **Transformation**: AST-level rewrites that convert dialect-specific constructs to/from
11//! a normalized form (e.g., Snowflake `SQUARE(x)` becomes `POWER(x, 2)`).
12//!
13//! The primary entry point is [`Dialect::get`], which returns a configured [`Dialect`] instance
14//! for a given [`DialectType`]. From there, callers can [`parse`](Dialect::parse),
15//! [`generate`](Dialect::generate), [`transform`](Dialect::transform), or
16//! [`transpile_to`](Dialect::transpile_to) another dialect in a single call.
17//!
18//! Each concrete dialect (e.g., `PostgresDialect`, `BigQueryDialect`) implements the
19//! [`DialectImpl`] trait, which provides configuration hooks and expression-level transforms.
20//! Dialect modules live in submodules of this module and are re-exported here.
21
22mod athena;
23mod bigquery;
24mod clickhouse;
25mod cockroachdb;
26mod databricks;
27mod datafusion;
28mod doris;
29mod dremio;
30mod drill;
31mod druid;
32mod duckdb;
33mod dune;
34mod exasol;
35mod fabric;
36mod generic;
37mod hive;
38mod materialize;
39mod mysql;
40mod oracle;
41mod postgres;
42mod presto;
43mod redshift;
44mod risingwave;
45mod singlestore;
46mod snowflake;
47mod solr;
48mod spark;
49mod sqlite;
50mod starrocks;
51mod tableau;
52mod teradata;
53mod tidb;
54mod trino;
55mod tsql;
56
57pub use athena::AthenaDialect;
58pub use bigquery::BigQueryDialect;
59pub use clickhouse::ClickHouseDialect;
60pub use cockroachdb::CockroachDBDialect;
61pub use databricks::DatabricksDialect;
62pub use datafusion::DataFusionDialect;
63pub use doris::DorisDialect;
64pub use dremio::DremioDialect;
65pub use drill::DrillDialect;
66pub use druid::DruidDialect;
67pub use duckdb::DuckDBDialect;
68pub use dune::DuneDialect;
69pub use exasol::ExasolDialect;
70pub use fabric::FabricDialect;
71pub use generic::GenericDialect;
72pub use hive::HiveDialect;
73pub use materialize::MaterializeDialect;
74pub use mysql::MySQLDialect;
75pub use oracle::OracleDialect;
76pub use postgres::PostgresDialect;
77pub use presto::PrestoDialect;
78pub use redshift::RedshiftDialect;
79pub use risingwave::RisingWaveDialect;
80pub use singlestore::SingleStoreDialect;
81pub use snowflake::SnowflakeDialect;
82pub use solr::SolrDialect;
83pub use spark::SparkDialect;
84pub use sqlite::SQLiteDialect;
85pub use starrocks::StarRocksDialect;
86pub use tableau::TableauDialect;
87pub use teradata::TeradataDialect;
88pub use tidb::TiDBDialect;
89pub use trino::TrinoDialect;
90pub use tsql::TSQLDialect;
91
92use crate::error::Result;
93use crate::expressions::{Expression, FunctionBody};
94use crate::generator::{Generator, GeneratorConfig};
95use crate::parser::Parser;
96use crate::tokens::{Tokenizer, TokenizerConfig};
97use serde::{Deserialize, Serialize};
98use std::collections::HashMap;
99use std::sync::{Arc, LazyLock, RwLock};
100
101/// Enumeration of all supported SQL dialects.
102///
103/// Each variant corresponds to a specific SQL database engine or query language.
104/// The `Generic` variant represents standard SQL with no dialect-specific behavior,
105/// and is used as the default when no dialect is specified.
106///
107/// Dialect names are case-insensitive when parsed from strings via [`FromStr`].
108/// Some dialects accept aliases (e.g., "mssql" and "sqlserver" both resolve to [`TSQL`](DialectType::TSQL)).
109#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
110#[serde(rename_all = "lowercase")]
111pub enum DialectType {
112 /// Standard SQL with no dialect-specific behavior (default).
113 Generic,
114 /// PostgreSQL -- advanced open-source relational database.
115 PostgreSQL,
116 /// MySQL -- widely-used open-source relational database (also accepts "mysql").
117 MySQL,
118 /// Google BigQuery -- serverless cloud data warehouse with unique syntax (backtick quoting, STRUCT types, QUALIFY).
119 BigQuery,
120 /// Snowflake -- cloud data platform with QUALIFY clause, FLATTEN, and variant types.
121 Snowflake,
122 /// DuckDB -- in-process analytical database with modern SQL extensions.
123 DuckDB,
124 /// SQLite -- lightweight embedded relational database.
125 SQLite,
126 /// Apache Hive -- data warehouse on Hadoop with HiveQL syntax.
127 Hive,
128 /// Apache Spark SQL -- distributed query engine (also accepts "spark2").
129 Spark,
130 /// Trino -- distributed SQL query engine (formerly PrestoSQL).
131 Trino,
132 /// PrestoDB -- distributed SQL query engine for big data.
133 Presto,
134 /// Amazon Redshift -- cloud data warehouse based on PostgreSQL.
135 Redshift,
136 /// Transact-SQL (T-SQL) -- Microsoft SQL Server and Azure SQL (also accepts "mssql", "sqlserver").
137 TSQL,
138 /// Oracle Database -- commercial relational database with PL/SQL extensions.
139 Oracle,
140 /// ClickHouse -- column-oriented OLAP database for real-time analytics.
141 ClickHouse,
142 /// Databricks SQL -- Spark-based lakehouse platform with QUALIFY support.
143 Databricks,
144 /// Amazon Athena -- serverless query service (hybrid Trino/Hive engine).
145 Athena,
146 /// Teradata -- enterprise data warehouse with proprietary SQL extensions.
147 Teradata,
148 /// Apache Doris -- real-time analytical database (MySQL-compatible).
149 Doris,
150 /// StarRocks -- sub-second OLAP database (MySQL-compatible).
151 StarRocks,
152 /// Materialize -- streaming SQL database built on differential dataflow.
153 Materialize,
154 /// RisingWave -- distributed streaming database with PostgreSQL compatibility.
155 RisingWave,
156 /// SingleStore (formerly MemSQL) -- distributed SQL database (also accepts "memsql").
157 SingleStore,
158 /// CockroachDB -- distributed SQL database with PostgreSQL compatibility (also accepts "cockroach").
159 CockroachDB,
160 /// TiDB -- distributed HTAP database with MySQL compatibility.
161 TiDB,
162 /// Apache Druid -- real-time analytics database.
163 Druid,
164 /// Apache Solr -- search platform with SQL interface.
165 Solr,
166 /// Tableau -- data visualization platform with its own SQL dialect.
167 Tableau,
168 /// Dune Analytics -- blockchain analytics SQL engine.
169 Dune,
170 /// Microsoft Fabric -- unified analytics platform (T-SQL based).
171 Fabric,
172 /// Apache Drill -- schema-free SQL query engine for big data.
173 Drill,
174 /// Dremio -- data lakehouse platform with Arrow-based query engine.
175 Dremio,
176 /// Exasol -- in-memory analytic database.
177 Exasol,
178 /// Apache DataFusion -- Arrow-based query engine with modern SQL extensions.
179 DataFusion,
180}
181
182impl Default for DialectType {
183 fn default() -> Self {
184 DialectType::Generic
185 }
186}
187
188impl std::fmt::Display for DialectType {
189 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
190 match self {
191 DialectType::Generic => write!(f, "generic"),
192 DialectType::PostgreSQL => write!(f, "postgresql"),
193 DialectType::MySQL => write!(f, "mysql"),
194 DialectType::BigQuery => write!(f, "bigquery"),
195 DialectType::Snowflake => write!(f, "snowflake"),
196 DialectType::DuckDB => write!(f, "duckdb"),
197 DialectType::SQLite => write!(f, "sqlite"),
198 DialectType::Hive => write!(f, "hive"),
199 DialectType::Spark => write!(f, "spark"),
200 DialectType::Trino => write!(f, "trino"),
201 DialectType::Presto => write!(f, "presto"),
202 DialectType::Redshift => write!(f, "redshift"),
203 DialectType::TSQL => write!(f, "tsql"),
204 DialectType::Oracle => write!(f, "oracle"),
205 DialectType::ClickHouse => write!(f, "clickhouse"),
206 DialectType::Databricks => write!(f, "databricks"),
207 DialectType::Athena => write!(f, "athena"),
208 DialectType::Teradata => write!(f, "teradata"),
209 DialectType::Doris => write!(f, "doris"),
210 DialectType::StarRocks => write!(f, "starrocks"),
211 DialectType::Materialize => write!(f, "materialize"),
212 DialectType::RisingWave => write!(f, "risingwave"),
213 DialectType::SingleStore => write!(f, "singlestore"),
214 DialectType::CockroachDB => write!(f, "cockroachdb"),
215 DialectType::TiDB => write!(f, "tidb"),
216 DialectType::Druid => write!(f, "druid"),
217 DialectType::Solr => write!(f, "solr"),
218 DialectType::Tableau => write!(f, "tableau"),
219 DialectType::Dune => write!(f, "dune"),
220 DialectType::Fabric => write!(f, "fabric"),
221 DialectType::Drill => write!(f, "drill"),
222 DialectType::Dremio => write!(f, "dremio"),
223 DialectType::Exasol => write!(f, "exasol"),
224 DialectType::DataFusion => write!(f, "datafusion"),
225 }
226 }
227}
228
229impl std::str::FromStr for DialectType {
230 type Err = crate::error::Error;
231
232 fn from_str(s: &str) -> Result<Self> {
233 match s.to_lowercase().as_str() {
234 "generic" | "" => Ok(DialectType::Generic),
235 "postgres" | "postgresql" => Ok(DialectType::PostgreSQL),
236 "mysql" => Ok(DialectType::MySQL),
237 "bigquery" => Ok(DialectType::BigQuery),
238 "snowflake" => Ok(DialectType::Snowflake),
239 "duckdb" => Ok(DialectType::DuckDB),
240 "sqlite" => Ok(DialectType::SQLite),
241 "hive" => Ok(DialectType::Hive),
242 "spark" | "spark2" => Ok(DialectType::Spark),
243 "trino" => Ok(DialectType::Trino),
244 "presto" => Ok(DialectType::Presto),
245 "redshift" => Ok(DialectType::Redshift),
246 "tsql" | "mssql" | "sqlserver" => Ok(DialectType::TSQL),
247 "oracle" => Ok(DialectType::Oracle),
248 "clickhouse" => Ok(DialectType::ClickHouse),
249 "databricks" => Ok(DialectType::Databricks),
250 "athena" => Ok(DialectType::Athena),
251 "teradata" => Ok(DialectType::Teradata),
252 "doris" => Ok(DialectType::Doris),
253 "starrocks" => Ok(DialectType::StarRocks),
254 "materialize" => Ok(DialectType::Materialize),
255 "risingwave" => Ok(DialectType::RisingWave),
256 "singlestore" | "memsql" => Ok(DialectType::SingleStore),
257 "cockroachdb" | "cockroach" => Ok(DialectType::CockroachDB),
258 "tidb" => Ok(DialectType::TiDB),
259 "druid" => Ok(DialectType::Druid),
260 "solr" => Ok(DialectType::Solr),
261 "tableau" => Ok(DialectType::Tableau),
262 "dune" => Ok(DialectType::Dune),
263 "fabric" => Ok(DialectType::Fabric),
264 "drill" => Ok(DialectType::Drill),
265 "dremio" => Ok(DialectType::Dremio),
266 "exasol" => Ok(DialectType::Exasol),
267 "datafusion" | "arrow-datafusion" | "arrow_datafusion" => Ok(DialectType::DataFusion),
268 _ => Err(crate::error::Error::parse(format!(
269 "Unknown dialect: {}",
270 s
271 ))),
272 }
273 }
274}
275
276/// Trait that each concrete SQL dialect must implement.
277///
278/// `DialectImpl` provides the configuration hooks and per-expression transform logic
279/// that distinguish one dialect from another. Implementors supply:
280///
281/// - A [`DialectType`] identifier.
282/// - Optional overrides for tokenizer and generator configuration (defaults to generic SQL).
283/// - An expression-level transform function ([`transform_expr`](DialectImpl::transform_expr))
284/// that rewrites individual AST nodes for this dialect (e.g., converting `NVL` to `COALESCE`).
285/// - An optional preprocessing step ([`preprocess`](DialectImpl::preprocess)) for whole-tree
286/// rewrites that must run before the recursive per-node transform (e.g., eliminating QUALIFY).
287///
288/// The default implementations are no-ops, so a minimal dialect only needs to provide
289/// [`dialect_type`](DialectImpl::dialect_type) and override the methods that differ from
290/// standard SQL.
291pub trait DialectImpl {
292 /// Returns the [`DialectType`] that identifies this dialect.
293 fn dialect_type(&self) -> DialectType;
294
295 /// Returns the tokenizer configuration for this dialect.
296 ///
297 /// Override to customize identifier quoting characters, string escape rules,
298 /// comment styles, and other lexing behavior.
299 fn tokenizer_config(&self) -> TokenizerConfig {
300 TokenizerConfig::default()
301 }
302
303 /// Returns the generator configuration for this dialect.
304 ///
305 /// Override to customize identifier quoting style, function name casing,
306 /// keyword casing, and other SQL generation behavior.
307 fn generator_config(&self) -> GeneratorConfig {
308 GeneratorConfig::default()
309 }
310
311 /// Returns a generator configuration tailored to a specific expression.
312 ///
313 /// Override this for hybrid dialects like Athena that route to different SQL engines
314 /// based on expression type (e.g., Hive-style generation for DDL, Trino-style for DML).
315 /// The default delegates to [`generator_config`](DialectImpl::generator_config).
316 fn generator_config_for_expr(&self, _expr: &Expression) -> GeneratorConfig {
317 self.generator_config()
318 }
319
320 /// Transforms a single expression node for this dialect, without recursing into children.
321 ///
322 /// This is the per-node rewrite hook invoked by [`transform_recursive`]. Return the
323 /// expression unchanged if no dialect-specific rewrite is needed. Transformations
324 /// typically include function renaming, operator substitution, and type mapping.
325 fn transform_expr(&self, expr: Expression) -> Result<Expression> {
326 Ok(expr)
327 }
328
329 /// Applies whole-tree preprocessing transforms before the recursive per-node pass.
330 ///
331 /// Override this to apply structural rewrites that must see the entire tree at once,
332 /// such as `eliminate_qualify`, `eliminate_distinct_on`, `ensure_bools`, or
333 /// `explode_projection_to_unnest`. The default is a no-op pass-through.
334 fn preprocess(&self, expr: Expression) -> Result<Expression> {
335 Ok(expr)
336 }
337}
338
339/// Recursively transforms a [`DataType`](crate::expressions::DataType), handling nested
340/// parametric types such as `ARRAY<INT>`, `STRUCT<a INT, b TEXT>`, and `MAP<STRING, INT>`.
341///
342/// The outer type is first passed through `transform_fn` as an `Expression::DataType`,
343/// and then nested element/field types are recursed into. This ensures that dialect-level
344/// type mappings (e.g., `INT` to `INTEGER`) propagate into complex nested types.
345fn transform_data_type_recursive<F>(
346 dt: crate::expressions::DataType,
347 transform_fn: &F,
348) -> Result<crate::expressions::DataType>
349where
350 F: Fn(Expression) -> Result<Expression>,
351{
352 use crate::expressions::DataType;
353 // First, transform the outermost type through the expression system
354 let dt_expr = transform_fn(Expression::DataType(dt))?;
355 let dt = match dt_expr {
356 Expression::DataType(d) => d,
357 _ => {
358 return Ok(match dt_expr {
359 _ => DataType::Custom {
360 name: "UNKNOWN".to_string(),
361 },
362 })
363 }
364 };
365 // Then recurse into nested types
366 match dt {
367 DataType::Array {
368 element_type,
369 dimension,
370 } => {
371 let inner = transform_data_type_recursive(*element_type, transform_fn)?;
372 Ok(DataType::Array {
373 element_type: Box::new(inner),
374 dimension,
375 })
376 }
377 DataType::List { element_type } => {
378 let inner = transform_data_type_recursive(*element_type, transform_fn)?;
379 Ok(DataType::List {
380 element_type: Box::new(inner),
381 })
382 }
383 DataType::Struct { fields, nested } => {
384 let mut new_fields = Vec::new();
385 for mut field in fields {
386 field.data_type = transform_data_type_recursive(field.data_type, transform_fn)?;
387 new_fields.push(field);
388 }
389 Ok(DataType::Struct {
390 fields: new_fields,
391 nested,
392 })
393 }
394 DataType::Map {
395 key_type,
396 value_type,
397 } => {
398 let k = transform_data_type_recursive(*key_type, transform_fn)?;
399 let v = transform_data_type_recursive(*value_type, transform_fn)?;
400 Ok(DataType::Map {
401 key_type: Box::new(k),
402 value_type: Box::new(v),
403 })
404 }
405 other => Ok(other),
406 }
407}
408
409/// Convert DuckDB C-style format strings to Presto C-style format strings.
410/// DuckDB and Presto both use C-style % directives but with different specifiers for some cases.
411fn duckdb_to_presto_format(fmt: &str) -> String {
412 // Order matters: handle longer patterns first to avoid partial replacements
413 let mut result = fmt.to_string();
414 // First pass: mark multi-char patterns with placeholders
415 result = result.replace("%-m", "\x01NOPADM\x01");
416 result = result.replace("%-d", "\x01NOPADD\x01");
417 result = result.replace("%-I", "\x01NOPADI\x01");
418 result = result.replace("%-H", "\x01NOPADH\x01");
419 result = result.replace("%H:%M:%S", "\x01HMS\x01");
420 result = result.replace("%Y-%m-%d", "\x01YMD\x01");
421 // Now convert individual specifiers
422 result = result.replace("%M", "%i");
423 result = result.replace("%S", "%s");
424 // Restore multi-char patterns with Presto equivalents
425 result = result.replace("\x01NOPADM\x01", "%c");
426 result = result.replace("\x01NOPADD\x01", "%e");
427 result = result.replace("\x01NOPADI\x01", "%l");
428 result = result.replace("\x01NOPADH\x01", "%k");
429 result = result.replace("\x01HMS\x01", "%T");
430 result = result.replace("\x01YMD\x01", "%Y-%m-%d");
431 result
432}
433
434/// Convert DuckDB C-style format strings to BigQuery format strings.
435/// BigQuery uses a mix of strftime-like directives.
436fn duckdb_to_bigquery_format(fmt: &str) -> String {
437 let mut result = fmt.to_string();
438 // Handle longer patterns first
439 result = result.replace("%-d", "%e");
440 result = result.replace("%Y-%m-%d %H:%M:%S", "%F %T");
441 result = result.replace("%Y-%m-%d", "%F");
442 result = result.replace("%H:%M:%S", "%T");
443 result
444}
445
446/// Applies a transform function bottom-up through an entire expression tree.
447///
448/// This is the core tree-rewriting engine used by the dialect system. It performs
449/// a post-order (children-first) traversal: for each node, all children are recursively
450/// transformed before the node itself is passed to `transform_fn`. This bottom-up
451/// strategy means that when `transform_fn` sees a node, its children have already
452/// been rewritten, which simplifies pattern matching on sub-expressions.
453///
454/// The function handles all expression variants including SELECT clauses (FROM, WHERE,
455/// GROUP BY, HAVING, ORDER BY, QUALIFY, WITH/CTEs, WINDOW), binary operators,
456/// function calls, CASE expressions, date/time functions, and more.
457///
458/// # Arguments
459///
460/// * `expr` - The root expression to transform (consumed).
461/// * `transform_fn` - A closure that receives each expression node (after its children
462/// have been transformed) and returns a possibly-rewritten expression.
463///
464/// # Errors
465///
466/// Returns an error if `transform_fn` returns an error for any node.
467pub fn transform_recursive<F>(expr: Expression, transform_fn: &F) -> Result<Expression>
468where
469 F: Fn(Expression) -> Result<Expression>,
470{
471 use crate::expressions::BinaryOp;
472
473 // Helper macro to transform binary ops with Box<BinaryOp>
474 macro_rules! transform_binary {
475 ($variant:ident, $op:expr) => {{
476 let left = transform_recursive($op.left, transform_fn)?;
477 let right = transform_recursive($op.right, transform_fn)?;
478 Expression::$variant(Box::new(BinaryOp {
479 left,
480 right,
481 left_comments: $op.left_comments,
482 operator_comments: $op.operator_comments,
483 trailing_comments: $op.trailing_comments,
484 }))
485 }};
486 }
487
488 // First recursively transform children, then apply the transform function
489 let expr = match expr {
490 Expression::Select(mut select) => {
491 select.expressions = select
492 .expressions
493 .into_iter()
494 .map(|e| transform_recursive(e, transform_fn))
495 .collect::<Result<Vec<_>>>()?;
496
497 // Transform FROM clause
498 if let Some(mut from) = select.from.take() {
499 from.expressions = from
500 .expressions
501 .into_iter()
502 .map(|e| transform_recursive(e, transform_fn))
503 .collect::<Result<Vec<_>>>()?;
504 select.from = Some(from);
505 }
506
507 // Transform JOINs - important for CROSS APPLY / LATERAL transformations
508 select.joins = select
509 .joins
510 .into_iter()
511 .map(|mut join| {
512 join.this = transform_recursive(join.this, transform_fn)?;
513 if let Some(on) = join.on.take() {
514 join.on = Some(transform_recursive(on, transform_fn)?);
515 }
516 // Wrap join in Expression::Join to allow transform_fn to transform it
517 match transform_fn(Expression::Join(Box::new(join)))? {
518 Expression::Join(j) => Ok(*j),
519 _ => Err(crate::error::Error::parse(
520 "Join transformation returned non-join expression",
521 )),
522 }
523 })
524 .collect::<Result<Vec<_>>>()?;
525
526 // Transform LATERAL VIEW expressions (Hive/Spark)
527 select.lateral_views = select
528 .lateral_views
529 .into_iter()
530 .map(|mut lv| {
531 lv.this = transform_recursive(lv.this, transform_fn)?;
532 Ok(lv)
533 })
534 .collect::<Result<Vec<_>>>()?;
535
536 // Transform WHERE clause
537 if let Some(mut where_clause) = select.where_clause.take() {
538 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
539 select.where_clause = Some(where_clause);
540 }
541
542 // Transform GROUP BY
543 if let Some(mut group_by) = select.group_by.take() {
544 group_by.expressions = group_by
545 .expressions
546 .into_iter()
547 .map(|e| transform_recursive(e, transform_fn))
548 .collect::<Result<Vec<_>>>()?;
549 select.group_by = Some(group_by);
550 }
551
552 // Transform HAVING
553 if let Some(mut having) = select.having.take() {
554 having.this = transform_recursive(having.this, transform_fn)?;
555 select.having = Some(having);
556 }
557
558 // Transform WITH (CTEs)
559 if let Some(mut with) = select.with.take() {
560 with.ctes = with
561 .ctes
562 .into_iter()
563 .map(|mut cte| {
564 let original = cte.this.clone();
565 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
566 cte
567 })
568 .collect();
569 select.with = Some(with);
570 }
571
572 // Transform ORDER BY
573 if let Some(mut order) = select.order_by.take() {
574 order.expressions = order
575 .expressions
576 .into_iter()
577 .map(|o| {
578 let mut o = o;
579 let original = o.this.clone();
580 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
581 // Also apply transform to the Ordered wrapper itself (for NULLS FIRST etc.)
582 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
583 Ok(Expression::Ordered(transformed)) => *transformed,
584 Ok(_) | Err(_) => o,
585 }
586 })
587 .collect();
588 select.order_by = Some(order);
589 }
590
591 // Transform WINDOW clause order_by
592 if let Some(ref mut windows) = select.windows {
593 for nw in windows.iter_mut() {
594 nw.spec.order_by = std::mem::take(&mut nw.spec.order_by)
595 .into_iter()
596 .map(|o| {
597 let mut o = o;
598 let original = o.this.clone();
599 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
600 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
601 Ok(Expression::Ordered(transformed)) => *transformed,
602 Ok(_) | Err(_) => o,
603 }
604 })
605 .collect();
606 }
607 }
608
609 // Transform QUALIFY
610 if let Some(mut qual) = select.qualify.take() {
611 qual.this = transform_recursive(qual.this, transform_fn)?;
612 select.qualify = Some(qual);
613 }
614
615 Expression::Select(select)
616 }
617 Expression::Function(mut f) => {
618 f.args = f
619 .args
620 .into_iter()
621 .map(|e| transform_recursive(e, transform_fn))
622 .collect::<Result<Vec<_>>>()?;
623 Expression::Function(f)
624 }
625 Expression::AggregateFunction(mut f) => {
626 f.args = f
627 .args
628 .into_iter()
629 .map(|e| transform_recursive(e, transform_fn))
630 .collect::<Result<Vec<_>>>()?;
631 if let Some(filter) = f.filter {
632 f.filter = Some(transform_recursive(filter, transform_fn)?);
633 }
634 Expression::AggregateFunction(f)
635 }
636 Expression::WindowFunction(mut wf) => {
637 wf.this = transform_recursive(wf.this, transform_fn)?;
638 wf.over.partition_by = wf
639 .over
640 .partition_by
641 .into_iter()
642 .map(|e| transform_recursive(e, transform_fn))
643 .collect::<Result<Vec<_>>>()?;
644 // Transform order_by items through Expression::Ordered wrapper
645 wf.over.order_by = wf
646 .over
647 .order_by
648 .into_iter()
649 .map(|o| {
650 let mut o = o;
651 o.this = transform_recursive(o.this, transform_fn)?;
652 match transform_fn(Expression::Ordered(Box::new(o)))? {
653 Expression::Ordered(transformed) => Ok(*transformed),
654 _ => Err(crate::error::Error::parse(
655 "Ordered transformation returned non-Ordered expression",
656 )),
657 }
658 })
659 .collect::<Result<Vec<_>>>()?;
660 Expression::WindowFunction(wf)
661 }
662 Expression::Alias(mut a) => {
663 a.this = transform_recursive(a.this, transform_fn)?;
664 Expression::Alias(a)
665 }
666 Expression::Cast(mut c) => {
667 c.this = transform_recursive(c.this, transform_fn)?;
668 // Also transform the target data type (recursively for nested types like ARRAY<INT>, STRUCT<a INT>)
669 c.to = transform_data_type_recursive(c.to, transform_fn)?;
670 Expression::Cast(c)
671 }
672 Expression::And(op) => transform_binary!(And, *op),
673 Expression::Or(op) => transform_binary!(Or, *op),
674 Expression::Add(op) => transform_binary!(Add, *op),
675 Expression::Sub(op) => transform_binary!(Sub, *op),
676 Expression::Mul(op) => transform_binary!(Mul, *op),
677 Expression::Div(op) => transform_binary!(Div, *op),
678 Expression::Eq(op) => transform_binary!(Eq, *op),
679 Expression::Lt(op) => transform_binary!(Lt, *op),
680 Expression::Gt(op) => transform_binary!(Gt, *op),
681 Expression::Paren(mut p) => {
682 p.this = transform_recursive(p.this, transform_fn)?;
683 Expression::Paren(p)
684 }
685 Expression::Coalesce(mut f) => {
686 f.expressions = f
687 .expressions
688 .into_iter()
689 .map(|e| transform_recursive(e, transform_fn))
690 .collect::<Result<Vec<_>>>()?;
691 Expression::Coalesce(f)
692 }
693 Expression::IfNull(mut f) => {
694 f.this = transform_recursive(f.this, transform_fn)?;
695 f.expression = transform_recursive(f.expression, transform_fn)?;
696 Expression::IfNull(f)
697 }
698 Expression::Nvl(mut f) => {
699 f.this = transform_recursive(f.this, transform_fn)?;
700 f.expression = transform_recursive(f.expression, transform_fn)?;
701 Expression::Nvl(f)
702 }
703 Expression::In(mut i) => {
704 i.this = transform_recursive(i.this, transform_fn)?;
705 i.expressions = i
706 .expressions
707 .into_iter()
708 .map(|e| transform_recursive(e, transform_fn))
709 .collect::<Result<Vec<_>>>()?;
710 if let Some(query) = i.query {
711 i.query = Some(transform_recursive(query, transform_fn)?);
712 }
713 Expression::In(i)
714 }
715 Expression::Not(mut n) => {
716 n.this = transform_recursive(n.this, transform_fn)?;
717 Expression::Not(n)
718 }
719 Expression::ArraySlice(mut s) => {
720 s.this = transform_recursive(s.this, transform_fn)?;
721 if let Some(start) = s.start {
722 s.start = Some(transform_recursive(start, transform_fn)?);
723 }
724 if let Some(end) = s.end {
725 s.end = Some(transform_recursive(end, transform_fn)?);
726 }
727 Expression::ArraySlice(s)
728 }
729 Expression::Subscript(mut s) => {
730 s.this = transform_recursive(s.this, transform_fn)?;
731 s.index = transform_recursive(s.index, transform_fn)?;
732 Expression::Subscript(s)
733 }
734 Expression::Array(mut a) => {
735 a.expressions = a
736 .expressions
737 .into_iter()
738 .map(|e| transform_recursive(e, transform_fn))
739 .collect::<Result<Vec<_>>>()?;
740 Expression::Array(a)
741 }
742 Expression::Struct(mut s) => {
743 let mut new_fields = Vec::new();
744 for (name, expr) in s.fields {
745 let transformed = transform_recursive(expr, transform_fn)?;
746 new_fields.push((name, transformed));
747 }
748 s.fields = new_fields;
749 Expression::Struct(s)
750 }
751 Expression::NamedArgument(mut na) => {
752 na.value = transform_recursive(na.value, transform_fn)?;
753 Expression::NamedArgument(na)
754 }
755 Expression::MapFunc(mut m) => {
756 m.keys = m
757 .keys
758 .into_iter()
759 .map(|e| transform_recursive(e, transform_fn))
760 .collect::<Result<Vec<_>>>()?;
761 m.values = m
762 .values
763 .into_iter()
764 .map(|e| transform_recursive(e, transform_fn))
765 .collect::<Result<Vec<_>>>()?;
766 Expression::MapFunc(m)
767 }
768 Expression::ArrayFunc(mut a) => {
769 a.expressions = a
770 .expressions
771 .into_iter()
772 .map(|e| transform_recursive(e, transform_fn))
773 .collect::<Result<Vec<_>>>()?;
774 Expression::ArrayFunc(a)
775 }
776 Expression::Lambda(mut l) => {
777 l.body = transform_recursive(l.body, transform_fn)?;
778 Expression::Lambda(l)
779 }
780 Expression::JsonExtract(mut f) => {
781 f.this = transform_recursive(f.this, transform_fn)?;
782 f.path = transform_recursive(f.path, transform_fn)?;
783 Expression::JsonExtract(f)
784 }
785 Expression::JsonExtractScalar(mut f) => {
786 f.this = transform_recursive(f.this, transform_fn)?;
787 f.path = transform_recursive(f.path, transform_fn)?;
788 Expression::JsonExtractScalar(f)
789 }
790
791 // ===== UnaryFunc-based expressions =====
792 // These all have a single `this: Expression` child
793 Expression::Length(mut f) => {
794 f.this = transform_recursive(f.this, transform_fn)?;
795 Expression::Length(f)
796 }
797 Expression::Upper(mut f) => {
798 f.this = transform_recursive(f.this, transform_fn)?;
799 Expression::Upper(f)
800 }
801 Expression::Lower(mut f) => {
802 f.this = transform_recursive(f.this, transform_fn)?;
803 Expression::Lower(f)
804 }
805 Expression::LTrim(mut f) => {
806 f.this = transform_recursive(f.this, transform_fn)?;
807 Expression::LTrim(f)
808 }
809 Expression::RTrim(mut f) => {
810 f.this = transform_recursive(f.this, transform_fn)?;
811 Expression::RTrim(f)
812 }
813 Expression::Reverse(mut f) => {
814 f.this = transform_recursive(f.this, transform_fn)?;
815 Expression::Reverse(f)
816 }
817 Expression::Abs(mut f) => {
818 f.this = transform_recursive(f.this, transform_fn)?;
819 Expression::Abs(f)
820 }
821 Expression::Ceil(mut f) => {
822 f.this = transform_recursive(f.this, transform_fn)?;
823 Expression::Ceil(f)
824 }
825 Expression::Floor(mut f) => {
826 f.this = transform_recursive(f.this, transform_fn)?;
827 Expression::Floor(f)
828 }
829 Expression::Sign(mut f) => {
830 f.this = transform_recursive(f.this, transform_fn)?;
831 Expression::Sign(f)
832 }
833 Expression::Sqrt(mut f) => {
834 f.this = transform_recursive(f.this, transform_fn)?;
835 Expression::Sqrt(f)
836 }
837 Expression::Cbrt(mut f) => {
838 f.this = transform_recursive(f.this, transform_fn)?;
839 Expression::Cbrt(f)
840 }
841 Expression::Ln(mut f) => {
842 f.this = transform_recursive(f.this, transform_fn)?;
843 Expression::Ln(f)
844 }
845 Expression::Log(mut f) => {
846 f.this = transform_recursive(f.this, transform_fn)?;
847 if let Some(base) = f.base {
848 f.base = Some(transform_recursive(base, transform_fn)?);
849 }
850 Expression::Log(f)
851 }
852 Expression::Exp(mut f) => {
853 f.this = transform_recursive(f.this, transform_fn)?;
854 Expression::Exp(f)
855 }
856 Expression::Date(mut f) => {
857 f.this = transform_recursive(f.this, transform_fn)?;
858 Expression::Date(f)
859 }
860 Expression::Stddev(mut f) => {
861 f.this = transform_recursive(f.this, transform_fn)?;
862 Expression::Stddev(f)
863 }
864 Expression::Variance(mut f) => {
865 f.this = transform_recursive(f.this, transform_fn)?;
866 Expression::Variance(f)
867 }
868
869 // ===== BinaryFunc-based expressions =====
870 Expression::ModFunc(mut f) => {
871 f.this = transform_recursive(f.this, transform_fn)?;
872 f.expression = transform_recursive(f.expression, transform_fn)?;
873 Expression::ModFunc(f)
874 }
875 Expression::Power(mut f) => {
876 f.this = transform_recursive(f.this, transform_fn)?;
877 f.expression = transform_recursive(f.expression, transform_fn)?;
878 Expression::Power(f)
879 }
880 Expression::MapFromArrays(mut f) => {
881 f.this = transform_recursive(f.this, transform_fn)?;
882 f.expression = transform_recursive(f.expression, transform_fn)?;
883 Expression::MapFromArrays(f)
884 }
885 Expression::ElementAt(mut f) => {
886 f.this = transform_recursive(f.this, transform_fn)?;
887 f.expression = transform_recursive(f.expression, transform_fn)?;
888 Expression::ElementAt(f)
889 }
890 Expression::MapContainsKey(mut f) => {
891 f.this = transform_recursive(f.this, transform_fn)?;
892 f.expression = transform_recursive(f.expression, transform_fn)?;
893 Expression::MapContainsKey(f)
894 }
895 Expression::Left(mut f) => {
896 f.this = transform_recursive(f.this, transform_fn)?;
897 f.length = transform_recursive(f.length, transform_fn)?;
898 Expression::Left(f)
899 }
900 Expression::Right(mut f) => {
901 f.this = transform_recursive(f.this, transform_fn)?;
902 f.length = transform_recursive(f.length, transform_fn)?;
903 Expression::Right(f)
904 }
905 Expression::Repeat(mut f) => {
906 f.this = transform_recursive(f.this, transform_fn)?;
907 f.times = transform_recursive(f.times, transform_fn)?;
908 Expression::Repeat(f)
909 }
910
911 // ===== Complex function expressions =====
912 Expression::Substring(mut f) => {
913 f.this = transform_recursive(f.this, transform_fn)?;
914 f.start = transform_recursive(f.start, transform_fn)?;
915 if let Some(len) = f.length {
916 f.length = Some(transform_recursive(len, transform_fn)?);
917 }
918 Expression::Substring(f)
919 }
920 Expression::Replace(mut f) => {
921 f.this = transform_recursive(f.this, transform_fn)?;
922 f.old = transform_recursive(f.old, transform_fn)?;
923 f.new = transform_recursive(f.new, transform_fn)?;
924 Expression::Replace(f)
925 }
926 Expression::ConcatWs(mut f) => {
927 f.separator = transform_recursive(f.separator, transform_fn)?;
928 f.expressions = f
929 .expressions
930 .into_iter()
931 .map(|e| transform_recursive(e, transform_fn))
932 .collect::<Result<Vec<_>>>()?;
933 Expression::ConcatWs(f)
934 }
935 Expression::Trim(mut f) => {
936 f.this = transform_recursive(f.this, transform_fn)?;
937 if let Some(chars) = f.characters {
938 f.characters = Some(transform_recursive(chars, transform_fn)?);
939 }
940 Expression::Trim(f)
941 }
942 Expression::Split(mut f) => {
943 f.this = transform_recursive(f.this, transform_fn)?;
944 f.delimiter = transform_recursive(f.delimiter, transform_fn)?;
945 Expression::Split(f)
946 }
947 Expression::Lpad(mut f) => {
948 f.this = transform_recursive(f.this, transform_fn)?;
949 f.length = transform_recursive(f.length, transform_fn)?;
950 if let Some(fill) = f.fill {
951 f.fill = Some(transform_recursive(fill, transform_fn)?);
952 }
953 Expression::Lpad(f)
954 }
955 Expression::Rpad(mut f) => {
956 f.this = transform_recursive(f.this, transform_fn)?;
957 f.length = transform_recursive(f.length, transform_fn)?;
958 if let Some(fill) = f.fill {
959 f.fill = Some(transform_recursive(fill, transform_fn)?);
960 }
961 Expression::Rpad(f)
962 }
963
964 // ===== Conditional expressions =====
965 Expression::Case(mut c) => {
966 if let Some(operand) = c.operand {
967 c.operand = Some(transform_recursive(operand, transform_fn)?);
968 }
969 c.whens = c
970 .whens
971 .into_iter()
972 .map(|(cond, then)| {
973 let new_cond = transform_recursive(cond.clone(), transform_fn).unwrap_or(cond);
974 let new_then = transform_recursive(then.clone(), transform_fn).unwrap_or(then);
975 (new_cond, new_then)
976 })
977 .collect();
978 if let Some(else_expr) = c.else_ {
979 c.else_ = Some(transform_recursive(else_expr, transform_fn)?);
980 }
981 Expression::Case(c)
982 }
983 Expression::IfFunc(mut f) => {
984 f.condition = transform_recursive(f.condition, transform_fn)?;
985 f.true_value = transform_recursive(f.true_value, transform_fn)?;
986 if let Some(false_val) = f.false_value {
987 f.false_value = Some(transform_recursive(false_val, transform_fn)?);
988 }
989 Expression::IfFunc(f)
990 }
991
992 // ===== Date/Time expressions =====
993 Expression::DateAdd(mut f) => {
994 f.this = transform_recursive(f.this, transform_fn)?;
995 f.interval = transform_recursive(f.interval, transform_fn)?;
996 Expression::DateAdd(f)
997 }
998 Expression::DateSub(mut f) => {
999 f.this = transform_recursive(f.this, transform_fn)?;
1000 f.interval = transform_recursive(f.interval, transform_fn)?;
1001 Expression::DateSub(f)
1002 }
1003 Expression::DateDiff(mut f) => {
1004 f.this = transform_recursive(f.this, transform_fn)?;
1005 f.expression = transform_recursive(f.expression, transform_fn)?;
1006 Expression::DateDiff(f)
1007 }
1008 Expression::DateTrunc(mut f) => {
1009 f.this = transform_recursive(f.this, transform_fn)?;
1010 Expression::DateTrunc(f)
1011 }
1012 Expression::Extract(mut f) => {
1013 f.this = transform_recursive(f.this, transform_fn)?;
1014 Expression::Extract(f)
1015 }
1016
1017 // ===== JSON expressions =====
1018 Expression::JsonObject(mut f) => {
1019 f.pairs = f
1020 .pairs
1021 .into_iter()
1022 .map(|(k, v)| {
1023 let new_k = transform_recursive(k, transform_fn)?;
1024 let new_v = transform_recursive(v, transform_fn)?;
1025 Ok((new_k, new_v))
1026 })
1027 .collect::<Result<Vec<_>>>()?;
1028 Expression::JsonObject(f)
1029 }
1030
1031 // ===== Subquery expressions =====
1032 Expression::Subquery(mut s) => {
1033 s.this = transform_recursive(s.this, transform_fn)?;
1034 Expression::Subquery(s)
1035 }
1036 Expression::Exists(mut e) => {
1037 e.this = transform_recursive(e.this, transform_fn)?;
1038 Expression::Exists(e)
1039 }
1040
1041 // ===== Set operations =====
1042 Expression::Union(mut u) => {
1043 u.left = transform_recursive(u.left, transform_fn)?;
1044 u.right = transform_recursive(u.right, transform_fn)?;
1045 Expression::Union(u)
1046 }
1047 Expression::Intersect(mut i) => {
1048 i.left = transform_recursive(i.left, transform_fn)?;
1049 i.right = transform_recursive(i.right, transform_fn)?;
1050 Expression::Intersect(i)
1051 }
1052 Expression::Except(mut e) => {
1053 e.left = transform_recursive(e.left, transform_fn)?;
1054 e.right = transform_recursive(e.right, transform_fn)?;
1055 Expression::Except(e)
1056 }
1057
1058 // ===== DML expressions =====
1059 Expression::Insert(mut ins) => {
1060 // Transform VALUES clause expressions
1061 let mut new_values = Vec::new();
1062 for row in ins.values {
1063 let mut new_row = Vec::new();
1064 for e in row {
1065 new_row.push(transform_recursive(e, transform_fn)?);
1066 }
1067 new_values.push(new_row);
1068 }
1069 ins.values = new_values;
1070
1071 // Transform query (for INSERT ... SELECT)
1072 if let Some(query) = ins.query {
1073 ins.query = Some(transform_recursive(query, transform_fn)?);
1074 }
1075
1076 // Transform RETURNING clause
1077 let mut new_returning = Vec::new();
1078 for e in ins.returning {
1079 new_returning.push(transform_recursive(e, transform_fn)?);
1080 }
1081 ins.returning = new_returning;
1082
1083 // Transform ON CONFLICT clause
1084 if let Some(on_conflict) = ins.on_conflict {
1085 ins.on_conflict = Some(Box::new(transform_recursive(*on_conflict, transform_fn)?));
1086 }
1087
1088 Expression::Insert(ins)
1089 }
1090 Expression::Update(mut upd) => {
1091 upd.set = upd
1092 .set
1093 .into_iter()
1094 .map(|(id, val)| {
1095 let new_val = transform_recursive(val.clone(), transform_fn).unwrap_or(val);
1096 (id, new_val)
1097 })
1098 .collect();
1099 if let Some(mut where_clause) = upd.where_clause.take() {
1100 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
1101 upd.where_clause = Some(where_clause);
1102 }
1103 Expression::Update(upd)
1104 }
1105 Expression::Delete(mut del) => {
1106 if let Some(mut where_clause) = del.where_clause.take() {
1107 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
1108 del.where_clause = Some(where_clause);
1109 }
1110 Expression::Delete(del)
1111 }
1112
1113 // ===== CTE expressions =====
1114 Expression::With(mut w) => {
1115 w.ctes = w
1116 .ctes
1117 .into_iter()
1118 .map(|mut cte| {
1119 let original = cte.this.clone();
1120 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
1121 cte
1122 })
1123 .collect();
1124 Expression::With(w)
1125 }
1126 Expression::Cte(mut c) => {
1127 c.this = transform_recursive(c.this, transform_fn)?;
1128 Expression::Cte(c)
1129 }
1130
1131 // ===== Order expressions =====
1132 Expression::Ordered(mut o) => {
1133 o.this = transform_recursive(o.this, transform_fn)?;
1134 Expression::Ordered(o)
1135 }
1136
1137 // ===== Negation =====
1138 Expression::Neg(mut n) => {
1139 n.this = transform_recursive(n.this, transform_fn)?;
1140 Expression::Neg(n)
1141 }
1142
1143 // ===== Between =====
1144 Expression::Between(mut b) => {
1145 b.this = transform_recursive(b.this, transform_fn)?;
1146 b.low = transform_recursive(b.low, transform_fn)?;
1147 b.high = transform_recursive(b.high, transform_fn)?;
1148 Expression::Between(b)
1149 }
1150
1151 // ===== Like expressions =====
1152 Expression::Like(mut l) => {
1153 l.left = transform_recursive(l.left, transform_fn)?;
1154 l.right = transform_recursive(l.right, transform_fn)?;
1155 Expression::Like(l)
1156 }
1157 Expression::ILike(mut l) => {
1158 l.left = transform_recursive(l.left, transform_fn)?;
1159 l.right = transform_recursive(l.right, transform_fn)?;
1160 Expression::ILike(l)
1161 }
1162
1163 // ===== Additional binary ops not covered by macro =====
1164 Expression::Neq(op) => transform_binary!(Neq, *op),
1165 Expression::Lte(op) => transform_binary!(Lte, *op),
1166 Expression::Gte(op) => transform_binary!(Gte, *op),
1167 Expression::Mod(op) => transform_binary!(Mod, *op),
1168 Expression::Concat(op) => transform_binary!(Concat, *op),
1169 Expression::BitwiseAnd(op) => transform_binary!(BitwiseAnd, *op),
1170 Expression::BitwiseOr(op) => transform_binary!(BitwiseOr, *op),
1171 Expression::BitwiseXor(op) => transform_binary!(BitwiseXor, *op),
1172 Expression::Is(op) => transform_binary!(Is, *op),
1173
1174 // ===== TryCast / SafeCast =====
1175 Expression::TryCast(mut c) => {
1176 c.this = transform_recursive(c.this, transform_fn)?;
1177 c.to = transform_data_type_recursive(c.to, transform_fn)?;
1178 Expression::TryCast(c)
1179 }
1180 Expression::SafeCast(mut c) => {
1181 c.this = transform_recursive(c.this, transform_fn)?;
1182 c.to = transform_data_type_recursive(c.to, transform_fn)?;
1183 Expression::SafeCast(c)
1184 }
1185
1186 // ===== Misc =====
1187 Expression::Unnest(mut f) => {
1188 f.this = transform_recursive(f.this, transform_fn)?;
1189 f.expressions = f
1190 .expressions
1191 .into_iter()
1192 .map(|e| transform_recursive(e, transform_fn))
1193 .collect::<Result<Vec<_>>>()?;
1194 Expression::Unnest(f)
1195 }
1196 Expression::Explode(mut f) => {
1197 f.this = transform_recursive(f.this, transform_fn)?;
1198 Expression::Explode(f)
1199 }
1200 Expression::GroupConcat(mut f) => {
1201 f.this = transform_recursive(f.this, transform_fn)?;
1202 Expression::GroupConcat(f)
1203 }
1204 Expression::StringAgg(mut f) => {
1205 f.this = transform_recursive(f.this, transform_fn)?;
1206 Expression::StringAgg(f)
1207 }
1208 Expression::ListAgg(mut f) => {
1209 f.this = transform_recursive(f.this, transform_fn)?;
1210 Expression::ListAgg(f)
1211 }
1212 Expression::ArrayAgg(mut f) => {
1213 f.this = transform_recursive(f.this, transform_fn)?;
1214 Expression::ArrayAgg(f)
1215 }
1216 Expression::ParseJson(mut f) => {
1217 f.this = transform_recursive(f.this, transform_fn)?;
1218 Expression::ParseJson(f)
1219 }
1220 Expression::ToJson(mut f) => {
1221 f.this = transform_recursive(f.this, transform_fn)?;
1222 Expression::ToJson(f)
1223 }
1224 Expression::JSONExtract(mut e) => {
1225 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1226 e.expression = Box::new(transform_recursive(*e.expression, transform_fn)?);
1227 Expression::JSONExtract(e)
1228 }
1229 Expression::JSONExtractScalar(mut e) => {
1230 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1231 e.expression = Box::new(transform_recursive(*e.expression, transform_fn)?);
1232 Expression::JSONExtractScalar(e)
1233 }
1234
1235 // StrToTime: recurse into this
1236 Expression::StrToTime(mut e) => {
1237 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1238 Expression::StrToTime(e)
1239 }
1240
1241 // UnixToTime: recurse into this
1242 Expression::UnixToTime(mut e) => {
1243 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1244 Expression::UnixToTime(e)
1245 }
1246
1247 // CreateTable: recurse into column defaults, on_update expressions, and data types
1248 Expression::CreateTable(mut ct) => {
1249 for col in &mut ct.columns {
1250 if let Some(default_expr) = col.default.take() {
1251 col.default = Some(transform_recursive(default_expr, transform_fn)?);
1252 }
1253 if let Some(on_update_expr) = col.on_update.take() {
1254 col.on_update = Some(transform_recursive(on_update_expr, transform_fn)?);
1255 }
1256 // Note: Column data type transformations (INT -> INT64 for BigQuery, etc.)
1257 // are NOT applied here because per-dialect transforms are designed for CAST/expression
1258 // contexts and may not produce correct results for DDL column definitions.
1259 // The DDL type mappings would need dedicated handling per source/target pair.
1260 }
1261 if let Some(as_select) = ct.as_select.take() {
1262 ct.as_select = Some(transform_recursive(as_select, transform_fn)?);
1263 }
1264 Expression::CreateTable(ct)
1265 }
1266
1267 // CreateProcedure: recurse into body expressions
1268 Expression::CreateProcedure(mut cp) => {
1269 if let Some(body) = cp.body.take() {
1270 cp.body = Some(match body {
1271 FunctionBody::Expression(expr) => {
1272 FunctionBody::Expression(transform_recursive(expr, transform_fn)?)
1273 }
1274 FunctionBody::Return(expr) => {
1275 FunctionBody::Return(transform_recursive(expr, transform_fn)?)
1276 }
1277 FunctionBody::Statements(stmts) => {
1278 let transformed_stmts = stmts
1279 .into_iter()
1280 .map(|s| transform_recursive(s, transform_fn))
1281 .collect::<Result<Vec<_>>>()?;
1282 FunctionBody::Statements(transformed_stmts)
1283 }
1284 other => other,
1285 });
1286 }
1287 Expression::CreateProcedure(cp)
1288 }
1289
1290 // CreateFunction: recurse into body expressions
1291 Expression::CreateFunction(mut cf) => {
1292 if let Some(body) = cf.body.take() {
1293 cf.body = Some(match body {
1294 FunctionBody::Expression(expr) => {
1295 FunctionBody::Expression(transform_recursive(expr, transform_fn)?)
1296 }
1297 FunctionBody::Return(expr) => {
1298 FunctionBody::Return(transform_recursive(expr, transform_fn)?)
1299 }
1300 FunctionBody::Statements(stmts) => {
1301 let transformed_stmts = stmts
1302 .into_iter()
1303 .map(|s| transform_recursive(s, transform_fn))
1304 .collect::<Result<Vec<_>>>()?;
1305 FunctionBody::Statements(transformed_stmts)
1306 }
1307 other => other,
1308 });
1309 }
1310 Expression::CreateFunction(cf)
1311 }
1312
1313 // MemberOf: recurse into left and right operands
1314 Expression::MemberOf(op) => transform_binary!(MemberOf, *op),
1315 // ArrayContainsAll (@>): recurse into left and right operands
1316 Expression::ArrayContainsAll(op) => transform_binary!(ArrayContainsAll, *op),
1317 // ArrayContainedBy (<@): recurse into left and right operands
1318 Expression::ArrayContainedBy(op) => transform_binary!(ArrayContainedBy, *op),
1319 // ArrayOverlaps (&&): recurse into left and right operands
1320 Expression::ArrayOverlaps(op) => transform_binary!(ArrayOverlaps, *op),
1321 // TsMatch (@@): recurse into left and right operands
1322 Expression::TsMatch(op) => transform_binary!(TsMatch, *op),
1323 // Adjacent (-|-): recurse into left and right operands
1324 Expression::Adjacent(op) => transform_binary!(Adjacent, *op),
1325
1326 // Table: recurse into when (HistoricalData) and changes fields
1327 Expression::Table(mut t) => {
1328 if let Some(when) = t.when.take() {
1329 let transformed =
1330 transform_recursive(Expression::HistoricalData(when), transform_fn)?;
1331 if let Expression::HistoricalData(hd) = transformed {
1332 t.when = Some(hd);
1333 }
1334 }
1335 if let Some(changes) = t.changes.take() {
1336 let transformed = transform_recursive(Expression::Changes(changes), transform_fn)?;
1337 if let Expression::Changes(c) = transformed {
1338 t.changes = Some(c);
1339 }
1340 }
1341 Expression::Table(t)
1342 }
1343
1344 // HistoricalData (Snowflake time travel): recurse into expression
1345 Expression::HistoricalData(mut hd) => {
1346 *hd.expression = transform_recursive(*hd.expression, transform_fn)?;
1347 Expression::HistoricalData(hd)
1348 }
1349
1350 // Changes (Snowflake CHANGES clause): recurse into at_before and end
1351 Expression::Changes(mut c) => {
1352 if let Some(at_before) = c.at_before.take() {
1353 c.at_before = Some(Box::new(transform_recursive(*at_before, transform_fn)?));
1354 }
1355 if let Some(end) = c.end.take() {
1356 c.end = Some(Box::new(transform_recursive(*end, transform_fn)?));
1357 }
1358 Expression::Changes(c)
1359 }
1360
1361 // TableArgument: TABLE(expr) or MODEL(expr)
1362 Expression::TableArgument(mut ta) => {
1363 ta.this = transform_recursive(ta.this, transform_fn)?;
1364 Expression::TableArgument(ta)
1365 }
1366
1367 // JoinedTable: (tbl1 JOIN tbl2 ON ...) - recurse into left and join tables
1368 Expression::JoinedTable(mut jt) => {
1369 jt.left = transform_recursive(jt.left, transform_fn)?;
1370 for join in &mut jt.joins {
1371 join.this = transform_recursive(
1372 std::mem::replace(&mut join.this, Expression::Null(crate::expressions::Null)),
1373 transform_fn,
1374 )?;
1375 if let Some(on) = join.on.take() {
1376 join.on = Some(transform_recursive(on, transform_fn)?);
1377 }
1378 }
1379 jt.lateral_views = jt
1380 .lateral_views
1381 .into_iter()
1382 .map(|mut lv| {
1383 lv.this = transform_recursive(lv.this, transform_fn)?;
1384 Ok(lv)
1385 })
1386 .collect::<Result<Vec<_>>>()?;
1387 Expression::JoinedTable(jt)
1388 }
1389
1390 // Lateral: LATERAL func() - recurse into the function expression
1391 Expression::Lateral(mut lat) => {
1392 *lat.this = transform_recursive(*lat.this, transform_fn)?;
1393 Expression::Lateral(lat)
1394 }
1395
1396 // WithinGroup: recurse into order_by items (for NULLS FIRST/LAST etc.)
1397 // but NOT into wg.this - the inner function is handled by StringAggConvert/GroupConcatConvert
1398 // as a unit together with the WithinGroup wrapper
1399 Expression::WithinGroup(mut wg) => {
1400 wg.order_by = wg
1401 .order_by
1402 .into_iter()
1403 .map(|mut o| {
1404 let original = o.this.clone();
1405 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
1406 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
1407 Ok(Expression::Ordered(transformed)) => *transformed,
1408 Ok(_) | Err(_) => o,
1409 }
1410 })
1411 .collect();
1412 Expression::WithinGroup(wg)
1413 }
1414
1415 // Filter: recurse into both the aggregate and the filter condition
1416 Expression::Filter(mut f) => {
1417 f.this = Box::new(transform_recursive(*f.this, transform_fn)?);
1418 f.expression = Box::new(transform_recursive(*f.expression, transform_fn)?);
1419 Expression::Filter(f)
1420 }
1421
1422 // BitwiseOrAgg/BitwiseAndAgg/BitwiseXorAgg: recurse into the aggregate argument
1423 Expression::BitwiseOrAgg(mut f) => {
1424 f.this = transform_recursive(f.this, transform_fn)?;
1425 Expression::BitwiseOrAgg(f)
1426 }
1427 Expression::BitwiseAndAgg(mut f) => {
1428 f.this = transform_recursive(f.this, transform_fn)?;
1429 Expression::BitwiseAndAgg(f)
1430 }
1431 Expression::BitwiseXorAgg(mut f) => {
1432 f.this = transform_recursive(f.this, transform_fn)?;
1433 Expression::BitwiseXorAgg(f)
1434 }
1435 Expression::PipeOperator(mut pipe) => {
1436 pipe.this = transform_recursive(pipe.this, transform_fn)?;
1437 pipe.expression = transform_recursive(pipe.expression, transform_fn)?;
1438 Expression::PipeOperator(pipe)
1439 }
1440
1441 // Pass through leaf nodes unchanged
1442 other => other,
1443 };
1444
1445 // Then apply the transform function
1446 transform_fn(expr)
1447}
1448
1449/// Returns the tokenizer config, generator config, and expression transform closure
1450/// for a built-in dialect type. This is the shared implementation used by both
1451/// `Dialect::get()` and custom dialect construction.
1452fn configs_for_dialect_type(
1453 dt: DialectType,
1454) -> (
1455 TokenizerConfig,
1456 GeneratorConfig,
1457 Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>,
1458) {
1459 macro_rules! dialect_configs {
1460 ($dialect_struct:ident) => {{
1461 let d = $dialect_struct;
1462 (
1463 d.tokenizer_config(),
1464 d.generator_config(),
1465 Box::new(move |e| $dialect_struct.transform_expr(e)),
1466 )
1467 }};
1468 }
1469 match dt {
1470 DialectType::PostgreSQL => dialect_configs!(PostgresDialect),
1471 DialectType::MySQL => dialect_configs!(MySQLDialect),
1472 DialectType::BigQuery => dialect_configs!(BigQueryDialect),
1473 DialectType::Snowflake => dialect_configs!(SnowflakeDialect),
1474 DialectType::DuckDB => dialect_configs!(DuckDBDialect),
1475 DialectType::TSQL => dialect_configs!(TSQLDialect),
1476 DialectType::Oracle => dialect_configs!(OracleDialect),
1477 DialectType::Hive => dialect_configs!(HiveDialect),
1478 DialectType::Spark => dialect_configs!(SparkDialect),
1479 DialectType::SQLite => dialect_configs!(SQLiteDialect),
1480 DialectType::Presto => dialect_configs!(PrestoDialect),
1481 DialectType::Trino => dialect_configs!(TrinoDialect),
1482 DialectType::Redshift => dialect_configs!(RedshiftDialect),
1483 DialectType::ClickHouse => dialect_configs!(ClickHouseDialect),
1484 DialectType::Databricks => dialect_configs!(DatabricksDialect),
1485 DialectType::Athena => dialect_configs!(AthenaDialect),
1486 DialectType::Teradata => dialect_configs!(TeradataDialect),
1487 DialectType::Doris => dialect_configs!(DorisDialect),
1488 DialectType::StarRocks => dialect_configs!(StarRocksDialect),
1489 DialectType::Materialize => dialect_configs!(MaterializeDialect),
1490 DialectType::RisingWave => dialect_configs!(RisingWaveDialect),
1491 DialectType::SingleStore => dialect_configs!(SingleStoreDialect),
1492 DialectType::CockroachDB => dialect_configs!(CockroachDBDialect),
1493 DialectType::TiDB => dialect_configs!(TiDBDialect),
1494 DialectType::Druid => dialect_configs!(DruidDialect),
1495 DialectType::Solr => dialect_configs!(SolrDialect),
1496 DialectType::Tableau => dialect_configs!(TableauDialect),
1497 DialectType::Dune => dialect_configs!(DuneDialect),
1498 DialectType::Fabric => dialect_configs!(FabricDialect),
1499 DialectType::Drill => dialect_configs!(DrillDialect),
1500 DialectType::Dremio => dialect_configs!(DremioDialect),
1501 DialectType::Exasol => dialect_configs!(ExasolDialect),
1502 DialectType::DataFusion => dialect_configs!(DataFusionDialect),
1503 _ => dialect_configs!(GenericDialect),
1504 }
1505}
1506
1507// ---------------------------------------------------------------------------
1508// Custom dialect registry
1509// ---------------------------------------------------------------------------
1510
1511static CUSTOM_DIALECT_REGISTRY: LazyLock<RwLock<HashMap<String, Arc<CustomDialectConfig>>>> =
1512 LazyLock::new(|| RwLock::new(HashMap::new()));
1513
1514struct CustomDialectConfig {
1515 name: String,
1516 base_dialect: DialectType,
1517 tokenizer_config: TokenizerConfig,
1518 generator_config: GeneratorConfig,
1519 transform: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1520 preprocess: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1521}
1522
1523/// Fluent builder for creating and registering custom SQL dialects.
1524///
1525/// A custom dialect is based on an existing built-in dialect and allows selective
1526/// overrides of tokenizer configuration, generator configuration, and expression
1527/// transforms.
1528///
1529/// # Example
1530///
1531/// ```rust,ignore
1532/// use polyglot_sql::dialects::{CustomDialectBuilder, DialectType, Dialect};
1533/// use polyglot_sql::generator::NormalizeFunctions;
1534///
1535/// CustomDialectBuilder::new("my_postgres")
1536/// .based_on(DialectType::PostgreSQL)
1537/// .generator_config_modifier(|gc| {
1538/// gc.normalize_functions = NormalizeFunctions::Lower;
1539/// })
1540/// .register()
1541/// .unwrap();
1542///
1543/// let d = Dialect::get_by_name("my_postgres").unwrap();
1544/// let exprs = d.parse("SELECT COUNT(*)").unwrap();
1545/// let sql = d.generate(&exprs[0]).unwrap();
1546/// assert_eq!(sql, "select count(*)");
1547///
1548/// polyglot_sql::unregister_custom_dialect("my_postgres");
1549/// ```
1550pub struct CustomDialectBuilder {
1551 name: String,
1552 base_dialect: DialectType,
1553 tokenizer_modifier: Option<Box<dyn FnOnce(&mut TokenizerConfig)>>,
1554 generator_modifier: Option<Box<dyn FnOnce(&mut GeneratorConfig)>>,
1555 transform: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1556 preprocess: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1557}
1558
1559impl CustomDialectBuilder {
1560 /// Create a new builder with the given name. Defaults to `Generic` as the base dialect.
1561 pub fn new(name: impl Into<String>) -> Self {
1562 Self {
1563 name: name.into(),
1564 base_dialect: DialectType::Generic,
1565 tokenizer_modifier: None,
1566 generator_modifier: None,
1567 transform: None,
1568 preprocess: None,
1569 }
1570 }
1571
1572 /// Set the base built-in dialect to inherit configuration from.
1573 pub fn based_on(mut self, dialect: DialectType) -> Self {
1574 self.base_dialect = dialect;
1575 self
1576 }
1577
1578 /// Provide a closure that modifies the tokenizer configuration inherited from the base dialect.
1579 pub fn tokenizer_config_modifier<F>(mut self, f: F) -> Self
1580 where
1581 F: FnOnce(&mut TokenizerConfig) + 'static,
1582 {
1583 self.tokenizer_modifier = Some(Box::new(f));
1584 self
1585 }
1586
1587 /// Provide a closure that modifies the generator configuration inherited from the base dialect.
1588 pub fn generator_config_modifier<F>(mut self, f: F) -> Self
1589 where
1590 F: FnOnce(&mut GeneratorConfig) + 'static,
1591 {
1592 self.generator_modifier = Some(Box::new(f));
1593 self
1594 }
1595
1596 /// Set a custom per-node expression transform function.
1597 ///
1598 /// This replaces the base dialect's transform. It is called on every expression
1599 /// node during the recursive transform pass.
1600 pub fn transform_fn<F>(mut self, f: F) -> Self
1601 where
1602 F: Fn(Expression) -> Result<Expression> + Send + Sync + 'static,
1603 {
1604 self.transform = Some(Arc::new(f));
1605 self
1606 }
1607
1608 /// Set a custom whole-tree preprocessing function.
1609 ///
1610 /// This replaces the base dialect's built-in preprocessing. It is called once
1611 /// on the entire expression tree before the recursive per-node transform.
1612 pub fn preprocess_fn<F>(mut self, f: F) -> Self
1613 where
1614 F: Fn(Expression) -> Result<Expression> + Send + Sync + 'static,
1615 {
1616 self.preprocess = Some(Arc::new(f));
1617 self
1618 }
1619
1620 /// Build the custom dialect configuration and register it in the global registry.
1621 ///
1622 /// Returns an error if:
1623 /// - The name collides with a built-in dialect name
1624 /// - A custom dialect with the same name is already registered
1625 pub fn register(self) -> Result<()> {
1626 // Reject names that collide with built-in dialects
1627 if DialectType::from_str(&self.name).is_ok() {
1628 return Err(crate::error::Error::parse(format!(
1629 "Cannot register custom dialect '{}': name collides with built-in dialect",
1630 self.name
1631 )));
1632 }
1633
1634 // Get base configs
1635 let (mut tok_config, mut gen_config, _base_transform) =
1636 configs_for_dialect_type(self.base_dialect);
1637
1638 // Apply modifiers
1639 if let Some(tok_mod) = self.tokenizer_modifier {
1640 tok_mod(&mut tok_config);
1641 }
1642 if let Some(gen_mod) = self.generator_modifier {
1643 gen_mod(&mut gen_config);
1644 }
1645
1646 let config = CustomDialectConfig {
1647 name: self.name.clone(),
1648 base_dialect: self.base_dialect,
1649 tokenizer_config: tok_config,
1650 generator_config: gen_config,
1651 transform: self.transform,
1652 preprocess: self.preprocess,
1653 };
1654
1655 register_custom_dialect(config)
1656 }
1657}
1658
1659use std::str::FromStr;
1660
1661fn register_custom_dialect(config: CustomDialectConfig) -> Result<()> {
1662 let mut registry = CUSTOM_DIALECT_REGISTRY
1663 .write()
1664 .map_err(|e| crate::error::Error::parse(format!("Registry lock poisoned: {}", e)))?;
1665
1666 if registry.contains_key(&config.name) {
1667 return Err(crate::error::Error::parse(format!(
1668 "Custom dialect '{}' is already registered",
1669 config.name
1670 )));
1671 }
1672
1673 registry.insert(config.name.clone(), Arc::new(config));
1674 Ok(())
1675}
1676
1677/// Remove a custom dialect from the global registry.
1678///
1679/// Returns `true` if a dialect with that name was found and removed,
1680/// `false` if no such custom dialect existed.
1681pub fn unregister_custom_dialect(name: &str) -> bool {
1682 if let Ok(mut registry) = CUSTOM_DIALECT_REGISTRY.write() {
1683 registry.remove(name).is_some()
1684 } else {
1685 false
1686 }
1687}
1688
1689fn get_custom_dialect_config(name: &str) -> Option<Arc<CustomDialectConfig>> {
1690 CUSTOM_DIALECT_REGISTRY
1691 .read()
1692 .ok()
1693 .and_then(|registry| registry.get(name).cloned())
1694}
1695
1696/// Main entry point for dialect-specific SQL operations.
1697///
1698/// A `Dialect` bundles together a tokenizer, generator configuration, and expression
1699/// transformer for a specific SQL database engine. It is the high-level API through
1700/// which callers parse, generate, transform, and transpile SQL.
1701///
1702/// # Usage
1703///
1704/// ```rust,ignore
1705/// use polyglot_sql::dialects::{Dialect, DialectType};
1706///
1707/// // Parse PostgreSQL SQL into an AST
1708/// let pg = Dialect::get(DialectType::PostgreSQL);
1709/// let exprs = pg.parse("SELECT id, name FROM users WHERE active")?;
1710///
1711/// // Transpile from PostgreSQL to BigQuery
1712/// let results = pg.transpile_to("SELECT NOW()", DialectType::BigQuery)?;
1713/// assert_eq!(results[0], "SELECT CURRENT_TIMESTAMP()");
1714/// ```
1715///
1716/// Obtain an instance via [`Dialect::get`] or [`Dialect::get_by_name`].
1717/// The struct is `Send + Sync` safe so it can be shared across threads.
1718pub struct Dialect {
1719 dialect_type: DialectType,
1720 tokenizer: Tokenizer,
1721 generator_config: GeneratorConfig,
1722 transformer: Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>,
1723 /// Optional function to get expression-specific generator config (for hybrid dialects like Athena).
1724 generator_config_for_expr: Option<Box<dyn Fn(&Expression) -> GeneratorConfig + Send + Sync>>,
1725 /// Optional custom preprocessing function (overrides built-in preprocess for custom dialects).
1726 custom_preprocess: Option<Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1727}
1728
1729impl Dialect {
1730 /// Creates a fully configured [`Dialect`] instance for the given [`DialectType`].
1731 ///
1732 /// This is the primary constructor. It initializes the tokenizer, generator config,
1733 /// and expression transformer based on the dialect's [`DialectImpl`] implementation.
1734 /// For hybrid dialects like Athena, it also sets up expression-specific generator
1735 /// config routing.
1736 pub fn get(dialect_type: DialectType) -> Self {
1737 let (tokenizer_config, generator_config, transformer) =
1738 configs_for_dialect_type(dialect_type);
1739
1740 // Set up expression-specific generator config for hybrid dialects
1741 let generator_config_for_expr: Option<
1742 Box<dyn Fn(&Expression) -> GeneratorConfig + Send + Sync>,
1743 > = match dialect_type {
1744 DialectType::Athena => Some(Box::new(|expr| {
1745 AthenaDialect.generator_config_for_expr(expr)
1746 })),
1747 _ => None,
1748 };
1749
1750 Self {
1751 dialect_type,
1752 tokenizer: Tokenizer::new(tokenizer_config),
1753 generator_config,
1754 transformer,
1755 generator_config_for_expr,
1756 custom_preprocess: None,
1757 }
1758 }
1759
1760 /// Look up a dialect by string name.
1761 ///
1762 /// Checks built-in dialect names first (via [`DialectType::from_str`]), then
1763 /// falls back to the custom dialect registry. Returns `None` if no dialect
1764 /// with the given name exists.
1765 pub fn get_by_name(name: &str) -> Option<Self> {
1766 // Try built-in first
1767 if let Ok(dt) = DialectType::from_str(name) {
1768 return Some(Self::get(dt));
1769 }
1770
1771 // Try custom registry
1772 let config = get_custom_dialect_config(name)?;
1773 Some(Self::from_custom_config(&config))
1774 }
1775
1776 /// Construct a `Dialect` from a custom dialect configuration.
1777 fn from_custom_config(config: &CustomDialectConfig) -> Self {
1778 // Build the transformer: use custom if provided, else use base dialect's
1779 let transformer: Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync> =
1780 if let Some(ref custom_transform) = config.transform {
1781 let t = Arc::clone(custom_transform);
1782 Box::new(move |e| t(e))
1783 } else {
1784 let (_, _, base_transform) = configs_for_dialect_type(config.base_dialect);
1785 base_transform
1786 };
1787
1788 // Build the custom preprocess: use custom if provided
1789 let custom_preprocess: Option<Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>> =
1790 config.preprocess.as_ref().map(|p| {
1791 let p = Arc::clone(p);
1792 Box::new(move |e: Expression| p(e))
1793 as Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>
1794 });
1795
1796 Self {
1797 dialect_type: config.base_dialect,
1798 tokenizer: Tokenizer::new(config.tokenizer_config.clone()),
1799 generator_config: config.generator_config.clone(),
1800 transformer,
1801 generator_config_for_expr: None,
1802 custom_preprocess,
1803 }
1804 }
1805
1806 /// Get the dialect type
1807 pub fn dialect_type(&self) -> DialectType {
1808 self.dialect_type
1809 }
1810
1811 /// Get the generator configuration
1812 pub fn generator_config(&self) -> &GeneratorConfig {
1813 &self.generator_config
1814 }
1815
1816 /// Parses a SQL string into a list of [`Expression`] AST nodes.
1817 ///
1818 /// The input may contain multiple semicolon-separated statements; each one
1819 /// produces a separate element in the returned vector. Tokenization uses
1820 /// this dialect's configured tokenizer, and parsing uses the dialect-aware parser.
1821 pub fn parse(&self, sql: &str) -> Result<Vec<Expression>> {
1822 let tokens = self.tokenizer.tokenize(sql)?;
1823 let config = crate::parser::ParserConfig {
1824 dialect: Some(self.dialect_type),
1825 ..Default::default()
1826 };
1827 let mut parser = Parser::with_source(tokens, config, sql.to_string());
1828 parser.parse()
1829 }
1830
1831 /// Get the generator config for a specific expression (supports hybrid dialects)
1832 fn get_config_for_expr(&self, expr: &Expression) -> GeneratorConfig {
1833 if let Some(ref config_fn) = self.generator_config_for_expr {
1834 config_fn(expr)
1835 } else {
1836 self.generator_config.clone()
1837 }
1838 }
1839
1840 /// Generates a SQL string from an [`Expression`] AST node.
1841 ///
1842 /// The output uses this dialect's generator configuration for identifier quoting,
1843 /// keyword casing, function name normalization, and syntax style. The result is
1844 /// a single-line (non-pretty) SQL string.
1845 pub fn generate(&self, expr: &Expression) -> Result<String> {
1846 let config = self.get_config_for_expr(expr);
1847 let mut generator = Generator::with_config(config);
1848 generator.generate(expr)
1849 }
1850
1851 /// Generate SQL from an expression with pretty printing enabled
1852 pub fn generate_pretty(&self, expr: &Expression) -> Result<String> {
1853 let mut config = self.get_config_for_expr(expr);
1854 config.pretty = true;
1855 let mut generator = Generator::with_config(config);
1856 generator.generate(expr)
1857 }
1858
1859 /// Generate SQL from an expression with source dialect info (for transpilation)
1860 pub fn generate_with_source(&self, expr: &Expression, source: DialectType) -> Result<String> {
1861 let mut config = self.get_config_for_expr(expr);
1862 config.source_dialect = Some(source);
1863 let mut generator = Generator::with_config(config);
1864 generator.generate(expr)
1865 }
1866
1867 /// Generate SQL from an expression with pretty printing and source dialect info
1868 pub fn generate_pretty_with_source(
1869 &self,
1870 expr: &Expression,
1871 source: DialectType,
1872 ) -> Result<String> {
1873 let mut config = self.get_config_for_expr(expr);
1874 config.pretty = true;
1875 config.source_dialect = Some(source);
1876 let mut generator = Generator::with_config(config);
1877 generator.generate(expr)
1878 }
1879
1880 /// Generate SQL from an expression with forced identifier quoting (identify=True)
1881 pub fn generate_with_identify(&self, expr: &Expression) -> Result<String> {
1882 let mut config = self.get_config_for_expr(expr);
1883 config.always_quote_identifiers = true;
1884 let mut generator = Generator::with_config(config);
1885 generator.generate(expr)
1886 }
1887
1888 /// Generate SQL from an expression with pretty printing and forced identifier quoting
1889 pub fn generate_pretty_with_identify(&self, expr: &Expression) -> Result<String> {
1890 let mut config = self.generator_config.clone();
1891 config.pretty = true;
1892 config.always_quote_identifiers = true;
1893 let mut generator = Generator::with_config(config);
1894 generator.generate(expr)
1895 }
1896
1897 /// Generate SQL from an expression with caller-specified config overrides
1898 pub fn generate_with_overrides(
1899 &self,
1900 expr: &Expression,
1901 overrides: impl FnOnce(&mut GeneratorConfig),
1902 ) -> Result<String> {
1903 let mut config = self.get_config_for_expr(expr);
1904 overrides(&mut config);
1905 let mut generator = Generator::with_config(config);
1906 generator.generate(expr)
1907 }
1908
1909 /// Transforms an expression tree to conform to this dialect's syntax and semantics.
1910 ///
1911 /// The transformation proceeds in two phases:
1912 /// 1. **Preprocessing** -- whole-tree structural rewrites such as eliminating QUALIFY,
1913 /// ensuring boolean predicates, or converting DISTINCT ON to a window-function pattern.
1914 /// 2. **Recursive per-node transform** -- a bottom-up pass via [`transform_recursive`]
1915 /// that applies this dialect's [`DialectImpl::transform_expr`] to every node.
1916 ///
1917 /// This method is used both during transpilation (to rewrite an AST for a target dialect)
1918 /// and for identity transforms (normalizing SQL within the same dialect).
1919 pub fn transform(&self, expr: Expression) -> Result<Expression> {
1920 // Apply preprocessing transforms based on dialect
1921 let preprocessed = self.preprocess(expr)?;
1922 // Then apply recursive transformation
1923 transform_recursive(preprocessed, &self.transformer)
1924 }
1925
1926 /// Apply dialect-specific preprocessing transforms
1927 fn preprocess(&self, expr: Expression) -> Result<Expression> {
1928 // If a custom preprocess function is set, use it instead of the built-in logic
1929 if let Some(ref custom_preprocess) = self.custom_preprocess {
1930 return custom_preprocess(expr);
1931 }
1932
1933 use crate::transforms;
1934
1935 match self.dialect_type {
1936 // MySQL doesn't support QUALIFY, DISTINCT ON, FULL OUTER JOIN
1937 // MySQL doesn't natively support GENERATE_DATE_ARRAY (expand to recursive CTE)
1938 DialectType::MySQL => {
1939 let expr = transforms::eliminate_qualify(expr)?;
1940 let expr = transforms::eliminate_full_outer_join(expr)?;
1941 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
1942 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
1943 Ok(expr)
1944 }
1945 // PostgreSQL doesn't support QUALIFY
1946 // PostgreSQL: UNNEST(GENERATE_SERIES) -> subquery wrapping
1947 // PostgreSQL: Normalize SET ... TO to SET ... = in CREATE FUNCTION
1948 DialectType::PostgreSQL => {
1949 let expr = transforms::eliminate_qualify(expr)?;
1950 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
1951 let expr = transforms::unwrap_unnest_generate_series_for_postgres(expr)?;
1952 // Normalize SET ... TO to SET ... = in CREATE FUNCTION
1953 // Only normalize when sqlglot would fully parse (no body) —
1954 // sqlglot falls back to Command for complex function bodies,
1955 // preserving the original text including TO.
1956 let expr = if let Expression::CreateFunction(mut cf) = expr {
1957 if cf.body.is_none() {
1958 for opt in &mut cf.set_options {
1959 if let crate::expressions::FunctionSetValue::Value { use_to, .. } =
1960 &mut opt.value
1961 {
1962 *use_to = false;
1963 }
1964 }
1965 }
1966 Expression::CreateFunction(cf)
1967 } else {
1968 expr
1969 };
1970 Ok(expr)
1971 }
1972 // BigQuery doesn't support DISTINCT ON or CTE column aliases
1973 DialectType::BigQuery => {
1974 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
1975 let expr = transforms::pushdown_cte_column_names(expr)?;
1976 let expr = transforms::explode_projection_to_unnest(expr, DialectType::BigQuery)?;
1977 Ok(expr)
1978 }
1979 // Snowflake
1980 DialectType::Snowflake => {
1981 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
1982 let expr = transforms::eliminate_window_clause(expr)?;
1983 let expr = transforms::snowflake_flatten_projection_to_unnest(expr)?;
1984 Ok(expr)
1985 }
1986 // TSQL doesn't support QUALIFY
1987 // TSQL requires boolean expressions in WHERE/HAVING (no implicit truthiness)
1988 // TSQL doesn't support CTEs in subqueries (hoist to top level)
1989 // NOTE: no_limit_order_by_union is handled in cross_dialect_normalize (not preprocess)
1990 // to avoid breaking TSQL identity tests where ORDER BY on UNION is valid
1991 DialectType::TSQL => {
1992 let expr = transforms::eliminate_qualify(expr)?;
1993 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
1994 let expr = transforms::ensure_bools(expr)?;
1995 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
1996 let expr = transforms::move_ctes_to_top_level(expr)?;
1997 let expr = transforms::qualify_derived_table_outputs(expr)?;
1998 Ok(expr)
1999 }
2000 // Spark doesn't support QUALIFY (but Databricks does)
2001 // Spark doesn't support CTEs in subqueries (hoist to top level)
2002 DialectType::Spark => {
2003 let expr = transforms::eliminate_qualify(expr)?;
2004 let expr = transforms::add_auto_table_alias(expr)?;
2005 let expr = transforms::simplify_nested_paren_values(expr)?;
2006 let expr = transforms::move_ctes_to_top_level(expr)?;
2007 Ok(expr)
2008 }
2009 // Databricks supports QUALIFY natively
2010 // Databricks doesn't support CTEs in subqueries (hoist to top level)
2011 DialectType::Databricks => {
2012 let expr = transforms::add_auto_table_alias(expr)?;
2013 let expr = transforms::simplify_nested_paren_values(expr)?;
2014 let expr = transforms::move_ctes_to_top_level(expr)?;
2015 Ok(expr)
2016 }
2017 // Hive doesn't support QUALIFY or CTEs in subqueries
2018 DialectType::Hive => {
2019 let expr = transforms::eliminate_qualify(expr)?;
2020 let expr = transforms::move_ctes_to_top_level(expr)?;
2021 Ok(expr)
2022 }
2023 // SQLite doesn't support QUALIFY
2024 DialectType::SQLite => {
2025 let expr = transforms::eliminate_qualify(expr)?;
2026 Ok(expr)
2027 }
2028 // Trino doesn't support QUALIFY
2029 DialectType::Trino => {
2030 let expr = transforms::eliminate_qualify(expr)?;
2031 let expr = transforms::explode_projection_to_unnest(expr, DialectType::Trino)?;
2032 Ok(expr)
2033 }
2034 // Presto doesn't support QUALIFY or WINDOW clause
2035 DialectType::Presto => {
2036 let expr = transforms::eliminate_qualify(expr)?;
2037 let expr = transforms::eliminate_window_clause(expr)?;
2038 let expr = transforms::explode_projection_to_unnest(expr, DialectType::Presto)?;
2039 Ok(expr)
2040 }
2041 // DuckDB supports QUALIFY - no elimination needed
2042 // Expand POSEXPLODE to GENERATE_SUBSCRIPTS + UNNEST
2043 // Expand LIKE ANY / ILIKE ANY to OR chains (DuckDB doesn't support quantifiers)
2044 DialectType::DuckDB => {
2045 let expr = transforms::expand_posexplode_duckdb(expr)?;
2046 let expr = transforms::expand_like_any(expr)?;
2047 Ok(expr)
2048 }
2049 // Redshift doesn't support QUALIFY, WINDOW clause, or GENERATE_DATE_ARRAY
2050 DialectType::Redshift => {
2051 let expr = transforms::eliminate_qualify(expr)?;
2052 let expr = transforms::eliminate_window_clause(expr)?;
2053 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
2054 Ok(expr)
2055 }
2056 // StarRocks doesn't support BETWEEN in DELETE statements or QUALIFY
2057 DialectType::StarRocks => {
2058 let expr = transforms::eliminate_qualify(expr)?;
2059 let expr = transforms::expand_between_in_delete(expr)?;
2060 Ok(expr)
2061 }
2062 // DataFusion supports QUALIFY and semi/anti joins natively
2063 DialectType::DataFusion => Ok(expr),
2064 // Oracle doesn't support QUALIFY
2065 DialectType::Oracle => {
2066 let expr = transforms::eliminate_qualify(expr)?;
2067 Ok(expr)
2068 }
2069 // Drill - no special preprocessing needed
2070 DialectType::Drill => Ok(expr),
2071 // Teradata - no special preprocessing needed
2072 DialectType::Teradata => Ok(expr),
2073 // ClickHouse doesn't support ORDER BY/LIMIT directly on UNION
2074 DialectType::ClickHouse => {
2075 let expr = transforms::no_limit_order_by_union(expr)?;
2076 Ok(expr)
2077 }
2078 // Other dialects - no preprocessing
2079 _ => Ok(expr),
2080 }
2081 }
2082
2083 /// Transpile SQL from this dialect to another
2084 pub fn transpile_to(&self, sql: &str, target: DialectType) -> Result<Vec<String>> {
2085 self.transpile_to_inner(sql, target, false)
2086 }
2087
2088 /// Transpile SQL from this dialect to another with pretty printing enabled
2089 pub fn transpile_to_pretty(&self, sql: &str, target: DialectType) -> Result<Vec<String>> {
2090 self.transpile_to_inner(sql, target, true)
2091 }
2092
2093 fn transpile_to_inner(
2094 &self,
2095 sql: &str,
2096 target: DialectType,
2097 pretty: bool,
2098 ) -> Result<Vec<String>> {
2099 let expressions = self.parse(sql)?;
2100 let target_dialect = Dialect::get(target);
2101
2102 expressions
2103 .into_iter()
2104 .map(|expr| {
2105 // DuckDB source: normalize VARCHAR/CHAR to TEXT (DuckDB doesn't support
2106 // VARCHAR length constraints). This emulates Python sqlglot's DuckDB parser
2107 // where VARCHAR_LENGTH = None and VARCHAR maps to TEXT.
2108 let expr = if matches!(self.dialect_type, DialectType::DuckDB) {
2109 use crate::expressions::DataType as DT;
2110 transform_recursive(expr, &|e| match e {
2111 Expression::DataType(DT::VarChar { .. }) => {
2112 Ok(Expression::DataType(DT::Text))
2113 }
2114 Expression::DataType(DT::Char { .. }) => Ok(Expression::DataType(DT::Text)),
2115 _ => Ok(e),
2116 })?
2117 } else {
2118 expr
2119 };
2120
2121 // When source and target differ, first normalize the source dialect's
2122 // AST constructs to standard SQL, so that the target dialect can handle them.
2123 // This handles cases like Snowflake's SQUARE -> POWER, DIV0 -> CASE, etc.
2124 let normalized =
2125 if self.dialect_type != target && self.dialect_type != DialectType::Generic {
2126 self.transform(expr)?
2127 } else {
2128 expr
2129 };
2130
2131 // For TSQL source targeting non-TSQL: unwrap ISNULL(JSON_QUERY(...), JSON_VALUE(...))
2132 // to just JSON_QUERY(...) so cross_dialect_normalize can convert it cleanly.
2133 // The TSQL read transform wraps JsonQuery in ISNULL for identity, but for
2134 // cross-dialect transpilation we need the unwrapped JSON_QUERY.
2135 let normalized =
2136 if matches!(self.dialect_type, DialectType::TSQL | DialectType::Fabric)
2137 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
2138 {
2139 transform_recursive(normalized, &|e| {
2140 if let Expression::Function(ref f) = e {
2141 if f.name.eq_ignore_ascii_case("ISNULL") && f.args.len() == 2 {
2142 // Check if first arg is JSON_QUERY and second is JSON_VALUE
2143 if let (
2144 Expression::Function(ref jq),
2145 Expression::Function(ref jv),
2146 ) = (&f.args[0], &f.args[1])
2147 {
2148 if jq.name.eq_ignore_ascii_case("JSON_QUERY")
2149 && jv.name.eq_ignore_ascii_case("JSON_VALUE")
2150 {
2151 // Unwrap: return just JSON_QUERY(...)
2152 return Ok(f.args[0].clone());
2153 }
2154 }
2155 }
2156 }
2157 Ok(e)
2158 })?
2159 } else {
2160 normalized
2161 };
2162
2163 // Snowflake source to non-Snowflake target: CURRENT_TIME -> LOCALTIME
2164 // Snowflake's CURRENT_TIME is equivalent to LOCALTIME in other dialects.
2165 // Python sqlglot parses Snowflake's CURRENT_TIME as Localtime expression.
2166 let normalized = if matches!(self.dialect_type, DialectType::Snowflake)
2167 && !matches!(target, DialectType::Snowflake)
2168 {
2169 transform_recursive(normalized, &|e| {
2170 if let Expression::Function(ref f) = e {
2171 if f.name.eq_ignore_ascii_case("CURRENT_TIME") {
2172 return Ok(Expression::Localtime(Box::new(
2173 crate::expressions::Localtime { this: None },
2174 )));
2175 }
2176 }
2177 Ok(e)
2178 })?
2179 } else {
2180 normalized
2181 };
2182
2183 // Snowflake source to DuckDB target: REPEAT(' ', n) -> REPEAT(' ', CAST(n AS BIGINT))
2184 // Snowflake's SPACE(n) is converted to REPEAT(' ', n) by the Snowflake source
2185 // transform. DuckDB requires the count argument to be BIGINT.
2186 let normalized = if matches!(self.dialect_type, DialectType::Snowflake)
2187 && matches!(target, DialectType::DuckDB)
2188 {
2189 transform_recursive(normalized, &|e| {
2190 if let Expression::Function(ref f) = e {
2191 if f.name.eq_ignore_ascii_case("REPEAT") && f.args.len() == 2 {
2192 // Check if first arg is space string literal
2193 if let Expression::Literal(crate::expressions::Literal::String(
2194 ref s,
2195 )) = f.args[0]
2196 {
2197 if s == " " {
2198 // Wrap second arg in CAST(... AS BIGINT) if not already
2199 if !matches!(f.args[1], Expression::Cast(_)) {
2200 let mut new_args = f.args.clone();
2201 new_args[1] = Expression::Cast(Box::new(
2202 crate::expressions::Cast {
2203 this: new_args[1].clone(),
2204 to: crate::expressions::DataType::BigInt {
2205 length: None,
2206 },
2207 trailing_comments: Vec::new(),
2208 double_colon_syntax: false,
2209 format: None,
2210 default: None,
2211 },
2212 ));
2213 return Ok(Expression::Function(Box::new(
2214 crate::expressions::Function {
2215 name: f.name.clone(),
2216 args: new_args,
2217 distinct: f.distinct,
2218 trailing_comments: f.trailing_comments.clone(),
2219 use_bracket_syntax: f.use_bracket_syntax,
2220 no_parens: f.no_parens,
2221 quoted: f.quoted,
2222 },
2223 )));
2224 }
2225 }
2226 }
2227 }
2228 }
2229 Ok(e)
2230 })?
2231 } else {
2232 normalized
2233 };
2234
2235 // Propagate struct field names in arrays (for BigQuery source to non-BigQuery target)
2236 // BigQuery->BigQuery should NOT propagate names (BigQuery handles implicit inheritance)
2237 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
2238 && !matches!(target, DialectType::BigQuery)
2239 {
2240 crate::transforms::propagate_struct_field_names(normalized)?
2241 } else {
2242 normalized
2243 };
2244
2245 // Apply cross-dialect semantic normalizations
2246 let normalized =
2247 Self::cross_dialect_normalize(normalized, self.dialect_type, target)?;
2248
2249 // For DuckDB target from BigQuery source: wrap UNNEST of struct arrays in
2250 // (SELECT UNNEST(..., max_depth => 2)) subquery
2251 // Must run BEFORE unnest_alias_to_column_alias since it changes alias structure
2252 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
2253 && matches!(target, DialectType::DuckDB)
2254 {
2255 crate::transforms::wrap_duckdb_unnest_struct(normalized)?
2256 } else {
2257 normalized
2258 };
2259
2260 // Convert BigQuery UNNEST aliases to column-alias format for DuckDB/Presto/Spark
2261 // UNNEST(arr) AS x -> UNNEST(arr) AS _t0(x)
2262 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
2263 && matches!(
2264 target,
2265 DialectType::DuckDB
2266 | DialectType::Presto
2267 | DialectType::Trino
2268 | DialectType::Athena
2269 | DialectType::Spark
2270 | DialectType::Databricks
2271 ) {
2272 crate::transforms::unnest_alias_to_column_alias(normalized)?
2273 } else if matches!(self.dialect_type, DialectType::BigQuery)
2274 && matches!(target, DialectType::BigQuery | DialectType::Redshift)
2275 {
2276 // For BigQuery/Redshift targets: move UNNEST FROM items to CROSS JOINs
2277 // but don't convert alias format (no _t0 wrapper)
2278 let result = crate::transforms::unnest_from_to_cross_join(normalized)?;
2279 // For Redshift: strip UNNEST when arg is a column reference path
2280 if matches!(target, DialectType::Redshift) {
2281 crate::transforms::strip_unnest_column_refs(result)?
2282 } else {
2283 result
2284 }
2285 } else {
2286 normalized
2287 };
2288
2289 // For Presto/Trino targets from PostgreSQL/Redshift source:
2290 // Wrap UNNEST aliases from GENERATE_SERIES conversion: AS s -> AS _u(s)
2291 let normalized = if matches!(
2292 self.dialect_type,
2293 DialectType::PostgreSQL | DialectType::Redshift
2294 ) && matches!(
2295 target,
2296 DialectType::Presto | DialectType::Trino | DialectType::Athena
2297 ) {
2298 crate::transforms::wrap_unnest_join_aliases(normalized)?
2299 } else {
2300 normalized
2301 };
2302
2303 // Eliminate DISTINCT ON with target-dialect awareness
2304 // This must happen after source transform (which may produce DISTINCT ON)
2305 // and before target transform, with knowledge of the target dialect's NULL ordering behavior
2306 let normalized =
2307 crate::transforms::eliminate_distinct_on_for_dialect(normalized, Some(target))?;
2308
2309 // GENERATE_DATE_ARRAY in UNNEST -> Snowflake ARRAY_GENERATE_RANGE + DATEADD
2310 let normalized = if matches!(target, DialectType::Snowflake) {
2311 Self::transform_generate_date_array_snowflake(normalized)?
2312 } else {
2313 normalized
2314 };
2315
2316 // CROSS JOIN UNNEST -> LATERAL VIEW EXPLODE/INLINE for Spark/Hive/Databricks
2317 let normalized = if matches!(
2318 target,
2319 DialectType::Spark | DialectType::Databricks | DialectType::Hive
2320 ) {
2321 crate::transforms::unnest_to_explode_select(normalized)?
2322 } else {
2323 normalized
2324 };
2325
2326 // Wrap UNION with ORDER BY/LIMIT in a subquery for dialects that require it
2327 let normalized = if matches!(target, DialectType::ClickHouse | DialectType::TSQL) {
2328 crate::transforms::no_limit_order_by_union(normalized)?
2329 } else {
2330 normalized
2331 };
2332
2333 // TSQL: Convert COUNT(*) -> COUNT_BIG(*) when source is not TSQL/Fabric
2334 // Python sqlglot does this in the TSQL generator, but we can't do it there
2335 // because it would break TSQL -> TSQL identity
2336 let normalized = if matches!(target, DialectType::TSQL | DialectType::Fabric)
2337 && !matches!(self.dialect_type, DialectType::TSQL | DialectType::Fabric)
2338 {
2339 transform_recursive(normalized, &|e| {
2340 if let Expression::Count(ref c) = e {
2341 // Build COUNT_BIG(...) as an AggregateFunction
2342 let args = if c.star {
2343 vec![Expression::Star(crate::expressions::Star {
2344 table: None,
2345 except: None,
2346 replace: None,
2347 rename: None,
2348 trailing_comments: Vec::new(),
2349 })]
2350 } else if let Some(ref this) = c.this {
2351 vec![this.clone()]
2352 } else {
2353 vec![]
2354 };
2355 Ok(Expression::AggregateFunction(Box::new(
2356 crate::expressions::AggregateFunction {
2357 name: "COUNT_BIG".to_string(),
2358 args,
2359 distinct: c.distinct,
2360 filter: c.filter.clone(),
2361 order_by: Vec::new(),
2362 limit: None,
2363 ignore_nulls: None,
2364 },
2365 )))
2366 } else {
2367 Ok(e)
2368 }
2369 })?
2370 } else {
2371 normalized
2372 };
2373
2374 let transformed = target_dialect.transform(normalized)?;
2375 let mut sql = if pretty {
2376 target_dialect.generate_pretty_with_source(&transformed, self.dialect_type)?
2377 } else {
2378 target_dialect.generate_with_source(&transformed, self.dialect_type)?
2379 };
2380
2381 // Align a known Snowflake pretty-print edge case with Python sqlglot output.
2382 if pretty && target == DialectType::Snowflake {
2383 sql = Self::normalize_snowflake_pretty(sql);
2384 }
2385
2386 Ok(sql)
2387 })
2388 .collect()
2389 }
2390
2391 /// Transform BigQuery GENERATE_DATE_ARRAY in UNNEST for Snowflake target.
2392 /// Converts:
2393 /// SELECT ..., alias, ... FROM t CROSS JOIN UNNEST(GENERATE_DATE_ARRAY(start, end, INTERVAL '1' unit)) AS alias
2394 /// To:
2395 /// SELECT ..., DATEADD(unit, CAST(alias AS INT), CAST(start AS DATE)) AS alias, ...
2396 /// FROM t, LATERAL FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (DATEDIFF(unit, start, end) + 1 - 1) + 1)) AS _t0(seq, key, path, index, alias, this)
2397 fn transform_generate_date_array_snowflake(expr: Expression) -> Result<Expression> {
2398 use crate::expressions::*;
2399 transform_recursive(expr, &|e| {
2400 // Handle ARRAY_SIZE(GENERATE_DATE_ARRAY(...)) -> ARRAY_SIZE((SELECT ARRAY_AGG(*) FROM subquery))
2401 if let Expression::ArraySize(ref af) = e {
2402 if let Expression::Function(ref f) = af.this {
2403 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY") && f.args.len() >= 2 {
2404 let result = Self::convert_array_size_gda_snowflake(f)?;
2405 return Ok(result);
2406 }
2407 }
2408 }
2409
2410 let Expression::Select(mut sel) = e else {
2411 return Ok(e);
2412 };
2413
2414 // Find joins with UNNEST containing GenerateSeries (from GENERATE_DATE_ARRAY conversion)
2415 let mut gda_info: Option<(String, Expression, Expression, String)> = None; // (alias_name, start_expr, end_expr, unit)
2416 let mut gda_join_idx: Option<usize> = None;
2417
2418 for (idx, join) in sel.joins.iter().enumerate() {
2419 // The join.this may be:
2420 // 1. Unnest(UnnestFunc { alias: Some("mnth"), ... })
2421 // 2. Alias(Alias { this: Unnest(UnnestFunc { alias: None, ... }), alias: "mnth", ... })
2422 let (unnest_ref, alias_name) = match &join.this {
2423 Expression::Unnest(ref unnest) => {
2424 let alias = unnest.alias.as_ref().map(|id| id.name.clone());
2425 (Some(unnest.as_ref()), alias)
2426 }
2427 Expression::Alias(ref a) => {
2428 if let Expression::Unnest(ref unnest) = a.this {
2429 (Some(unnest.as_ref()), Some(a.alias.name.clone()))
2430 } else {
2431 (None, None)
2432 }
2433 }
2434 _ => (None, None),
2435 };
2436
2437 if let (Some(unnest), Some(alias)) = (unnest_ref, alias_name) {
2438 // Check the main expression (this) of the UNNEST for GENERATE_DATE_ARRAY function
2439 if let Expression::Function(ref f) = unnest.this {
2440 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY") && f.args.len() >= 2 {
2441 let start_expr = f.args[0].clone();
2442 let end_expr = f.args[1].clone();
2443 let step = f.args.get(2).cloned();
2444
2445 // Extract unit from step interval
2446 let unit = if let Some(Expression::Interval(ref iv)) = step {
2447 if let Some(IntervalUnitSpec::Simple { ref unit, .. }) = iv.unit {
2448 Some(format!("{:?}", unit).to_uppercase())
2449 } else if let Some(ref this) = iv.this {
2450 // The interval may be stored as a string like "1 MONTH"
2451 if let Expression::Literal(Literal::String(ref s)) = this {
2452 let parts: Vec<&str> = s.split_whitespace().collect();
2453 if parts.len() == 2 {
2454 Some(parts[1].to_uppercase())
2455 } else if parts.len() == 1 {
2456 // Single word like "MONTH" or just "1"
2457 let upper = parts[0].to_uppercase();
2458 if matches!(
2459 upper.as_str(),
2460 "YEAR"
2461 | "QUARTER"
2462 | "MONTH"
2463 | "WEEK"
2464 | "DAY"
2465 | "HOUR"
2466 | "MINUTE"
2467 | "SECOND"
2468 ) {
2469 Some(upper)
2470 } else {
2471 None
2472 }
2473 } else {
2474 None
2475 }
2476 } else {
2477 None
2478 }
2479 } else {
2480 None
2481 }
2482 } else {
2483 None
2484 };
2485
2486 if let Some(unit_str) = unit {
2487 gda_info = Some((alias, start_expr, end_expr, unit_str));
2488 gda_join_idx = Some(idx);
2489 }
2490 }
2491 }
2492 }
2493 if gda_info.is_some() {
2494 break;
2495 }
2496 }
2497
2498 let Some((alias_name, start_expr, end_expr, unit_str)) = gda_info else {
2499 // Also check FROM clause for UNNEST(GENERATE_DATE_ARRAY(...)) patterns
2500 // This handles Generic->Snowflake where GENERATE_DATE_ARRAY is in FROM, not in JOIN
2501 let result = Self::try_transform_from_gda_snowflake(sel);
2502 return result;
2503 };
2504 let join_idx = gda_join_idx.unwrap();
2505
2506 // Build ARRAY_GENERATE_RANGE(0, (DATEDIFF(unit, start, end) + 1 - 1) + 1)
2507 let datediff = Expression::Function(Box::new(Function::new(
2508 "DATEDIFF".to_string(),
2509 vec![
2510 Expression::Column(Column {
2511 name: Identifier::new(&unit_str),
2512 table: None,
2513 join_mark: false,
2514 trailing_comments: vec![],
2515 }),
2516 start_expr.clone(),
2517 end_expr.clone(),
2518 ],
2519 )));
2520 // (DATEDIFF(...) + 1 - 1) + 1
2521 let plus_one = Expression::Add(Box::new(BinaryOp {
2522 left: datediff,
2523 right: Expression::Literal(Literal::Number("1".to_string())),
2524 left_comments: vec![],
2525 operator_comments: vec![],
2526 trailing_comments: vec![],
2527 }));
2528 let minus_one = Expression::Sub(Box::new(BinaryOp {
2529 left: plus_one,
2530 right: Expression::Literal(Literal::Number("1".to_string())),
2531 left_comments: vec![],
2532 operator_comments: vec![],
2533 trailing_comments: vec![],
2534 }));
2535 let paren_inner = Expression::Paren(Box::new(Paren {
2536 this: minus_one,
2537 trailing_comments: vec![],
2538 }));
2539 let outer_plus_one = Expression::Add(Box::new(BinaryOp {
2540 left: paren_inner,
2541 right: Expression::Literal(Literal::Number("1".to_string())),
2542 left_comments: vec![],
2543 operator_comments: vec![],
2544 trailing_comments: vec![],
2545 }));
2546
2547 let array_gen_range = Expression::Function(Box::new(Function::new(
2548 "ARRAY_GENERATE_RANGE".to_string(),
2549 vec![
2550 Expression::Literal(Literal::Number("0".to_string())),
2551 outer_plus_one,
2552 ],
2553 )));
2554
2555 // Build FLATTEN(INPUT => ARRAY_GENERATE_RANGE(...))
2556 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
2557 name: Identifier::new("INPUT"),
2558 value: array_gen_range,
2559 separator: crate::expressions::NamedArgSeparator::DArrow,
2560 }));
2561 let flatten = Expression::Function(Box::new(Function::new(
2562 "FLATTEN".to_string(),
2563 vec![flatten_input],
2564 )));
2565
2566 // Build LATERAL FLATTEN(...) AS _t0(seq, key, path, index, alias, this)
2567 let alias_table = Alias {
2568 this: flatten,
2569 alias: Identifier::new("_t0"),
2570 column_aliases: vec![
2571 Identifier::new("seq"),
2572 Identifier::new("key"),
2573 Identifier::new("path"),
2574 Identifier::new("index"),
2575 Identifier::new(&alias_name),
2576 Identifier::new("this"),
2577 ],
2578 pre_alias_comments: vec![],
2579 trailing_comments: vec![],
2580 };
2581 let lateral_expr = Expression::Lateral(Box::new(Lateral {
2582 this: Box::new(Expression::Alias(Box::new(alias_table))),
2583 view: None,
2584 outer: None,
2585 alias: None,
2586 alias_quoted: false,
2587 cross_apply: None,
2588 ordinality: None,
2589 column_aliases: vec![],
2590 }));
2591
2592 // Remove the original join and add to FROM expressions
2593 sel.joins.remove(join_idx);
2594 if let Some(ref mut from) = sel.from {
2595 from.expressions.push(lateral_expr);
2596 }
2597
2598 // Build DATEADD(unit, CAST(alias AS INT), CAST(start AS DATE))
2599 let dateadd_expr = Expression::Function(Box::new(Function::new(
2600 "DATEADD".to_string(),
2601 vec![
2602 Expression::Column(Column {
2603 name: Identifier::new(&unit_str),
2604 table: None,
2605 join_mark: false,
2606 trailing_comments: vec![],
2607 }),
2608 Expression::Cast(Box::new(Cast {
2609 this: Expression::Column(Column {
2610 name: Identifier::new(&alias_name),
2611 table: None,
2612 join_mark: false,
2613 trailing_comments: vec![],
2614 }),
2615 to: DataType::Int {
2616 length: None,
2617 integer_spelling: false,
2618 },
2619 trailing_comments: vec![],
2620 double_colon_syntax: false,
2621 format: None,
2622 default: None,
2623 })),
2624 Expression::Cast(Box::new(Cast {
2625 this: start_expr.clone(),
2626 to: DataType::Date,
2627 trailing_comments: vec![],
2628 double_colon_syntax: false,
2629 format: None,
2630 default: None,
2631 })),
2632 ],
2633 )));
2634
2635 // Replace references to the alias in the SELECT list
2636 let new_exprs: Vec<Expression> = sel
2637 .expressions
2638 .iter()
2639 .map(|expr| Self::replace_column_ref_with_dateadd(expr, &alias_name, &dateadd_expr))
2640 .collect();
2641 sel.expressions = new_exprs;
2642
2643 Ok(Expression::Select(sel))
2644 })
2645 }
2646
2647 /// Helper: replace column references to `alias_name` with dateadd expression
2648 fn replace_column_ref_with_dateadd(
2649 expr: &Expression,
2650 alias_name: &str,
2651 dateadd: &Expression,
2652 ) -> Expression {
2653 use crate::expressions::*;
2654 match expr {
2655 Expression::Column(c) if c.name.name == alias_name && c.table.is_none() => {
2656 // Plain column reference -> DATEADD(...) AS alias_name
2657 Expression::Alias(Box::new(Alias {
2658 this: dateadd.clone(),
2659 alias: Identifier::new(alias_name),
2660 column_aliases: vec![],
2661 pre_alias_comments: vec![],
2662 trailing_comments: vec![],
2663 }))
2664 }
2665 Expression::Alias(a) => {
2666 // Check if the inner expression references the alias
2667 let new_this = Self::replace_column_ref_inner(&a.this, alias_name, dateadd);
2668 Expression::Alias(Box::new(Alias {
2669 this: new_this,
2670 alias: a.alias.clone(),
2671 column_aliases: a.column_aliases.clone(),
2672 pre_alias_comments: a.pre_alias_comments.clone(),
2673 trailing_comments: a.trailing_comments.clone(),
2674 }))
2675 }
2676 _ => expr.clone(),
2677 }
2678 }
2679
2680 /// Helper: replace column references in inner expression (not top-level)
2681 fn replace_column_ref_inner(
2682 expr: &Expression,
2683 alias_name: &str,
2684 dateadd: &Expression,
2685 ) -> Expression {
2686 use crate::expressions::*;
2687 match expr {
2688 Expression::Column(c) if c.name.name == alias_name && c.table.is_none() => {
2689 dateadd.clone()
2690 }
2691 Expression::Add(op) => {
2692 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
2693 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
2694 Expression::Add(Box::new(BinaryOp {
2695 left,
2696 right,
2697 left_comments: op.left_comments.clone(),
2698 operator_comments: op.operator_comments.clone(),
2699 trailing_comments: op.trailing_comments.clone(),
2700 }))
2701 }
2702 Expression::Sub(op) => {
2703 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
2704 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
2705 Expression::Sub(Box::new(BinaryOp {
2706 left,
2707 right,
2708 left_comments: op.left_comments.clone(),
2709 operator_comments: op.operator_comments.clone(),
2710 trailing_comments: op.trailing_comments.clone(),
2711 }))
2712 }
2713 Expression::Mul(op) => {
2714 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
2715 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
2716 Expression::Mul(Box::new(BinaryOp {
2717 left,
2718 right,
2719 left_comments: op.left_comments.clone(),
2720 operator_comments: op.operator_comments.clone(),
2721 trailing_comments: op.trailing_comments.clone(),
2722 }))
2723 }
2724 _ => expr.clone(),
2725 }
2726 }
2727
2728 /// Handle UNNEST(GENERATE_DATE_ARRAY(...)) in FROM clause for Snowflake target.
2729 /// Converts to a subquery with DATEADD + TABLE(FLATTEN(ARRAY_GENERATE_RANGE(...))).
2730 fn try_transform_from_gda_snowflake(
2731 mut sel: Box<crate::expressions::Select>,
2732 ) -> Result<Expression> {
2733 use crate::expressions::*;
2734
2735 // Extract GDA info from FROM clause
2736 let mut gda_info: Option<(
2737 usize,
2738 String,
2739 Expression,
2740 Expression,
2741 String,
2742 Option<(String, Vec<Identifier>)>,
2743 )> = None; // (from_idx, col_name, start, end, unit, outer_alias)
2744
2745 if let Some(ref from) = sel.from {
2746 for (idx, table_expr) in from.expressions.iter().enumerate() {
2747 // Pattern 1: UNNEST(GENERATE_DATE_ARRAY(...))
2748 // Pattern 2: Alias(UNNEST(GENERATE_DATE_ARRAY(...))) AS _q(date_week)
2749 let (unnest_opt, outer_alias_info) = match table_expr {
2750 Expression::Unnest(ref unnest) => (Some(unnest.as_ref()), None),
2751 Expression::Alias(ref a) => {
2752 if let Expression::Unnest(ref unnest) = a.this {
2753 let alias_info = (a.alias.name.clone(), a.column_aliases.clone());
2754 (Some(unnest.as_ref()), Some(alias_info))
2755 } else {
2756 (None, None)
2757 }
2758 }
2759 _ => (None, None),
2760 };
2761
2762 if let Some(unnest) = unnest_opt {
2763 // Check for GENERATE_DATE_ARRAY function
2764 let func_opt = match &unnest.this {
2765 Expression::Function(ref f)
2766 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY")
2767 && f.args.len() >= 2 =>
2768 {
2769 Some(f)
2770 }
2771 // Also check for GenerateSeries (from earlier normalization)
2772 _ => None,
2773 };
2774
2775 if let Some(f) = func_opt {
2776 let start_expr = f.args[0].clone();
2777 let end_expr = f.args[1].clone();
2778 let step = f.args.get(2).cloned();
2779
2780 // Extract unit and column name
2781 let unit = Self::extract_interval_unit_str(&step);
2782 let col_name = outer_alias_info
2783 .as_ref()
2784 .and_then(|(_, cols)| cols.first().map(|id| id.name.clone()))
2785 .unwrap_or_else(|| "value".to_string());
2786
2787 if let Some(unit_str) = unit {
2788 gda_info = Some((
2789 idx,
2790 col_name,
2791 start_expr,
2792 end_expr,
2793 unit_str,
2794 outer_alias_info,
2795 ));
2796 break;
2797 }
2798 }
2799 }
2800 }
2801 }
2802
2803 let Some((from_idx, col_name, start_expr, end_expr, unit_str, outer_alias_info)) = gda_info
2804 else {
2805 return Ok(Expression::Select(sel));
2806 };
2807
2808 // Build the Snowflake subquery:
2809 // (SELECT DATEADD(unit, CAST(col_name AS INT), CAST(start AS DATE)) AS col_name
2810 // FROM TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (DATEDIFF(unit, start, end) + 1 - 1) + 1))) AS _t0(seq, key, path, index, col_name, this))
2811
2812 // DATEDIFF(unit, start, end)
2813 let datediff = Expression::Function(Box::new(Function::new(
2814 "DATEDIFF".to_string(),
2815 vec![
2816 Expression::Column(Column {
2817 name: Identifier::new(&unit_str),
2818 table: None,
2819 join_mark: false,
2820 trailing_comments: vec![],
2821 }),
2822 start_expr.clone(),
2823 end_expr.clone(),
2824 ],
2825 )));
2826 // (DATEDIFF(...) + 1 - 1) + 1
2827 let plus_one = Expression::Add(Box::new(BinaryOp {
2828 left: datediff,
2829 right: Expression::Literal(Literal::Number("1".to_string())),
2830 left_comments: vec![],
2831 operator_comments: vec![],
2832 trailing_comments: vec![],
2833 }));
2834 let minus_one = Expression::Sub(Box::new(BinaryOp {
2835 left: plus_one,
2836 right: Expression::Literal(Literal::Number("1".to_string())),
2837 left_comments: vec![],
2838 operator_comments: vec![],
2839 trailing_comments: vec![],
2840 }));
2841 let paren_inner = Expression::Paren(Box::new(Paren {
2842 this: minus_one,
2843 trailing_comments: vec![],
2844 }));
2845 let outer_plus_one = Expression::Add(Box::new(BinaryOp {
2846 left: paren_inner,
2847 right: Expression::Literal(Literal::Number("1".to_string())),
2848 left_comments: vec![],
2849 operator_comments: vec![],
2850 trailing_comments: vec![],
2851 }));
2852
2853 let array_gen_range = Expression::Function(Box::new(Function::new(
2854 "ARRAY_GENERATE_RANGE".to_string(),
2855 vec![
2856 Expression::Literal(Literal::Number("0".to_string())),
2857 outer_plus_one,
2858 ],
2859 )));
2860
2861 // TABLE(FLATTEN(INPUT => ...))
2862 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
2863 name: Identifier::new("INPUT"),
2864 value: array_gen_range,
2865 separator: crate::expressions::NamedArgSeparator::DArrow,
2866 }));
2867 let flatten = Expression::Function(Box::new(Function::new(
2868 "FLATTEN".to_string(),
2869 vec![flatten_input],
2870 )));
2871
2872 // Determine alias name for the table: use outer alias or _t0
2873 let table_alias_name = outer_alias_info
2874 .as_ref()
2875 .map(|(name, _)| name.clone())
2876 .unwrap_or_else(|| "_t0".to_string());
2877
2878 // TABLE(FLATTEN(...)) AS _t0(seq, key, path, index, col_name, this)
2879 let table_func =
2880 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])));
2881 let flatten_aliased = Expression::Alias(Box::new(Alias {
2882 this: table_func,
2883 alias: Identifier::new(&table_alias_name),
2884 column_aliases: vec![
2885 Identifier::new("seq"),
2886 Identifier::new("key"),
2887 Identifier::new("path"),
2888 Identifier::new("index"),
2889 Identifier::new(&col_name),
2890 Identifier::new("this"),
2891 ],
2892 pre_alias_comments: vec![],
2893 trailing_comments: vec![],
2894 }));
2895
2896 // SELECT DATEADD(unit, CAST(col_name AS INT), CAST(start AS DATE)) AS col_name
2897 let dateadd_expr = Expression::Function(Box::new(Function::new(
2898 "DATEADD".to_string(),
2899 vec![
2900 Expression::Column(Column {
2901 name: Identifier::new(&unit_str),
2902 table: None,
2903 join_mark: false,
2904 trailing_comments: vec![],
2905 }),
2906 Expression::Cast(Box::new(Cast {
2907 this: Expression::Column(Column {
2908 name: Identifier::new(&col_name),
2909 table: None,
2910 join_mark: false,
2911 trailing_comments: vec![],
2912 }),
2913 to: DataType::Int {
2914 length: None,
2915 integer_spelling: false,
2916 },
2917 trailing_comments: vec![],
2918 double_colon_syntax: false,
2919 format: None,
2920 default: None,
2921 })),
2922 // Use start_expr directly - it's already been normalized (DATE literal -> CAST)
2923 start_expr.clone(),
2924 ],
2925 )));
2926 let dateadd_aliased = Expression::Alias(Box::new(Alias {
2927 this: dateadd_expr,
2928 alias: Identifier::new(&col_name),
2929 column_aliases: vec![],
2930 pre_alias_comments: vec![],
2931 trailing_comments: vec![],
2932 }));
2933
2934 // Build inner SELECT
2935 let mut inner_select = Select::new();
2936 inner_select.expressions = vec![dateadd_aliased];
2937 inner_select.from = Some(From {
2938 expressions: vec![flatten_aliased],
2939 });
2940
2941 let inner_select_expr = Expression::Select(Box::new(inner_select));
2942 let subquery = Expression::Subquery(Box::new(Subquery {
2943 this: inner_select_expr,
2944 alias: None,
2945 column_aliases: vec![],
2946 order_by: None,
2947 limit: None,
2948 offset: None,
2949 distribute_by: None,
2950 sort_by: None,
2951 cluster_by: None,
2952 lateral: false,
2953 modifiers_inside: false,
2954 trailing_comments: vec![],
2955 }));
2956
2957 // If there was an outer alias (e.g., AS _q(date_week)), wrap with alias
2958 let replacement = if let Some((alias_name, col_aliases)) = outer_alias_info {
2959 Expression::Alias(Box::new(Alias {
2960 this: subquery,
2961 alias: Identifier::new(&alias_name),
2962 column_aliases: col_aliases,
2963 pre_alias_comments: vec![],
2964 trailing_comments: vec![],
2965 }))
2966 } else {
2967 subquery
2968 };
2969
2970 // Replace the FROM expression
2971 if let Some(ref mut from) = sel.from {
2972 from.expressions[from_idx] = replacement;
2973 }
2974
2975 Ok(Expression::Select(sel))
2976 }
2977
2978 /// Convert ARRAY_SIZE(GENERATE_DATE_ARRAY(start, end, step)) for Snowflake.
2979 /// Produces: ARRAY_SIZE((SELECT ARRAY_AGG(*) FROM (SELECT DATEADD(unit, CAST(value AS INT), start) AS value
2980 /// FROM TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (DATEDIFF(unit, start, end) + 1 - 1) + 1))) AS _t0(...))))
2981 fn convert_array_size_gda_snowflake(f: &crate::expressions::Function) -> Result<Expression> {
2982 use crate::expressions::*;
2983
2984 let start_expr = f.args[0].clone();
2985 let end_expr = f.args[1].clone();
2986 let step = f.args.get(2).cloned();
2987 let unit_str = Self::extract_interval_unit_str(&step).unwrap_or_else(|| "DAY".to_string());
2988 let col_name = "value";
2989
2990 // Build the inner subquery: same as try_transform_from_gda_snowflake
2991 let datediff = Expression::Function(Box::new(Function::new(
2992 "DATEDIFF".to_string(),
2993 vec![
2994 Expression::Column(Column {
2995 name: Identifier::new(&unit_str),
2996 table: None,
2997 join_mark: false,
2998 trailing_comments: vec![],
2999 }),
3000 start_expr.clone(),
3001 end_expr.clone(),
3002 ],
3003 )));
3004 let plus_one = Expression::Add(Box::new(BinaryOp {
3005 left: datediff,
3006 right: Expression::Literal(Literal::Number("1".to_string())),
3007 left_comments: vec![],
3008 operator_comments: vec![],
3009 trailing_comments: vec![],
3010 }));
3011 let minus_one = Expression::Sub(Box::new(BinaryOp {
3012 left: plus_one,
3013 right: Expression::Literal(Literal::Number("1".to_string())),
3014 left_comments: vec![],
3015 operator_comments: vec![],
3016 trailing_comments: vec![],
3017 }));
3018 let paren_inner = Expression::Paren(Box::new(Paren {
3019 this: minus_one,
3020 trailing_comments: vec![],
3021 }));
3022 let outer_plus_one = Expression::Add(Box::new(BinaryOp {
3023 left: paren_inner,
3024 right: Expression::Literal(Literal::Number("1".to_string())),
3025 left_comments: vec![],
3026 operator_comments: vec![],
3027 trailing_comments: vec![],
3028 }));
3029
3030 let array_gen_range = Expression::Function(Box::new(Function::new(
3031 "ARRAY_GENERATE_RANGE".to_string(),
3032 vec![
3033 Expression::Literal(Literal::Number("0".to_string())),
3034 outer_plus_one,
3035 ],
3036 )));
3037
3038 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
3039 name: Identifier::new("INPUT"),
3040 value: array_gen_range,
3041 separator: crate::expressions::NamedArgSeparator::DArrow,
3042 }));
3043 let flatten = Expression::Function(Box::new(Function::new(
3044 "FLATTEN".to_string(),
3045 vec![flatten_input],
3046 )));
3047
3048 let table_func =
3049 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])));
3050 let flatten_aliased = Expression::Alias(Box::new(Alias {
3051 this: table_func,
3052 alias: Identifier::new("_t0"),
3053 column_aliases: vec![
3054 Identifier::new("seq"),
3055 Identifier::new("key"),
3056 Identifier::new("path"),
3057 Identifier::new("index"),
3058 Identifier::new(col_name),
3059 Identifier::new("this"),
3060 ],
3061 pre_alias_comments: vec![],
3062 trailing_comments: vec![],
3063 }));
3064
3065 let dateadd_expr = Expression::Function(Box::new(Function::new(
3066 "DATEADD".to_string(),
3067 vec![
3068 Expression::Column(Column {
3069 name: Identifier::new(&unit_str),
3070 table: None,
3071 join_mark: false,
3072 trailing_comments: vec![],
3073 }),
3074 Expression::Cast(Box::new(Cast {
3075 this: Expression::Column(Column {
3076 name: Identifier::new(col_name),
3077 table: None,
3078 join_mark: false,
3079 trailing_comments: vec![],
3080 }),
3081 to: DataType::Int {
3082 length: None,
3083 integer_spelling: false,
3084 },
3085 trailing_comments: vec![],
3086 double_colon_syntax: false,
3087 format: None,
3088 default: None,
3089 })),
3090 start_expr.clone(),
3091 ],
3092 )));
3093 let dateadd_aliased = Expression::Alias(Box::new(Alias {
3094 this: dateadd_expr,
3095 alias: Identifier::new(col_name),
3096 column_aliases: vec![],
3097 pre_alias_comments: vec![],
3098 trailing_comments: vec![],
3099 }));
3100
3101 // Inner SELECT: SELECT DATEADD(...) AS value FROM TABLE(FLATTEN(...)) AS _t0(...)
3102 let mut inner_select = Select::new();
3103 inner_select.expressions = vec![dateadd_aliased];
3104 inner_select.from = Some(From {
3105 expressions: vec![flatten_aliased],
3106 });
3107
3108 // Wrap in subquery for the inner part
3109 let inner_subquery = Expression::Subquery(Box::new(Subquery {
3110 this: Expression::Select(Box::new(inner_select)),
3111 alias: None,
3112 column_aliases: vec![],
3113 order_by: None,
3114 limit: None,
3115 offset: None,
3116 distribute_by: None,
3117 sort_by: None,
3118 cluster_by: None,
3119 lateral: false,
3120 modifiers_inside: false,
3121 trailing_comments: vec![],
3122 }));
3123
3124 // Outer: SELECT ARRAY_AGG(*) FROM (inner_subquery)
3125 let star = Expression::Star(Star {
3126 table: None,
3127 except: None,
3128 replace: None,
3129 rename: None,
3130 trailing_comments: vec![],
3131 });
3132 let array_agg = Expression::ArrayAgg(Box::new(AggFunc {
3133 this: star,
3134 distinct: false,
3135 filter: None,
3136 order_by: vec![],
3137 name: Some("ARRAY_AGG".to_string()),
3138 ignore_nulls: None,
3139 having_max: None,
3140 limit: None,
3141 }));
3142
3143 let mut outer_select = Select::new();
3144 outer_select.expressions = vec![array_agg];
3145 outer_select.from = Some(From {
3146 expressions: vec![inner_subquery],
3147 });
3148
3149 // Wrap in a subquery
3150 let outer_subquery = Expression::Subquery(Box::new(Subquery {
3151 this: Expression::Select(Box::new(outer_select)),
3152 alias: None,
3153 column_aliases: vec![],
3154 order_by: None,
3155 limit: None,
3156 offset: None,
3157 distribute_by: None,
3158 sort_by: None,
3159 cluster_by: None,
3160 lateral: false,
3161 modifiers_inside: false,
3162 trailing_comments: vec![],
3163 }));
3164
3165 // ARRAY_SIZE(subquery)
3166 Ok(Expression::ArraySize(Box::new(UnaryFunc::new(
3167 outer_subquery,
3168 ))))
3169 }
3170
3171 /// Extract interval unit string from an optional step expression.
3172 fn extract_interval_unit_str(step: &Option<Expression>) -> Option<String> {
3173 use crate::expressions::*;
3174 if let Some(Expression::Interval(ref iv)) = step {
3175 if let Some(IntervalUnitSpec::Simple { ref unit, .. }) = iv.unit {
3176 return Some(format!("{:?}", unit).to_uppercase());
3177 }
3178 if let Some(ref this) = iv.this {
3179 if let Expression::Literal(Literal::String(ref s)) = this {
3180 let parts: Vec<&str> = s.split_whitespace().collect();
3181 if parts.len() == 2 {
3182 return Some(parts[1].to_uppercase());
3183 } else if parts.len() == 1 {
3184 let upper = parts[0].to_uppercase();
3185 if matches!(
3186 upper.as_str(),
3187 "YEAR"
3188 | "QUARTER"
3189 | "MONTH"
3190 | "WEEK"
3191 | "DAY"
3192 | "HOUR"
3193 | "MINUTE"
3194 | "SECOND"
3195 ) {
3196 return Some(upper);
3197 }
3198 }
3199 }
3200 }
3201 }
3202 // Default to DAY if no step or no interval
3203 if step.is_none() {
3204 return Some("DAY".to_string());
3205 }
3206 None
3207 }
3208
3209 fn normalize_snowflake_pretty(mut sql: String) -> String {
3210 if sql.contains("LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)")
3211 && sql.contains("ARRAY_GENERATE_RANGE(0, (GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1) + 1)")
3212 {
3213 sql = sql.replace(
3214 "AND uc.user_id <> ALL (SELECT DISTINCT\n _id\n FROM users, LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)\n WHERE\n GET_PATH(datasource.value, 'name') = 'something')",
3215 "AND uc.user_id <> ALL (\n SELECT DISTINCT\n _id\n FROM users, LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)\n WHERE\n GET_PATH(datasource.value, 'name') = 'something'\n )",
3216 );
3217
3218 sql = sql.replace(
3219 "CROSS JOIN TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1) + 1))) AS _u(seq, key, path, index, pos, this)",
3220 "CROSS JOIN TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (\n GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1\n) + 1))) AS _u(seq, key, path, index, pos, this)",
3221 );
3222
3223 sql = sql.replace(
3224 "OR (_u.pos > (ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1)\n AND _u_2.pos_2 = (ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1))",
3225 "OR (\n _u.pos > (\n ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1\n )\n AND _u_2.pos_2 = (\n ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1\n )\n )",
3226 );
3227 }
3228
3229 sql
3230 }
3231
3232 /// Apply cross-dialect semantic normalizations that depend on knowing both source and target.
3233 /// This handles cases where the same syntax has different semantics across dialects.
3234 fn cross_dialect_normalize(
3235 expr: Expression,
3236 source: DialectType,
3237 target: DialectType,
3238 ) -> Result<Expression> {
3239 use crate::expressions::{
3240 AggFunc, BinaryOp, Case, Cast, ConvertTimezone, DataType, DateTimeField, DateTruncFunc,
3241 Function, Identifier, IsNull, Literal, Null, Paren,
3242 };
3243
3244 // Helper to tag which kind of transform to apply
3245 #[derive(Debug)]
3246 enum Action {
3247 None,
3248 GreatestLeastNull,
3249 ArrayGenerateRange,
3250 Div0TypedDivision,
3251 ArrayAggCollectList,
3252 ArrayAggWithinGroupFilter,
3253 ArrayAggFilter,
3254 CastTimestampToDatetime,
3255 DateTruncWrapCast,
3256 ToDateToCast,
3257 ConvertTimezoneToExpr,
3258 SetToVariable,
3259 RegexpReplaceSnowflakeToDuckDB,
3260 BigQueryFunctionNormalize,
3261 BigQuerySafeDivide,
3262 BigQueryCastType,
3263 BigQueryToHexBare, // _BQ_TO_HEX(x) with no LOWER/UPPER wrapper
3264 BigQueryToHexLower, // LOWER(_BQ_TO_HEX(x))
3265 BigQueryToHexUpper, // UPPER(_BQ_TO_HEX(x))
3266 BigQueryLastDayStripUnit, // LAST_DAY(date, MONTH) -> LAST_DAY(date)
3267 BigQueryCastFormat, // CAST(x AS type FORMAT 'fmt') -> PARSE_DATE/PARSE_TIMESTAMP etc.
3268 BigQueryAnyValueHaving, // ANY_VALUE(x HAVING MAX/MIN y) -> ARG_MAX_NULL/ARG_MIN_NULL for DuckDB
3269 BigQueryApproxQuantiles, // APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
3270 GenericFunctionNormalize, // Cross-dialect function renaming (non-BigQuery sources)
3271 RegexpLikeToDuckDB, // RegexpLike -> REGEXP_MATCHES for DuckDB target (partial match)
3272 EpochConvert, // Expression::Epoch -> target-specific epoch function
3273 EpochMsConvert, // Expression::EpochMs -> target-specific epoch ms function
3274 TSQLTypeNormalize, // TSQL types (MONEY, SMALLMONEY, REAL, DATETIME2) -> standard types
3275 MySQLSafeDivide, // MySQL a/b -> a / NULLIF(b, 0) with optional CAST
3276 NullsOrdering, // Add NULLS FIRST/LAST for ORDER BY
3277 AlterTableRenameStripSchema, // ALTER TABLE db.t1 RENAME TO db.t2 -> ALTER TABLE db.t1 RENAME TO t2
3278 StringAggConvert, // STRING_AGG/WITHIN GROUP -> target-specific aggregate
3279 GroupConcatConvert, // GROUP_CONCAT -> target-specific aggregate
3280 TempTableHash, // TSQL #table -> temp table normalization
3281 ArrayLengthConvert, // CARDINALITY/ARRAY_LENGTH/ARRAY_SIZE -> target-specific
3282 DatePartUnquote, // DATE_PART('month', x) -> DATE_PART(month, x) for Snowflake target
3283 NvlClearOriginal, // Clear NVL original_name for cross-dialect transpilation
3284 HiveCastToTryCast, // Hive/Spark CAST -> TRY_CAST for targets that support it
3285 XorExpand, // MySQL XOR -> (a AND NOT b) OR (NOT a AND b) for non-XOR targets
3286 CastTimestampStripTz, // CAST(x AS TIMESTAMP WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark
3287 JsonExtractToGetJsonObject, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
3288 JsonExtractScalarToGetJsonObject, // JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
3289 JsonQueryValueConvert, // JsonQuery/JsonValue -> target-specific (ISNULL wrapper for TSQL, GET_JSON_OBJECT for Spark, etc.)
3290 JsonLiteralToJsonParse, // JSON 'x' -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake
3291 ArraySyntaxConvert, // ARRAY[x] -> ARRAY(x) for Spark, [x] for BigQuery/DuckDB
3292 AtTimeZoneConvert, // AT TIME ZONE -> AT_TIMEZONE (Presto) / FROM_UTC_TIMESTAMP (Spark)
3293 DayOfWeekConvert, // DAY_OF_WEEK -> dialect-specific
3294 MaxByMinByConvert, // MAX_BY/MIN_BY -> argMax/argMin for ClickHouse
3295 ArrayAggToCollectList, // ARRAY_AGG(x ORDER BY ...) -> COLLECT_LIST(x) for Hive/Spark
3296 ElementAtConvert, // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
3297 CurrentUserParens, // CURRENT_USER -> CURRENT_USER() for Snowflake
3298 CastToJsonForSpark, // CAST(x AS JSON) -> TO_JSON(x) for Spark
3299 CastJsonToFromJson, // CAST(JSON_PARSE(literal) AS ARRAY/MAP) -> FROM_JSON(literal, type_string)
3300 ToJsonConvert, // TO_JSON(x) -> JSON_FORMAT(CAST(x AS JSON)) for Presto etc.
3301 ArrayAggNullFilter, // ARRAY_AGG(x) FILTER(WHERE cond) -> add AND NOT x IS NULL for DuckDB
3302 ArrayAggIgnoreNullsDuckDB, // ARRAY_AGG(x IGNORE NULLS ORDER BY ...) -> ARRAY_AGG(x ORDER BY a NULLS FIRST, ...) for DuckDB
3303 BigQueryPercentileContToDuckDB, // PERCENTILE_CONT(x, frac RESPECT NULLS) -> QUANTILE_CONT(x, frac) for DuckDB
3304 BigQueryArraySelectAsStructToSnowflake, // ARRAY(SELECT AS STRUCT ...) -> (SELECT ARRAY_AGG(OBJECT_CONSTRUCT(...)))
3305 CountDistinctMultiArg, // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN ... END)
3306 VarianceToClickHouse, // Expression::Variance -> varSamp for ClickHouse
3307 StddevToClickHouse, // Expression::Stddev -> stddevSamp for ClickHouse
3308 ApproxQuantileConvert, // Expression::ApproxQuantile -> APPROX_PERCENTILE for Snowflake
3309 ArrayIndexConvert, // array[1] -> array[0] for BigQuery (1-based to 0-based)
3310 DollarParamConvert, // $foo -> @foo for BigQuery
3311 TablesampleReservoir, // TABLESAMPLE (n ROWS) -> TABLESAMPLE RESERVOIR (n ROWS) for DuckDB
3312 BitAggFloatCast, // BIT_OR/BIT_AND/BIT_XOR float arg -> CAST(ROUND(CAST(arg)) AS INT) for DuckDB
3313 BitAggSnowflakeRename, // BIT_OR -> BITORAGG, BIT_AND -> BITANDAGG etc. for Snowflake
3314 StrftimeCastTimestamp, // CAST TIMESTAMP -> TIMESTAMP_NTZ for Spark in STRFTIME
3315 AnyValueIgnoreNulls, // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
3316 CreateTableStripComment, // Strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
3317 EscapeStringNormalize, // e'Hello\nworld' literal newline -> \n
3318 AnyToExists, // PostgreSQL x <op> ANY(array) -> EXISTS(array, x -> ...)
3319 ArrayConcatBracketConvert, // [1,2] -> ARRAY[1,2] for PostgreSQL in ARRAY_CAT
3320 SnowflakeIntervalFormat, // INTERVAL '2' HOUR -> INTERVAL '2 HOUR' for Snowflake
3321 AlterTableToSpRename, // ALTER TABLE RENAME -> EXEC sp_rename for TSQL
3322 StraightJoinCase, // STRAIGHT_JOIN -> straight_join for DuckDB
3323 RespectNullsConvert, // RESPECT NULLS window function handling
3324 MysqlNullsOrdering, // MySQL doesn't support NULLS ordering
3325 MysqlNullsLastRewrite, // Add CASE WHEN to ORDER BY for DuckDB -> MySQL (NULLS LAST simulation)
3326 BigQueryNullsOrdering, // BigQuery doesn't support NULLS FIRST/LAST - strip
3327 SnowflakeFloatProtect, // Protect FLOAT from being converted to DOUBLE by Snowflake target transform
3328 JsonToGetPath, // JSON arrow -> GET_PATH/PARSE_JSON for Snowflake
3329 FilterToIff, // FILTER(WHERE) -> IFF wrapping for Snowflake
3330 AggFilterToIff, // AggFunc.filter -> IFF wrapping for Snowflake (e.g., AVG(x) FILTER(WHERE cond))
3331 StructToRow, // DuckDB struct -> Presto ROW / BigQuery STRUCT
3332 SparkStructConvert, // Spark STRUCT(x AS col1, ...) -> ROW/DuckDB struct
3333 DecimalDefaultPrecision, // DECIMAL -> DECIMAL(18, 3) for Snowflake in BIT agg
3334 ApproxCountDistinctToApproxDistinct, // APPROX_COUNT_DISTINCT -> APPROX_DISTINCT for Presto/Trino
3335 CollectListToArrayAgg, // COLLECT_LIST -> ARRAY_AGG for Presto/DuckDB
3336 CollectSetConvert, // COLLECT_SET -> SET_AGG/ARRAY_AGG(DISTINCT)/ARRAY_UNIQUE_AGG
3337 PercentileConvert, // PERCENTILE -> QUANTILE/APPROX_PERCENTILE
3338 CorrIsnanWrap, // CORR(a,b) -> CASE WHEN ISNAN(CORR(a,b)) THEN NULL ELSE CORR(a,b) END
3339 TruncToDateTrunc, // TRUNC(ts, unit) -> DATE_TRUNC(unit, ts)
3340 ArrayContainsConvert, // ARRAY_CONTAINS -> CONTAINS/target-specific
3341 StrPositionExpand, // StrPosition with position -> complex STRPOS expansion for Presto/DuckDB
3342 TablesampleSnowflakeStrip, // Strip method and PERCENT for Snowflake target
3343 FirstToAnyValue, // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
3344 MonthsBetweenConvert, // Expression::MonthsBetween -> target-specific
3345 CurrentUserSparkParens, // CURRENT_USER -> CURRENT_USER() for Spark
3346 SparkDateFuncCast, // MONTH/YEAR/DAY('str') -> MONTH/YEAR/DAY(CAST('str' AS DATE)) from Spark
3347 MapFromArraysConvert, // Expression::MapFromArrays -> MAP/OBJECT_CONSTRUCT/MAP_FROM_ARRAYS
3348 AddMonthsConvert, // Expression::AddMonths -> target-specific DATEADD/DATE_ADD
3349 PercentileContConvert, // PERCENTILE_CONT/DISC WITHIN GROUP -> APPROX_PERCENTILE/PERCENTILE_APPROX
3350 GenerateSeriesConvert, // GENERATE_SERIES -> SEQUENCE/UNNEST(SEQUENCE)/EXPLODE(SEQUENCE)
3351 ConcatCoalesceWrap, // CONCAT(a, b) -> CONCAT(COALESCE(CAST(a), ''), ...) for Presto/ClickHouse
3352 PipeConcatToConcat, // a || b -> CONCAT(CAST(a), CAST(b)) for Presto
3353 DivFuncConvert, // DIV(a, b) -> a // b for DuckDB, CAST for BigQuery
3354 JsonObjectAggConvert, // JSON_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
3355 JsonbExistsConvert, // JSONB_EXISTS -> JSON_EXISTS for DuckDB
3356 DateBinConvert, // DATE_BIN -> TIME_BUCKET for DuckDB
3357 MysqlCastCharToText, // MySQL CAST(x AS CHAR) -> CAST(x AS TEXT/VARCHAR/STRING) for targets
3358 SparkCastVarcharToString, // Spark CAST(x AS VARCHAR/CHAR) -> CAST(x AS STRING) for Spark targets
3359 JsonExtractToArrow, // JSON_EXTRACT(x, path) -> x -> path for SQLite/DuckDB
3360 JsonExtractToTsql, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> ISNULL(JSON_QUERY, JSON_VALUE) for TSQL
3361 JsonExtractToClickHouse, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> JSONExtractString for ClickHouse
3362 JsonExtractScalarConvert, // JSON_EXTRACT_SCALAR -> target-specific (PostgreSQL, Snowflake, SQLite)
3363 JsonPathNormalize, // Normalize JSON path format (brackets, wildcards, quotes) for various dialects
3364 MinMaxToLeastGreatest, // Multi-arg MIN(a,b,c) -> LEAST(a,b,c), MAX(a,b,c) -> GREATEST(a,b,c)
3365 ClickHouseUniqToApproxCountDistinct, // uniq(x) -> APPROX_COUNT_DISTINCT(x) for non-ClickHouse targets
3366 ClickHouseAnyToAnyValue, // any(x) -> ANY_VALUE(x) for non-ClickHouse targets
3367 OracleVarchar2ToVarchar, // VARCHAR2(N CHAR/BYTE) -> VARCHAR(N) for non-Oracle targets
3368 Nvl2Expand, // NVL2(a, b, c) -> CASE WHEN NOT a IS NULL THEN b ELSE c END
3369 IfnullToCoalesce, // IFNULL(a, b) -> COALESCE(a, b)
3370 IsAsciiConvert, // IS_ASCII(x) -> dialect-specific ASCII check
3371 StrPositionConvert, // STR_POSITION(haystack, needle[, pos]) -> dialect-specific
3372 DecodeSimplify, // DECODE with null-safe -> simple = comparison
3373 ArraySumConvert, // ARRAY_SUM -> target-specific
3374 ArraySizeConvert, // ARRAY_SIZE -> target-specific
3375 ArrayAnyConvert, // ARRAY_ANY -> target-specific
3376 CastTimestamptzToFunc, // CAST(x AS TIMESTAMPTZ) -> TIMESTAMP(x) for MySQL/StarRocks
3377 TsOrDsToDateConvert, // TS_OR_DS_TO_DATE(x[, fmt]) -> dialect-specific
3378 TsOrDsToDateStrConvert, // TS_OR_DS_TO_DATE_STR(x) -> SUBSTRING(CAST(x AS type), 1, 10)
3379 DateStrToDateConvert, // DATE_STR_TO_DATE(x) -> CAST(x AS DATE)
3380 TimeStrToDateConvert, // TIME_STR_TO_DATE(x) -> CAST(x AS DATE)
3381 TimeStrToTimeConvert, // TIME_STR_TO_TIME(x) -> CAST(x AS TIMESTAMP)
3382 DateToDateStrConvert, // DATE_TO_DATE_STR(x) -> CAST(x AS TEXT/VARCHAR/STRING)
3383 DateToDiConvert, // DATE_TO_DI(x) -> dialect-specific (CAST date to YYYYMMDD integer)
3384 DiToDateConvert, // DI_TO_DATE(x) -> dialect-specific (integer YYYYMMDD to date)
3385 TsOrDiToDiConvert, // TS_OR_DI_TO_DI(x) -> dialect-specific
3386 UnixToStrConvert, // UNIX_TO_STR(x, fmt) -> dialect-specific
3387 UnixToTimeConvert, // UNIX_TO_TIME(x) -> dialect-specific
3388 UnixToTimeStrConvert, // UNIX_TO_TIME_STR(x) -> dialect-specific
3389 TimeToUnixConvert, // TIME_TO_UNIX(x) -> dialect-specific
3390 TimeToStrConvert, // TIME_TO_STR(x, fmt) -> dialect-specific
3391 StrToUnixConvert, // STR_TO_UNIX(x, fmt) -> dialect-specific
3392 DateTruncSwapArgs, // DATE_TRUNC('unit', x) -> DATE_TRUNC(x, unit) / TRUNC(x, unit)
3393 TimestampTruncConvert, // TIMESTAMP_TRUNC(x, UNIT[, tz]) -> dialect-specific
3394 StrToDateConvert, // STR_TO_DATE(x, fmt) from Generic -> CAST(StrToTime(x,fmt) AS DATE)
3395 TsOrDsAddConvert, // TS_OR_DS_ADD(x, n, 'UNIT') from Generic -> DATE_ADD per dialect
3396 DateFromUnixDateConvert, // DATE_FROM_UNIX_DATE(n) -> DATEADD(DAY, n, '1970-01-01')
3397 TimeStrToUnixConvert, // TIME_STR_TO_UNIX(x) -> dialect-specific
3398 TimeToTimeStrConvert, // TIME_TO_TIME_STR(x) -> CAST(x AS type)
3399 CreateTableLikeToCtas, // CREATE TABLE a LIKE b -> CREATE TABLE a AS SELECT * FROM b LIMIT 0
3400 CreateTableLikeToSelectInto, // CREATE TABLE a LIKE b -> SELECT TOP 0 * INTO a FROM b AS temp
3401 CreateTableLikeToAs, // CREATE TABLE a LIKE b -> CREATE TABLE a AS b (ClickHouse)
3402 ArrayRemoveConvert, // ARRAY_REMOVE(arr, target) -> LIST_FILTER/arrayFilter/ARRAY subquery
3403 ArrayReverseConvert, // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
3404 JsonKeysConvert, // JSON_KEYS -> JSON_OBJECT_KEYS/OBJECT_KEYS
3405 ParseJsonStrip, // PARSE_JSON(x) -> x (strip wrapper)
3406 ArraySizeDrill, // ARRAY_SIZE -> REPEATED_COUNT for Drill
3407 WeekOfYearToWeekIso, // WEEKOFYEAR -> WEEKISO for Snowflake cross-dialect
3408 }
3409
3410 // Handle SELECT INTO -> CREATE TABLE AS for DuckDB/Snowflake/etc.
3411 let expr = if matches!(source, DialectType::TSQL | DialectType::Fabric) {
3412 Self::transform_select_into(expr, source, target)
3413 } else {
3414 expr
3415 };
3416
3417 // Strip OFFSET ROWS for non-TSQL/Oracle targets
3418 let expr = if !matches!(
3419 target,
3420 DialectType::TSQL | DialectType::Oracle | DialectType::Fabric
3421 ) {
3422 if let Expression::Select(mut select) = expr {
3423 if let Some(ref mut offset) = select.offset {
3424 offset.rows = None;
3425 }
3426 Expression::Select(select)
3427 } else {
3428 expr
3429 }
3430 } else {
3431 expr
3432 };
3433
3434 // Oracle: LIMIT -> FETCH FIRST, OFFSET -> OFFSET ROWS
3435 let expr = if matches!(target, DialectType::Oracle) {
3436 if let Expression::Select(mut select) = expr {
3437 if let Some(limit) = select.limit.take() {
3438 // Convert LIMIT to FETCH FIRST n ROWS ONLY
3439 select.fetch = Some(crate::expressions::Fetch {
3440 direction: "FIRST".to_string(),
3441 count: Some(limit.this),
3442 percent: false,
3443 rows: true,
3444 with_ties: false,
3445 });
3446 }
3447 // Add ROWS to OFFSET if present
3448 if let Some(ref mut offset) = select.offset {
3449 offset.rows = Some(true);
3450 }
3451 Expression::Select(select)
3452 } else {
3453 expr
3454 }
3455 } else {
3456 expr
3457 };
3458
3459 // Handle CreateTable WITH properties transformation before recursive transforms
3460 let expr = if let Expression::CreateTable(mut ct) = expr {
3461 Self::transform_create_table_properties(&mut ct, source, target);
3462
3463 // Handle Hive-style PARTITIONED BY (col_name type, ...) -> target-specific
3464 // When the PARTITIONED BY clause contains column definitions, merge them into the
3465 // main column list and adjust the PARTITIONED BY clause for the target dialect.
3466 if matches!(
3467 source,
3468 DialectType::Hive | DialectType::Spark | DialectType::Databricks
3469 ) {
3470 let mut partition_col_names: Vec<String> = Vec::new();
3471 let mut partition_col_defs: Vec<crate::expressions::ColumnDef> = Vec::new();
3472 let mut has_col_def_partitions = false;
3473
3474 // Check if any PARTITIONED BY property contains ColumnDef expressions
3475 for prop in &ct.properties {
3476 if let Expression::PartitionedByProperty(ref pbp) = prop {
3477 if let Expression::Tuple(ref tuple) = *pbp.this {
3478 for expr in &tuple.expressions {
3479 if let Expression::ColumnDef(ref cd) = expr {
3480 has_col_def_partitions = true;
3481 partition_col_names.push(cd.name.name.clone());
3482 partition_col_defs.push(*cd.clone());
3483 }
3484 }
3485 }
3486 }
3487 }
3488
3489 if has_col_def_partitions && !matches!(target, DialectType::Hive) {
3490 // Merge partition columns into main column list
3491 for cd in partition_col_defs {
3492 ct.columns.push(cd);
3493 }
3494
3495 // Replace PARTITIONED BY property with column-name-only version
3496 ct.properties
3497 .retain(|p| !matches!(p, Expression::PartitionedByProperty(_)));
3498
3499 if matches!(
3500 target,
3501 DialectType::Presto | DialectType::Trino | DialectType::Athena
3502 ) {
3503 // Presto: WITH (PARTITIONED_BY=ARRAY['y', 'z'])
3504 let array_elements: Vec<String> = partition_col_names
3505 .iter()
3506 .map(|n| format!("'{}'", n))
3507 .collect();
3508 let array_value = format!("ARRAY[{}]", array_elements.join(", "));
3509 ct.with_properties
3510 .push(("PARTITIONED_BY".to_string(), array_value));
3511 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
3512 // Spark: PARTITIONED BY (y, z) - just column names
3513 let name_exprs: Vec<Expression> = partition_col_names
3514 .iter()
3515 .map(|n| {
3516 Expression::Column(crate::expressions::Column {
3517 name: crate::expressions::Identifier::new(n.clone()),
3518 table: None,
3519 join_mark: false,
3520 trailing_comments: Vec::new(),
3521 })
3522 })
3523 .collect();
3524 ct.properties.insert(
3525 0,
3526 Expression::PartitionedByProperty(Box::new(
3527 crate::expressions::PartitionedByProperty {
3528 this: Box::new(Expression::Tuple(Box::new(
3529 crate::expressions::Tuple {
3530 expressions: name_exprs,
3531 },
3532 ))),
3533 },
3534 )),
3535 );
3536 }
3537 // For DuckDB and other targets, just drop the PARTITIONED BY (already retained above)
3538 }
3539
3540 // Note: Non-ColumnDef partitions (e.g., function expressions like MONTHS(y))
3541 // are handled by transform_create_table_properties which runs first
3542 }
3543
3544 // Strip LOCATION property for Presto/Trino (not supported)
3545 if matches!(
3546 target,
3547 DialectType::Presto | DialectType::Trino | DialectType::Athena
3548 ) {
3549 ct.properties
3550 .retain(|p| !matches!(p, Expression::LocationProperty(_)));
3551 }
3552
3553 // Strip table-level constraints for Spark/Hive/Databricks
3554 // Keep PRIMARY KEY and LIKE constraints but strip TSQL-specific modifiers; remove all others
3555 if matches!(
3556 target,
3557 DialectType::Spark | DialectType::Databricks | DialectType::Hive
3558 ) {
3559 ct.constraints.retain(|c| {
3560 matches!(
3561 c,
3562 crate::expressions::TableConstraint::PrimaryKey { .. }
3563 | crate::expressions::TableConstraint::Like { .. }
3564 )
3565 });
3566 for constraint in &mut ct.constraints {
3567 if let crate::expressions::TableConstraint::PrimaryKey {
3568 columns,
3569 modifiers,
3570 ..
3571 } = constraint
3572 {
3573 // Strip ASC/DESC from column names
3574 for col in columns.iter_mut() {
3575 if col.name.ends_with(" ASC") {
3576 col.name = col.name[..col.name.len() - 4].to_string();
3577 } else if col.name.ends_with(" DESC") {
3578 col.name = col.name[..col.name.len() - 5].to_string();
3579 }
3580 }
3581 // Strip TSQL-specific modifiers
3582 modifiers.clustered = None;
3583 modifiers.with_options.clear();
3584 modifiers.on_filegroup = None;
3585 }
3586 }
3587 }
3588
3589 // Databricks: IDENTITY columns with INT/INTEGER -> BIGINT
3590 if matches!(target, DialectType::Databricks) {
3591 for col in &mut ct.columns {
3592 if col.auto_increment {
3593 if matches!(col.data_type, crate::expressions::DataType::Int { .. }) {
3594 col.data_type = crate::expressions::DataType::BigInt { length: None };
3595 }
3596 }
3597 }
3598 }
3599
3600 // Spark/Databricks: INTEGER -> INT in column definitions
3601 // Python sqlglot always outputs INT for Spark/Databricks
3602 if matches!(target, DialectType::Spark | DialectType::Databricks) {
3603 for col in &mut ct.columns {
3604 if let crate::expressions::DataType::Int {
3605 integer_spelling, ..
3606 } = &mut col.data_type
3607 {
3608 *integer_spelling = false;
3609 }
3610 }
3611 }
3612
3613 // Strip explicit NULL constraints for Hive/Spark (B INTEGER NULL -> B INTEGER)
3614 if matches!(target, DialectType::Hive | DialectType::Spark) {
3615 for col in &mut ct.columns {
3616 // If nullable is explicitly true (NULL), change to None (omit it)
3617 if col.nullable == Some(true) {
3618 col.nullable = None;
3619 }
3620 // Also remove from constraints if stored there
3621 col.constraints
3622 .retain(|c| !matches!(c, crate::expressions::ColumnConstraint::Null));
3623 }
3624 }
3625
3626 // Strip TSQL ON filegroup for non-TSQL/Fabric targets
3627 if ct.on_property.is_some()
3628 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
3629 {
3630 ct.on_property = None;
3631 }
3632
3633 // Snowflake: strip ARRAY type parameters (ARRAY<INT> -> ARRAY, ARRAY<ARRAY<INT>> -> ARRAY)
3634 // Snowflake doesn't support typed arrays in DDL
3635 if matches!(target, DialectType::Snowflake) {
3636 fn strip_array_type_params(dt: &mut crate::expressions::DataType) {
3637 if let crate::expressions::DataType::Array { .. } = dt {
3638 *dt = crate::expressions::DataType::Custom {
3639 name: "ARRAY".to_string(),
3640 };
3641 }
3642 }
3643 for col in &mut ct.columns {
3644 strip_array_type_params(&mut col.data_type);
3645 }
3646 }
3647
3648 // PostgreSQL target: ensure IDENTITY columns have NOT NULL
3649 // If NOT NULL was explicit in source (present in constraint_order), preserve original order.
3650 // If NOT NULL was not explicit, add it after IDENTITY (GENERATED BY DEFAULT AS IDENTITY NOT NULL).
3651 if matches!(target, DialectType::PostgreSQL) {
3652 for col in &mut ct.columns {
3653 if col.auto_increment && !col.constraint_order.is_empty() {
3654 use crate::expressions::ConstraintType;
3655 let has_explicit_not_null = col
3656 .constraint_order
3657 .iter()
3658 .any(|ct| *ct == ConstraintType::NotNull);
3659
3660 if has_explicit_not_null {
3661 // Source had explicit NOT NULL - preserve original order
3662 // Just ensure nullable is set
3663 if col.nullable != Some(false) {
3664 col.nullable = Some(false);
3665 }
3666 } else {
3667 // Source didn't have explicit NOT NULL - build order with
3668 // AutoIncrement + NotNull first, then remaining constraints
3669 let mut new_order = Vec::new();
3670 // Put AutoIncrement (IDENTITY) first, followed by synthetic NotNull
3671 new_order.push(ConstraintType::AutoIncrement);
3672 new_order.push(ConstraintType::NotNull);
3673 // Add remaining constraints in original order (except AutoIncrement)
3674 for ct_type in &col.constraint_order {
3675 if *ct_type != ConstraintType::AutoIncrement {
3676 new_order.push(ct_type.clone());
3677 }
3678 }
3679 col.constraint_order = new_order;
3680 col.nullable = Some(false);
3681 }
3682 }
3683 }
3684 }
3685
3686 Expression::CreateTable(ct)
3687 } else {
3688 expr
3689 };
3690
3691 // Handle CreateView column stripping for Presto/Trino target
3692 let expr = if let Expression::CreateView(mut cv) = expr {
3693 // Presto/Trino: drop column list when view has a SELECT body
3694 if matches!(target, DialectType::Presto | DialectType::Trino) && !cv.columns.is_empty()
3695 {
3696 if !matches!(&cv.query, Expression::Null(_)) {
3697 cv.columns.clear();
3698 }
3699 }
3700 Expression::CreateView(cv)
3701 } else {
3702 expr
3703 };
3704
3705 // Wrap bare VALUES in CTE bodies with SELECT * FROM (...) AS _values for generic/non-Presto targets
3706 let expr = if !matches!(
3707 target,
3708 DialectType::Presto | DialectType::Trino | DialectType::Athena
3709 ) {
3710 if let Expression::Select(mut select) = expr {
3711 if let Some(ref mut with) = select.with {
3712 for cte in &mut with.ctes {
3713 if let Expression::Values(ref vals) = cte.this {
3714 // Build: SELECT * FROM (VALUES ...) AS _values
3715 let values_subquery =
3716 Expression::Subquery(Box::new(crate::expressions::Subquery {
3717 this: Expression::Values(vals.clone()),
3718 alias: Some(Identifier::new("_values".to_string())),
3719 column_aliases: Vec::new(),
3720 order_by: None,
3721 limit: None,
3722 offset: None,
3723 distribute_by: None,
3724 sort_by: None,
3725 cluster_by: None,
3726 lateral: false,
3727 modifiers_inside: false,
3728 trailing_comments: Vec::new(),
3729 }));
3730 let mut new_select = crate::expressions::Select::new();
3731 new_select.expressions =
3732 vec![Expression::Star(crate::expressions::Star {
3733 table: None,
3734 except: None,
3735 replace: None,
3736 rename: None,
3737 trailing_comments: Vec::new(),
3738 })];
3739 new_select.from = Some(crate::expressions::From {
3740 expressions: vec![values_subquery],
3741 });
3742 cte.this = Expression::Select(Box::new(new_select));
3743 }
3744 }
3745 }
3746 Expression::Select(select)
3747 } else {
3748 expr
3749 }
3750 } else {
3751 expr
3752 };
3753
3754 // PostgreSQL CREATE INDEX: add NULLS FIRST to index columns that don't have nulls ordering
3755 let expr = if matches!(target, DialectType::PostgreSQL) {
3756 if let Expression::CreateIndex(mut ci) = expr {
3757 for col in &mut ci.columns {
3758 if col.nulls_first.is_none() {
3759 col.nulls_first = Some(true);
3760 }
3761 }
3762 Expression::CreateIndex(ci)
3763 } else {
3764 expr
3765 }
3766 } else {
3767 expr
3768 };
3769
3770 transform_recursive(expr, &|e| {
3771 // BigQuery CAST(ARRAY[STRUCT(...)] AS STRUCT_TYPE[]) -> DuckDB: convert unnamed Structs to ROW()
3772 // This converts auto-named struct literals {'_0': x, '_1': y} inside typed arrays to ROW(x, y)
3773 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
3774 if let Expression::Cast(ref c) = e {
3775 // Check if this is a CAST of an array to a struct array type
3776 let is_struct_array_cast =
3777 matches!(&c.to, crate::expressions::DataType::Array { .. });
3778 if is_struct_array_cast {
3779 let has_auto_named_structs = match &c.this {
3780 Expression::Array(arr) => arr.expressions.iter().any(|elem| {
3781 if let Expression::Struct(s) = elem {
3782 s.fields.iter().all(|(name, _)| {
3783 name.as_ref().map_or(true, |n| {
3784 n.starts_with('_') && n[1..].parse::<usize>().is_ok()
3785 })
3786 })
3787 } else {
3788 false
3789 }
3790 }),
3791 Expression::ArrayFunc(arr) => arr.expressions.iter().any(|elem| {
3792 if let Expression::Struct(s) = elem {
3793 s.fields.iter().all(|(name, _)| {
3794 name.as_ref().map_or(true, |n| {
3795 n.starts_with('_') && n[1..].parse::<usize>().is_ok()
3796 })
3797 })
3798 } else {
3799 false
3800 }
3801 }),
3802 _ => false,
3803 };
3804 if has_auto_named_structs {
3805 let convert_struct_to_row = |elem: Expression| -> Expression {
3806 if let Expression::Struct(s) = elem {
3807 let row_args: Vec<Expression> =
3808 s.fields.into_iter().map(|(_, v)| v).collect();
3809 Expression::Function(Box::new(Function::new(
3810 "ROW".to_string(),
3811 row_args,
3812 )))
3813 } else {
3814 elem
3815 }
3816 };
3817 let mut c_clone = c.as_ref().clone();
3818 match &mut c_clone.this {
3819 Expression::Array(arr) => {
3820 arr.expressions = arr
3821 .expressions
3822 .drain(..)
3823 .map(convert_struct_to_row)
3824 .collect();
3825 }
3826 Expression::ArrayFunc(arr) => {
3827 arr.expressions = arr
3828 .expressions
3829 .drain(..)
3830 .map(convert_struct_to_row)
3831 .collect();
3832 }
3833 _ => {}
3834 }
3835 return Ok(Expression::Cast(Box::new(c_clone)));
3836 }
3837 }
3838 }
3839 }
3840
3841 // BigQuery SELECT AS STRUCT -> DuckDB struct literal {'key': value, ...}
3842 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
3843 if let Expression::Select(ref sel) = e {
3844 if sel.kind.as_deref() == Some("STRUCT") {
3845 let mut fields = Vec::new();
3846 for expr in &sel.expressions {
3847 match expr {
3848 Expression::Alias(a) => {
3849 fields.push((Some(a.alias.name.clone()), a.this.clone()));
3850 }
3851 Expression::Column(c) => {
3852 fields.push((Some(c.name.name.clone()), expr.clone()));
3853 }
3854 _ => {
3855 fields.push((None, expr.clone()));
3856 }
3857 }
3858 }
3859 let struct_lit =
3860 Expression::Struct(Box::new(crate::expressions::Struct { fields }));
3861 let mut new_select = sel.as_ref().clone();
3862 new_select.kind = None;
3863 new_select.expressions = vec![struct_lit];
3864 return Ok(Expression::Select(Box::new(new_select)));
3865 }
3866 }
3867 }
3868
3869 // Convert @variable -> ${variable} for Spark/Hive/Databricks
3870 if matches!(source, DialectType::TSQL | DialectType::Fabric)
3871 && matches!(
3872 target,
3873 DialectType::Spark | DialectType::Databricks | DialectType::Hive
3874 )
3875 {
3876 if let Expression::Parameter(ref p) = e {
3877 if p.style == crate::expressions::ParameterStyle::At {
3878 if let Some(ref name) = p.name {
3879 return Ok(Expression::Parameter(Box::new(
3880 crate::expressions::Parameter {
3881 name: Some(name.clone()),
3882 index: p.index,
3883 style: crate::expressions::ParameterStyle::DollarBrace,
3884 quoted: p.quoted,
3885 string_quoted: p.string_quoted,
3886 expression: None,
3887 },
3888 )));
3889 }
3890 }
3891 }
3892 // Also handle Column("@x") -> Parameter("x", DollarBrace) for TSQL vars
3893 if let Expression::Column(ref col) = e {
3894 if col.name.name.starts_with('@') && col.table.is_none() {
3895 let var_name = col.name.name.trim_start_matches('@').to_string();
3896 return Ok(Expression::Parameter(Box::new(
3897 crate::expressions::Parameter {
3898 name: Some(var_name),
3899 index: None,
3900 style: crate::expressions::ParameterStyle::DollarBrace,
3901 quoted: false,
3902 string_quoted: false,
3903 expression: None,
3904 },
3905 )));
3906 }
3907 }
3908 }
3909
3910 // Convert @variable -> variable in SET statements for Spark/Databricks
3911 if matches!(source, DialectType::TSQL | DialectType::Fabric)
3912 && matches!(target, DialectType::Spark | DialectType::Databricks)
3913 {
3914 if let Expression::SetStatement(ref s) = e {
3915 let mut new_items = s.items.clone();
3916 let mut changed = false;
3917 for item in &mut new_items {
3918 // Strip @ from the SET name (Parameter style)
3919 if let Expression::Parameter(ref p) = item.name {
3920 if p.style == crate::expressions::ParameterStyle::At {
3921 if let Some(ref name) = p.name {
3922 item.name = Expression::Identifier(Identifier::new(name));
3923 changed = true;
3924 }
3925 }
3926 }
3927 // Strip @ from the SET name (Identifier style - SET parser)
3928 if let Expression::Identifier(ref id) = item.name {
3929 if id.name.starts_with('@') {
3930 let var_name = id.name.trim_start_matches('@').to_string();
3931 item.name = Expression::Identifier(Identifier::new(&var_name));
3932 changed = true;
3933 }
3934 }
3935 // Strip @ from the SET name (Column style - alternative parsing)
3936 if let Expression::Column(ref col) = item.name {
3937 if col.name.name.starts_with('@') && col.table.is_none() {
3938 let var_name = col.name.name.trim_start_matches('@').to_string();
3939 item.name = Expression::Identifier(Identifier::new(&var_name));
3940 changed = true;
3941 }
3942 }
3943 }
3944 if changed {
3945 let mut new_set = (**s).clone();
3946 new_set.items = new_items;
3947 return Ok(Expression::SetStatement(Box::new(new_set)));
3948 }
3949 }
3950 }
3951
3952 // Strip NOLOCK hint for non-TSQL targets
3953 if matches!(source, DialectType::TSQL | DialectType::Fabric)
3954 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
3955 {
3956 if let Expression::Table(ref tr) = e {
3957 if !tr.hints.is_empty() {
3958 let mut new_tr = tr.clone();
3959 new_tr.hints.clear();
3960 return Ok(Expression::Table(new_tr));
3961 }
3962 }
3963 }
3964
3965 // Snowflake: TRUE IS TRUE -> TRUE, FALSE IS FALSE -> FALSE
3966 // Snowflake simplifies IS TRUE/IS FALSE on boolean literals
3967 if matches!(target, DialectType::Snowflake) {
3968 if let Expression::IsTrue(ref itf) = e {
3969 if let Expression::Boolean(ref b) = itf.this {
3970 if !itf.not {
3971 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
3972 value: b.value,
3973 }));
3974 } else {
3975 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
3976 value: !b.value,
3977 }));
3978 }
3979 }
3980 }
3981 if let Expression::IsFalse(ref itf) = e {
3982 if let Expression::Boolean(ref b) = itf.this {
3983 if !itf.not {
3984 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
3985 value: !b.value,
3986 }));
3987 } else {
3988 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
3989 value: b.value,
3990 }));
3991 }
3992 }
3993 }
3994 }
3995
3996 // BigQuery: split dotted backtick identifiers in table names
3997 // e.g., `a.b.c` -> "a"."b"."c" when source is BigQuery and target is not BigQuery
3998 if matches!(source, DialectType::BigQuery) && !matches!(target, DialectType::BigQuery) {
3999 if let Expression::CreateTable(ref ct) = e {
4000 let mut changed = false;
4001 let mut new_ct = ct.clone();
4002 // Split the table name
4003 if ct.name.schema.is_none() && ct.name.name.name.contains('.') {
4004 let parts: Vec<&str> = ct.name.name.name.split('.').collect();
4005 // Use quoted identifiers when the original was quoted (backtick in BigQuery)
4006 let was_quoted = ct.name.name.quoted;
4007 let mk_id = |s: &str| {
4008 if was_quoted {
4009 Identifier::quoted(s)
4010 } else {
4011 Identifier::new(s)
4012 }
4013 };
4014 if parts.len() == 3 {
4015 new_ct.name.catalog = Some(mk_id(parts[0]));
4016 new_ct.name.schema = Some(mk_id(parts[1]));
4017 new_ct.name.name = mk_id(parts[2]);
4018 changed = true;
4019 } else if parts.len() == 2 {
4020 new_ct.name.schema = Some(mk_id(parts[0]));
4021 new_ct.name.name = mk_id(parts[1]);
4022 changed = true;
4023 }
4024 }
4025 // Split the clone source name
4026 if let Some(ref clone_src) = ct.clone_source {
4027 if clone_src.schema.is_none() && clone_src.name.name.contains('.') {
4028 let parts: Vec<&str> = clone_src.name.name.split('.').collect();
4029 let was_quoted = clone_src.name.quoted;
4030 let mk_id = |s: &str| {
4031 if was_quoted {
4032 Identifier::quoted(s)
4033 } else {
4034 Identifier::new(s)
4035 }
4036 };
4037 let mut new_src = clone_src.clone();
4038 if parts.len() == 3 {
4039 new_src.catalog = Some(mk_id(parts[0]));
4040 new_src.schema = Some(mk_id(parts[1]));
4041 new_src.name = mk_id(parts[2]);
4042 new_ct.clone_source = Some(new_src);
4043 changed = true;
4044 } else if parts.len() == 2 {
4045 new_src.schema = Some(mk_id(parts[0]));
4046 new_src.name = mk_id(parts[1]);
4047 new_ct.clone_source = Some(new_src);
4048 changed = true;
4049 }
4050 }
4051 }
4052 if changed {
4053 return Ok(Expression::CreateTable(new_ct));
4054 }
4055 }
4056 }
4057
4058 // BigQuery array subscript: a[1], b[OFFSET(1)], c[ORDINAL(1)], d[SAFE_OFFSET(1)], e[SAFE_ORDINAL(1)]
4059 // -> DuckDB/Presto: convert 0-based to 1-based, handle SAFE_* -> ELEMENT_AT for Presto
4060 if matches!(source, DialectType::BigQuery)
4061 && matches!(
4062 target,
4063 DialectType::DuckDB
4064 | DialectType::Presto
4065 | DialectType::Trino
4066 | DialectType::Athena
4067 )
4068 {
4069 if let Expression::Subscript(ref sub) = e {
4070 let (new_index, is_safe) = match &sub.index {
4071 // a[1] -> a[1+1] = a[2] (plain index is 0-based in BQ)
4072 Expression::Literal(Literal::Number(n)) => {
4073 if let Ok(val) = n.parse::<i64>() {
4074 (
4075 Some(Expression::Literal(Literal::Number(
4076 (val + 1).to_string(),
4077 ))),
4078 false,
4079 )
4080 } else {
4081 (None, false)
4082 }
4083 }
4084 // OFFSET(n) -> n+1 (0-based)
4085 Expression::Function(ref f)
4086 if f.name.eq_ignore_ascii_case("OFFSET") && f.args.len() == 1 =>
4087 {
4088 if let Expression::Literal(Literal::Number(n)) = &f.args[0] {
4089 if let Ok(val) = n.parse::<i64>() {
4090 (
4091 Some(Expression::Literal(Literal::Number(
4092 (val + 1).to_string(),
4093 ))),
4094 false,
4095 )
4096 } else {
4097 (
4098 Some(Expression::Add(Box::new(
4099 crate::expressions::BinaryOp::new(
4100 f.args[0].clone(),
4101 Expression::number(1),
4102 ),
4103 ))),
4104 false,
4105 )
4106 }
4107 } else {
4108 (
4109 Some(Expression::Add(Box::new(
4110 crate::expressions::BinaryOp::new(
4111 f.args[0].clone(),
4112 Expression::number(1),
4113 ),
4114 ))),
4115 false,
4116 )
4117 }
4118 }
4119 // ORDINAL(n) -> n (already 1-based)
4120 Expression::Function(ref f)
4121 if f.name.eq_ignore_ascii_case("ORDINAL") && f.args.len() == 1 =>
4122 {
4123 (Some(f.args[0].clone()), false)
4124 }
4125 // SAFE_OFFSET(n) -> n+1 (0-based, safe)
4126 Expression::Function(ref f)
4127 if f.name.eq_ignore_ascii_case("SAFE_OFFSET") && f.args.len() == 1 =>
4128 {
4129 if let Expression::Literal(Literal::Number(n)) = &f.args[0] {
4130 if let Ok(val) = n.parse::<i64>() {
4131 (
4132 Some(Expression::Literal(Literal::Number(
4133 (val + 1).to_string(),
4134 ))),
4135 true,
4136 )
4137 } else {
4138 (
4139 Some(Expression::Add(Box::new(
4140 crate::expressions::BinaryOp::new(
4141 f.args[0].clone(),
4142 Expression::number(1),
4143 ),
4144 ))),
4145 true,
4146 )
4147 }
4148 } else {
4149 (
4150 Some(Expression::Add(Box::new(
4151 crate::expressions::BinaryOp::new(
4152 f.args[0].clone(),
4153 Expression::number(1),
4154 ),
4155 ))),
4156 true,
4157 )
4158 }
4159 }
4160 // SAFE_ORDINAL(n) -> n (already 1-based, safe)
4161 Expression::Function(ref f)
4162 if f.name.eq_ignore_ascii_case("SAFE_ORDINAL") && f.args.len() == 1 =>
4163 {
4164 (Some(f.args[0].clone()), true)
4165 }
4166 _ => (None, false),
4167 };
4168 if let Some(idx) = new_index {
4169 if is_safe
4170 && matches!(
4171 target,
4172 DialectType::Presto | DialectType::Trino | DialectType::Athena
4173 )
4174 {
4175 // Presto: SAFE_OFFSET/SAFE_ORDINAL -> ELEMENT_AT(arr, idx)
4176 return Ok(Expression::Function(Box::new(Function::new(
4177 "ELEMENT_AT".to_string(),
4178 vec![sub.this.clone(), idx],
4179 ))));
4180 } else {
4181 // DuckDB or non-safe: just use subscript with converted index
4182 return Ok(Expression::Subscript(Box::new(
4183 crate::expressions::Subscript {
4184 this: sub.this.clone(),
4185 index: idx,
4186 },
4187 )));
4188 }
4189 }
4190 }
4191 }
4192
4193 // BigQuery LENGTH(x) -> DuckDB CASE TYPEOF(x) WHEN 'BLOB' THEN OCTET_LENGTH(...) ELSE LENGTH(...) END
4194 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
4195 if let Expression::Length(ref uf) = e {
4196 let arg = uf.this.clone();
4197 let typeof_func = Expression::Function(Box::new(Function::new(
4198 "TYPEOF".to_string(),
4199 vec![arg.clone()],
4200 )));
4201 let blob_cast = Expression::Cast(Box::new(Cast {
4202 this: arg.clone(),
4203 to: DataType::VarBinary { length: None },
4204 trailing_comments: vec![],
4205 double_colon_syntax: false,
4206 format: None,
4207 default: None,
4208 }));
4209 let octet_length = Expression::Function(Box::new(Function::new(
4210 "OCTET_LENGTH".to_string(),
4211 vec![blob_cast],
4212 )));
4213 let text_cast = Expression::Cast(Box::new(Cast {
4214 this: arg,
4215 to: DataType::Text,
4216 trailing_comments: vec![],
4217 double_colon_syntax: false,
4218 format: None,
4219 default: None,
4220 }));
4221 let length_text = Expression::Length(Box::new(crate::expressions::UnaryFunc {
4222 this: text_cast,
4223 original_name: None,
4224 }));
4225 return Ok(Expression::Case(Box::new(Case {
4226 operand: Some(typeof_func),
4227 whens: vec![(
4228 Expression::Literal(Literal::String("BLOB".to_string())),
4229 octet_length,
4230 )],
4231 else_: Some(length_text),
4232 comments: Vec::new(),
4233 })));
4234 }
4235 }
4236
4237 // BigQuery UNNEST alias handling (only for non-BigQuery sources):
4238 // UNNEST(...) AS x -> UNNEST(...) (drop unused table alias)
4239 // UNNEST(...) AS x(y) -> UNNEST(...) AS y (use column alias as main alias)
4240 if matches!(target, DialectType::BigQuery) && !matches!(source, DialectType::BigQuery) {
4241 if let Expression::Alias(ref a) = e {
4242 if matches!(&a.this, Expression::Unnest(_)) {
4243 if a.column_aliases.is_empty() {
4244 // Drop the entire alias, return just the UNNEST expression
4245 return Ok(a.this.clone());
4246 } else {
4247 // Use first column alias as the main alias
4248 let mut new_alias = a.as_ref().clone();
4249 new_alias.alias = a.column_aliases[0].clone();
4250 new_alias.column_aliases.clear();
4251 return Ok(Expression::Alias(Box::new(new_alias)));
4252 }
4253 }
4254 }
4255 }
4256
4257 // BigQuery IN UNNEST(expr) -> IN (SELECT UNNEST/EXPLODE(expr)) for non-BigQuery targets
4258 if matches!(source, DialectType::BigQuery) && !matches!(target, DialectType::BigQuery) {
4259 if let Expression::In(ref in_expr) = e {
4260 if let Some(ref unnest_inner) = in_expr.unnest {
4261 // Build the function call for the target dialect
4262 let func_expr = if matches!(
4263 target,
4264 DialectType::Hive | DialectType::Spark | DialectType::Databricks
4265 ) {
4266 // Use EXPLODE for Hive/Spark
4267 Expression::Function(Box::new(Function::new(
4268 "EXPLODE".to_string(),
4269 vec![*unnest_inner.clone()],
4270 )))
4271 } else {
4272 // Use UNNEST for Presto/Trino/DuckDB/etc.
4273 Expression::Unnest(Box::new(crate::expressions::UnnestFunc {
4274 this: *unnest_inner.clone(),
4275 expressions: Vec::new(),
4276 with_ordinality: false,
4277 alias: None,
4278 offset_alias: None,
4279 }))
4280 };
4281
4282 // Wrap in SELECT
4283 let mut inner_select = crate::expressions::Select::new();
4284 inner_select.expressions = vec![func_expr];
4285
4286 let subquery_expr = Expression::Select(Box::new(inner_select));
4287
4288 return Ok(Expression::In(Box::new(crate::expressions::In {
4289 this: in_expr.this.clone(),
4290 expressions: Vec::new(),
4291 query: Some(subquery_expr),
4292 not: in_expr.not,
4293 global: in_expr.global,
4294 unnest: None,
4295 is_field: false,
4296 })));
4297 }
4298 }
4299 }
4300
4301 // SQLite: GENERATE_SERIES AS t(i) -> (SELECT value AS i FROM GENERATE_SERIES(...)) AS t
4302 // This handles the subquery wrapping for RANGE -> GENERATE_SERIES in FROM context
4303 if matches!(target, DialectType::SQLite) && matches!(source, DialectType::DuckDB) {
4304 if let Expression::Alias(ref a) = e {
4305 if let Expression::Function(ref f) = a.this {
4306 if f.name.eq_ignore_ascii_case("GENERATE_SERIES")
4307 && !a.column_aliases.is_empty()
4308 {
4309 // Build: (SELECT value AS col_alias FROM GENERATE_SERIES(start, end)) AS table_alias
4310 let col_alias = a.column_aliases[0].clone();
4311 let mut inner_select = crate::expressions::Select::new();
4312 inner_select.expressions =
4313 vec![Expression::Alias(Box::new(crate::expressions::Alias::new(
4314 Expression::Identifier(Identifier::new("value".to_string())),
4315 col_alias,
4316 )))];
4317 inner_select.from = Some(crate::expressions::From {
4318 expressions: vec![a.this.clone()],
4319 });
4320 let subquery =
4321 Expression::Subquery(Box::new(crate::expressions::Subquery {
4322 this: Expression::Select(Box::new(inner_select)),
4323 alias: Some(a.alias.clone()),
4324 column_aliases: Vec::new(),
4325 order_by: None,
4326 limit: None,
4327 offset: None,
4328 lateral: false,
4329 modifiers_inside: false,
4330 trailing_comments: Vec::new(),
4331 distribute_by: None,
4332 sort_by: None,
4333 cluster_by: None,
4334 }));
4335 return Ok(subquery);
4336 }
4337 }
4338 }
4339 }
4340
4341 // BigQuery implicit UNNEST: comma-join on array path -> CROSS JOIN UNNEST
4342 // e.g., SELECT results FROM Coordinates, Coordinates.position AS results
4343 // -> SELECT results FROM Coordinates CROSS JOIN UNNEST(Coordinates.position) AS results
4344 if matches!(source, DialectType::BigQuery) {
4345 if let Expression::Select(ref s) = e {
4346 if let Some(ref from) = s.from {
4347 if from.expressions.len() >= 2 {
4348 // Collect table names from first expression
4349 let first_tables: Vec<String> = from
4350 .expressions
4351 .iter()
4352 .take(1)
4353 .filter_map(|expr| {
4354 if let Expression::Table(t) = expr {
4355 Some(t.name.name.to_lowercase())
4356 } else {
4357 None
4358 }
4359 })
4360 .collect();
4361
4362 // Check if any subsequent FROM expressions are schema-qualified with a matching table name
4363 // or have a dotted name matching a table
4364 let mut needs_rewrite = false;
4365 for expr in from.expressions.iter().skip(1) {
4366 if let Expression::Table(t) = expr {
4367 if let Some(ref schema) = t.schema {
4368 if first_tables.contains(&schema.name.to_lowercase()) {
4369 needs_rewrite = true;
4370 break;
4371 }
4372 }
4373 // Also check dotted names in quoted identifiers (e.g., `Coordinates.position`)
4374 if t.schema.is_none() && t.name.name.contains('.') {
4375 let parts: Vec<&str> = t.name.name.split('.').collect();
4376 if parts.len() >= 2
4377 && first_tables.contains(&parts[0].to_lowercase())
4378 {
4379 needs_rewrite = true;
4380 break;
4381 }
4382 }
4383 }
4384 }
4385
4386 if needs_rewrite {
4387 let mut new_select = s.clone();
4388 let mut new_from_exprs = vec![from.expressions[0].clone()];
4389 let mut new_joins = s.joins.clone();
4390
4391 for expr in from.expressions.iter().skip(1) {
4392 if let Expression::Table(ref t) = expr {
4393 if let Some(ref schema) = t.schema {
4394 if first_tables.contains(&schema.name.to_lowercase()) {
4395 // This is an array path reference, convert to CROSS JOIN UNNEST
4396 let col_expr = Expression::Column(
4397 crate::expressions::Column {
4398 name: t.name.clone(),
4399 table: Some(schema.clone()),
4400 join_mark: false,
4401 trailing_comments: vec![],
4402 },
4403 );
4404 let unnest_expr = Expression::Unnest(Box::new(
4405 crate::expressions::UnnestFunc {
4406 this: col_expr,
4407 expressions: Vec::new(),
4408 with_ordinality: false,
4409 alias: None,
4410 offset_alias: None,
4411 },
4412 ));
4413 let join_this = if let Some(ref alias) = t.alias {
4414 if matches!(
4415 target,
4416 DialectType::Presto
4417 | DialectType::Trino
4418 | DialectType::Athena
4419 ) {
4420 // Presto: UNNEST(x) AS _t0(results)
4421 Expression::Alias(Box::new(
4422 crate::expressions::Alias {
4423 this: unnest_expr,
4424 alias: Identifier::new("_t0"),
4425 column_aliases: vec![alias.clone()],
4426 pre_alias_comments: vec![],
4427 trailing_comments: vec![],
4428 },
4429 ))
4430 } else {
4431 // BigQuery: UNNEST(x) AS results
4432 Expression::Alias(Box::new(
4433 crate::expressions::Alias {
4434 this: unnest_expr,
4435 alias: alias.clone(),
4436 column_aliases: vec![],
4437 pre_alias_comments: vec![],
4438 trailing_comments: vec![],
4439 },
4440 ))
4441 }
4442 } else {
4443 unnest_expr
4444 };
4445 new_joins.push(crate::expressions::Join {
4446 kind: crate::expressions::JoinKind::Cross,
4447 this: join_this,
4448 on: None,
4449 using: Vec::new(),
4450 use_inner_keyword: false,
4451 use_outer_keyword: false,
4452 deferred_condition: false,
4453 join_hint: None,
4454 match_condition: None,
4455 pivots: Vec::new(),
4456 comments: Vec::new(),
4457 nesting_group: 0,
4458 directed: false,
4459 });
4460 } else {
4461 new_from_exprs.push(expr.clone());
4462 }
4463 } else if t.schema.is_none() && t.name.name.contains('.') {
4464 // Dotted name in quoted identifier: `Coordinates.position`
4465 let parts: Vec<&str> = t.name.name.split('.').collect();
4466 if parts.len() >= 2
4467 && first_tables.contains(&parts[0].to_lowercase())
4468 {
4469 let join_this =
4470 if matches!(target, DialectType::BigQuery) {
4471 // BigQuery: keep as single quoted identifier, just convert comma -> CROSS JOIN
4472 Expression::Table(t.clone())
4473 } else {
4474 // Other targets: split into "schema"."name"
4475 let mut new_t = t.clone();
4476 new_t.schema =
4477 Some(Identifier::quoted(parts[0]));
4478 new_t.name = Identifier::quoted(parts[1]);
4479 Expression::Table(new_t)
4480 };
4481 new_joins.push(crate::expressions::Join {
4482 kind: crate::expressions::JoinKind::Cross,
4483 this: join_this,
4484 on: None,
4485 using: Vec::new(),
4486 use_inner_keyword: false,
4487 use_outer_keyword: false,
4488 deferred_condition: false,
4489 join_hint: None,
4490 match_condition: None,
4491 pivots: Vec::new(),
4492 comments: Vec::new(),
4493 nesting_group: 0,
4494 directed: false,
4495 });
4496 } else {
4497 new_from_exprs.push(expr.clone());
4498 }
4499 } else {
4500 new_from_exprs.push(expr.clone());
4501 }
4502 } else {
4503 new_from_exprs.push(expr.clone());
4504 }
4505 }
4506
4507 new_select.from = Some(crate::expressions::From {
4508 expressions: new_from_exprs,
4509 ..from.clone()
4510 });
4511 new_select.joins = new_joins;
4512 return Ok(Expression::Select(new_select));
4513 }
4514 }
4515 }
4516 }
4517 }
4518
4519 // CROSS JOIN UNNEST -> LATERAL VIEW EXPLODE for Hive/Spark
4520 if matches!(
4521 target,
4522 DialectType::Hive | DialectType::Spark | DialectType::Databricks
4523 ) {
4524 if let Expression::Select(ref s) = e {
4525 // Check if any joins are CROSS JOIN with UNNEST/EXPLODE
4526 let is_unnest_or_explode_expr = |expr: &Expression| -> bool {
4527 matches!(expr, Expression::Unnest(_))
4528 || matches!(expr, Expression::Function(f) if f.name.eq_ignore_ascii_case("EXPLODE"))
4529 };
4530 let has_unnest_join = s.joins.iter().any(|j| {
4531 j.kind == crate::expressions::JoinKind::Cross && (
4532 matches!(&j.this, Expression::Alias(a) if is_unnest_or_explode_expr(&a.this))
4533 || is_unnest_or_explode_expr(&j.this)
4534 )
4535 });
4536 if has_unnest_join {
4537 let mut select = s.clone();
4538 let mut new_joins = Vec::new();
4539 for join in select.joins.drain(..) {
4540 if join.kind == crate::expressions::JoinKind::Cross {
4541 // Extract the UNNEST/EXPLODE from the join
4542 let (func_expr, table_alias, col_aliases) = match &join.this {
4543 Expression::Alias(a) => {
4544 let ta = if a.alias.is_empty() {
4545 None
4546 } else {
4547 Some(a.alias.clone())
4548 };
4549 let cas = a.column_aliases.clone();
4550 match &a.this {
4551 Expression::Unnest(u) => {
4552 // Multi-arg UNNEST(y, z) -> INLINE(ARRAYS_ZIP(y, z))
4553 if !u.expressions.is_empty() {
4554 let mut all_args = vec![u.this.clone()];
4555 all_args.extend(u.expressions.clone());
4556 let arrays_zip =
4557 Expression::Function(Box::new(
4558 crate::expressions::Function::new(
4559 "ARRAYS_ZIP".to_string(),
4560 all_args,
4561 ),
4562 ));
4563 let inline = Expression::Function(Box::new(
4564 crate::expressions::Function::new(
4565 "INLINE".to_string(),
4566 vec![arrays_zip],
4567 ),
4568 ));
4569 (Some(inline), ta, a.column_aliases.clone())
4570 } else {
4571 // Convert UNNEST(x) to EXPLODE(x) or POSEXPLODE(x)
4572 let func_name = if u.with_ordinality {
4573 "POSEXPLODE"
4574 } else {
4575 "EXPLODE"
4576 };
4577 let explode = Expression::Function(Box::new(
4578 crate::expressions::Function::new(
4579 func_name.to_string(),
4580 vec![u.this.clone()],
4581 ),
4582 ));
4583 // For POSEXPLODE, add 'pos' to column aliases
4584 let cas = if u.with_ordinality {
4585 let mut pos_aliases =
4586 vec![Identifier::new(
4587 "pos".to_string(),
4588 )];
4589 pos_aliases
4590 .extend(a.column_aliases.clone());
4591 pos_aliases
4592 } else {
4593 a.column_aliases.clone()
4594 };
4595 (Some(explode), ta, cas)
4596 }
4597 }
4598 Expression::Function(f)
4599 if f.name.eq_ignore_ascii_case("EXPLODE") =>
4600 {
4601 (Some(Expression::Function(f.clone())), ta, cas)
4602 }
4603 _ => (None, None, Vec::new()),
4604 }
4605 }
4606 Expression::Unnest(u) => {
4607 let func_name = if u.with_ordinality {
4608 "POSEXPLODE"
4609 } else {
4610 "EXPLODE"
4611 };
4612 let explode = Expression::Function(Box::new(
4613 crate::expressions::Function::new(
4614 func_name.to_string(),
4615 vec![u.this.clone()],
4616 ),
4617 ));
4618 let ta = u.alias.clone();
4619 let col_aliases = if u.with_ordinality {
4620 vec![Identifier::new("pos".to_string())]
4621 } else {
4622 Vec::new()
4623 };
4624 (Some(explode), ta, col_aliases)
4625 }
4626 _ => (None, None, Vec::new()),
4627 };
4628 if let Some(func) = func_expr {
4629 select.lateral_views.push(crate::expressions::LateralView {
4630 this: func,
4631 table_alias,
4632 column_aliases: col_aliases,
4633 outer: false,
4634 });
4635 } else {
4636 new_joins.push(join);
4637 }
4638 } else {
4639 new_joins.push(join);
4640 }
4641 }
4642 select.joins = new_joins;
4643 return Ok(Expression::Select(select));
4644 }
4645 }
4646 }
4647
4648 // UNNEST expansion: DuckDB SELECT UNNEST(arr) in SELECT list -> expanded query
4649 // for BigQuery, Presto/Trino, Snowflake
4650 if matches!(source, DialectType::DuckDB | DialectType::PostgreSQL)
4651 && matches!(
4652 target,
4653 DialectType::BigQuery
4654 | DialectType::Presto
4655 | DialectType::Trino
4656 | DialectType::Snowflake
4657 )
4658 {
4659 if let Expression::Select(ref s) = e {
4660 // Check if any SELECT expressions contain UNNEST
4661 // Note: UNNEST can appear as Expression::Unnest OR Expression::Function("UNNEST")
4662 let has_unnest_in_select = s.expressions.iter().any(|expr| {
4663 fn contains_unnest(e: &Expression) -> bool {
4664 match e {
4665 Expression::Unnest(_) => true,
4666 Expression::Function(f)
4667 if f.name.eq_ignore_ascii_case("UNNEST") =>
4668 {
4669 true
4670 }
4671 Expression::Alias(a) => contains_unnest(&a.this),
4672 Expression::Add(op)
4673 | Expression::Sub(op)
4674 | Expression::Mul(op)
4675 | Expression::Div(op) => {
4676 contains_unnest(&op.left) || contains_unnest(&op.right)
4677 }
4678 _ => false,
4679 }
4680 }
4681 contains_unnest(expr)
4682 });
4683
4684 if has_unnest_in_select {
4685 let rewritten = Self::rewrite_unnest_expansion(s, target);
4686 if let Some(new_select) = rewritten {
4687 return Ok(Expression::Select(Box::new(new_select)));
4688 }
4689 }
4690 }
4691 }
4692
4693 // BigQuery -> PostgreSQL: convert escape sequences in string literals to actual characters
4694 // BigQuery '\n' -> PostgreSQL literal newline in string
4695 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::PostgreSQL)
4696 {
4697 if let Expression::Literal(Literal::String(ref s)) = e {
4698 if s.contains("\\n")
4699 || s.contains("\\t")
4700 || s.contains("\\r")
4701 || s.contains("\\\\")
4702 {
4703 let converted = s
4704 .replace("\\n", "\n")
4705 .replace("\\t", "\t")
4706 .replace("\\r", "\r")
4707 .replace("\\\\", "\\");
4708 return Ok(Expression::Literal(Literal::String(converted)));
4709 }
4710 }
4711 }
4712
4713 // Cross-dialect: convert Literal::Timestamp to target-specific CAST form
4714 // when source != target (identity tests keep the Literal::Timestamp for native handling)
4715 if source != target {
4716 if let Expression::Literal(Literal::Timestamp(ref s)) = e {
4717 let s = s.clone();
4718 // MySQL: TIMESTAMP handling depends on source dialect
4719 // BigQuery TIMESTAMP is timezone-aware -> TIMESTAMP() function in MySQL
4720 // Other sources' TIMESTAMP is non-timezone -> CAST('x' AS DATETIME) in MySQL
4721 if matches!(target, DialectType::MySQL) {
4722 if matches!(source, DialectType::BigQuery) {
4723 // BigQuery TIMESTAMP is timezone-aware -> MySQL TIMESTAMP() function
4724 return Ok(Expression::Function(Box::new(Function::new(
4725 "TIMESTAMP".to_string(),
4726 vec![Expression::Literal(Literal::String(s))],
4727 ))));
4728 } else {
4729 // Non-timezone TIMESTAMP -> CAST('x' AS DATETIME) in MySQL
4730 return Ok(Expression::Cast(Box::new(Cast {
4731 this: Expression::Literal(Literal::String(s)),
4732 to: DataType::Custom {
4733 name: "DATETIME".to_string(),
4734 },
4735 trailing_comments: Vec::new(),
4736 double_colon_syntax: false,
4737 format: None,
4738 default: None,
4739 })));
4740 }
4741 }
4742 let dt = match target {
4743 DialectType::BigQuery | DialectType::StarRocks => DataType::Custom {
4744 name: "DATETIME".to_string(),
4745 },
4746 DialectType::Snowflake => {
4747 // BigQuery TIMESTAMP is timezone-aware -> use TIMESTAMPTZ for Snowflake
4748 if matches!(source, DialectType::BigQuery) {
4749 DataType::Custom {
4750 name: "TIMESTAMPTZ".to_string(),
4751 }
4752 } else if matches!(
4753 source,
4754 DialectType::PostgreSQL
4755 | DialectType::Redshift
4756 | DialectType::Snowflake
4757 ) {
4758 DataType::Timestamp {
4759 precision: None,
4760 timezone: false,
4761 }
4762 } else {
4763 DataType::Custom {
4764 name: "TIMESTAMPNTZ".to_string(),
4765 }
4766 }
4767 }
4768 DialectType::Spark | DialectType::Databricks => {
4769 // BigQuery TIMESTAMP is timezone-aware -> use plain TIMESTAMP for Spark/Databricks
4770 if matches!(source, DialectType::BigQuery) {
4771 DataType::Timestamp {
4772 precision: None,
4773 timezone: false,
4774 }
4775 } else {
4776 DataType::Custom {
4777 name: "TIMESTAMP_NTZ".to_string(),
4778 }
4779 }
4780 }
4781 DialectType::ClickHouse => DataType::Nullable {
4782 inner: Box::new(DataType::Custom {
4783 name: "DateTime".to_string(),
4784 }),
4785 },
4786 DialectType::TSQL | DialectType::Fabric => DataType::Custom {
4787 name: "DATETIME2".to_string(),
4788 },
4789 DialectType::DuckDB => {
4790 // DuckDB: use TIMESTAMPTZ when source is BigQuery (BQ TIMESTAMP is always UTC/tz-aware)
4791 // or when the timestamp string explicitly has timezone info
4792 if matches!(source, DialectType::BigQuery)
4793 || Self::timestamp_string_has_timezone(&s)
4794 {
4795 DataType::Custom {
4796 name: "TIMESTAMPTZ".to_string(),
4797 }
4798 } else {
4799 DataType::Timestamp {
4800 precision: None,
4801 timezone: false,
4802 }
4803 }
4804 }
4805 _ => DataType::Timestamp {
4806 precision: None,
4807 timezone: false,
4808 },
4809 };
4810 return Ok(Expression::Cast(Box::new(Cast {
4811 this: Expression::Literal(Literal::String(s)),
4812 to: dt,
4813 trailing_comments: vec![],
4814 double_colon_syntax: false,
4815 format: None,
4816 default: None,
4817 })));
4818 }
4819 }
4820
4821 // PostgreSQL DELETE requires explicit AS for table aliases
4822 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
4823 if let Expression::Delete(ref del) = e {
4824 if del.alias.is_some() && !del.alias_explicit_as {
4825 let mut new_del = del.clone();
4826 new_del.alias_explicit_as = true;
4827 return Ok(Expression::Delete(new_del));
4828 }
4829 }
4830 }
4831
4832 // UNION/INTERSECT/EXCEPT DISTINCT handling:
4833 // Some dialects require explicit DISTINCT (BigQuery, ClickHouse),
4834 // while others don't support it (Presto, Spark, DuckDB, etc.)
4835 {
4836 let needs_distinct =
4837 matches!(target, DialectType::BigQuery | DialectType::ClickHouse);
4838 let drop_distinct = matches!(
4839 target,
4840 DialectType::Presto
4841 | DialectType::Trino
4842 | DialectType::Athena
4843 | DialectType::Spark
4844 | DialectType::Databricks
4845 | DialectType::DuckDB
4846 | DialectType::Hive
4847 | DialectType::MySQL
4848 | DialectType::PostgreSQL
4849 | DialectType::SQLite
4850 | DialectType::TSQL
4851 | DialectType::Redshift
4852 | DialectType::Snowflake
4853 | DialectType::Oracle
4854 | DialectType::Teradata
4855 | DialectType::Drill
4856 | DialectType::Doris
4857 | DialectType::StarRocks
4858 );
4859 match &e {
4860 Expression::Union(u) if !u.all && needs_distinct && !u.distinct => {
4861 let mut new_u = (**u).clone();
4862 new_u.distinct = true;
4863 return Ok(Expression::Union(Box::new(new_u)));
4864 }
4865 Expression::Intersect(i) if !i.all && needs_distinct && !i.distinct => {
4866 let mut new_i = (**i).clone();
4867 new_i.distinct = true;
4868 return Ok(Expression::Intersect(Box::new(new_i)));
4869 }
4870 Expression::Except(ex) if !ex.all && needs_distinct && !ex.distinct => {
4871 let mut new_ex = (**ex).clone();
4872 new_ex.distinct = true;
4873 return Ok(Expression::Except(Box::new(new_ex)));
4874 }
4875 Expression::Union(u) if u.distinct && drop_distinct => {
4876 let mut new_u = (**u).clone();
4877 new_u.distinct = false;
4878 return Ok(Expression::Union(Box::new(new_u)));
4879 }
4880 Expression::Intersect(i) if i.distinct && drop_distinct => {
4881 let mut new_i = (**i).clone();
4882 new_i.distinct = false;
4883 return Ok(Expression::Intersect(Box::new(new_i)));
4884 }
4885 Expression::Except(ex) if ex.distinct && drop_distinct => {
4886 let mut new_ex = (**ex).clone();
4887 new_ex.distinct = false;
4888 return Ok(Expression::Except(Box::new(new_ex)));
4889 }
4890 _ => {}
4891 }
4892 }
4893
4894 // ClickHouse: MAP('a', '1') -> map('a', '1') (lowercase function name)
4895 if matches!(target, DialectType::ClickHouse) {
4896 if let Expression::Function(ref f) = e {
4897 if f.name.eq_ignore_ascii_case("MAP") && !f.args.is_empty() {
4898 let mut new_f = f.as_ref().clone();
4899 new_f.name = "map".to_string();
4900 return Ok(Expression::Function(Box::new(new_f)));
4901 }
4902 }
4903 }
4904
4905 // ClickHouse: INTERSECT ALL -> INTERSECT (ClickHouse doesn't support ALL on INTERSECT)
4906 if matches!(target, DialectType::ClickHouse) {
4907 if let Expression::Intersect(ref i) = e {
4908 if i.all {
4909 let mut new_i = (**i).clone();
4910 new_i.all = false;
4911 return Ok(Expression::Intersect(Box::new(new_i)));
4912 }
4913 }
4914 }
4915
4916 // Integer division: a / b -> CAST(a AS DOUBLE) / b for dialects that need it
4917 // Only from Generic source, to prevent double-wrapping
4918 if matches!(source, DialectType::Generic) {
4919 if let Expression::Div(ref op) = e {
4920 let cast_type = match target {
4921 DialectType::TSQL | DialectType::Fabric => Some(DataType::Float {
4922 precision: None,
4923 scale: None,
4924 real_spelling: false,
4925 }),
4926 DialectType::Drill
4927 | DialectType::Trino
4928 | DialectType::Athena
4929 | DialectType::Presto => Some(DataType::Double {
4930 precision: None,
4931 scale: None,
4932 }),
4933 DialectType::PostgreSQL
4934 | DialectType::Redshift
4935 | DialectType::Materialize
4936 | DialectType::Teradata
4937 | DialectType::RisingWave => Some(DataType::Double {
4938 precision: None,
4939 scale: None,
4940 }),
4941 _ => None,
4942 };
4943 if let Some(dt) = cast_type {
4944 let cast_left = Expression::Cast(Box::new(Cast {
4945 this: op.left.clone(),
4946 to: dt,
4947 double_colon_syntax: false,
4948 trailing_comments: Vec::new(),
4949 format: None,
4950 default: None,
4951 }));
4952 let new_op = crate::expressions::BinaryOp {
4953 left: cast_left,
4954 right: op.right.clone(),
4955 left_comments: op.left_comments.clone(),
4956 operator_comments: op.operator_comments.clone(),
4957 trailing_comments: op.trailing_comments.clone(),
4958 };
4959 return Ok(Expression::Div(Box::new(new_op)));
4960 }
4961 }
4962 }
4963
4964 // CREATE DATABASE -> CREATE SCHEMA for DuckDB target
4965 if matches!(target, DialectType::DuckDB) {
4966 if let Expression::CreateDatabase(db) = e {
4967 let mut schema = crate::expressions::CreateSchema::new(db.name.name.clone());
4968 schema.if_not_exists = db.if_not_exists;
4969 return Ok(Expression::CreateSchema(Box::new(schema)));
4970 }
4971 if let Expression::DropDatabase(db) = e {
4972 let mut schema = crate::expressions::DropSchema::new(db.name.name.clone());
4973 schema.if_exists = db.if_exists;
4974 return Ok(Expression::DropSchema(Box::new(schema)));
4975 }
4976 }
4977
4978 // Strip ClickHouse Nullable(...) wrapper for non-ClickHouse targets
4979 if matches!(source, DialectType::ClickHouse)
4980 && !matches!(target, DialectType::ClickHouse)
4981 {
4982 if let Expression::Cast(ref c) = e {
4983 if let DataType::Custom { ref name } = c.to {
4984 let upper = name.to_uppercase();
4985 if upper.starts_with("NULLABLE(") && upper.ends_with(")") {
4986 let inner = &name[9..name.len() - 1]; // strip "Nullable(" and ")"
4987 let inner_upper = inner.to_uppercase();
4988 let new_dt = match inner_upper.as_str() {
4989 "DATETIME" | "DATETIME64" => DataType::Timestamp {
4990 precision: None,
4991 timezone: false,
4992 },
4993 "DATE" => DataType::Date,
4994 "INT64" | "BIGINT" => DataType::BigInt { length: None },
4995 "INT32" | "INT" | "INTEGER" => DataType::Int {
4996 length: None,
4997 integer_spelling: false,
4998 },
4999 "FLOAT64" | "DOUBLE" => DataType::Double {
5000 precision: None,
5001 scale: None,
5002 },
5003 "STRING" => DataType::Text,
5004 _ => DataType::Custom {
5005 name: inner.to_string(),
5006 },
5007 };
5008 let mut new_cast = c.clone();
5009 new_cast.to = new_dt;
5010 return Ok(Expression::Cast(new_cast));
5011 }
5012 }
5013 }
5014 }
5015
5016 // ARRAY_CONCAT_AGG -> Snowflake: ARRAY_FLATTEN(ARRAY_AGG(...))
5017 if matches!(target, DialectType::Snowflake) {
5018 if let Expression::ArrayConcatAgg(ref agg) = e {
5019 let mut agg_clone = agg.as_ref().clone();
5020 agg_clone.name = None; // Clear name so generator uses default "ARRAY_AGG"
5021 let array_agg = Expression::ArrayAgg(Box::new(agg_clone));
5022 let flatten = Expression::Function(Box::new(Function::new(
5023 "ARRAY_FLATTEN".to_string(),
5024 vec![array_agg],
5025 )));
5026 return Ok(flatten);
5027 }
5028 }
5029
5030 // ARRAY_CONCAT_AGG -> others: keep as function for cross-dialect
5031 if !matches!(target, DialectType::BigQuery | DialectType::Snowflake) {
5032 if let Expression::ArrayConcatAgg(agg) = e {
5033 let arg = agg.this;
5034 return Ok(Expression::Function(Box::new(Function::new(
5035 "ARRAY_CONCAT_AGG".to_string(),
5036 vec![arg],
5037 ))));
5038 }
5039 }
5040
5041 // Determine what action to take by inspecting e immutably
5042 let action = {
5043 let source_propagates_nulls =
5044 matches!(source, DialectType::Snowflake | DialectType::BigQuery);
5045 let target_ignores_nulls =
5046 matches!(target, DialectType::DuckDB | DialectType::PostgreSQL);
5047
5048 match &e {
5049 Expression::Function(f) => {
5050 let name = f.name.to_uppercase();
5051 // DATE_PART: strip quotes from first arg when target is Snowflake (source != Snowflake)
5052 if (name == "DATE_PART" || name == "DATEPART")
5053 && f.args.len() == 2
5054 && matches!(target, DialectType::Snowflake)
5055 && !matches!(source, DialectType::Snowflake)
5056 && matches!(
5057 &f.args[0],
5058 Expression::Literal(crate::expressions::Literal::String(_))
5059 )
5060 {
5061 Action::DatePartUnquote
5062 } else if source_propagates_nulls
5063 && target_ignores_nulls
5064 && (name == "GREATEST" || name == "LEAST")
5065 && f.args.len() >= 2
5066 {
5067 Action::GreatestLeastNull
5068 } else if matches!(source, DialectType::Snowflake)
5069 && name == "ARRAY_GENERATE_RANGE"
5070 && f.args.len() >= 2
5071 {
5072 Action::ArrayGenerateRange
5073 } else if matches!(source, DialectType::Snowflake)
5074 && matches!(target, DialectType::DuckDB)
5075 && name == "DATE_TRUNC"
5076 && f.args.len() == 2
5077 {
5078 // Determine if DuckDB DATE_TRUNC needs CAST wrapping to preserve input type.
5079 // Logic based on Python sqlglot's input_type_preserved flag:
5080 // - DATE + non-date-unit (HOUR, MINUTE, etc.) -> wrap
5081 // - TIMESTAMP + date-unit (YEAR, QUARTER, MONTH, WEEK, DAY) -> wrap
5082 // - TIMESTAMPTZ/TIMESTAMPLTZ/TIME -> always wrap
5083 let unit_str = match &f.args[0] {
5084 Expression::Literal(crate::expressions::Literal::String(s)) => {
5085 Some(s.to_uppercase())
5086 }
5087 _ => None,
5088 };
5089 let is_date_unit = unit_str.as_ref().map_or(false, |u| {
5090 matches!(u.as_str(), "YEAR" | "QUARTER" | "MONTH" | "WEEK" | "DAY")
5091 });
5092 match &f.args[1] {
5093 Expression::Cast(c) => match &c.to {
5094 DataType::Time { .. } => Action::DateTruncWrapCast,
5095 DataType::Custom { name }
5096 if name.eq_ignore_ascii_case("TIMESTAMPTZ")
5097 || name.eq_ignore_ascii_case("TIMESTAMPLTZ") =>
5098 {
5099 Action::DateTruncWrapCast
5100 }
5101 DataType::Timestamp { timezone: true, .. } => {
5102 Action::DateTruncWrapCast
5103 }
5104 DataType::Date if !is_date_unit => Action::DateTruncWrapCast,
5105 DataType::Timestamp {
5106 timezone: false, ..
5107 } if is_date_unit => Action::DateTruncWrapCast,
5108 _ => Action::None,
5109 },
5110 _ => Action::None,
5111 }
5112 } else if matches!(source, DialectType::Snowflake)
5113 && matches!(target, DialectType::DuckDB)
5114 && name == "TO_DATE"
5115 && f.args.len() == 1
5116 && !matches!(
5117 &f.args[0],
5118 Expression::Literal(crate::expressions::Literal::String(_))
5119 )
5120 {
5121 Action::ToDateToCast
5122 } else if !matches!(source, DialectType::Redshift)
5123 && matches!(target, DialectType::Redshift)
5124 && name == "CONVERT_TIMEZONE"
5125 && (f.args.len() == 2 || f.args.len() == 3)
5126 {
5127 // Convert Function("CONVERT_TIMEZONE") to Expression::ConvertTimezone
5128 // so Redshift's transform_expr won't expand 2-arg to 3-arg with 'UTC'.
5129 // The Redshift parser adds 'UTC' as default source_tz, but when
5130 // transpiling from other dialects, we should preserve the original form.
5131 Action::ConvertTimezoneToExpr
5132 } else if matches!(source, DialectType::Snowflake)
5133 && matches!(target, DialectType::DuckDB)
5134 && name == "REGEXP_REPLACE"
5135 && f.args.len() == 4
5136 && !matches!(
5137 &f.args[3],
5138 Expression::Literal(crate::expressions::Literal::String(_))
5139 )
5140 {
5141 // Snowflake REGEXP_REPLACE with position arg -> DuckDB needs 'g' flag
5142 Action::RegexpReplaceSnowflakeToDuckDB
5143 } else if name == "_BQ_TO_HEX" {
5144 // Internal marker from TO_HEX conversion - bare (no LOWER/UPPER wrapper)
5145 Action::BigQueryToHexBare
5146 } else if matches!(source, DialectType::BigQuery)
5147 && !matches!(target, DialectType::BigQuery)
5148 {
5149 // BigQuery-specific functions that need to be converted to standard forms
5150 match name.as_str() {
5151 "TIMESTAMP_DIFF" | "DATETIME_DIFF" | "TIME_DIFF"
5152 | "DATE_DIFF"
5153 | "TIMESTAMP_ADD" | "TIMESTAMP_SUB"
5154 | "DATETIME_ADD" | "DATETIME_SUB"
5155 | "TIME_ADD" | "TIME_SUB"
5156 | "DATE_ADD" | "DATE_SUB"
5157 | "SAFE_DIVIDE"
5158 | "GENERATE_UUID"
5159 | "COUNTIF"
5160 | "EDIT_DISTANCE"
5161 | "TIMESTAMP_SECONDS" | "TIMESTAMP_MILLIS" | "TIMESTAMP_MICROS"
5162 | "TIMESTAMP_TRUNC" | "DATETIME_TRUNC" | "DATE_TRUNC"
5163 | "TO_HEX"
5164 | "TO_JSON_STRING"
5165 | "GENERATE_ARRAY" | "GENERATE_TIMESTAMP_ARRAY"
5166 | "DIV"
5167 | "UNIX_DATE" | "UNIX_SECONDS" | "UNIX_MILLIS" | "UNIX_MICROS"
5168 | "LAST_DAY"
5169 | "TIME" | "DATETIME" | "TIMESTAMP" | "STRING"
5170 | "REGEXP_CONTAINS"
5171 | "CONTAINS_SUBSTR"
5172 | "SAFE_ADD" | "SAFE_SUBTRACT" | "SAFE_MULTIPLY"
5173 | "SAFE_CAST"
5174 | "GENERATE_DATE_ARRAY"
5175 | "PARSE_DATE" | "PARSE_TIMESTAMP"
5176 | "FORMAT_DATE" | "FORMAT_DATETIME" | "FORMAT_TIMESTAMP"
5177 | "ARRAY_CONCAT"
5178 | "JSON_QUERY" | "JSON_VALUE_ARRAY"
5179 | "INSTR"
5180 | "MD5" | "SHA1" | "SHA256" | "SHA512"
5181 | "GENERATE_UUID()" // just in case
5182 | "REGEXP_EXTRACT_ALL"
5183 | "REGEXP_EXTRACT"
5184 | "INT64"
5185 | "ARRAY_CONCAT_AGG"
5186 | "DATE_DIFF(" // just in case
5187 | "TO_HEX_MD5" // internal
5188 | "MOD"
5189 | "CONCAT"
5190 | "CURRENT_TIMESTAMP" | "CURRENT_DATE" | "CURRENT_DATETIME" | "CURRENT_TIME"
5191 | "STRUCT"
5192 | "ROUND"
5193 | "MAKE_INTERVAL"
5194 | "ARRAY_TO_STRING"
5195 | "PERCENTILE_CONT"
5196 => Action::BigQueryFunctionNormalize,
5197 "ARRAY" if matches!(target, DialectType::Snowflake)
5198 && f.args.len() == 1
5199 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT"))
5200 => Action::BigQueryArraySelectAsStructToSnowflake,
5201 _ => Action::None,
5202 }
5203 } else if matches!(source, DialectType::BigQuery)
5204 && matches!(target, DialectType::BigQuery)
5205 {
5206 // BigQuery -> BigQuery normalizations
5207 match name.as_str() {
5208 "TIMESTAMP_DIFF"
5209 | "DATETIME_DIFF"
5210 | "TIME_DIFF"
5211 | "DATE_DIFF"
5212 | "DATE_ADD"
5213 | "TO_HEX"
5214 | "CURRENT_TIMESTAMP"
5215 | "CURRENT_DATE"
5216 | "CURRENT_TIME"
5217 | "CURRENT_DATETIME"
5218 | "GENERATE_DATE_ARRAY"
5219 | "INSTR"
5220 | "FORMAT_DATETIME"
5221 | "DATETIME"
5222 | "MAKE_INTERVAL" => Action::BigQueryFunctionNormalize,
5223 _ => Action::None,
5224 }
5225 } else {
5226 // Generic function normalization for non-BigQuery sources
5227 match name.as_str() {
5228 "ARBITRARY" | "AGGREGATE"
5229 | "REGEXP_MATCHES" | "REGEXP_FULL_MATCH"
5230 | "STRUCT_EXTRACT"
5231 | "LIST_FILTER" | "LIST_TRANSFORM" | "LIST_SORT" | "LIST_REVERSE_SORT"
5232 | "STRING_TO_ARRAY" | "STR_SPLIT" | "STR_SPLIT_REGEX" | "SPLIT_TO_ARRAY"
5233 | "SUBSTRINGINDEX"
5234 | "ARRAY_LENGTH" | "SIZE" | "CARDINALITY"
5235 | "UNICODE"
5236 | "XOR"
5237 | "ARRAY_REVERSE_SORT"
5238 | "ENCODE" | "DECODE"
5239 | "QUANTILE"
5240 | "EPOCH" | "EPOCH_MS"
5241 | "HASHBYTES"
5242 | "JSON_EXTRACT_PATH" | "JSON_EXTRACT_PATH_TEXT"
5243 | "APPROX_DISTINCT"
5244 | "DATE_PARSE" | "FORMAT_DATETIME"
5245 | "REGEXP_EXTRACT" | "REGEXP_SUBSTR" | "TO_DAYS"
5246 | "RLIKE"
5247 | "DATEDIFF" | "DATE_DIFF" | "MONTHS_BETWEEN"
5248 | "ADD_MONTHS" | "DATEADD" | "DATE_ADD" | "DATE_SUB" | "DATETRUNC"
5249 | "LAST_DAY" | "LAST_DAY_OF_MONTH" | "EOMONTH"
5250 | "ARRAY_CONSTRUCT" | "ARRAY_CAT" | "ARRAY_COMPACT"
5251 | "ARRAY_FILTER" | "FILTER" | "REDUCE" | "ARRAY_REVERSE"
5252 | "MAP" | "MAP_FROM_ENTRIES"
5253 | "COLLECT_LIST" | "COLLECT_SET"
5254 | "ISNAN" | "IS_NAN"
5255 | "TO_UTC_TIMESTAMP" | "FROM_UTC_TIMESTAMP"
5256 | "FORMAT_NUMBER"
5257 | "TOMONDAY" | "TOSTARTOFWEEK" | "TOSTARTOFMONTH" | "TOSTARTOFYEAR"
5258 | "ELEMENT_AT"
5259 | "EXPLODE" | "EXPLODE_OUTER" | "POSEXPLODE"
5260 | "SPLIT_PART"
5261 // GENERATE_SERIES: handled separately below
5262 | "JSON_EXTRACT" | "JSON_EXTRACT_SCALAR"
5263 | "JSON_QUERY" | "JSON_VALUE"
5264 | "JSON_EXTRACT_JSON" | "BSON_EXTRACT_BSON"
5265 | "TO_UNIX_TIMESTAMP" | "UNIX_TIMESTAMP"
5266 | "CURDATE" | "CURTIME"
5267 | "ARRAY_TO_STRING"
5268 | "ARRAY_SORT" | "SORT_ARRAY"
5269 | "LEFT" | "RIGHT"
5270 | "MAP_FROM_ARRAYS"
5271 | "LIKE" | "ILIKE"
5272 | "ARRAY_CONCAT" | "LIST_CONCAT"
5273 | "QUANTILE_CONT" | "QUANTILE_DISC"
5274 | "PERCENTILE_CONT" | "PERCENTILE_DISC"
5275 | "PERCENTILE_APPROX" | "APPROX_PERCENTILE"
5276 | "LOCATE" | "STRPOS" | "INSTR"
5277 | "CHAR"
5278 // CONCAT: handled separately for COALESCE wrapping
5279 | "ARRAY_JOIN"
5280 | "ARRAY_CONTAINS" | "HAS" | "CONTAINS"
5281 | "ISNULL"
5282 | "MONTHNAME"
5283 | "TO_TIMESTAMP"
5284 | "TO_DATE"
5285 | "TO_JSON"
5286 | "REGEXP_SPLIT"
5287 | "SPLIT"
5288 | "FORMATDATETIME"
5289 | "ARRAYJOIN"
5290 | "SPLITBYSTRING" | "SPLITBYREGEXP"
5291 | "NVL"
5292 | "TO_CHAR"
5293 | "DBMS_RANDOM.VALUE"
5294 | "REGEXP_LIKE"
5295 | "REPLICATE"
5296 | "LEN"
5297 | "COUNT_BIG"
5298 | "DATEFROMPARTS"
5299 | "DATETIMEFROMPARTS"
5300 | "CONVERT" | "TRY_CONVERT"
5301 | "STRFTIME" | "STRPTIME"
5302 | "DATE_FORMAT" | "FORMAT_DATE"
5303 | "PARSE_TIMESTAMP" | "PARSE_DATE"
5304 | "FROM_BASE64" | "TO_BASE64"
5305 | "GETDATE"
5306 | "TO_HEX" | "FROM_HEX" | "UNHEX" | "HEX"
5307 | "TO_UTF8" | "FROM_UTF8"
5308 | "STARTS_WITH" | "STARTSWITH"
5309 | "APPROX_COUNT_DISTINCT"
5310 | "JSON_FORMAT"
5311 | "SYSDATE"
5312 | "LOGICAL_OR" | "LOGICAL_AND"
5313 | "MONTHS_ADD"
5314 | "SCHEMA_NAME"
5315 | "STRTOL"
5316 | "EDITDIST3"
5317 | "FORMAT"
5318 | "LIST_CONTAINS" | "LIST_HAS"
5319 | "VARIANCE" | "STDDEV"
5320 | "ISINF"
5321 | "TO_UNIXTIME"
5322 | "FROM_UNIXTIME"
5323 | "DATEPART" | "DATE_PART"
5324 | "DATENAME"
5325 | "STRING_AGG"
5326 | "JSON_ARRAYAGG"
5327 | "APPROX_QUANTILE"
5328 | "MAKE_DATE"
5329 | "LIST_HAS_ANY" | "ARRAY_HAS_ANY"
5330 | "RANGE"
5331 | "TRY_ELEMENT_AT"
5332 | "STR_TO_MAP"
5333 | "STRING"
5334 | "STR_TO_TIME"
5335 | "CURRENT_SCHEMA"
5336 | "LTRIM" | "RTRIM"
5337 | "UUID"
5338 | "FARM_FINGERPRINT"
5339 | "JSON_KEYS"
5340 | "WEEKOFYEAR"
5341 | "CONCAT_WS"
5342 | "ARRAY_SLICE"
5343 | "ARRAY_PREPEND"
5344 | "ARRAY_REMOVE"
5345 | "GENERATE_DATE_ARRAY"
5346 | "PARSE_JSON"
5347 | "JSON_REMOVE"
5348 | "JSON_SET"
5349 | "LEVENSHTEIN"
5350 => Action::GenericFunctionNormalize,
5351 // Canonical date functions -> dialect-specific
5352 "TS_OR_DS_TO_DATE" => Action::TsOrDsToDateConvert,
5353 "TS_OR_DS_TO_DATE_STR" if f.args.len() == 1 => Action::TsOrDsToDateStrConvert,
5354 "DATE_STR_TO_DATE" if f.args.len() == 1 => Action::DateStrToDateConvert,
5355 "TIME_STR_TO_DATE" if f.args.len() == 1 => Action::TimeStrToDateConvert,
5356 "TIME_STR_TO_TIME" if f.args.len() <= 2 => Action::TimeStrToTimeConvert,
5357 "TIME_STR_TO_UNIX" if f.args.len() == 1 => Action::TimeStrToUnixConvert,
5358 "TIME_TO_TIME_STR" if f.args.len() == 1 => Action::TimeToTimeStrConvert,
5359 "DATE_TO_DATE_STR" if f.args.len() == 1 => Action::DateToDateStrConvert,
5360 "DATE_TO_DI" if f.args.len() == 1 => Action::DateToDiConvert,
5361 "DI_TO_DATE" if f.args.len() == 1 => Action::DiToDateConvert,
5362 "TS_OR_DI_TO_DI" if f.args.len() == 1 => Action::TsOrDiToDiConvert,
5363 "UNIX_TO_STR" if f.args.len() == 2 => Action::UnixToStrConvert,
5364 "UNIX_TO_TIME" if f.args.len() == 1 => Action::UnixToTimeConvert,
5365 "UNIX_TO_TIME_STR" if f.args.len() == 1 => Action::UnixToTimeStrConvert,
5366 "TIME_TO_UNIX" if f.args.len() == 1 => Action::TimeToUnixConvert,
5367 "TIME_TO_STR" if f.args.len() == 2 => Action::TimeToStrConvert,
5368 "STR_TO_UNIX" if f.args.len() == 2 => Action::StrToUnixConvert,
5369 // STR_TO_DATE(x, fmt) -> dialect-specific
5370 "STR_TO_DATE" if f.args.len() == 2
5371 && matches!(source, DialectType::Generic) => Action::StrToDateConvert,
5372 "STR_TO_DATE" => Action::GenericFunctionNormalize,
5373 // TS_OR_DS_ADD(x, n, 'UNIT') from Generic -> dialect-specific DATE_ADD
5374 "TS_OR_DS_ADD" if f.args.len() == 3
5375 && matches!(source, DialectType::Generic) => Action::TsOrDsAddConvert,
5376 // DATE_FROM_UNIX_DATE(n) -> DATEADD(DAY, n, '1970-01-01')
5377 "DATE_FROM_UNIX_DATE" if f.args.len() == 1 => Action::DateFromUnixDateConvert,
5378 // NVL2(a, b, c) -> CASE WHEN NOT a IS NULL THEN b [ELSE c] END
5379 "NVL2" if (f.args.len() == 2 || f.args.len() == 3) => Action::Nvl2Expand,
5380 // IFNULL(a, b) -> COALESCE(a, b) when coming from Generic source
5381 "IFNULL" if f.args.len() == 2 => Action::IfnullToCoalesce,
5382 // IS_ASCII(x) -> dialect-specific
5383 "IS_ASCII" if f.args.len() == 1 => Action::IsAsciiConvert,
5384 // STR_POSITION(haystack, needle[, pos[, occ]]) -> dialect-specific
5385 "STR_POSITION" => Action::StrPositionConvert,
5386 // ARRAY_SUM -> dialect-specific
5387 "ARRAY_SUM" => Action::ArraySumConvert,
5388 // ARRAY_SIZE -> dialect-specific (Drill only)
5389 "ARRAY_SIZE" if matches!(target, DialectType::Drill) => Action::ArraySizeConvert,
5390 // ARRAY_ANY -> dialect-specific
5391 "ARRAY_ANY" if f.args.len() == 2 => Action::ArrayAnyConvert,
5392 // Functions needing specific cross-dialect transforms
5393 "MAX_BY" | "MIN_BY" if matches!(target, DialectType::ClickHouse | DialectType::Spark | DialectType::Databricks | DialectType::DuckDB) => Action::MaxByMinByConvert,
5394 "STRUCT" if matches!(source, DialectType::Spark | DialectType::Databricks)
5395 && !matches!(target, DialectType::Spark | DialectType::Databricks | DialectType::Hive) => Action::SparkStructConvert,
5396 "ARRAY" if matches!(source, DialectType::BigQuery)
5397 && matches!(target, DialectType::Snowflake)
5398 && f.args.len() == 1
5399 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT")) => Action::BigQueryArraySelectAsStructToSnowflake,
5400 "ARRAY" if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena | DialectType::BigQuery | DialectType::DuckDB | DialectType::ClickHouse | DialectType::StarRocks) => Action::ArraySyntaxConvert,
5401 "TRUNC" if f.args.len() == 2 && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::ClickHouse) => Action::TruncToDateTrunc,
5402 // DATE_TRUNC('unit', x) from Generic source -> arg swap for BigQuery/Doris/Spark/MySQL
5403 "DATE_TRUNC" if f.args.len() == 2
5404 && matches!(source, DialectType::Generic)
5405 && matches!(target, DialectType::BigQuery | DialectType::Doris | DialectType::StarRocks
5406 | DialectType::Spark | DialectType::Databricks | DialectType::MySQL) => Action::DateTruncSwapArgs,
5407 // TIMESTAMP_TRUNC(x, UNIT) from Generic source -> convert to per-dialect
5408 "TIMESTAMP_TRUNC" if f.args.len() >= 2
5409 && matches!(source, DialectType::Generic) => Action::TimestampTruncConvert,
5410 "UNIFORM" if matches!(target, DialectType::Snowflake) => Action::GenericFunctionNormalize,
5411 // GENERATE_SERIES -> SEQUENCE/UNNEST/EXPLODE for target dialects
5412 "GENERATE_SERIES" if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
5413 && !matches!(target, DialectType::PostgreSQL | DialectType::Redshift | DialectType::TSQL | DialectType::Fabric) => Action::GenerateSeriesConvert,
5414 // GENERATE_SERIES with interval normalization for PG target
5415 "GENERATE_SERIES" if f.args.len() >= 3
5416 && matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
5417 && matches!(target, DialectType::PostgreSQL | DialectType::Redshift) => Action::GenerateSeriesConvert,
5418 "GENERATE_SERIES" => Action::None, // passthrough for other cases
5419 // CONCAT(a, b) -> COALESCE wrapping for Presto/ClickHouse from PostgreSQL
5420 "CONCAT" if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
5421 && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::ClickHouse) => Action::ConcatCoalesceWrap,
5422 "CONCAT" => Action::GenericFunctionNormalize,
5423 // DIV(a, b) -> target-specific integer division
5424 "DIV" if f.args.len() == 2
5425 && matches!(source, DialectType::PostgreSQL)
5426 && matches!(target, DialectType::DuckDB | DialectType::BigQuery | DialectType::SQLite) => Action::DivFuncConvert,
5427 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
5428 "JSON_OBJECT_AGG" | "JSONB_OBJECT_AGG" if f.args.len() == 2
5429 && matches!(target, DialectType::DuckDB) => Action::JsonObjectAggConvert,
5430 // JSONB_EXISTS -> JSON_EXISTS for DuckDB
5431 "JSONB_EXISTS" if f.args.len() == 2
5432 && matches!(target, DialectType::DuckDB) => Action::JsonbExistsConvert,
5433 // DATE_BIN -> TIME_BUCKET for DuckDB
5434 "DATE_BIN" if matches!(target, DialectType::DuckDB) => Action::DateBinConvert,
5435 // Multi-arg MIN(a,b,c) -> LEAST, MAX(a,b,c) -> GREATEST
5436 "MIN" | "MAX" if f.args.len() > 1 && !matches!(target, DialectType::SQLite) => Action::MinMaxToLeastGreatest,
5437 // ClickHouse uniq -> APPROX_COUNT_DISTINCT for other dialects
5438 "UNIQ" if matches!(source, DialectType::ClickHouse) && !matches!(target, DialectType::ClickHouse) => Action::ClickHouseUniqToApproxCountDistinct,
5439 // ClickHouse any -> ANY_VALUE for other dialects
5440 "ANY" if f.args.len() == 1 && matches!(source, DialectType::ClickHouse) && !matches!(target, DialectType::ClickHouse) => Action::ClickHouseAnyToAnyValue,
5441 _ => Action::None,
5442 }
5443 }
5444 }
5445 Expression::AggregateFunction(af) => {
5446 let name = af.name.to_uppercase();
5447 match name.as_str() {
5448 "ARBITRARY" | "AGGREGATE" => Action::GenericFunctionNormalize,
5449 "JSON_ARRAYAGG" => Action::GenericFunctionNormalize,
5450 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
5451 "JSON_OBJECT_AGG" | "JSONB_OBJECT_AGG"
5452 if matches!(target, DialectType::DuckDB) =>
5453 {
5454 Action::JsonObjectAggConvert
5455 }
5456 "ARRAY_AGG"
5457 if matches!(
5458 target,
5459 DialectType::Hive
5460 | DialectType::Spark
5461 | DialectType::Databricks
5462 ) =>
5463 {
5464 Action::ArrayAggToCollectList
5465 }
5466 "MAX_BY" | "MIN_BY"
5467 if matches!(
5468 target,
5469 DialectType::ClickHouse
5470 | DialectType::Spark
5471 | DialectType::Databricks
5472 | DialectType::DuckDB
5473 ) =>
5474 {
5475 Action::MaxByMinByConvert
5476 }
5477 "COLLECT_LIST"
5478 if matches!(
5479 target,
5480 DialectType::Presto | DialectType::Trino | DialectType::DuckDB
5481 ) =>
5482 {
5483 Action::CollectListToArrayAgg
5484 }
5485 "COLLECT_SET"
5486 if matches!(
5487 target,
5488 DialectType::Presto
5489 | DialectType::Trino
5490 | DialectType::Snowflake
5491 | DialectType::DuckDB
5492 ) =>
5493 {
5494 Action::CollectSetConvert
5495 }
5496 "PERCENTILE"
5497 if matches!(
5498 target,
5499 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
5500 ) =>
5501 {
5502 Action::PercentileConvert
5503 }
5504 // CORR -> CASE WHEN ISNAN(CORR(a,b)) THEN NULL ELSE CORR(a,b) END for DuckDB
5505 "CORR"
5506 if matches!(target, DialectType::DuckDB)
5507 && matches!(source, DialectType::Snowflake) =>
5508 {
5509 Action::CorrIsnanWrap
5510 }
5511 // BigQuery APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
5512 "APPROX_QUANTILES"
5513 if matches!(source, DialectType::BigQuery)
5514 && matches!(target, DialectType::DuckDB) =>
5515 {
5516 Action::BigQueryApproxQuantiles
5517 }
5518 // BigQuery PERCENTILE_CONT(x, frac RESPECT NULLS) -> QUANTILE_CONT(x, frac) for DuckDB
5519 "PERCENTILE_CONT"
5520 if matches!(source, DialectType::BigQuery)
5521 && matches!(target, DialectType::DuckDB)
5522 && af.args.len() >= 2 =>
5523 {
5524 Action::BigQueryPercentileContToDuckDB
5525 }
5526 _ => Action::None,
5527 }
5528 }
5529 Expression::JSONArrayAgg(_) => match target {
5530 DialectType::PostgreSQL => Action::GenericFunctionNormalize,
5531 _ => Action::None,
5532 },
5533 Expression::ToNumber(tn) => {
5534 // TO_NUMBER(x) with 1 arg -> CAST(x AS DOUBLE) for most targets
5535 if tn.format.is_none() && tn.precision.is_none() && tn.scale.is_none() {
5536 match target {
5537 DialectType::Oracle
5538 | DialectType::Snowflake
5539 | DialectType::Teradata => Action::None,
5540 _ => Action::GenericFunctionNormalize,
5541 }
5542 } else {
5543 Action::None
5544 }
5545 }
5546 Expression::Nvl2(_) => {
5547 // NVL2(a, b, c) -> CASE WHEN NOT a IS NULL THEN b ELSE c END for most dialects
5548 // Keep as NVL2 for dialects that support it natively
5549 match target {
5550 DialectType::Oracle
5551 | DialectType::Snowflake
5552 | DialectType::Teradata
5553 | DialectType::Spark
5554 | DialectType::Databricks
5555 | DialectType::Redshift => Action::None,
5556 _ => Action::Nvl2Expand,
5557 }
5558 }
5559 Expression::Decode(_) | Expression::DecodeCase(_) => {
5560 // DECODE(a, b, c[, d, e[, ...]]) -> CASE WHEN with null-safe comparisons
5561 // Keep as DECODE for Oracle/Snowflake
5562 match target {
5563 DialectType::Oracle | DialectType::Snowflake => Action::None,
5564 _ => Action::DecodeSimplify,
5565 }
5566 }
5567 Expression::Coalesce(ref cf) => {
5568 // IFNULL(a, b) -> COALESCE(a, b): clear original_name for cross-dialect
5569 // BigQuery keeps IFNULL natively when source is also BigQuery
5570 if cf.original_name.as_deref() == Some("IFNULL")
5571 && !(matches!(source, DialectType::BigQuery)
5572 && matches!(target, DialectType::BigQuery))
5573 {
5574 Action::IfnullToCoalesce
5575 } else {
5576 Action::None
5577 }
5578 }
5579 Expression::IfFunc(if_func) => {
5580 if matches!(source, DialectType::Snowflake)
5581 && matches!(
5582 target,
5583 DialectType::Presto | DialectType::Trino | DialectType::SQLite
5584 )
5585 && matches!(if_func.false_value, Some(Expression::Div(_)))
5586 {
5587 Action::Div0TypedDivision
5588 } else {
5589 Action::None
5590 }
5591 }
5592 Expression::ToJson(_) => match target {
5593 DialectType::Presto | DialectType::Trino => Action::ToJsonConvert,
5594 DialectType::BigQuery => Action::ToJsonConvert,
5595 DialectType::DuckDB => Action::ToJsonConvert,
5596 _ => Action::None,
5597 },
5598 Expression::ArrayAgg(ref agg) => {
5599 if matches!(
5600 target,
5601 DialectType::Hive | DialectType::Spark | DialectType::Databricks
5602 ) {
5603 // Any source -> Hive/Spark: convert ARRAY_AGG to COLLECT_LIST
5604 Action::ArrayAggToCollectList
5605 } else if matches!(
5606 source,
5607 DialectType::Spark | DialectType::Databricks | DialectType::Hive
5608 ) && matches!(target, DialectType::DuckDB)
5609 && agg.filter.is_some()
5610 {
5611 // Spark/Hive ARRAY_AGG excludes NULLs, DuckDB includes them
5612 // Need to add NOT x IS NULL to existing filter
5613 Action::ArrayAggNullFilter
5614 } else if matches!(target, DialectType::DuckDB)
5615 && agg.ignore_nulls == Some(true)
5616 && !agg.order_by.is_empty()
5617 {
5618 // BigQuery ARRAY_AGG(x IGNORE NULLS ORDER BY ...) -> DuckDB ARRAY_AGG(x ORDER BY a NULLS FIRST, ...)
5619 Action::ArrayAggIgnoreNullsDuckDB
5620 } else if !matches!(source, DialectType::Snowflake) {
5621 Action::None
5622 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
5623 let is_array_agg = agg.name.as_deref().map(|n| n.to_uppercase())
5624 == Some("ARRAY_AGG".to_string())
5625 || agg.name.is_none();
5626 if is_array_agg {
5627 Action::ArrayAggCollectList
5628 } else {
5629 Action::None
5630 }
5631 } else if matches!(
5632 target,
5633 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
5634 ) && agg.filter.is_none()
5635 {
5636 Action::ArrayAggFilter
5637 } else {
5638 Action::None
5639 }
5640 }
5641 Expression::WithinGroup(wg) => {
5642 if matches!(source, DialectType::Snowflake)
5643 && matches!(
5644 target,
5645 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
5646 )
5647 && matches!(wg.this, Expression::ArrayAgg(_))
5648 {
5649 Action::ArrayAggWithinGroupFilter
5650 } else if matches!(&wg.this, Expression::AggregateFunction(af) if af.name.eq_ignore_ascii_case("STRING_AGG"))
5651 || matches!(&wg.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("STRING_AGG"))
5652 || matches!(&wg.this, Expression::StringAgg(_))
5653 {
5654 Action::StringAggConvert
5655 } else if matches!(
5656 target,
5657 DialectType::Presto
5658 | DialectType::Trino
5659 | DialectType::Athena
5660 | DialectType::Spark
5661 | DialectType::Databricks
5662 ) && (matches!(&wg.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("PERCENTILE_CONT") || f.name.eq_ignore_ascii_case("PERCENTILE_DISC"))
5663 || matches!(&wg.this, Expression::AggregateFunction(af) if af.name.eq_ignore_ascii_case("PERCENTILE_CONT") || af.name.eq_ignore_ascii_case("PERCENTILE_DISC"))
5664 || matches!(&wg.this, Expression::PercentileCont(_)))
5665 {
5666 Action::PercentileContConvert
5667 } else {
5668 Action::None
5669 }
5670 }
5671 // For BigQuery: CAST(x AS TIMESTAMP) -> CAST(x AS DATETIME)
5672 // because BigQuery's TIMESTAMP is really TIMESTAMPTZ, and
5673 // DATETIME is the timezone-unaware type
5674 Expression::Cast(ref c) => {
5675 if c.format.is_some()
5676 && (matches!(source, DialectType::BigQuery)
5677 || matches!(source, DialectType::Teradata))
5678 {
5679 Action::BigQueryCastFormat
5680 } else if matches!(target, DialectType::BigQuery)
5681 && !matches!(source, DialectType::BigQuery)
5682 && matches!(
5683 c.to,
5684 DataType::Timestamp {
5685 timezone: false,
5686 ..
5687 }
5688 )
5689 {
5690 Action::CastTimestampToDatetime
5691 } else if matches!(target, DialectType::MySQL | DialectType::StarRocks)
5692 && !matches!(source, DialectType::MySQL | DialectType::StarRocks)
5693 && matches!(
5694 c.to,
5695 DataType::Timestamp {
5696 timezone: false,
5697 ..
5698 }
5699 )
5700 {
5701 // Generic/other -> MySQL/StarRocks: CAST(x AS TIMESTAMP) -> CAST(x AS DATETIME)
5702 // but MySQL-native CAST(x AS TIMESTAMP) stays as TIMESTAMP(x) via transform_cast
5703 Action::CastTimestampToDatetime
5704 } else if matches!(
5705 source,
5706 DialectType::Hive | DialectType::Spark | DialectType::Databricks
5707 ) && matches!(
5708 target,
5709 DialectType::Presto
5710 | DialectType::Trino
5711 | DialectType::Athena
5712 | DialectType::DuckDB
5713 | DialectType::Snowflake
5714 | DialectType::BigQuery
5715 | DialectType::Databricks
5716 | DialectType::TSQL
5717 ) {
5718 Action::HiveCastToTryCast
5719 } else if matches!(c.to, DataType::Timestamp { timezone: true, .. })
5720 && matches!(target, DialectType::MySQL | DialectType::StarRocks)
5721 {
5722 // CAST(x AS TIMESTAMPTZ) -> TIMESTAMP(x) function for MySQL/StarRocks
5723 Action::CastTimestamptzToFunc
5724 } else if matches!(c.to, DataType::Timestamp { timezone: true, .. })
5725 && matches!(
5726 target,
5727 DialectType::Hive
5728 | DialectType::Spark
5729 | DialectType::Databricks
5730 | DialectType::BigQuery
5731 )
5732 {
5733 // CAST(x AS TIMESTAMP WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark/BigQuery
5734 Action::CastTimestampStripTz
5735 } else if matches!(&c.to, DataType::Json)
5736 && matches!(&c.this, Expression::Literal(Literal::String(_)))
5737 && matches!(
5738 target,
5739 DialectType::Presto
5740 | DialectType::Trino
5741 | DialectType::Athena
5742 | DialectType::Snowflake
5743 )
5744 {
5745 // CAST('x' AS JSON) -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake
5746 // Only when the input is a string literal (JSON 'value' syntax)
5747 Action::JsonLiteralToJsonParse
5748 } else if matches!(&c.to, DataType::Json | DataType::JsonB)
5749 && matches!(target, DialectType::Spark | DialectType::Databricks)
5750 {
5751 // CAST(x AS JSON) -> TO_JSON(x) for Spark
5752 Action::CastToJsonForSpark
5753 } else if (matches!(
5754 &c.to,
5755 DataType::Array { .. } | DataType::Map { .. } | DataType::Struct { .. }
5756 )) && matches!(
5757 target,
5758 DialectType::Spark | DialectType::Databricks
5759 ) && (matches!(&c.this, Expression::ParseJson(_))
5760 || matches!(
5761 &c.this,
5762 Expression::Function(f)
5763 if f.name.eq_ignore_ascii_case("JSON_EXTRACT")
5764 || f.name.eq_ignore_ascii_case("JSON_EXTRACT_SCALAR")
5765 || f.name.eq_ignore_ascii_case("GET_JSON_OBJECT")
5766 ))
5767 {
5768 // CAST(JSON_PARSE(...) AS ARRAY/MAP) or CAST(JSON_EXTRACT/GET_JSON_OBJECT(...) AS ARRAY/MAP)
5769 // -> FROM_JSON(..., type_string) for Spark
5770 Action::CastJsonToFromJson
5771 } else if matches!(target, DialectType::Spark | DialectType::Databricks)
5772 && matches!(
5773 c.to,
5774 DataType::Timestamp {
5775 timezone: false,
5776 ..
5777 }
5778 )
5779 && matches!(source, DialectType::DuckDB)
5780 {
5781 Action::StrftimeCastTimestamp
5782 } else if matches!(source, DialectType::DuckDB)
5783 && matches!(
5784 c.to,
5785 DataType::Decimal {
5786 precision: None,
5787 ..
5788 }
5789 )
5790 {
5791 Action::DecimalDefaultPrecision
5792 } else if matches!(source, DialectType::MySQL | DialectType::SingleStore)
5793 && matches!(c.to, DataType::Char { length: None })
5794 && !matches!(target, DialectType::MySQL | DialectType::SingleStore)
5795 {
5796 // MySQL CAST(x AS CHAR) was originally TEXT - convert to target text type
5797 Action::MysqlCastCharToText
5798 } else if matches!(
5799 source,
5800 DialectType::Spark | DialectType::Databricks | DialectType::Hive
5801 ) && matches!(
5802 target,
5803 DialectType::Spark | DialectType::Databricks | DialectType::Hive
5804 ) && Self::has_varchar_char_type(&c.to)
5805 {
5806 // Spark parses VARCHAR(n)/CHAR(n) as TEXT, so normalize back to STRING
5807 Action::SparkCastVarcharToString
5808 } else {
5809 Action::None
5810 }
5811 }
5812 Expression::SafeCast(ref c) => {
5813 if c.format.is_some()
5814 && matches!(source, DialectType::BigQuery)
5815 && !matches!(target, DialectType::BigQuery)
5816 {
5817 Action::BigQueryCastFormat
5818 } else {
5819 Action::None
5820 }
5821 }
5822 // For DuckDB: DATE_TRUNC should preserve the input type
5823 Expression::DateTrunc(_) | Expression::TimestampTrunc(_) => {
5824 if matches!(source, DialectType::Snowflake)
5825 && matches!(target, DialectType::DuckDB)
5826 {
5827 Action::DateTruncWrapCast
5828 } else {
5829 Action::None
5830 }
5831 }
5832 // For DuckDB: SET a = 1 -> SET VARIABLE a = 1
5833 Expression::SetStatement(s) => {
5834 if matches!(target, DialectType::DuckDB)
5835 && !matches!(source, DialectType::TSQL | DialectType::Fabric)
5836 && s.items.iter().any(|item| item.kind.is_none())
5837 {
5838 Action::SetToVariable
5839 } else {
5840 Action::None
5841 }
5842 }
5843 // Cross-dialect NULL ordering normalization.
5844 // When nulls_first is not specified, fill in the source dialect's implied
5845 // default so the target generator can correctly add/strip NULLS FIRST/LAST.
5846 Expression::Ordered(o) => {
5847 // MySQL doesn't support NULLS FIRST/LAST - strip or rewrite
5848 if matches!(target, DialectType::MySQL) && o.nulls_first.is_some() {
5849 Action::MysqlNullsOrdering
5850 } else {
5851 // Skip targets that don't support NULLS FIRST/LAST syntax
5852 let target_supports_nulls = !matches!(
5853 target,
5854 DialectType::MySQL
5855 | DialectType::TSQL
5856 | DialectType::StarRocks
5857 | DialectType::Doris
5858 );
5859 if o.nulls_first.is_none() && source != target && target_supports_nulls
5860 {
5861 Action::NullsOrdering
5862 } else {
5863 Action::None
5864 }
5865 }
5866 }
5867 // BigQuery data types: convert INT64, BYTES, NUMERIC etc. to standard types
5868 Expression::DataType(dt) => {
5869 if matches!(source, DialectType::BigQuery)
5870 && !matches!(target, DialectType::BigQuery)
5871 {
5872 match dt {
5873 DataType::Custom { ref name }
5874 if name.eq_ignore_ascii_case("INT64")
5875 || name.eq_ignore_ascii_case("FLOAT64")
5876 || name.eq_ignore_ascii_case("BOOL")
5877 || name.eq_ignore_ascii_case("BYTES")
5878 || name.eq_ignore_ascii_case("NUMERIC")
5879 || name.eq_ignore_ascii_case("STRING")
5880 || name.eq_ignore_ascii_case("DATETIME") =>
5881 {
5882 Action::BigQueryCastType
5883 }
5884 _ => Action::None,
5885 }
5886 } else if matches!(source, DialectType::TSQL) {
5887 // For TSQL source -> any target (including TSQL itself for REAL)
5888 match dt {
5889 // REAL -> FLOAT even for TSQL->TSQL
5890 DataType::Custom { ref name }
5891 if name.eq_ignore_ascii_case("REAL") =>
5892 {
5893 Action::TSQLTypeNormalize
5894 }
5895 DataType::Float {
5896 real_spelling: true,
5897 ..
5898 } => Action::TSQLTypeNormalize,
5899 // Other TSQL type normalizations only for non-TSQL targets
5900 DataType::Custom { ref name }
5901 if !matches!(target, DialectType::TSQL)
5902 && (name.eq_ignore_ascii_case("MONEY")
5903 || name.eq_ignore_ascii_case("SMALLMONEY")
5904 || name.eq_ignore_ascii_case("DATETIME2")
5905 || name.eq_ignore_ascii_case("IMAGE")
5906 || name.eq_ignore_ascii_case("BIT")
5907 || name.eq_ignore_ascii_case("ROWVERSION")
5908 || name.eq_ignore_ascii_case("UNIQUEIDENTIFIER")
5909 || name.eq_ignore_ascii_case("DATETIMEOFFSET")
5910 || name.to_uppercase().starts_with("NUMERIC")
5911 || name.to_uppercase().starts_with("DATETIME2(")
5912 || name.to_uppercase().starts_with("TIME(")) =>
5913 {
5914 Action::TSQLTypeNormalize
5915 }
5916 DataType::Float {
5917 precision: Some(_), ..
5918 } if !matches!(target, DialectType::TSQL) => {
5919 Action::TSQLTypeNormalize
5920 }
5921 DataType::TinyInt { .. }
5922 if !matches!(target, DialectType::TSQL) =>
5923 {
5924 Action::TSQLTypeNormalize
5925 }
5926 // INTEGER -> INT for Databricks/Spark targets
5927 DataType::Int {
5928 integer_spelling: true,
5929 ..
5930 } if matches!(
5931 target,
5932 DialectType::Databricks | DialectType::Spark
5933 ) =>
5934 {
5935 Action::TSQLTypeNormalize
5936 }
5937 _ => Action::None,
5938 }
5939 } else if (matches!(source, DialectType::Oracle)
5940 || matches!(source, DialectType::Generic))
5941 && !matches!(target, DialectType::Oracle)
5942 {
5943 match dt {
5944 DataType::Custom { ref name }
5945 if name.to_uppercase().starts_with("VARCHAR2(")
5946 || name.to_uppercase().starts_with("NVARCHAR2(")
5947 || name.eq_ignore_ascii_case("VARCHAR2")
5948 || name.eq_ignore_ascii_case("NVARCHAR2") =>
5949 {
5950 Action::OracleVarchar2ToVarchar
5951 }
5952 _ => Action::None,
5953 }
5954 } else if matches!(target, DialectType::Snowflake)
5955 && !matches!(source, DialectType::Snowflake)
5956 {
5957 // When target is Snowflake but source is NOT Snowflake,
5958 // protect FLOAT from being converted to DOUBLE by Snowflake's transform.
5959 // Snowflake treats FLOAT=DOUBLE internally, but non-Snowflake sources
5960 // should keep their FLOAT spelling.
5961 match dt {
5962 DataType::Float { .. } => Action::SnowflakeFloatProtect,
5963 _ => Action::None,
5964 }
5965 } else {
5966 Action::None
5967 }
5968 }
5969 // LOWER patterns from BigQuery TO_HEX conversions:
5970 // - LOWER(LOWER(HEX(x))) from non-BQ targets: flatten
5971 // - LOWER(Function("TO_HEX")) for BQ->BQ: strip LOWER
5972 Expression::Lower(uf) => {
5973 if matches!(source, DialectType::BigQuery) {
5974 match &uf.this {
5975 Expression::Lower(_) => Action::BigQueryToHexLower,
5976 Expression::Function(f)
5977 if f.name == "TO_HEX"
5978 && matches!(target, DialectType::BigQuery) =>
5979 {
5980 // BQ->BQ: LOWER(TO_HEX(x)) -> TO_HEX(x)
5981 Action::BigQueryToHexLower
5982 }
5983 _ => Action::None,
5984 }
5985 } else {
5986 Action::None
5987 }
5988 }
5989 // UPPER patterns from BigQuery TO_HEX conversions:
5990 // - UPPER(LOWER(HEX(x))) from non-BQ targets: extract inner
5991 // - UPPER(Function("TO_HEX")) for BQ->BQ: keep as UPPER(TO_HEX(x))
5992 Expression::Upper(uf) => {
5993 if matches!(source, DialectType::BigQuery) {
5994 match &uf.this {
5995 Expression::Lower(_) => Action::BigQueryToHexUpper,
5996 _ => Action::None,
5997 }
5998 } else {
5999 Action::None
6000 }
6001 }
6002 // BigQuery LAST_DAY(date, unit) -> strip unit for non-BigQuery targets
6003 // Snowflake supports LAST_DAY with unit, so keep it there
6004 Expression::LastDay(ld) => {
6005 if matches!(source, DialectType::BigQuery)
6006 && !matches!(target, DialectType::BigQuery | DialectType::Snowflake)
6007 && ld.unit.is_some()
6008 {
6009 Action::BigQueryLastDayStripUnit
6010 } else {
6011 Action::None
6012 }
6013 }
6014 // BigQuery SafeDivide expressions (already parsed as SafeDivide)
6015 Expression::SafeDivide(_) => {
6016 if matches!(source, DialectType::BigQuery)
6017 && !matches!(target, DialectType::BigQuery)
6018 {
6019 Action::BigQuerySafeDivide
6020 } else {
6021 Action::None
6022 }
6023 }
6024 // BigQuery ANY_VALUE(x HAVING MAX/MIN y) -> ARG_MAX_NULL/ARG_MIN_NULL for DuckDB
6025 // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
6026 Expression::AnyValue(ref agg) => {
6027 if matches!(source, DialectType::BigQuery)
6028 && matches!(target, DialectType::DuckDB)
6029 && agg.having_max.is_some()
6030 {
6031 Action::BigQueryAnyValueHaving
6032 } else if matches!(target, DialectType::Spark | DialectType::Databricks)
6033 && !matches!(source, DialectType::Spark | DialectType::Databricks)
6034 && agg.ignore_nulls.is_none()
6035 {
6036 Action::AnyValueIgnoreNulls
6037 } else {
6038 Action::None
6039 }
6040 }
6041 Expression::Any(ref q) => {
6042 if matches!(source, DialectType::PostgreSQL)
6043 && matches!(
6044 target,
6045 DialectType::Spark | DialectType::Databricks | DialectType::Hive
6046 )
6047 && q.op.is_some()
6048 && !matches!(
6049 q.subquery,
6050 Expression::Select(_) | Expression::Subquery(_)
6051 )
6052 {
6053 Action::AnyToExists
6054 } else {
6055 Action::None
6056 }
6057 }
6058 // BigQuery APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
6059 // RegexpLike from non-DuckDB sources -> REGEXP_MATCHES for DuckDB target
6060 // DuckDB's ~ is a full match, but other dialects' REGEXP/RLIKE is a partial match
6061 Expression::RegexpLike(_)
6062 if !matches!(source, DialectType::DuckDB)
6063 && matches!(target, DialectType::DuckDB) =>
6064 {
6065 Action::RegexpLikeToDuckDB
6066 }
6067 // MySQL division -> NULLIF wrapping and/or CAST for specific targets
6068 Expression::Div(ref op)
6069 if matches!(source, DialectType::MySQL)
6070 && matches!(
6071 target,
6072 DialectType::PostgreSQL
6073 | DialectType::Redshift
6074 | DialectType::Drill
6075 | DialectType::Trino
6076 | DialectType::Presto
6077 | DialectType::TSQL
6078 | DialectType::Teradata
6079 | DialectType::SQLite
6080 | DialectType::BigQuery
6081 | DialectType::Snowflake
6082 | DialectType::Databricks
6083 | DialectType::Oracle
6084 ) =>
6085 {
6086 // Only wrap if RHS is not already NULLIF
6087 if !matches!(&op.right, Expression::Function(f) if f.name.eq_ignore_ascii_case("NULLIF"))
6088 {
6089 Action::MySQLSafeDivide
6090 } else {
6091 Action::None
6092 }
6093 }
6094 // ALTER TABLE ... RENAME TO <schema>.<table> -> strip schema for most targets
6095 // For TSQL/Fabric, convert to sp_rename instead
6096 Expression::AlterTable(ref at) if !at.actions.is_empty() => {
6097 if let Some(crate::expressions::AlterTableAction::RenameTable(
6098 ref new_tbl,
6099 )) = at.actions.first()
6100 {
6101 if matches!(target, DialectType::TSQL | DialectType::Fabric) {
6102 // TSQL: ALTER TABLE RENAME -> EXEC sp_rename
6103 Action::AlterTableToSpRename
6104 } else if new_tbl.schema.is_some()
6105 && matches!(
6106 target,
6107 DialectType::BigQuery
6108 | DialectType::Doris
6109 | DialectType::StarRocks
6110 | DialectType::DuckDB
6111 | DialectType::PostgreSQL
6112 | DialectType::Redshift
6113 )
6114 {
6115 Action::AlterTableRenameStripSchema
6116 } else {
6117 Action::None
6118 }
6119 } else {
6120 Action::None
6121 }
6122 }
6123 // EPOCH(x) expression -> target-specific epoch conversion
6124 Expression::Epoch(_) if !matches!(target, DialectType::DuckDB) => {
6125 Action::EpochConvert
6126 }
6127 // EPOCH_MS(x) expression -> target-specific epoch ms conversion
6128 Expression::EpochMs(_) if !matches!(target, DialectType::DuckDB) => {
6129 Action::EpochMsConvert
6130 }
6131 // STRING_AGG -> GROUP_CONCAT for MySQL/SQLite
6132 Expression::StringAgg(_) => {
6133 if matches!(
6134 target,
6135 DialectType::MySQL
6136 | DialectType::SingleStore
6137 | DialectType::Doris
6138 | DialectType::StarRocks
6139 | DialectType::SQLite
6140 ) {
6141 Action::StringAggConvert
6142 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
6143 Action::StringAggConvert
6144 } else {
6145 Action::None
6146 }
6147 }
6148 // GROUP_CONCAT -> STRING_AGG for PostgreSQL/Presto/etc.
6149 // Also handles GROUP_CONCAT normalization for MySQL/SQLite targets
6150 Expression::GroupConcat(_) => Action::GroupConcatConvert,
6151 // CARDINALITY/ARRAY_LENGTH/ARRAY_SIZE -> target-specific array length
6152 Expression::Cardinality(_) | Expression::ArrayLength(_) => {
6153 Action::ArrayLengthConvert
6154 }
6155 Expression::ArraySize(_) => {
6156 if matches!(target, DialectType::Drill) {
6157 Action::ArraySizeDrill
6158 } else {
6159 Action::ArrayLengthConvert
6160 }
6161 }
6162 // ARRAY_REMOVE(arr, target) -> LIST_FILTER/arrayFilter/ARRAY subquery
6163 Expression::ArrayRemove(_) => match target {
6164 DialectType::DuckDB | DialectType::ClickHouse | DialectType::BigQuery => {
6165 Action::ArrayRemoveConvert
6166 }
6167 _ => Action::None,
6168 },
6169 // ARRAY_REVERSE(x) -> arrayReverse for ClickHouse
6170 Expression::ArrayReverse(_) => match target {
6171 DialectType::ClickHouse => Action::ArrayReverseConvert,
6172 _ => Action::None,
6173 },
6174 // JSON_KEYS(x) -> JSON_OBJECT_KEYS/OBJECT_KEYS for Spark/Databricks/Snowflake
6175 Expression::JsonKeys(_) => match target {
6176 DialectType::Spark | DialectType::Databricks | DialectType::Snowflake => {
6177 Action::JsonKeysConvert
6178 }
6179 _ => Action::None,
6180 },
6181 // PARSE_JSON(x) -> strip for SQLite/Doris/MySQL/StarRocks
6182 Expression::ParseJson(_) => match target {
6183 DialectType::SQLite
6184 | DialectType::Doris
6185 | DialectType::MySQL
6186 | DialectType::StarRocks => Action::ParseJsonStrip,
6187 _ => Action::None,
6188 },
6189 // WeekOfYear -> WEEKISO for Snowflake (cross-dialect only)
6190 Expression::WeekOfYear(_)
6191 if matches!(target, DialectType::Snowflake)
6192 && !matches!(source, DialectType::Snowflake) =>
6193 {
6194 Action::WeekOfYearToWeekIso
6195 }
6196 // NVL: clear original_name so generator uses dialect-specific function names
6197 Expression::Nvl(f) if f.original_name.is_some() => Action::NvlClearOriginal,
6198 // XOR: expand for dialects that don't support the XOR keyword
6199 Expression::Xor(_) => {
6200 let target_supports_xor = matches!(
6201 target,
6202 DialectType::MySQL
6203 | DialectType::SingleStore
6204 | DialectType::Doris
6205 | DialectType::StarRocks
6206 );
6207 if !target_supports_xor {
6208 Action::XorExpand
6209 } else {
6210 Action::None
6211 }
6212 }
6213 // TSQL #table -> temp table normalization (CREATE TABLE)
6214 Expression::CreateTable(ct)
6215 if matches!(source, DialectType::TSQL | DialectType::Fabric)
6216 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
6217 && ct.name.name.name.starts_with('#') =>
6218 {
6219 Action::TempTableHash
6220 }
6221 // TSQL #table -> strip # from table references in SELECT/etc.
6222 Expression::Table(tr)
6223 if matches!(source, DialectType::TSQL | DialectType::Fabric)
6224 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
6225 && tr.name.name.starts_with('#') =>
6226 {
6227 Action::TempTableHash
6228 }
6229 // TSQL #table -> strip # from DROP TABLE names
6230 Expression::DropTable(ref dt)
6231 if matches!(source, DialectType::TSQL | DialectType::Fabric)
6232 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
6233 && dt.names.iter().any(|n| n.name.name.starts_with('#')) =>
6234 {
6235 Action::TempTableHash
6236 }
6237 // JSON_EXTRACT -> ISNULL(JSON_QUERY, JSON_VALUE) for TSQL
6238 Expression::JsonExtract(_)
6239 if matches!(target, DialectType::TSQL | DialectType::Fabric) =>
6240 {
6241 Action::JsonExtractToTsql
6242 }
6243 // JSON_EXTRACT_SCALAR -> ISNULL(JSON_QUERY, JSON_VALUE) for TSQL
6244 Expression::JsonExtractScalar(_)
6245 if matches!(target, DialectType::TSQL | DialectType::Fabric) =>
6246 {
6247 Action::JsonExtractToTsql
6248 }
6249 // JSON_EXTRACT -> JSONExtractString for ClickHouse
6250 Expression::JsonExtract(_) if matches!(target, DialectType::ClickHouse) => {
6251 Action::JsonExtractToClickHouse
6252 }
6253 // JSON_EXTRACT_SCALAR -> JSONExtractString for ClickHouse
6254 Expression::JsonExtractScalar(_)
6255 if matches!(target, DialectType::ClickHouse) =>
6256 {
6257 Action::JsonExtractToClickHouse
6258 }
6259 // JSON_EXTRACT -> arrow syntax for SQLite/DuckDB
6260 Expression::JsonExtract(ref f)
6261 if !f.arrow_syntax
6262 && matches!(target, DialectType::SQLite | DialectType::DuckDB) =>
6263 {
6264 Action::JsonExtractToArrow
6265 }
6266 // JSON_EXTRACT with JSONPath -> JSON_EXTRACT_PATH for PostgreSQL (non-PG sources only)
6267 Expression::JsonExtract(ref f)
6268 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift)
6269 && !matches!(
6270 source,
6271 DialectType::PostgreSQL
6272 | DialectType::Redshift
6273 | DialectType::Materialize
6274 )
6275 && matches!(&f.path, Expression::Literal(Literal::String(s)) if s.starts_with('$')) =>
6276 {
6277 Action::JsonExtractToGetJsonObject
6278 }
6279 // JSON_EXTRACT -> GET_JSON_OBJECT for Hive/Spark
6280 Expression::JsonExtract(_)
6281 if matches!(
6282 target,
6283 DialectType::Hive | DialectType::Spark | DialectType::Databricks
6284 ) =>
6285 {
6286 Action::JsonExtractToGetJsonObject
6287 }
6288 // JSON_EXTRACT_SCALAR -> target-specific for PostgreSQL, Snowflake, SQLite
6289 // Skip if already in arrow/hash_arrow syntax (same-dialect identity case)
6290 Expression::JsonExtractScalar(ref f)
6291 if !f.arrow_syntax
6292 && !f.hash_arrow_syntax
6293 && matches!(
6294 target,
6295 DialectType::PostgreSQL
6296 | DialectType::Redshift
6297 | DialectType::Snowflake
6298 | DialectType::SQLite
6299 | DialectType::DuckDB
6300 ) =>
6301 {
6302 Action::JsonExtractScalarConvert
6303 }
6304 // JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
6305 Expression::JsonExtractScalar(_)
6306 if matches!(
6307 target,
6308 DialectType::Hive | DialectType::Spark | DialectType::Databricks
6309 ) =>
6310 {
6311 Action::JsonExtractScalarToGetJsonObject
6312 }
6313 // JSON_EXTRACT path normalization for BigQuery, MySQL (bracket/wildcard handling)
6314 Expression::JsonExtract(ref f)
6315 if !f.arrow_syntax
6316 && matches!(target, DialectType::BigQuery | DialectType::MySQL) =>
6317 {
6318 Action::JsonPathNormalize
6319 }
6320 // JsonQuery (parsed JSON_QUERY) -> target-specific
6321 Expression::JsonQuery(_) => Action::JsonQueryValueConvert,
6322 // JsonValue (parsed JSON_VALUE) -> target-specific
6323 Expression::JsonValue(_) => Action::JsonQueryValueConvert,
6324 // AT TIME ZONE -> AT_TIMEZONE for Presto, FROM_UTC_TIMESTAMP for Spark,
6325 // TIMESTAMP(DATETIME(...)) for BigQuery, CONVERT_TIMEZONE for Snowflake
6326 Expression::AtTimeZone(_)
6327 if matches!(
6328 target,
6329 DialectType::Presto
6330 | DialectType::Trino
6331 | DialectType::Athena
6332 | DialectType::Spark
6333 | DialectType::Databricks
6334 | DialectType::BigQuery
6335 | DialectType::Snowflake
6336 ) =>
6337 {
6338 Action::AtTimeZoneConvert
6339 }
6340 // DAY_OF_WEEK -> dialect-specific
6341 Expression::DayOfWeek(_)
6342 if matches!(
6343 target,
6344 DialectType::DuckDB | DialectType::Spark | DialectType::Databricks
6345 ) =>
6346 {
6347 Action::DayOfWeekConvert
6348 }
6349 // CURRENT_USER -> CURRENT_USER() for Snowflake
6350 Expression::CurrentUser(_) if matches!(target, DialectType::Snowflake) => {
6351 Action::CurrentUserParens
6352 }
6353 // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
6354 Expression::ElementAt(_)
6355 if matches!(target, DialectType::PostgreSQL | DialectType::BigQuery) =>
6356 {
6357 Action::ElementAtConvert
6358 }
6359 // ARRAY[...] (ArrayFunc bracket_notation=false) -> convert for target dialect
6360 Expression::ArrayFunc(ref arr)
6361 if !arr.bracket_notation
6362 && matches!(
6363 target,
6364 DialectType::Spark
6365 | DialectType::Databricks
6366 | DialectType::Hive
6367 | DialectType::BigQuery
6368 | DialectType::DuckDB
6369 | DialectType::Snowflake
6370 | DialectType::Presto
6371 | DialectType::Trino
6372 | DialectType::Athena
6373 | DialectType::ClickHouse
6374 | DialectType::StarRocks
6375 ) =>
6376 {
6377 Action::ArraySyntaxConvert
6378 }
6379 // VARIANCE expression -> varSamp for ClickHouse
6380 Expression::Variance(_) if matches!(target, DialectType::ClickHouse) => {
6381 Action::VarianceToClickHouse
6382 }
6383 // STDDEV expression -> stddevSamp for ClickHouse
6384 Expression::Stddev(_) if matches!(target, DialectType::ClickHouse) => {
6385 Action::StddevToClickHouse
6386 }
6387 // ApproxQuantile -> APPROX_PERCENTILE for Snowflake
6388 Expression::ApproxQuantile(_) if matches!(target, DialectType::Snowflake) => {
6389 Action::ApproxQuantileConvert
6390 }
6391 // MonthsBetween -> target-specific
6392 Expression::MonthsBetween(_)
6393 if !matches!(
6394 target,
6395 DialectType::Spark | DialectType::Databricks | DialectType::Hive
6396 ) =>
6397 {
6398 Action::MonthsBetweenConvert
6399 }
6400 // AddMonths -> target-specific DATEADD/DATE_ADD
6401 Expression::AddMonths(_) => Action::AddMonthsConvert,
6402 // MapFromArrays -> target-specific (MAP, OBJECT_CONSTRUCT, MAP_FROM_ARRAYS)
6403 Expression::MapFromArrays(_)
6404 if !matches!(target, DialectType::Spark | DialectType::Databricks) =>
6405 {
6406 Action::MapFromArraysConvert
6407 }
6408 // CURRENT_USER -> CURRENT_USER() for Spark
6409 Expression::CurrentUser(_)
6410 if matches!(target, DialectType::Spark | DialectType::Databricks) =>
6411 {
6412 Action::CurrentUserSparkParens
6413 }
6414 // MONTH/YEAR/DAY('string') from Spark -> cast string to DATE for DuckDB/Presto
6415 Expression::Month(ref f) | Expression::Year(ref f) | Expression::Day(ref f)
6416 if matches!(
6417 source,
6418 DialectType::Spark | DialectType::Databricks | DialectType::Hive
6419 ) && matches!(&f.this, Expression::Literal(Literal::String(_)))
6420 && matches!(
6421 target,
6422 DialectType::DuckDB
6423 | DialectType::Presto
6424 | DialectType::Trino
6425 | DialectType::Athena
6426 | DialectType::PostgreSQL
6427 | DialectType::Redshift
6428 ) =>
6429 {
6430 Action::SparkDateFuncCast
6431 }
6432 // $parameter -> @parameter for BigQuery
6433 Expression::Parameter(ref p)
6434 if matches!(target, DialectType::BigQuery)
6435 && matches!(source, DialectType::DuckDB)
6436 && (p.style == crate::expressions::ParameterStyle::Dollar
6437 || p.style == crate::expressions::ParameterStyle::DoubleDollar) =>
6438 {
6439 Action::DollarParamConvert
6440 }
6441 // EscapeString literal: normalize literal newlines to \n
6442 Expression::Literal(Literal::EscapeString(ref s))
6443 if s.contains('\n') || s.contains('\r') || s.contains('\t') =>
6444 {
6445 Action::EscapeStringNormalize
6446 }
6447 // straight_join: keep lowercase for DuckDB, quote for MySQL
6448 Expression::Column(ref col)
6449 if col.name.name == "STRAIGHT_JOIN"
6450 && col.table.is_none()
6451 && matches!(source, DialectType::DuckDB)
6452 && matches!(target, DialectType::DuckDB | DialectType::MySQL) =>
6453 {
6454 Action::StraightJoinCase
6455 }
6456 // DATE and TIMESTAMP literal type conversions are now handled in the generator directly
6457 // Snowflake INTERVAL format: INTERVAL '2' HOUR -> INTERVAL '2 HOUR'
6458 Expression::Interval(ref iv)
6459 if matches!(
6460 target,
6461 DialectType::Snowflake
6462 | DialectType::PostgreSQL
6463 | DialectType::Redshift
6464 ) && iv.unit.is_some()
6465 && matches!(
6466 &iv.this,
6467 Some(Expression::Literal(Literal::String(_)))
6468 ) =>
6469 {
6470 Action::SnowflakeIntervalFormat
6471 }
6472 // TABLESAMPLE -> TABLESAMPLE RESERVOIR for DuckDB target
6473 Expression::TableSample(ref ts) if matches!(target, DialectType::DuckDB) => {
6474 if let Some(ref sample) = ts.sample {
6475 if !sample.explicit_method {
6476 Action::TablesampleReservoir
6477 } else {
6478 Action::None
6479 }
6480 } else {
6481 Action::None
6482 }
6483 }
6484 // TABLESAMPLE from non-Snowflake source to Snowflake: strip method and PERCENT
6485 // Handles both Expression::TableSample wrapper and Expression::Table with table_sample
6486 Expression::TableSample(ref ts)
6487 if matches!(target, DialectType::Snowflake)
6488 && !matches!(source, DialectType::Snowflake)
6489 && ts.sample.is_some() =>
6490 {
6491 if let Some(ref sample) = ts.sample {
6492 if !sample.explicit_method {
6493 Action::TablesampleSnowflakeStrip
6494 } else {
6495 Action::None
6496 }
6497 } else {
6498 Action::None
6499 }
6500 }
6501 Expression::Table(ref t)
6502 if matches!(target, DialectType::Snowflake)
6503 && !matches!(source, DialectType::Snowflake)
6504 && t.table_sample.is_some() =>
6505 {
6506 if let Some(ref sample) = t.table_sample {
6507 if !sample.explicit_method {
6508 Action::TablesampleSnowflakeStrip
6509 } else {
6510 Action::None
6511 }
6512 } else {
6513 Action::None
6514 }
6515 }
6516 // ALTER TABLE RENAME -> EXEC sp_rename for TSQL
6517 Expression::AlterTable(ref at)
6518 if matches!(target, DialectType::TSQL | DialectType::Fabric)
6519 && !at.actions.is_empty()
6520 && matches!(
6521 at.actions.first(),
6522 Some(crate::expressions::AlterTableAction::RenameTable(_))
6523 ) =>
6524 {
6525 Action::AlterTableToSpRename
6526 }
6527 // Subscript index: 1-based to 0-based for BigQuery/Hive/Spark
6528 Expression::Subscript(ref sub)
6529 if matches!(
6530 target,
6531 DialectType::BigQuery
6532 | DialectType::Hive
6533 | DialectType::Spark
6534 | DialectType::Databricks
6535 ) && matches!(
6536 source,
6537 DialectType::DuckDB
6538 | DialectType::PostgreSQL
6539 | DialectType::Presto
6540 | DialectType::Trino
6541 | DialectType::Redshift
6542 | DialectType::ClickHouse
6543 ) && matches!(&sub.index, Expression::Literal(Literal::Number(ref n)) if n.parse::<i64>().unwrap_or(0) > 0) =>
6544 {
6545 Action::ArrayIndexConvert
6546 }
6547 // ANY_VALUE IGNORE NULLS detection moved to the AnyValue arm above
6548 // MysqlNullsOrdering for Ordered is now handled in the Ordered arm above
6549 // RESPECT NULLS handling for SQLite (strip it, add NULLS LAST to ORDER BY)
6550 // and for MySQL (rewrite ORDER BY with CASE WHEN for null ordering)
6551 Expression::WindowFunction(ref wf) => {
6552 // BigQuery doesn't support NULLS FIRST/LAST in window function ORDER BY
6553 // EXCEPT for ROW_NUMBER which keeps NULLS LAST
6554 let is_row_number = matches!(wf.this, Expression::RowNumber(_));
6555 if matches!(target, DialectType::BigQuery)
6556 && !is_row_number
6557 && !wf.over.order_by.is_empty()
6558 && wf.over.order_by.iter().any(|o| o.nulls_first.is_some())
6559 {
6560 Action::BigQueryNullsOrdering
6561 // DuckDB -> MySQL: Add CASE WHEN for NULLS LAST simulation in window ORDER BY
6562 // But NOT when frame is RANGE/GROUPS, since adding CASE WHEN would break value-based frames
6563 } else {
6564 let source_nulls_last = matches!(source, DialectType::DuckDB);
6565 let has_range_frame = wf.over.frame.as_ref().map_or(false, |f| {
6566 matches!(
6567 f.kind,
6568 crate::expressions::WindowFrameKind::Range
6569 | crate::expressions::WindowFrameKind::Groups
6570 )
6571 });
6572 if source_nulls_last
6573 && matches!(target, DialectType::MySQL)
6574 && !wf.over.order_by.is_empty()
6575 && wf.over.order_by.iter().any(|o| !o.desc)
6576 && !has_range_frame
6577 {
6578 Action::MysqlNullsLastRewrite
6579 } else {
6580 match &wf.this {
6581 Expression::FirstValue(ref vf)
6582 | Expression::LastValue(ref vf)
6583 if vf.ignore_nulls == Some(false) =>
6584 {
6585 // RESPECT NULLS
6586 match target {
6587 DialectType::SQLite => Action::RespectNullsConvert,
6588 _ => Action::None,
6589 }
6590 }
6591 _ => Action::None,
6592 }
6593 }
6594 }
6595 }
6596 // CREATE TABLE a LIKE b -> dialect-specific transformations
6597 Expression::CreateTable(ref ct)
6598 if ct.columns.is_empty()
6599 && ct.constraints.iter().any(|c| {
6600 matches!(c, crate::expressions::TableConstraint::Like { .. })
6601 })
6602 && matches!(
6603 target,
6604 DialectType::DuckDB | DialectType::SQLite | DialectType::Drill
6605 ) =>
6606 {
6607 Action::CreateTableLikeToCtas
6608 }
6609 Expression::CreateTable(ref ct)
6610 if ct.columns.is_empty()
6611 && ct.constraints.iter().any(|c| {
6612 matches!(c, crate::expressions::TableConstraint::Like { .. })
6613 })
6614 && matches!(target, DialectType::TSQL | DialectType::Fabric) =>
6615 {
6616 Action::CreateTableLikeToSelectInto
6617 }
6618 Expression::CreateTable(ref ct)
6619 if ct.columns.is_empty()
6620 && ct.constraints.iter().any(|c| {
6621 matches!(c, crate::expressions::TableConstraint::Like { .. })
6622 })
6623 && matches!(target, DialectType::ClickHouse) =>
6624 {
6625 Action::CreateTableLikeToAs
6626 }
6627 // CREATE TABLE: strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
6628 Expression::CreateTable(ref ct)
6629 if matches!(target, DialectType::DuckDB)
6630 && matches!(
6631 source,
6632 DialectType::DuckDB
6633 | DialectType::Spark
6634 | DialectType::Databricks
6635 | DialectType::Hive
6636 ) =>
6637 {
6638 let has_comment = ct.columns.iter().any(|c| {
6639 c.comment.is_some()
6640 || c.constraints.iter().any(|con| {
6641 matches!(con, crate::expressions::ColumnConstraint::Comment(_))
6642 })
6643 });
6644 let has_props = !ct.properties.is_empty();
6645 if has_comment || has_props {
6646 Action::CreateTableStripComment
6647 } else {
6648 Action::None
6649 }
6650 }
6651 // Array conversion: Expression::Array -> Expression::ArrayFunc for PostgreSQL
6652 Expression::Array(_)
6653 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) =>
6654 {
6655 Action::ArrayConcatBracketConvert
6656 }
6657 // ArrayFunc (bracket notation) -> Function("ARRAY") for Redshift (from BigQuery source)
6658 Expression::ArrayFunc(ref arr)
6659 if arr.bracket_notation
6660 && matches!(source, DialectType::BigQuery)
6661 && matches!(target, DialectType::Redshift) =>
6662 {
6663 Action::ArrayConcatBracketConvert
6664 }
6665 // BIT_OR/BIT_AND/BIT_XOR: float/decimal arg cast for DuckDB, or rename for Snowflake
6666 Expression::BitwiseOrAgg(ref f)
6667 | Expression::BitwiseAndAgg(ref f)
6668 | Expression::BitwiseXorAgg(ref f) => {
6669 if matches!(target, DialectType::DuckDB) {
6670 // Check if the arg is CAST(val AS FLOAT/DOUBLE/DECIMAL/REAL)
6671 if let Expression::Cast(ref c) = f.this {
6672 match &c.to {
6673 DataType::Float { .. }
6674 | DataType::Double { .. }
6675 | DataType::Decimal { .. } => Action::BitAggFloatCast,
6676 DataType::Custom { ref name }
6677 if name.eq_ignore_ascii_case("REAL") =>
6678 {
6679 Action::BitAggFloatCast
6680 }
6681 _ => Action::None,
6682 }
6683 } else {
6684 Action::None
6685 }
6686 } else if matches!(target, DialectType::Snowflake) {
6687 Action::BitAggSnowflakeRename
6688 } else {
6689 Action::None
6690 }
6691 }
6692 // FILTER -> IFF for Snowflake (aggregate functions with FILTER clause)
6693 Expression::Filter(ref _f) if matches!(target, DialectType::Snowflake) => {
6694 Action::FilterToIff
6695 }
6696 // AggFunc.filter -> IFF wrapping for Snowflake (e.g., AVG(x) FILTER(WHERE cond))
6697 Expression::Avg(ref f)
6698 | Expression::Sum(ref f)
6699 | Expression::Min(ref f)
6700 | Expression::Max(ref f)
6701 | Expression::CountIf(ref f)
6702 | Expression::Stddev(ref f)
6703 | Expression::StddevPop(ref f)
6704 | Expression::StddevSamp(ref f)
6705 | Expression::Variance(ref f)
6706 | Expression::VarPop(ref f)
6707 | Expression::VarSamp(ref f)
6708 | Expression::Median(ref f)
6709 | Expression::Mode(ref f)
6710 | Expression::First(ref f)
6711 | Expression::Last(ref f)
6712 | Expression::ApproxDistinct(ref f)
6713 if f.filter.is_some() && matches!(target, DialectType::Snowflake) =>
6714 {
6715 Action::AggFilterToIff
6716 }
6717 Expression::Count(ref c)
6718 if c.filter.is_some() && matches!(target, DialectType::Snowflake) =>
6719 {
6720 Action::AggFilterToIff
6721 }
6722 // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN ... END) for dialects that don't support multi-arg DISTINCT
6723 Expression::Count(ref c)
6724 if c.distinct
6725 && matches!(&c.this, Some(Expression::Tuple(_)))
6726 && matches!(
6727 target,
6728 DialectType::Presto
6729 | DialectType::Trino
6730 | DialectType::DuckDB
6731 | DialectType::PostgreSQL
6732 ) =>
6733 {
6734 Action::CountDistinctMultiArg
6735 }
6736 // JSON arrow -> GET_PATH/PARSE_JSON for Snowflake
6737 Expression::JsonExtract(_) if matches!(target, DialectType::Snowflake) => {
6738 Action::JsonToGetPath
6739 }
6740 // DuckDB struct/dict -> BigQuery STRUCT / Presto ROW
6741 Expression::Struct(_)
6742 if matches!(
6743 target,
6744 DialectType::BigQuery | DialectType::Presto | DialectType::Trino
6745 ) && matches!(source, DialectType::DuckDB) =>
6746 {
6747 Action::StructToRow
6748 }
6749 // DuckDB curly-brace dict {'key': value} -> BigQuery STRUCT / Presto ROW
6750 Expression::MapFunc(ref m)
6751 if m.curly_brace_syntax
6752 && matches!(
6753 target,
6754 DialectType::BigQuery | DialectType::Presto | DialectType::Trino
6755 )
6756 && matches!(source, DialectType::DuckDB) =>
6757 {
6758 Action::StructToRow
6759 }
6760 // APPROX_COUNT_DISTINCT -> APPROX_DISTINCT for Presto/Trino
6761 Expression::ApproxCountDistinct(_)
6762 if matches!(
6763 target,
6764 DialectType::Presto | DialectType::Trino | DialectType::Athena
6765 ) =>
6766 {
6767 Action::ApproxCountDistinctToApproxDistinct
6768 }
6769 // ARRAY_CONTAINS(arr, val) -> CONTAINS(arr, val) for Presto, ARRAY_CONTAINS(CAST(val AS VARIANT), arr) for Snowflake
6770 Expression::ArrayContains(_)
6771 if matches!(
6772 target,
6773 DialectType::Presto | DialectType::Trino | DialectType::Snowflake
6774 ) =>
6775 {
6776 Action::ArrayContainsConvert
6777 }
6778 // StrPosition with position -> complex expansion for Presto/DuckDB
6779 // STRPOS doesn't support a position arg in these dialects
6780 Expression::StrPosition(ref sp)
6781 if sp.position.is_some()
6782 && matches!(
6783 target,
6784 DialectType::Presto
6785 | DialectType::Trino
6786 | DialectType::Athena
6787 | DialectType::DuckDB
6788 ) =>
6789 {
6790 Action::StrPositionExpand
6791 }
6792 // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
6793 Expression::First(ref f)
6794 if f.ignore_nulls == Some(true)
6795 && matches!(target, DialectType::DuckDB) =>
6796 {
6797 Action::FirstToAnyValue
6798 }
6799 // BEGIN -> START TRANSACTION for Presto/Trino
6800 Expression::Command(ref cmd)
6801 if cmd.this.eq_ignore_ascii_case("BEGIN")
6802 && matches!(
6803 target,
6804 DialectType::Presto | DialectType::Trino | DialectType::Athena
6805 ) =>
6806 {
6807 // Handled inline below
6808 Action::None // We'll handle it directly
6809 }
6810 // Note: PostgreSQL ^ is now parsed as Power directly (not BitwiseXor).
6811 // PostgreSQL # is parsed as BitwiseXor (which is correct).
6812 // a || b (Concat operator) -> CONCAT function for Presto/Trino
6813 Expression::Concat(ref _op)
6814 if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
6815 && matches!(target, DialectType::Presto | DialectType::Trino) =>
6816 {
6817 Action::PipeConcatToConcat
6818 }
6819 _ => Action::None,
6820 }
6821 };
6822
6823 match action {
6824 Action::None => {
6825 // Handle inline transforms that don't need a dedicated action
6826
6827 // BETWEEN SYMMETRIC/ASYMMETRIC expansion for non-PostgreSQL/Dremio targets
6828 if let Expression::Between(ref b) = e {
6829 if let Some(sym) = b.symmetric {
6830 let keeps_symmetric =
6831 matches!(target, DialectType::PostgreSQL | DialectType::Dremio);
6832 if !keeps_symmetric {
6833 if sym {
6834 // SYMMETRIC: expand to (x BETWEEN a AND b OR x BETWEEN b AND a)
6835 let b = if let Expression::Between(b) = e {
6836 *b
6837 } else {
6838 unreachable!()
6839 };
6840 let between1 = Expression::Between(Box::new(
6841 crate::expressions::Between {
6842 this: b.this.clone(),
6843 low: b.low.clone(),
6844 high: b.high.clone(),
6845 not: b.not,
6846 symmetric: None,
6847 },
6848 ));
6849 let between2 = Expression::Between(Box::new(
6850 crate::expressions::Between {
6851 this: b.this,
6852 low: b.high,
6853 high: b.low,
6854 not: b.not,
6855 symmetric: None,
6856 },
6857 ));
6858 return Ok(Expression::Paren(Box::new(
6859 crate::expressions::Paren {
6860 this: Expression::Or(Box::new(
6861 crate::expressions::BinaryOp::new(
6862 between1, between2,
6863 ),
6864 )),
6865 trailing_comments: vec![],
6866 },
6867 )));
6868 } else {
6869 // ASYMMETRIC: strip qualifier, keep as regular BETWEEN
6870 let b = if let Expression::Between(b) = e {
6871 *b
6872 } else {
6873 unreachable!()
6874 };
6875 return Ok(Expression::Between(Box::new(
6876 crate::expressions::Between {
6877 this: b.this,
6878 low: b.low,
6879 high: b.high,
6880 not: b.not,
6881 symmetric: None,
6882 },
6883 )));
6884 }
6885 }
6886 }
6887 }
6888
6889 // ILIKE -> LOWER(x) LIKE LOWER(y) for StarRocks/Doris
6890 if let Expression::ILike(ref _like) = e {
6891 if matches!(target, DialectType::StarRocks | DialectType::Doris) {
6892 let like = if let Expression::ILike(l) = e {
6893 *l
6894 } else {
6895 unreachable!()
6896 };
6897 let lower_left = Expression::Function(Box::new(Function::new(
6898 "LOWER".to_string(),
6899 vec![like.left],
6900 )));
6901 let lower_right = Expression::Function(Box::new(Function::new(
6902 "LOWER".to_string(),
6903 vec![like.right],
6904 )));
6905 return Ok(Expression::Like(Box::new(crate::expressions::LikeOp {
6906 left: lower_left,
6907 right: lower_right,
6908 escape: like.escape,
6909 quantifier: like.quantifier,
6910 })));
6911 }
6912 }
6913
6914 // Oracle DBMS_RANDOM.VALUE() -> RANDOM() for PostgreSQL, RAND() for others
6915 if let Expression::MethodCall(ref mc) = e {
6916 if matches!(source, DialectType::Oracle)
6917 && mc.method.name.eq_ignore_ascii_case("VALUE")
6918 && mc.args.is_empty()
6919 {
6920 let is_dbms_random = match &mc.this {
6921 Expression::Identifier(id) => {
6922 id.name.eq_ignore_ascii_case("DBMS_RANDOM")
6923 }
6924 Expression::Column(col) => {
6925 col.table.is_none()
6926 && col.name.name.eq_ignore_ascii_case("DBMS_RANDOM")
6927 }
6928 _ => false,
6929 };
6930 if is_dbms_random {
6931 let func_name = match target {
6932 DialectType::PostgreSQL
6933 | DialectType::Redshift
6934 | DialectType::DuckDB
6935 | DialectType::SQLite => "RANDOM",
6936 DialectType::Oracle => "DBMS_RANDOM.VALUE",
6937 _ => "RAND",
6938 };
6939 return Ok(Expression::Function(Box::new(Function::new(
6940 func_name.to_string(),
6941 vec![],
6942 ))));
6943 }
6944 }
6945 }
6946 // TRIM without explicit position -> add BOTH for ClickHouse
6947 if let Expression::Trim(ref trim) = e {
6948 if matches!(target, DialectType::ClickHouse)
6949 && trim.sql_standard_syntax
6950 && trim.characters.is_some()
6951 && !trim.position_explicit
6952 {
6953 let mut new_trim = (**trim).clone();
6954 new_trim.position_explicit = true;
6955 return Ok(Expression::Trim(Box::new(new_trim)));
6956 }
6957 }
6958 // BEGIN -> START TRANSACTION for Presto/Trino
6959 if let Expression::Transaction(ref txn) = e {
6960 if matches!(
6961 target,
6962 DialectType::Presto | DialectType::Trino | DialectType::Athena
6963 ) {
6964 // Convert BEGIN to START TRANSACTION by setting mark to "START"
6965 let mut txn = txn.clone();
6966 txn.mark = Some(Box::new(Expression::Identifier(Identifier::new(
6967 "START".to_string(),
6968 ))));
6969 return Ok(Expression::Transaction(Box::new(*txn)));
6970 }
6971 }
6972 // IS TRUE/FALSE -> simplified forms for Presto/Trino
6973 if matches!(
6974 target,
6975 DialectType::Presto | DialectType::Trino | DialectType::Athena
6976 ) {
6977 match &e {
6978 Expression::IsTrue(itf) if !itf.not => {
6979 // x IS TRUE -> x
6980 return Ok(itf.this.clone());
6981 }
6982 Expression::IsTrue(itf) if itf.not => {
6983 // x IS NOT TRUE -> NOT x
6984 return Ok(Expression::Not(Box::new(
6985 crate::expressions::UnaryOp {
6986 this: itf.this.clone(),
6987 },
6988 )));
6989 }
6990 Expression::IsFalse(itf) if !itf.not => {
6991 // x IS FALSE -> NOT x
6992 return Ok(Expression::Not(Box::new(
6993 crate::expressions::UnaryOp {
6994 this: itf.this.clone(),
6995 },
6996 )));
6997 }
6998 Expression::IsFalse(itf) if itf.not => {
6999 // x IS NOT FALSE -> NOT NOT x
7000 let not_x =
7001 Expression::Not(Box::new(crate::expressions::UnaryOp {
7002 this: itf.this.clone(),
7003 }));
7004 return Ok(Expression::Not(Box::new(
7005 crate::expressions::UnaryOp { this: not_x },
7006 )));
7007 }
7008 _ => {}
7009 }
7010 }
7011 // x IS NOT FALSE -> NOT x IS FALSE for Redshift
7012 if matches!(target, DialectType::Redshift) {
7013 if let Expression::IsFalse(ref itf) = e {
7014 if itf.not {
7015 return Ok(Expression::Not(Box::new(
7016 crate::expressions::UnaryOp {
7017 this: Expression::IsFalse(Box::new(
7018 crate::expressions::IsTrueFalse {
7019 this: itf.this.clone(),
7020 not: false,
7021 },
7022 )),
7023 },
7024 )));
7025 }
7026 }
7027 }
7028 // REGEXP_REPLACE: add 'g' flag when source defaults to global replacement
7029 // Snowflake default is global, PostgreSQL/DuckDB default is first-match-only
7030 if let Expression::Function(ref f) = e {
7031 if f.name.eq_ignore_ascii_case("REGEXP_REPLACE")
7032 && matches!(source, DialectType::Snowflake)
7033 && matches!(target, DialectType::PostgreSQL | DialectType::DuckDB)
7034 {
7035 if f.args.len() == 3 {
7036 let mut args = f.args.clone();
7037 args.push(Expression::string("g"));
7038 return Ok(Expression::Function(Box::new(Function::new(
7039 "REGEXP_REPLACE".to_string(),
7040 args,
7041 ))));
7042 } else if f.args.len() == 4 {
7043 // 4th arg might be position, add 'g' as 5th
7044 let mut args = f.args.clone();
7045 args.push(Expression::string("g"));
7046 return Ok(Expression::Function(Box::new(Function::new(
7047 "REGEXP_REPLACE".to_string(),
7048 args,
7049 ))));
7050 }
7051 }
7052 }
7053 Ok(e)
7054 }
7055
7056 Action::GreatestLeastNull => {
7057 let f = if let Expression::Function(f) = e {
7058 *f
7059 } else {
7060 unreachable!("action only triggered for Function expressions")
7061 };
7062 let mut null_checks: Vec<Expression> = f
7063 .args
7064 .iter()
7065 .map(|a| {
7066 Expression::IsNull(Box::new(IsNull {
7067 this: a.clone(),
7068 not: false,
7069 postfix_form: false,
7070 }))
7071 })
7072 .collect();
7073 let condition = if null_checks.len() == 1 {
7074 null_checks.remove(0)
7075 } else {
7076 let first = null_checks.remove(0);
7077 null_checks.into_iter().fold(first, |acc, check| {
7078 Expression::Or(Box::new(BinaryOp::new(acc, check)))
7079 })
7080 };
7081 Ok(Expression::Case(Box::new(Case {
7082 operand: None,
7083 whens: vec![(condition, Expression::Null(Null))],
7084 else_: Some(Expression::Function(Box::new(Function::new(
7085 f.name, f.args,
7086 )))),
7087 comments: Vec::new(),
7088 })))
7089 }
7090
7091 Action::ArrayGenerateRange => {
7092 let f = if let Expression::Function(f) = e {
7093 *f
7094 } else {
7095 unreachable!("action only triggered for Function expressions")
7096 };
7097 let start = f.args[0].clone();
7098 let end = f.args[1].clone();
7099 let step = f.args.get(2).cloned();
7100
7101 let end_minus_1 = Expression::Sub(Box::new(BinaryOp::new(
7102 end.clone(),
7103 Expression::number(1),
7104 )));
7105
7106 match target {
7107 DialectType::PostgreSQL | DialectType::Redshift => {
7108 let mut args = vec![start, end_minus_1];
7109 if let Some(s) = step {
7110 args.push(s);
7111 }
7112 Ok(Expression::Function(Box::new(Function::new(
7113 "GENERATE_SERIES".to_string(),
7114 args,
7115 ))))
7116 }
7117 DialectType::Presto | DialectType::Trino => {
7118 let mut args = vec![start, end_minus_1];
7119 if let Some(s) = step {
7120 args.push(s);
7121 }
7122 Ok(Expression::Function(Box::new(Function::new(
7123 "SEQUENCE".to_string(),
7124 args,
7125 ))))
7126 }
7127 DialectType::BigQuery => {
7128 let mut args = vec![start, end_minus_1];
7129 if let Some(s) = step {
7130 args.push(s);
7131 }
7132 Ok(Expression::Function(Box::new(Function::new(
7133 "GENERATE_ARRAY".to_string(),
7134 args,
7135 ))))
7136 }
7137 DialectType::Snowflake => {
7138 let normalized_end = Expression::Add(Box::new(BinaryOp::new(
7139 Expression::Paren(Box::new(Paren {
7140 this: end_minus_1,
7141 trailing_comments: vec![],
7142 })),
7143 Expression::number(1),
7144 )));
7145 let mut args = vec![start, normalized_end];
7146 if let Some(s) = step {
7147 args.push(s);
7148 }
7149 Ok(Expression::Function(Box::new(Function::new(
7150 "ARRAY_GENERATE_RANGE".to_string(),
7151 args,
7152 ))))
7153 }
7154 _ => Ok(Expression::Function(Box::new(Function::new(
7155 f.name, f.args,
7156 )))),
7157 }
7158 }
7159
7160 Action::Div0TypedDivision => {
7161 let if_func = if let Expression::IfFunc(f) = e {
7162 *f
7163 } else {
7164 unreachable!("action only triggered for IfFunc expressions")
7165 };
7166 if let Some(Expression::Div(div)) = if_func.false_value {
7167 let cast_type = if matches!(target, DialectType::SQLite) {
7168 DataType::Float {
7169 precision: None,
7170 scale: None,
7171 real_spelling: true,
7172 }
7173 } else {
7174 DataType::Double {
7175 precision: None,
7176 scale: None,
7177 }
7178 };
7179 let casted_left = Expression::Cast(Box::new(Cast {
7180 this: div.left,
7181 to: cast_type,
7182 trailing_comments: vec![],
7183 double_colon_syntax: false,
7184 format: None,
7185 default: None,
7186 }));
7187 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
7188 condition: if_func.condition,
7189 true_value: if_func.true_value,
7190 false_value: Some(Expression::Div(Box::new(BinaryOp::new(
7191 casted_left,
7192 div.right,
7193 )))),
7194 original_name: if_func.original_name,
7195 })))
7196 } else {
7197 // Not actually a Div, reconstruct
7198 Ok(Expression::IfFunc(Box::new(if_func)))
7199 }
7200 }
7201
7202 Action::ArrayAggCollectList => {
7203 let agg = if let Expression::ArrayAgg(a) = e {
7204 *a
7205 } else {
7206 unreachable!("action only triggered for ArrayAgg expressions")
7207 };
7208 Ok(Expression::ArrayAgg(Box::new(AggFunc {
7209 name: Some("COLLECT_LIST".to_string()),
7210 ..agg
7211 })))
7212 }
7213
7214 Action::ArrayAggWithinGroupFilter => {
7215 let wg = if let Expression::WithinGroup(w) = e {
7216 *w
7217 } else {
7218 unreachable!("action only triggered for WithinGroup expressions")
7219 };
7220 if let Expression::ArrayAgg(inner_agg) = wg.this {
7221 let col = inner_agg.this.clone();
7222 let filter = Expression::IsNull(Box::new(IsNull {
7223 this: col,
7224 not: true,
7225 postfix_form: false,
7226 }));
7227 // For DuckDB, add explicit NULLS FIRST for DESC ordering
7228 let order_by = if matches!(target, DialectType::DuckDB) {
7229 wg.order_by
7230 .into_iter()
7231 .map(|mut o| {
7232 if o.desc && o.nulls_first.is_none() {
7233 o.nulls_first = Some(true);
7234 }
7235 o
7236 })
7237 .collect()
7238 } else {
7239 wg.order_by
7240 };
7241 Ok(Expression::ArrayAgg(Box::new(AggFunc {
7242 this: inner_agg.this,
7243 distinct: inner_agg.distinct,
7244 filter: Some(filter),
7245 order_by,
7246 name: inner_agg.name,
7247 ignore_nulls: inner_agg.ignore_nulls,
7248 having_max: inner_agg.having_max,
7249 limit: inner_agg.limit,
7250 })))
7251 } else {
7252 Ok(Expression::WithinGroup(Box::new(wg)))
7253 }
7254 }
7255
7256 Action::ArrayAggFilter => {
7257 let agg = if let Expression::ArrayAgg(a) = e {
7258 *a
7259 } else {
7260 unreachable!("action only triggered for ArrayAgg expressions")
7261 };
7262 let col = agg.this.clone();
7263 let filter = Expression::IsNull(Box::new(IsNull {
7264 this: col,
7265 not: true,
7266 postfix_form: false,
7267 }));
7268 Ok(Expression::ArrayAgg(Box::new(AggFunc {
7269 filter: Some(filter),
7270 ..agg
7271 })))
7272 }
7273
7274 Action::ArrayAggNullFilter => {
7275 // ARRAY_AGG(x) FILTER(WHERE cond) -> ARRAY_AGG(x) FILTER(WHERE cond AND NOT x IS NULL)
7276 // For source dialects that exclude NULLs (Spark/Hive) targeting DuckDB which includes them
7277 let agg = if let Expression::ArrayAgg(a) = e {
7278 *a
7279 } else {
7280 unreachable!("action only triggered for ArrayAgg expressions")
7281 };
7282 let col = agg.this.clone();
7283 let not_null = Expression::IsNull(Box::new(IsNull {
7284 this: col,
7285 not: true,
7286 postfix_form: true, // Use "NOT x IS NULL" form (prefix NOT)
7287 }));
7288 let new_filter = if let Some(existing_filter) = agg.filter {
7289 // AND the NOT IS NULL with existing filter
7290 Expression::And(Box::new(crate::expressions::BinaryOp::new(
7291 existing_filter,
7292 not_null,
7293 )))
7294 } else {
7295 not_null
7296 };
7297 Ok(Expression::ArrayAgg(Box::new(AggFunc {
7298 filter: Some(new_filter),
7299 ..agg
7300 })))
7301 }
7302
7303 Action::BigQueryArraySelectAsStructToSnowflake => {
7304 // ARRAY(SELECT AS STRUCT x1 AS x1, x2 AS x2 FROM t)
7305 // -> (SELECT ARRAY_AGG(OBJECT_CONSTRUCT('x1', x1, 'x2', x2)) FROM t)
7306 if let Expression::Function(mut f) = e {
7307 let is_match = f.args.len() == 1
7308 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT"));
7309 if is_match {
7310 let inner_select = match f.args.remove(0) {
7311 Expression::Select(s) => *s,
7312 _ => unreachable!(
7313 "argument already verified to be a Select expression"
7314 ),
7315 };
7316 // Build OBJECT_CONSTRUCT args from SELECT expressions
7317 let mut oc_args = Vec::new();
7318 for expr in &inner_select.expressions {
7319 match expr {
7320 Expression::Alias(a) => {
7321 let key = Expression::Literal(Literal::String(
7322 a.alias.name.clone(),
7323 ));
7324 let value = a.this.clone();
7325 oc_args.push(key);
7326 oc_args.push(value);
7327 }
7328 Expression::Column(c) => {
7329 let key = Expression::Literal(Literal::String(
7330 c.name.name.clone(),
7331 ));
7332 oc_args.push(key);
7333 oc_args.push(expr.clone());
7334 }
7335 _ => {
7336 oc_args.push(expr.clone());
7337 }
7338 }
7339 }
7340 let object_construct = Expression::Function(Box::new(Function::new(
7341 "OBJECT_CONSTRUCT".to_string(),
7342 oc_args,
7343 )));
7344 let array_agg = Expression::Function(Box::new(Function::new(
7345 "ARRAY_AGG".to_string(),
7346 vec![object_construct],
7347 )));
7348 let mut new_select = crate::expressions::Select::new();
7349 new_select.expressions = vec![array_agg];
7350 new_select.from = inner_select.from.clone();
7351 new_select.where_clause = inner_select.where_clause.clone();
7352 new_select.group_by = inner_select.group_by.clone();
7353 new_select.having = inner_select.having.clone();
7354 new_select.joins = inner_select.joins.clone();
7355 Ok(Expression::Subquery(Box::new(
7356 crate::expressions::Subquery {
7357 this: Expression::Select(Box::new(new_select)),
7358 alias: None,
7359 column_aliases: Vec::new(),
7360 order_by: None,
7361 limit: None,
7362 offset: None,
7363 distribute_by: None,
7364 sort_by: None,
7365 cluster_by: None,
7366 lateral: false,
7367 modifiers_inside: false,
7368 trailing_comments: Vec::new(),
7369 },
7370 )))
7371 } else {
7372 Ok(Expression::Function(f))
7373 }
7374 } else {
7375 Ok(e)
7376 }
7377 }
7378
7379 Action::BigQueryPercentileContToDuckDB => {
7380 // PERCENTILE_CONT(x, frac [RESPECT NULLS]) -> QUANTILE_CONT(x, frac) for DuckDB
7381 if let Expression::AggregateFunction(mut af) = e {
7382 af.name = "QUANTILE_CONT".to_string();
7383 af.ignore_nulls = None; // Strip RESPECT/IGNORE NULLS
7384 // Keep only first 2 args
7385 if af.args.len() > 2 {
7386 af.args.truncate(2);
7387 }
7388 Ok(Expression::AggregateFunction(af))
7389 } else {
7390 Ok(e)
7391 }
7392 }
7393
7394 Action::ArrayAggIgnoreNullsDuckDB => {
7395 // ARRAY_AGG(x IGNORE NULLS ORDER BY a, b DESC) -> ARRAY_AGG(x ORDER BY a NULLS FIRST, b DESC)
7396 // Strip IGNORE NULLS, add NULLS FIRST to first ORDER BY column
7397 let mut agg = if let Expression::ArrayAgg(a) = e {
7398 *a
7399 } else {
7400 unreachable!("action only triggered for ArrayAgg expressions")
7401 };
7402 agg.ignore_nulls = None; // Strip IGNORE NULLS
7403 if !agg.order_by.is_empty() {
7404 agg.order_by[0].nulls_first = Some(true);
7405 }
7406 Ok(Expression::ArrayAgg(Box::new(agg)))
7407 }
7408
7409 Action::CountDistinctMultiArg => {
7410 // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN a IS NULL THEN NULL WHEN b IS NULL THEN NULL ELSE (a, b) END)
7411 if let Expression::Count(c) = e {
7412 if let Some(Expression::Tuple(t)) = c.this {
7413 let args = t.expressions;
7414 // Build CASE expression:
7415 // WHEN a IS NULL THEN NULL WHEN b IS NULL THEN NULL ELSE (a, b) END
7416 let mut whens = Vec::new();
7417 for arg in &args {
7418 whens.push((
7419 Expression::IsNull(Box::new(IsNull {
7420 this: arg.clone(),
7421 not: false,
7422 postfix_form: false,
7423 })),
7424 Expression::Null(crate::expressions::Null),
7425 ));
7426 }
7427 // Build the tuple for ELSE
7428 let tuple_expr =
7429 Expression::Tuple(Box::new(crate::expressions::Tuple {
7430 expressions: args,
7431 }));
7432 let case_expr = Expression::Case(Box::new(crate::expressions::Case {
7433 operand: None,
7434 whens,
7435 else_: Some(tuple_expr),
7436 comments: Vec::new(),
7437 }));
7438 Ok(Expression::Count(Box::new(crate::expressions::CountFunc {
7439 this: Some(case_expr),
7440 star: false,
7441 distinct: true,
7442 filter: c.filter,
7443 ignore_nulls: c.ignore_nulls,
7444 original_name: c.original_name,
7445 })))
7446 } else {
7447 Ok(Expression::Count(c))
7448 }
7449 } else {
7450 Ok(e)
7451 }
7452 }
7453
7454 Action::CastTimestampToDatetime => {
7455 let c = if let Expression::Cast(c) = e {
7456 *c
7457 } else {
7458 unreachable!("action only triggered for Cast expressions")
7459 };
7460 Ok(Expression::Cast(Box::new(Cast {
7461 to: DataType::Custom {
7462 name: "DATETIME".to_string(),
7463 },
7464 ..c
7465 })))
7466 }
7467
7468 Action::CastTimestampStripTz => {
7469 // CAST(x AS TIMESTAMP(n) WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark/BigQuery
7470 let c = if let Expression::Cast(c) = e {
7471 *c
7472 } else {
7473 unreachable!("action only triggered for Cast expressions")
7474 };
7475 Ok(Expression::Cast(Box::new(Cast {
7476 to: DataType::Timestamp {
7477 precision: None,
7478 timezone: false,
7479 },
7480 ..c
7481 })))
7482 }
7483
7484 Action::CastTimestamptzToFunc => {
7485 // CAST(x AS TIMESTAMPTZ) -> TIMESTAMP(x) function for MySQL/StarRocks
7486 let c = if let Expression::Cast(c) = e {
7487 *c
7488 } else {
7489 unreachable!("action only triggered for Cast expressions")
7490 };
7491 Ok(Expression::Function(Box::new(Function::new(
7492 "TIMESTAMP".to_string(),
7493 vec![c.this],
7494 ))))
7495 }
7496
7497 Action::ToDateToCast => {
7498 // Convert TO_DATE(x) -> CAST(x AS DATE) for DuckDB
7499 if let Expression::Function(f) = e {
7500 let arg = f.args.into_iter().next().unwrap();
7501 Ok(Expression::Cast(Box::new(Cast {
7502 this: arg,
7503 to: DataType::Date,
7504 double_colon_syntax: false,
7505 trailing_comments: vec![],
7506 format: None,
7507 default: None,
7508 })))
7509 } else {
7510 Ok(e)
7511 }
7512 }
7513 Action::DateTruncWrapCast => {
7514 // Handle both Expression::DateTrunc/TimestampTrunc and
7515 // Expression::Function("DATE_TRUNC", [unit, expr])
7516 match e {
7517 Expression::DateTrunc(d) | Expression::TimestampTrunc(d) => {
7518 let input_type = match &d.this {
7519 Expression::Cast(c) => Some(c.to.clone()),
7520 _ => None,
7521 };
7522 if let Some(cast_type) = input_type {
7523 let is_time = matches!(cast_type, DataType::Time { .. });
7524 if is_time {
7525 let date_expr = Expression::Cast(Box::new(Cast {
7526 this: Expression::Literal(
7527 crate::expressions::Literal::String(
7528 "1970-01-01".to_string(),
7529 ),
7530 ),
7531 to: DataType::Date,
7532 double_colon_syntax: false,
7533 trailing_comments: vec![],
7534 format: None,
7535 default: None,
7536 }));
7537 let add_expr =
7538 Expression::Add(Box::new(BinaryOp::new(date_expr, d.this)));
7539 let inner = Expression::DateTrunc(Box::new(DateTruncFunc {
7540 this: add_expr,
7541 unit: d.unit,
7542 }));
7543 Ok(Expression::Cast(Box::new(Cast {
7544 this: inner,
7545 to: cast_type,
7546 double_colon_syntax: false,
7547 trailing_comments: vec![],
7548 format: None,
7549 default: None,
7550 })))
7551 } else {
7552 let inner = Expression::DateTrunc(Box::new(*d));
7553 Ok(Expression::Cast(Box::new(Cast {
7554 this: inner,
7555 to: cast_type,
7556 double_colon_syntax: false,
7557 trailing_comments: vec![],
7558 format: None,
7559 default: None,
7560 })))
7561 }
7562 } else {
7563 Ok(Expression::DateTrunc(d))
7564 }
7565 }
7566 Expression::Function(f) if f.args.len() == 2 => {
7567 // Function-based DATE_TRUNC(unit, expr)
7568 let input_type = match &f.args[1] {
7569 Expression::Cast(c) => Some(c.to.clone()),
7570 _ => None,
7571 };
7572 if let Some(cast_type) = input_type {
7573 let is_time = matches!(cast_type, DataType::Time { .. });
7574 if is_time {
7575 let date_expr = Expression::Cast(Box::new(Cast {
7576 this: Expression::Literal(
7577 crate::expressions::Literal::String(
7578 "1970-01-01".to_string(),
7579 ),
7580 ),
7581 to: DataType::Date,
7582 double_colon_syntax: false,
7583 trailing_comments: vec![],
7584 format: None,
7585 default: None,
7586 }));
7587 let mut args = f.args;
7588 let unit_arg = args.remove(0);
7589 let time_expr = args.remove(0);
7590 let add_expr = Expression::Add(Box::new(BinaryOp::new(
7591 date_expr, time_expr,
7592 )));
7593 let inner = Expression::Function(Box::new(Function::new(
7594 "DATE_TRUNC".to_string(),
7595 vec![unit_arg, add_expr],
7596 )));
7597 Ok(Expression::Cast(Box::new(Cast {
7598 this: inner,
7599 to: cast_type,
7600 double_colon_syntax: false,
7601 trailing_comments: vec![],
7602 format: None,
7603 default: None,
7604 })))
7605 } else {
7606 // Wrap the function in CAST
7607 Ok(Expression::Cast(Box::new(Cast {
7608 this: Expression::Function(f),
7609 to: cast_type,
7610 double_colon_syntax: false,
7611 trailing_comments: vec![],
7612 format: None,
7613 default: None,
7614 })))
7615 }
7616 } else {
7617 Ok(Expression::Function(f))
7618 }
7619 }
7620 other => Ok(other),
7621 }
7622 }
7623
7624 Action::RegexpReplaceSnowflakeToDuckDB => {
7625 // Snowflake REGEXP_REPLACE(s, p, r, position) -> REGEXP_REPLACE(s, p, r, 'g')
7626 if let Expression::Function(f) = e {
7627 let mut args = f.args;
7628 let subject = args.remove(0);
7629 let pattern = args.remove(0);
7630 let replacement = args.remove(0);
7631 Ok(Expression::Function(Box::new(Function::new(
7632 "REGEXP_REPLACE".to_string(),
7633 vec![
7634 subject,
7635 pattern,
7636 replacement,
7637 Expression::Literal(crate::expressions::Literal::String(
7638 "g".to_string(),
7639 )),
7640 ],
7641 ))))
7642 } else {
7643 Ok(e)
7644 }
7645 }
7646
7647 Action::SetToVariable => {
7648 // For DuckDB: SET a = 1 -> SET VARIABLE a = 1
7649 if let Expression::SetStatement(mut s) = e {
7650 for item in &mut s.items {
7651 if item.kind.is_none() {
7652 // Check if name already has VARIABLE prefix (from DuckDB source parsing)
7653 let already_variable = match &item.name {
7654 Expression::Identifier(id) => id.name.starts_with("VARIABLE "),
7655 _ => false,
7656 };
7657 if already_variable {
7658 // Extract the actual name and set kind
7659 if let Expression::Identifier(ref mut id) = item.name {
7660 let actual_name = id.name["VARIABLE ".len()..].to_string();
7661 id.name = actual_name;
7662 }
7663 }
7664 item.kind = Some("VARIABLE".to_string());
7665 }
7666 }
7667 Ok(Expression::SetStatement(s))
7668 } else {
7669 Ok(e)
7670 }
7671 }
7672
7673 Action::ConvertTimezoneToExpr => {
7674 // Convert Function("CONVERT_TIMEZONE", args) to Expression::ConvertTimezone
7675 // This prevents Redshift's transform_expr from expanding 2-arg to 3-arg with 'UTC'
7676 if let Expression::Function(f) = e {
7677 if f.args.len() == 2 {
7678 let mut args = f.args;
7679 let target_tz = args.remove(0);
7680 let timestamp = args.remove(0);
7681 Ok(Expression::ConvertTimezone(Box::new(ConvertTimezone {
7682 source_tz: None,
7683 target_tz: Some(Box::new(target_tz)),
7684 timestamp: Some(Box::new(timestamp)),
7685 options: vec![],
7686 })))
7687 } else if f.args.len() == 3 {
7688 let mut args = f.args;
7689 let source_tz = args.remove(0);
7690 let target_tz = args.remove(0);
7691 let timestamp = args.remove(0);
7692 Ok(Expression::ConvertTimezone(Box::new(ConvertTimezone {
7693 source_tz: Some(Box::new(source_tz)),
7694 target_tz: Some(Box::new(target_tz)),
7695 timestamp: Some(Box::new(timestamp)),
7696 options: vec![],
7697 })))
7698 } else {
7699 Ok(Expression::Function(f))
7700 }
7701 } else {
7702 Ok(e)
7703 }
7704 }
7705
7706 Action::BigQueryCastType => {
7707 // Convert BigQuery types to standard SQL types
7708 if let Expression::DataType(dt) = e {
7709 match dt {
7710 DataType::Custom { ref name } if name.eq_ignore_ascii_case("INT64") => {
7711 Ok(Expression::DataType(DataType::BigInt { length: None }))
7712 }
7713 DataType::Custom { ref name }
7714 if name.eq_ignore_ascii_case("FLOAT64") =>
7715 {
7716 Ok(Expression::DataType(DataType::Double {
7717 precision: None,
7718 scale: None,
7719 }))
7720 }
7721 DataType::Custom { ref name } if name.eq_ignore_ascii_case("BOOL") => {
7722 Ok(Expression::DataType(DataType::Boolean))
7723 }
7724 DataType::Custom { ref name } if name.eq_ignore_ascii_case("BYTES") => {
7725 Ok(Expression::DataType(DataType::VarBinary { length: None }))
7726 }
7727 DataType::Custom { ref name }
7728 if name.eq_ignore_ascii_case("NUMERIC") =>
7729 {
7730 // For DuckDB target, use Custom("DECIMAL") to avoid DuckDB's
7731 // default precision (18, 3) being added to bare DECIMAL
7732 if matches!(target, DialectType::DuckDB) {
7733 Ok(Expression::DataType(DataType::Custom {
7734 name: "DECIMAL".to_string(),
7735 }))
7736 } else {
7737 Ok(Expression::DataType(DataType::Decimal {
7738 precision: None,
7739 scale: None,
7740 }))
7741 }
7742 }
7743 DataType::Custom { ref name }
7744 if name.eq_ignore_ascii_case("STRING") =>
7745 {
7746 Ok(Expression::DataType(DataType::String { length: None }))
7747 }
7748 DataType::Custom { ref name }
7749 if name.eq_ignore_ascii_case("DATETIME") =>
7750 {
7751 Ok(Expression::DataType(DataType::Timestamp {
7752 precision: None,
7753 timezone: false,
7754 }))
7755 }
7756 _ => Ok(Expression::DataType(dt)),
7757 }
7758 } else {
7759 Ok(e)
7760 }
7761 }
7762
7763 Action::BigQuerySafeDivide => {
7764 // Convert SafeDivide expression to IF/CASE form for most targets
7765 if let Expression::SafeDivide(sd) = e {
7766 let x = *sd.this;
7767 let y = *sd.expression;
7768 // Wrap x and y in parens if they're complex expressions
7769 let y_ref = match &y {
7770 Expression::Column(_)
7771 | Expression::Literal(_)
7772 | Expression::Identifier(_) => y.clone(),
7773 _ => Expression::Paren(Box::new(Paren {
7774 this: y.clone(),
7775 trailing_comments: vec![],
7776 })),
7777 };
7778 let x_ref = match &x {
7779 Expression::Column(_)
7780 | Expression::Literal(_)
7781 | Expression::Identifier(_) => x.clone(),
7782 _ => Expression::Paren(Box::new(Paren {
7783 this: x.clone(),
7784 trailing_comments: vec![],
7785 })),
7786 };
7787 let condition = Expression::Neq(Box::new(BinaryOp::new(
7788 y_ref.clone(),
7789 Expression::number(0),
7790 )));
7791 let div_expr = Expression::Div(Box::new(BinaryOp::new(x_ref, y_ref)));
7792
7793 if matches!(target, DialectType::Presto | DialectType::Trino) {
7794 // Presto/Trino: IF(y <> 0, CAST(x AS DOUBLE) / y, NULL)
7795 let cast_x = Expression::Cast(Box::new(Cast {
7796 this: match &x {
7797 Expression::Column(_)
7798 | Expression::Literal(_)
7799 | Expression::Identifier(_) => x,
7800 _ => Expression::Paren(Box::new(Paren {
7801 this: x,
7802 trailing_comments: vec![],
7803 })),
7804 },
7805 to: DataType::Double {
7806 precision: None,
7807 scale: None,
7808 },
7809 trailing_comments: vec![],
7810 double_colon_syntax: false,
7811 format: None,
7812 default: None,
7813 }));
7814 let cast_div = Expression::Div(Box::new(BinaryOp::new(
7815 cast_x,
7816 match &y {
7817 Expression::Column(_)
7818 | Expression::Literal(_)
7819 | Expression::Identifier(_) => y,
7820 _ => Expression::Paren(Box::new(Paren {
7821 this: y,
7822 trailing_comments: vec![],
7823 })),
7824 },
7825 )));
7826 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
7827 condition,
7828 true_value: cast_div,
7829 false_value: Some(Expression::Null(Null)),
7830 original_name: None,
7831 })))
7832 } else if matches!(target, DialectType::PostgreSQL) {
7833 // PostgreSQL: CASE WHEN y <> 0 THEN CAST(x AS DOUBLE PRECISION) / y ELSE NULL END
7834 let cast_x = Expression::Cast(Box::new(Cast {
7835 this: match &x {
7836 Expression::Column(_)
7837 | Expression::Literal(_)
7838 | Expression::Identifier(_) => x,
7839 _ => Expression::Paren(Box::new(Paren {
7840 this: x,
7841 trailing_comments: vec![],
7842 })),
7843 },
7844 to: DataType::Custom {
7845 name: "DOUBLE PRECISION".to_string(),
7846 },
7847 trailing_comments: vec![],
7848 double_colon_syntax: false,
7849 format: None,
7850 default: None,
7851 }));
7852 let y_paren = match &y {
7853 Expression::Column(_)
7854 | Expression::Literal(_)
7855 | Expression::Identifier(_) => y,
7856 _ => Expression::Paren(Box::new(Paren {
7857 this: y,
7858 trailing_comments: vec![],
7859 })),
7860 };
7861 let cast_div =
7862 Expression::Div(Box::new(BinaryOp::new(cast_x, y_paren)));
7863 Ok(Expression::Case(Box::new(Case {
7864 operand: None,
7865 whens: vec![(condition, cast_div)],
7866 else_: Some(Expression::Null(Null)),
7867 comments: Vec::new(),
7868 })))
7869 } else if matches!(target, DialectType::DuckDB) {
7870 // DuckDB: CASE WHEN y <> 0 THEN x / y ELSE NULL END
7871 Ok(Expression::Case(Box::new(Case {
7872 operand: None,
7873 whens: vec![(condition, div_expr)],
7874 else_: Some(Expression::Null(Null)),
7875 comments: Vec::new(),
7876 })))
7877 } else if matches!(target, DialectType::Snowflake) {
7878 // Snowflake: IFF(y <> 0, x / y, NULL)
7879 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
7880 condition,
7881 true_value: div_expr,
7882 false_value: Some(Expression::Null(Null)),
7883 original_name: Some("IFF".to_string()),
7884 })))
7885 } else {
7886 // All others: IF(y <> 0, x / y, NULL)
7887 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
7888 condition,
7889 true_value: div_expr,
7890 false_value: Some(Expression::Null(Null)),
7891 original_name: None,
7892 })))
7893 }
7894 } else {
7895 Ok(e)
7896 }
7897 }
7898
7899 Action::BigQueryLastDayStripUnit => {
7900 if let Expression::LastDay(mut ld) = e {
7901 ld.unit = None; // Strip the unit (MONTH is default)
7902 match target {
7903 DialectType::PostgreSQL => {
7904 // LAST_DAY(date) -> CAST(DATE_TRUNC('MONTH', date) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)
7905 let date_trunc = Expression::Function(Box::new(Function::new(
7906 "DATE_TRUNC".to_string(),
7907 vec![
7908 Expression::Literal(crate::expressions::Literal::String(
7909 "MONTH".to_string(),
7910 )),
7911 ld.this.clone(),
7912 ],
7913 )));
7914 let plus_month =
7915 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
7916 date_trunc,
7917 Expression::Interval(Box::new(
7918 crate::expressions::Interval {
7919 this: Some(Expression::Literal(
7920 crate::expressions::Literal::String(
7921 "1 MONTH".to_string(),
7922 ),
7923 )),
7924 unit: None,
7925 },
7926 )),
7927 )));
7928 let minus_day =
7929 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
7930 plus_month,
7931 Expression::Interval(Box::new(
7932 crate::expressions::Interval {
7933 this: Some(Expression::Literal(
7934 crate::expressions::Literal::String(
7935 "1 DAY".to_string(),
7936 ),
7937 )),
7938 unit: None,
7939 },
7940 )),
7941 )));
7942 Ok(Expression::Cast(Box::new(Cast {
7943 this: minus_day,
7944 to: DataType::Date,
7945 trailing_comments: vec![],
7946 double_colon_syntax: false,
7947 format: None,
7948 default: None,
7949 })))
7950 }
7951 DialectType::Presto => {
7952 // LAST_DAY(date) -> LAST_DAY_OF_MONTH(date)
7953 Ok(Expression::Function(Box::new(Function::new(
7954 "LAST_DAY_OF_MONTH".to_string(),
7955 vec![ld.this],
7956 ))))
7957 }
7958 DialectType::ClickHouse => {
7959 // ClickHouse LAST_DAY(CAST(x AS Nullable(DATE)))
7960 // Need to wrap the DATE type in Nullable
7961 let nullable_date = match ld.this {
7962 Expression::Cast(mut c) => {
7963 c.to = DataType::Nullable {
7964 inner: Box::new(DataType::Date),
7965 };
7966 Expression::Cast(c)
7967 }
7968 other => other,
7969 };
7970 ld.this = nullable_date;
7971 Ok(Expression::LastDay(ld))
7972 }
7973 _ => Ok(Expression::LastDay(ld)),
7974 }
7975 } else {
7976 Ok(e)
7977 }
7978 }
7979
7980 Action::BigQueryCastFormat => {
7981 // CAST(x AS DATE FORMAT 'fmt') -> PARSE_DATE('%m/%d/%Y', x) for BigQuery
7982 // CAST(x AS TIMESTAMP FORMAT 'fmt') -> PARSE_TIMESTAMP(...) for BigQuery
7983 // SAFE_CAST(x AS DATE FORMAT 'fmt') -> CAST(TRY_STRPTIME(x, ...) AS DATE) for DuckDB
7984 let (this, to, format_expr, is_safe) = match e {
7985 Expression::Cast(ref c) if c.format.is_some() => (
7986 c.this.clone(),
7987 c.to.clone(),
7988 c.format.as_ref().unwrap().as_ref().clone(),
7989 false,
7990 ),
7991 Expression::SafeCast(ref c) if c.format.is_some() => (
7992 c.this.clone(),
7993 c.to.clone(),
7994 c.format.as_ref().unwrap().as_ref().clone(),
7995 true,
7996 ),
7997 _ => return Ok(e),
7998 };
7999 // For CAST(x AS STRING FORMAT ...) when target is BigQuery, keep as-is
8000 if matches!(target, DialectType::BigQuery) {
8001 match &to {
8002 DataType::String { .. } | DataType::VarChar { .. } | DataType::Text => {
8003 // CAST(x AS STRING FORMAT 'fmt') stays as CAST expression for BigQuery
8004 return Ok(e);
8005 }
8006 _ => {}
8007 }
8008 }
8009 // Extract timezone from format if AT TIME ZONE is present
8010 let (actual_format_expr, timezone) = match &format_expr {
8011 Expression::AtTimeZone(ref atz) => {
8012 (atz.this.clone(), Some(atz.zone.clone()))
8013 }
8014 _ => (format_expr.clone(), None),
8015 };
8016 let strftime_fmt = Self::bq_cast_format_to_strftime(&actual_format_expr);
8017 match target {
8018 DialectType::BigQuery => {
8019 // CAST(x AS DATE FORMAT 'fmt') -> PARSE_DATE(strftime_fmt, x)
8020 // CAST(x AS TIMESTAMP FORMAT 'fmt' AT TIME ZONE 'tz') -> PARSE_TIMESTAMP(strftime_fmt, x, tz)
8021 let func_name = match &to {
8022 DataType::Date => "PARSE_DATE",
8023 DataType::Timestamp { .. } => "PARSE_TIMESTAMP",
8024 DataType::Time { .. } => "PARSE_TIMESTAMP",
8025 _ => "PARSE_TIMESTAMP",
8026 };
8027 let mut func_args = vec![strftime_fmt, this];
8028 if let Some(tz) = timezone {
8029 func_args.push(tz);
8030 }
8031 Ok(Expression::Function(Box::new(Function::new(
8032 func_name.to_string(),
8033 func_args,
8034 ))))
8035 }
8036 DialectType::DuckDB => {
8037 // SAFE_CAST(x AS DATE FORMAT 'fmt') -> CAST(TRY_STRPTIME(x, fmt) AS DATE)
8038 // CAST(x AS DATE FORMAT 'fmt') -> CAST(STRPTIME(x, fmt) AS DATE)
8039 let duck_fmt = Self::bq_format_to_duckdb(&strftime_fmt);
8040 let parse_fn_name = if is_safe { "TRY_STRPTIME" } else { "STRPTIME" };
8041 let parse_call = Expression::Function(Box::new(Function::new(
8042 parse_fn_name.to_string(),
8043 vec![this, duck_fmt],
8044 )));
8045 Ok(Expression::Cast(Box::new(Cast {
8046 this: parse_call,
8047 to,
8048 trailing_comments: vec![],
8049 double_colon_syntax: false,
8050 format: None,
8051 default: None,
8052 })))
8053 }
8054 _ => Ok(e),
8055 }
8056 }
8057
8058 Action::BigQueryFunctionNormalize => {
8059 Self::normalize_bigquery_function(e, source, target)
8060 }
8061
8062 Action::BigQueryToHexBare => {
8063 // Not used anymore - handled directly in normalize_bigquery_function
8064 Ok(e)
8065 }
8066
8067 Action::BigQueryToHexLower => {
8068 if let Expression::Lower(uf) = e {
8069 match uf.this {
8070 // BQ->BQ: LOWER(TO_HEX(x)) -> TO_HEX(x)
8071 Expression::Function(f)
8072 if matches!(target, DialectType::BigQuery)
8073 && f.name == "TO_HEX" =>
8074 {
8075 Ok(Expression::Function(f))
8076 }
8077 // LOWER(LOWER(HEX/TO_HEX(x))) patterns
8078 Expression::Lower(inner_uf) => {
8079 if matches!(target, DialectType::BigQuery) {
8080 // BQ->BQ: extract TO_HEX
8081 if let Expression::Function(f) = inner_uf.this {
8082 Ok(Expression::Function(Box::new(Function::new(
8083 "TO_HEX".to_string(),
8084 f.args,
8085 ))))
8086 } else {
8087 Ok(Expression::Lower(inner_uf))
8088 }
8089 } else {
8090 // Flatten: LOWER(LOWER(x)) -> LOWER(x)
8091 Ok(Expression::Lower(inner_uf))
8092 }
8093 }
8094 other => {
8095 Ok(Expression::Lower(Box::new(crate::expressions::UnaryFunc {
8096 this: other,
8097 original_name: None,
8098 })))
8099 }
8100 }
8101 } else {
8102 Ok(e)
8103 }
8104 }
8105
8106 Action::BigQueryToHexUpper => {
8107 // UPPER(LOWER(HEX(x))) -> HEX(x) (UPPER cancels LOWER, HEX is already uppercase)
8108 // UPPER(LOWER(TO_HEX(x))) -> TO_HEX(x) for Presto/Trino
8109 if let Expression::Upper(uf) = e {
8110 if let Expression::Lower(inner_uf) = uf.this {
8111 // For BQ->BQ: UPPER(TO_HEX(x)) should stay as UPPER(TO_HEX(x))
8112 if matches!(target, DialectType::BigQuery) {
8113 // Restore TO_HEX name in inner function
8114 if let Expression::Function(f) = inner_uf.this {
8115 let restored = Expression::Function(Box::new(Function::new(
8116 "TO_HEX".to_string(),
8117 f.args,
8118 )));
8119 Ok(Expression::Upper(Box::new(
8120 crate::expressions::UnaryFunc::new(restored),
8121 )))
8122 } else {
8123 Ok(Expression::Upper(inner_uf))
8124 }
8125 } else {
8126 // Extract the inner HEX/TO_HEX function (UPPER(LOWER(x)) = x when HEX is uppercase)
8127 Ok(inner_uf.this)
8128 }
8129 } else {
8130 Ok(Expression::Upper(uf))
8131 }
8132 } else {
8133 Ok(e)
8134 }
8135 }
8136
8137 Action::BigQueryAnyValueHaving => {
8138 // ANY_VALUE(x HAVING MAX y) -> ARG_MAX_NULL(x, y)
8139 // ANY_VALUE(x HAVING MIN y) -> ARG_MIN_NULL(x, y)
8140 if let Expression::AnyValue(agg) = e {
8141 if let Some((having_expr, is_max)) = agg.having_max {
8142 let func_name = if is_max {
8143 "ARG_MAX_NULL"
8144 } else {
8145 "ARG_MIN_NULL"
8146 };
8147 Ok(Expression::Function(Box::new(Function::new(
8148 func_name.to_string(),
8149 vec![agg.this, *having_expr],
8150 ))))
8151 } else {
8152 Ok(Expression::AnyValue(agg))
8153 }
8154 } else {
8155 Ok(e)
8156 }
8157 }
8158
8159 Action::BigQueryApproxQuantiles => {
8160 // APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [0, 1/n, 2/n, ..., 1])
8161 // APPROX_QUANTILES(DISTINCT x, n) -> APPROX_QUANTILE(DISTINCT x, [0, 1/n, ..., 1])
8162 if let Expression::AggregateFunction(agg) = e {
8163 if agg.args.len() >= 2 {
8164 let x_expr = agg.args[0].clone();
8165 let n_expr = &agg.args[1];
8166
8167 // Extract the numeric value from n_expr
8168 let n = match n_expr {
8169 Expression::Literal(crate::expressions::Literal::Number(s)) => {
8170 s.parse::<usize>().unwrap_or(2)
8171 }
8172 _ => 2,
8173 };
8174
8175 // Generate quantile array: [0, 1/n, 2/n, ..., 1]
8176 let mut quantiles = Vec::new();
8177 for i in 0..=n {
8178 let q = i as f64 / n as f64;
8179 // Format nicely: 0 -> 0, 0.25 -> 0.25, 1 -> 1
8180 if q == 0.0 {
8181 quantiles.push(Expression::number(0));
8182 } else if q == 1.0 {
8183 quantiles.push(Expression::number(1));
8184 } else {
8185 quantiles.push(Expression::Literal(
8186 crate::expressions::Literal::Number(format!("{}", q)),
8187 ));
8188 }
8189 }
8190
8191 let array_expr =
8192 Expression::Array(Box::new(crate::expressions::Array {
8193 expressions: quantiles,
8194 }));
8195
8196 // Preserve DISTINCT modifier
8197 let mut new_func = Function::new(
8198 "APPROX_QUANTILE".to_string(),
8199 vec![x_expr, array_expr],
8200 );
8201 new_func.distinct = agg.distinct;
8202 Ok(Expression::Function(Box::new(new_func)))
8203 } else {
8204 Ok(Expression::AggregateFunction(agg))
8205 }
8206 } else {
8207 Ok(e)
8208 }
8209 }
8210
8211 Action::GenericFunctionNormalize => {
8212 // Helper closure to convert ARBITRARY to target-specific function
8213 fn convert_arbitrary(arg: Expression, target: DialectType) -> Expression {
8214 let name = match target {
8215 DialectType::ClickHouse => "any",
8216 DialectType::TSQL | DialectType::SQLite => "MAX",
8217 DialectType::Hive => "FIRST",
8218 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
8219 "ARBITRARY"
8220 }
8221 _ => "ANY_VALUE",
8222 };
8223 Expression::Function(Box::new(Function::new(name.to_string(), vec![arg])))
8224 }
8225
8226 if let Expression::Function(f) = e {
8227 let name = f.name.to_uppercase();
8228 match name.as_str() {
8229 "ARBITRARY" if f.args.len() == 1 => {
8230 let arg = f.args.into_iter().next().unwrap();
8231 Ok(convert_arbitrary(arg, target))
8232 }
8233 "TO_NUMBER" if f.args.len() == 1 => {
8234 let arg = f.args.into_iter().next().unwrap();
8235 match target {
8236 DialectType::Oracle | DialectType::Snowflake => {
8237 Ok(Expression::Function(Box::new(Function::new(
8238 "TO_NUMBER".to_string(),
8239 vec![arg],
8240 ))))
8241 }
8242 _ => Ok(Expression::Cast(Box::new(crate::expressions::Cast {
8243 this: arg,
8244 to: crate::expressions::DataType::Double {
8245 precision: None,
8246 scale: None,
8247 },
8248 double_colon_syntax: false,
8249 trailing_comments: Vec::new(),
8250 format: None,
8251 default: None,
8252 }))),
8253 }
8254 }
8255 "AGGREGATE" if f.args.len() >= 3 => match target {
8256 DialectType::DuckDB
8257 | DialectType::Hive
8258 | DialectType::Presto
8259 | DialectType::Trino => Ok(Expression::Function(Box::new(
8260 Function::new("REDUCE".to_string(), f.args),
8261 ))),
8262 _ => Ok(Expression::Function(f)),
8263 },
8264 // REGEXP_MATCHES(x, y) -> RegexpLike for most targets, keep for DuckDB
8265 "REGEXP_MATCHES" if f.args.len() >= 2 => {
8266 if matches!(target, DialectType::DuckDB) {
8267 Ok(Expression::Function(f))
8268 } else {
8269 let mut args = f.args;
8270 let this = args.remove(0);
8271 let pattern = args.remove(0);
8272 let flags = if args.is_empty() {
8273 None
8274 } else {
8275 Some(args.remove(0))
8276 };
8277 Ok(Expression::RegexpLike(Box::new(
8278 crate::expressions::RegexpFunc {
8279 this,
8280 pattern,
8281 flags,
8282 },
8283 )))
8284 }
8285 }
8286 // REGEXP_FULL_MATCH (Hive REGEXP) -> RegexpLike
8287 "REGEXP_FULL_MATCH" if f.args.len() >= 2 => {
8288 if matches!(target, DialectType::DuckDB) {
8289 Ok(Expression::Function(f))
8290 } else {
8291 let mut args = f.args;
8292 let this = args.remove(0);
8293 let pattern = args.remove(0);
8294 let flags = if args.is_empty() {
8295 None
8296 } else {
8297 Some(args.remove(0))
8298 };
8299 Ok(Expression::RegexpLike(Box::new(
8300 crate::expressions::RegexpFunc {
8301 this,
8302 pattern,
8303 flags,
8304 },
8305 )))
8306 }
8307 }
8308 // STRUCT_EXTRACT(x, 'field') -> x.field (StructExtract expression)
8309 "STRUCT_EXTRACT" if f.args.len() == 2 => {
8310 let mut args = f.args;
8311 let this = args.remove(0);
8312 let field_expr = args.remove(0);
8313 // Extract string literal to get field name
8314 let field_name = match &field_expr {
8315 Expression::Literal(crate::expressions::Literal::String(s)) => {
8316 s.clone()
8317 }
8318 Expression::Identifier(id) => id.name.clone(),
8319 _ => {
8320 return Ok(Expression::Function(Box::new(Function::new(
8321 "STRUCT_EXTRACT".to_string(),
8322 vec![this, field_expr],
8323 ))))
8324 }
8325 };
8326 Ok(Expression::StructExtract(Box::new(
8327 crate::expressions::StructExtractFunc {
8328 this,
8329 field: crate::expressions::Identifier::new(field_name),
8330 },
8331 )))
8332 }
8333 // LIST_FILTER([4,5,6], x -> x > 4) -> FILTER(ARRAY(4,5,6), x -> x > 4)
8334 "LIST_FILTER" if f.args.len() == 2 => {
8335 let name = match target {
8336 DialectType::DuckDB => "LIST_FILTER",
8337 _ => "FILTER",
8338 };
8339 Ok(Expression::Function(Box::new(Function::new(
8340 name.to_string(),
8341 f.args,
8342 ))))
8343 }
8344 // LIST_TRANSFORM(x, y -> y + 1) -> TRANSFORM(x, y -> y + 1)
8345 "LIST_TRANSFORM" if f.args.len() == 2 => {
8346 let name = match target {
8347 DialectType::DuckDB => "LIST_TRANSFORM",
8348 _ => "TRANSFORM",
8349 };
8350 Ok(Expression::Function(Box::new(Function::new(
8351 name.to_string(),
8352 f.args,
8353 ))))
8354 }
8355 // LIST_SORT(x) -> SORT_ARRAY(x) / ARRAY_SORT(x)
8356 "LIST_SORT" if f.args.len() >= 1 => {
8357 let name = match target {
8358 DialectType::DuckDB
8359 | DialectType::Presto
8360 | DialectType::Trino => "ARRAY_SORT",
8361 _ => "SORT_ARRAY",
8362 };
8363 Ok(Expression::Function(Box::new(Function::new(
8364 name.to_string(),
8365 f.args,
8366 ))))
8367 }
8368 // LIST_REVERSE_SORT(x) -> SORT_ARRAY(x, FALSE) for Spark/Hive, ARRAY_SORT(x, lambda) for Presto
8369 "LIST_REVERSE_SORT" if f.args.len() >= 1 => {
8370 match target {
8371 DialectType::DuckDB => Ok(Expression::Function(Box::new(
8372 Function::new("ARRAY_REVERSE_SORT".to_string(), f.args),
8373 ))),
8374 DialectType::Spark
8375 | DialectType::Databricks
8376 | DialectType::Hive => {
8377 let mut args = f.args;
8378 args.push(Expression::Identifier(
8379 crate::expressions::Identifier::new("FALSE"),
8380 ));
8381 Ok(Expression::Function(Box::new(Function::new(
8382 "SORT_ARRAY".to_string(),
8383 args,
8384 ))))
8385 }
8386 DialectType::Presto
8387 | DialectType::Trino
8388 | DialectType::Athena => {
8389 // ARRAY_SORT(x, (a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END)
8390 let arr = f.args.into_iter().next().unwrap();
8391 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
8392 parameters: vec![
8393 crate::expressions::Identifier::new("a"),
8394 crate::expressions::Identifier::new("b"),
8395 ],
8396 body: Expression::Case(Box::new(Case {
8397 operand: None,
8398 whens: vec![
8399 (
8400 Expression::Lt(Box::new(BinaryOp::new(
8401 Expression::Identifier(crate::expressions::Identifier::new("a")),
8402 Expression::Identifier(crate::expressions::Identifier::new("b")),
8403 ))),
8404 Expression::number(1),
8405 ),
8406 (
8407 Expression::Gt(Box::new(BinaryOp::new(
8408 Expression::Identifier(crate::expressions::Identifier::new("a")),
8409 Expression::Identifier(crate::expressions::Identifier::new("b")),
8410 ))),
8411 Expression::Literal(Literal::Number("-1".to_string())),
8412 ),
8413 ],
8414 else_: Some(Expression::number(0)),
8415 comments: Vec::new(),
8416 })),
8417 colon: false,
8418 parameter_types: Vec::new(),
8419 }));
8420 Ok(Expression::Function(Box::new(Function::new(
8421 "ARRAY_SORT".to_string(),
8422 vec![arr, lambda],
8423 ))))
8424 }
8425 _ => Ok(Expression::Function(Box::new(Function::new(
8426 "LIST_REVERSE_SORT".to_string(),
8427 f.args,
8428 )))),
8429 }
8430 }
8431 // SPLIT_TO_ARRAY(x) with 1 arg -> add default ',' separator and rename
8432 "SPLIT_TO_ARRAY" if f.args.len() == 1 => {
8433 let mut args = f.args;
8434 args.push(Expression::string(","));
8435 let name = match target {
8436 DialectType::DuckDB => "STR_SPLIT",
8437 DialectType::Presto | DialectType::Trino => "SPLIT",
8438 DialectType::Spark
8439 | DialectType::Databricks
8440 | DialectType::Hive => "SPLIT",
8441 DialectType::PostgreSQL => "STRING_TO_ARRAY",
8442 DialectType::Redshift => "SPLIT_TO_ARRAY",
8443 _ => "SPLIT",
8444 };
8445 Ok(Expression::Function(Box::new(Function::new(
8446 name.to_string(),
8447 args,
8448 ))))
8449 }
8450 // SPLIT_TO_ARRAY(x, sep) with 2 args -> rename based on target
8451 "SPLIT_TO_ARRAY" if f.args.len() == 2 => {
8452 let name = match target {
8453 DialectType::DuckDB => "STR_SPLIT",
8454 DialectType::Presto | DialectType::Trino => "SPLIT",
8455 DialectType::Spark
8456 | DialectType::Databricks
8457 | DialectType::Hive => "SPLIT",
8458 DialectType::PostgreSQL => "STRING_TO_ARRAY",
8459 DialectType::Redshift => "SPLIT_TO_ARRAY",
8460 _ => "SPLIT",
8461 };
8462 Ok(Expression::Function(Box::new(Function::new(
8463 name.to_string(),
8464 f.args,
8465 ))))
8466 }
8467 // STRING_TO_ARRAY/STR_SPLIT -> target-specific split function
8468 "STRING_TO_ARRAY" | "STR_SPLIT" if f.args.len() >= 2 => {
8469 let name = match target {
8470 DialectType::DuckDB => "STR_SPLIT",
8471 DialectType::Presto | DialectType::Trino => "SPLIT",
8472 DialectType::Spark
8473 | DialectType::Databricks
8474 | DialectType::Hive => "SPLIT",
8475 DialectType::Doris | DialectType::StarRocks => {
8476 "SPLIT_BY_STRING"
8477 }
8478 DialectType::PostgreSQL | DialectType::Redshift => {
8479 "STRING_TO_ARRAY"
8480 }
8481 _ => "SPLIT",
8482 };
8483 // For Spark/Hive, SPLIT uses regex - need to escape literal with \Q...\E
8484 if matches!(
8485 target,
8486 DialectType::Spark
8487 | DialectType::Databricks
8488 | DialectType::Hive
8489 ) {
8490 let mut args = f.args;
8491 let x = args.remove(0);
8492 let sep = args.remove(0);
8493 // Wrap separator in CONCAT('\\Q', sep, '\\E')
8494 let escaped_sep =
8495 Expression::Function(Box::new(Function::new(
8496 "CONCAT".to_string(),
8497 vec![
8498 Expression::string("\\Q"),
8499 sep,
8500 Expression::string("\\E"),
8501 ],
8502 )));
8503 Ok(Expression::Function(Box::new(Function::new(
8504 name.to_string(),
8505 vec![x, escaped_sep],
8506 ))))
8507 } else {
8508 Ok(Expression::Function(Box::new(Function::new(
8509 name.to_string(),
8510 f.args,
8511 ))))
8512 }
8513 }
8514 // STR_SPLIT_REGEX(x, 'a') / REGEXP_SPLIT(x, 'a') -> target-specific regex split
8515 "STR_SPLIT_REGEX" | "REGEXP_SPLIT" if f.args.len() == 2 => {
8516 let name = match target {
8517 DialectType::DuckDB => "STR_SPLIT_REGEX",
8518 DialectType::Presto | DialectType::Trino => "REGEXP_SPLIT",
8519 DialectType::Spark
8520 | DialectType::Databricks
8521 | DialectType::Hive => "SPLIT",
8522 _ => "REGEXP_SPLIT",
8523 };
8524 Ok(Expression::Function(Box::new(Function::new(
8525 name.to_string(),
8526 f.args,
8527 ))))
8528 }
8529 // SPLIT(x, sep) from Presto/StarRocks/Doris -> target-specific split with regex escaping for Hive/Spark
8530 "SPLIT"
8531 if f.args.len() == 2
8532 && matches!(
8533 source,
8534 DialectType::Presto
8535 | DialectType::Trino
8536 | DialectType::Athena
8537 | DialectType::StarRocks
8538 | DialectType::Doris
8539 )
8540 && matches!(
8541 target,
8542 DialectType::Spark
8543 | DialectType::Databricks
8544 | DialectType::Hive
8545 ) =>
8546 {
8547 // Presto/StarRocks SPLIT is literal, Hive/Spark SPLIT is regex
8548 let mut args = f.args;
8549 let x = args.remove(0);
8550 let sep = args.remove(0);
8551 let escaped_sep = Expression::Function(Box::new(Function::new(
8552 "CONCAT".to_string(),
8553 vec![Expression::string("\\Q"), sep, Expression::string("\\E")],
8554 )));
8555 Ok(Expression::Function(Box::new(Function::new(
8556 "SPLIT".to_string(),
8557 vec![x, escaped_sep],
8558 ))))
8559 }
8560 // SUBSTRINGINDEX -> SUBSTRING_INDEX (ClickHouse camelCase to standard)
8561 // For ClickHouse target, preserve original name to maintain camelCase
8562 "SUBSTRINGINDEX" => {
8563 let name = if matches!(target, DialectType::ClickHouse) {
8564 f.name.clone()
8565 } else {
8566 "SUBSTRING_INDEX".to_string()
8567 };
8568 Ok(Expression::Function(Box::new(Function::new(name, f.args))))
8569 }
8570 // ARRAY_LENGTH/SIZE/CARDINALITY -> target-specific array length function
8571 "ARRAY_LENGTH" | "SIZE" | "CARDINALITY" => {
8572 // Get the array argument (first arg, drop dimension args)
8573 let mut args = f.args;
8574 let arr = if args.is_empty() {
8575 return Ok(Expression::Function(Box::new(Function::new(
8576 name.to_string(),
8577 args,
8578 ))));
8579 } else {
8580 args.remove(0)
8581 };
8582 let name =
8583 match target {
8584 DialectType::Spark
8585 | DialectType::Databricks
8586 | DialectType::Hive => "SIZE",
8587 DialectType::Presto | DialectType::Trino => "CARDINALITY",
8588 DialectType::BigQuery => "ARRAY_LENGTH",
8589 DialectType::DuckDB => {
8590 // DuckDB: use ARRAY_LENGTH with all args
8591 let mut all_args = vec![arr];
8592 all_args.extend(args);
8593 return Ok(Expression::Function(Box::new(
8594 Function::new("ARRAY_LENGTH".to_string(), all_args),
8595 )));
8596 }
8597 DialectType::PostgreSQL | DialectType::Redshift => {
8598 // Keep ARRAY_LENGTH with dimension arg
8599 let mut all_args = vec![arr];
8600 all_args.extend(args);
8601 return Ok(Expression::Function(Box::new(
8602 Function::new("ARRAY_LENGTH".to_string(), all_args),
8603 )));
8604 }
8605 DialectType::ClickHouse => "LENGTH",
8606 _ => "ARRAY_LENGTH",
8607 };
8608 Ok(Expression::Function(Box::new(Function::new(
8609 name.to_string(),
8610 vec![arr],
8611 ))))
8612 }
8613 // UNICODE(x) -> target-specific codepoint function
8614 "UNICODE" if f.args.len() == 1 => {
8615 match target {
8616 DialectType::SQLite | DialectType::DuckDB => {
8617 Ok(Expression::Function(Box::new(Function::new(
8618 "UNICODE".to_string(),
8619 f.args,
8620 ))))
8621 }
8622 DialectType::Oracle => {
8623 // ASCII(UNISTR(x))
8624 let inner = Expression::Function(Box::new(Function::new(
8625 "UNISTR".to_string(),
8626 f.args,
8627 )));
8628 Ok(Expression::Function(Box::new(Function::new(
8629 "ASCII".to_string(),
8630 vec![inner],
8631 ))))
8632 }
8633 DialectType::MySQL => {
8634 // ORD(CONVERT(x USING utf32))
8635 let arg = f.args.into_iter().next().unwrap();
8636 let convert_expr = Expression::ConvertToCharset(Box::new(
8637 crate::expressions::ConvertToCharset {
8638 this: Box::new(arg),
8639 dest: Some(Box::new(Expression::Identifier(
8640 crate::expressions::Identifier::new("utf32"),
8641 ))),
8642 source: None,
8643 },
8644 ));
8645 Ok(Expression::Function(Box::new(Function::new(
8646 "ORD".to_string(),
8647 vec![convert_expr],
8648 ))))
8649 }
8650 _ => Ok(Expression::Function(Box::new(Function::new(
8651 "ASCII".to_string(),
8652 f.args,
8653 )))),
8654 }
8655 }
8656 // XOR(a, b, ...) -> a XOR b XOR ... for MySQL, BITWISE_XOR for Presto/Trino, # for PostgreSQL, ^ for BigQuery
8657 "XOR" if f.args.len() >= 2 => {
8658 match target {
8659 DialectType::ClickHouse => {
8660 // ClickHouse: keep as xor() function with lowercase name
8661 Ok(Expression::Function(Box::new(Function::new(
8662 "xor".to_string(),
8663 f.args,
8664 ))))
8665 }
8666 DialectType::Presto | DialectType::Trino => {
8667 if f.args.len() == 2 {
8668 Ok(Expression::Function(Box::new(Function::new(
8669 "BITWISE_XOR".to_string(),
8670 f.args,
8671 ))))
8672 } else {
8673 // Nest: BITWISE_XOR(BITWISE_XOR(a, b), c)
8674 let mut args = f.args;
8675 let first = args.remove(0);
8676 let second = args.remove(0);
8677 let mut result =
8678 Expression::Function(Box::new(Function::new(
8679 "BITWISE_XOR".to_string(),
8680 vec![first, second],
8681 )));
8682 for arg in args {
8683 result =
8684 Expression::Function(Box::new(Function::new(
8685 "BITWISE_XOR".to_string(),
8686 vec![result, arg],
8687 )));
8688 }
8689 Ok(result)
8690 }
8691 }
8692 DialectType::MySQL
8693 | DialectType::SingleStore
8694 | DialectType::Doris
8695 | DialectType::StarRocks => {
8696 // Convert XOR(a, b, c) -> Expression::Xor with expressions list
8697 let args = f.args;
8698 Ok(Expression::Xor(Box::new(crate::expressions::Xor {
8699 this: None,
8700 expression: None,
8701 expressions: args,
8702 })))
8703 }
8704 DialectType::PostgreSQL | DialectType::Redshift => {
8705 // PostgreSQL: a # b (hash operator for XOR)
8706 let mut args = f.args;
8707 let first = args.remove(0);
8708 let second = args.remove(0);
8709 let mut result = Expression::BitwiseXor(Box::new(
8710 BinaryOp::new(first, second),
8711 ));
8712 for arg in args {
8713 result = Expression::BitwiseXor(Box::new(
8714 BinaryOp::new(result, arg),
8715 ));
8716 }
8717 Ok(result)
8718 }
8719 DialectType::DuckDB => {
8720 // DuckDB: keep as XOR function (DuckDB ^ is Power, not XOR)
8721 Ok(Expression::Function(Box::new(Function::new(
8722 "XOR".to_string(),
8723 f.args,
8724 ))))
8725 }
8726 DialectType::BigQuery => {
8727 // BigQuery: a ^ b (caret operator for XOR)
8728 let mut args = f.args;
8729 let first = args.remove(0);
8730 let second = args.remove(0);
8731 let mut result = Expression::BitwiseXor(Box::new(
8732 BinaryOp::new(first, second),
8733 ));
8734 for arg in args {
8735 result = Expression::BitwiseXor(Box::new(
8736 BinaryOp::new(result, arg),
8737 ));
8738 }
8739 Ok(result)
8740 }
8741 _ => Ok(Expression::Function(Box::new(Function::new(
8742 "XOR".to_string(),
8743 f.args,
8744 )))),
8745 }
8746 }
8747 // ARRAY_REVERSE_SORT(x) -> SORT_ARRAY(x, FALSE) for Spark/Hive, ARRAY_SORT(x, lambda) for Presto
8748 "ARRAY_REVERSE_SORT" if f.args.len() >= 1 => {
8749 match target {
8750 DialectType::Spark
8751 | DialectType::Databricks
8752 | DialectType::Hive => {
8753 let mut args = f.args;
8754 args.push(Expression::Identifier(
8755 crate::expressions::Identifier::new("FALSE"),
8756 ));
8757 Ok(Expression::Function(Box::new(Function::new(
8758 "SORT_ARRAY".to_string(),
8759 args,
8760 ))))
8761 }
8762 DialectType::Presto
8763 | DialectType::Trino
8764 | DialectType::Athena => {
8765 // ARRAY_SORT(x, (a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END)
8766 let arr = f.args.into_iter().next().unwrap();
8767 let lambda = Expression::Lambda(Box::new(
8768 crate::expressions::LambdaExpr {
8769 parameters: vec![
8770 Identifier::new("a"),
8771 Identifier::new("b"),
8772 ],
8773 colon: false,
8774 parameter_types: Vec::new(),
8775 body: Expression::Case(Box::new(Case {
8776 operand: None,
8777 whens: vec![
8778 (
8779 Expression::Lt(Box::new(
8780 BinaryOp::new(
8781 Expression::Identifier(
8782 Identifier::new("a"),
8783 ),
8784 Expression::Identifier(
8785 Identifier::new("b"),
8786 ),
8787 ),
8788 )),
8789 Expression::number(1),
8790 ),
8791 (
8792 Expression::Gt(Box::new(
8793 BinaryOp::new(
8794 Expression::Identifier(
8795 Identifier::new("a"),
8796 ),
8797 Expression::Identifier(
8798 Identifier::new("b"),
8799 ),
8800 ),
8801 )),
8802 Expression::Neg(Box::new(
8803 crate::expressions::UnaryOp {
8804 this: Expression::number(1),
8805 },
8806 )),
8807 ),
8808 ],
8809 else_: Some(Expression::number(0)),
8810 comments: Vec::new(),
8811 })),
8812 },
8813 ));
8814 Ok(Expression::Function(Box::new(Function::new(
8815 "ARRAY_SORT".to_string(),
8816 vec![arr, lambda],
8817 ))))
8818 }
8819 _ => Ok(Expression::Function(Box::new(Function::new(
8820 "ARRAY_REVERSE_SORT".to_string(),
8821 f.args,
8822 )))),
8823 }
8824 }
8825 // ENCODE(x) -> ENCODE(x, 'utf-8') for Spark/Hive, TO_UTF8(x) for Presto
8826 "ENCODE" if f.args.len() == 1 => match target {
8827 DialectType::Spark
8828 | DialectType::Databricks
8829 | DialectType::Hive => {
8830 let mut args = f.args;
8831 args.push(Expression::string("utf-8"));
8832 Ok(Expression::Function(Box::new(Function::new(
8833 "ENCODE".to_string(),
8834 args,
8835 ))))
8836 }
8837 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
8838 Ok(Expression::Function(Box::new(Function::new(
8839 "TO_UTF8".to_string(),
8840 f.args,
8841 ))))
8842 }
8843 _ => Ok(Expression::Function(Box::new(Function::new(
8844 "ENCODE".to_string(),
8845 f.args,
8846 )))),
8847 },
8848 // DECODE(x) -> DECODE(x, 'utf-8') for Spark/Hive, FROM_UTF8(x) for Presto
8849 "DECODE" if f.args.len() == 1 => match target {
8850 DialectType::Spark
8851 | DialectType::Databricks
8852 | DialectType::Hive => {
8853 let mut args = f.args;
8854 args.push(Expression::string("utf-8"));
8855 Ok(Expression::Function(Box::new(Function::new(
8856 "DECODE".to_string(),
8857 args,
8858 ))))
8859 }
8860 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
8861 Ok(Expression::Function(Box::new(Function::new(
8862 "FROM_UTF8".to_string(),
8863 f.args,
8864 ))))
8865 }
8866 _ => Ok(Expression::Function(Box::new(Function::new(
8867 "DECODE".to_string(),
8868 f.args,
8869 )))),
8870 },
8871 // QUANTILE(x, p) -> PERCENTILE(x, p) for Spark/Hive
8872 "QUANTILE" if f.args.len() == 2 => {
8873 let name = match target {
8874 DialectType::Spark
8875 | DialectType::Databricks
8876 | DialectType::Hive => "PERCENTILE",
8877 DialectType::Presto | DialectType::Trino => "APPROX_PERCENTILE",
8878 DialectType::BigQuery => "PERCENTILE_CONT",
8879 _ => "QUANTILE",
8880 };
8881 Ok(Expression::Function(Box::new(Function::new(
8882 name.to_string(),
8883 f.args,
8884 ))))
8885 }
8886 // QUANTILE_CONT(x, q) -> PERCENTILE_CONT(q) WITHIN GROUP (ORDER BY x) for PostgreSQL/Snowflake
8887 "QUANTILE_CONT" if f.args.len() == 2 => {
8888 let mut args = f.args;
8889 let column = args.remove(0);
8890 let quantile = args.remove(0);
8891 match target {
8892 DialectType::DuckDB => {
8893 Ok(Expression::Function(Box::new(Function::new(
8894 "QUANTILE_CONT".to_string(),
8895 vec![column, quantile],
8896 ))))
8897 }
8898 DialectType::PostgreSQL
8899 | DialectType::Redshift
8900 | DialectType::Snowflake => {
8901 // PERCENTILE_CONT(q) WITHIN GROUP (ORDER BY x)
8902 let inner = Expression::PercentileCont(Box::new(
8903 crate::expressions::PercentileFunc {
8904 this: column.clone(),
8905 percentile: quantile,
8906 order_by: None,
8907 filter: None,
8908 },
8909 ));
8910 Ok(Expression::WithinGroup(Box::new(
8911 crate::expressions::WithinGroup {
8912 this: inner,
8913 order_by: vec![crate::expressions::Ordered {
8914 this: column,
8915 desc: false,
8916 nulls_first: None,
8917 explicit_asc: false,
8918 with_fill: None,
8919 }],
8920 },
8921 )))
8922 }
8923 _ => Ok(Expression::Function(Box::new(Function::new(
8924 "QUANTILE_CONT".to_string(),
8925 vec![column, quantile],
8926 )))),
8927 }
8928 }
8929 // QUANTILE_DISC(x, q) -> PERCENTILE_DISC(q) WITHIN GROUP (ORDER BY x) for PostgreSQL/Snowflake
8930 "QUANTILE_DISC" if f.args.len() == 2 => {
8931 let mut args = f.args;
8932 let column = args.remove(0);
8933 let quantile = args.remove(0);
8934 match target {
8935 DialectType::DuckDB => {
8936 Ok(Expression::Function(Box::new(Function::new(
8937 "QUANTILE_DISC".to_string(),
8938 vec![column, quantile],
8939 ))))
8940 }
8941 DialectType::PostgreSQL
8942 | DialectType::Redshift
8943 | DialectType::Snowflake => {
8944 // PERCENTILE_DISC(q) WITHIN GROUP (ORDER BY x)
8945 let inner = Expression::PercentileDisc(Box::new(
8946 crate::expressions::PercentileFunc {
8947 this: column.clone(),
8948 percentile: quantile,
8949 order_by: None,
8950 filter: None,
8951 },
8952 ));
8953 Ok(Expression::WithinGroup(Box::new(
8954 crate::expressions::WithinGroup {
8955 this: inner,
8956 order_by: vec![crate::expressions::Ordered {
8957 this: column,
8958 desc: false,
8959 nulls_first: None,
8960 explicit_asc: false,
8961 with_fill: None,
8962 }],
8963 },
8964 )))
8965 }
8966 _ => Ok(Expression::Function(Box::new(Function::new(
8967 "QUANTILE_DISC".to_string(),
8968 vec![column, quantile],
8969 )))),
8970 }
8971 }
8972 // PERCENTILE_APPROX(x, p) / APPROX_PERCENTILE(x, p) -> target-specific
8973 "PERCENTILE_APPROX" | "APPROX_PERCENTILE" if f.args.len() >= 2 => {
8974 let name = match target {
8975 DialectType::Presto
8976 | DialectType::Trino
8977 | DialectType::Athena => "APPROX_PERCENTILE",
8978 DialectType::Spark
8979 | DialectType::Databricks
8980 | DialectType::Hive => "PERCENTILE_APPROX",
8981 DialectType::DuckDB => "APPROX_QUANTILE",
8982 DialectType::PostgreSQL | DialectType::Redshift => {
8983 "PERCENTILE_CONT"
8984 }
8985 _ => &f.name,
8986 };
8987 Ok(Expression::Function(Box::new(Function::new(
8988 name.to_string(),
8989 f.args,
8990 ))))
8991 }
8992 // EPOCH(x) -> UNIX_TIMESTAMP(x) for Spark/Hive
8993 "EPOCH" if f.args.len() == 1 => {
8994 let name = match target {
8995 DialectType::Spark
8996 | DialectType::Databricks
8997 | DialectType::Hive => "UNIX_TIMESTAMP",
8998 DialectType::Presto | DialectType::Trino => "TO_UNIXTIME",
8999 _ => "EPOCH",
9000 };
9001 Ok(Expression::Function(Box::new(Function::new(
9002 name.to_string(),
9003 f.args,
9004 ))))
9005 }
9006 // EPOCH_MS(x) -> target-specific epoch milliseconds conversion
9007 "EPOCH_MS" if f.args.len() == 1 => {
9008 match target {
9009 DialectType::Spark | DialectType::Databricks => {
9010 Ok(Expression::Function(Box::new(Function::new(
9011 "TIMESTAMP_MILLIS".to_string(),
9012 f.args,
9013 ))))
9014 }
9015 DialectType::Hive => {
9016 // Hive: FROM_UNIXTIME(x / 1000)
9017 let arg = f.args.into_iter().next().unwrap();
9018 let div_expr = Expression::Div(Box::new(
9019 crate::expressions::BinaryOp::new(
9020 arg,
9021 Expression::number(1000),
9022 ),
9023 ));
9024 Ok(Expression::Function(Box::new(Function::new(
9025 "FROM_UNIXTIME".to_string(),
9026 vec![div_expr],
9027 ))))
9028 }
9029 DialectType::Presto | DialectType::Trino => {
9030 Ok(Expression::Function(Box::new(Function::new(
9031 "FROM_UNIXTIME".to_string(),
9032 vec![Expression::Div(Box::new(
9033 crate::expressions::BinaryOp::new(
9034 f.args.into_iter().next().unwrap(),
9035 Expression::number(1000),
9036 ),
9037 ))],
9038 ))))
9039 }
9040 _ => Ok(Expression::Function(Box::new(Function::new(
9041 "EPOCH_MS".to_string(),
9042 f.args,
9043 )))),
9044 }
9045 }
9046 // HASHBYTES('algorithm', x) -> target-specific hash function
9047 "HASHBYTES" if f.args.len() == 2 => {
9048 // Keep HASHBYTES as-is for TSQL target
9049 if matches!(target, DialectType::TSQL) {
9050 return Ok(Expression::Function(f));
9051 }
9052 let algo_expr = &f.args[0];
9053 let algo = match algo_expr {
9054 Expression::Literal(crate::expressions::Literal::String(s)) => {
9055 s.to_uppercase()
9056 }
9057 _ => return Ok(Expression::Function(f)),
9058 };
9059 let data_arg = f.args.into_iter().nth(1).unwrap();
9060 match algo.as_str() {
9061 "SHA1" => {
9062 let name = match target {
9063 DialectType::Spark | DialectType::Databricks => "SHA",
9064 DialectType::Hive => "SHA1",
9065 _ => "SHA1",
9066 };
9067 Ok(Expression::Function(Box::new(Function::new(
9068 name.to_string(),
9069 vec![data_arg],
9070 ))))
9071 }
9072 "SHA2_256" => {
9073 Ok(Expression::Function(Box::new(Function::new(
9074 "SHA2".to_string(),
9075 vec![data_arg, Expression::number(256)],
9076 ))))
9077 }
9078 "SHA2_512" => {
9079 Ok(Expression::Function(Box::new(Function::new(
9080 "SHA2".to_string(),
9081 vec![data_arg, Expression::number(512)],
9082 ))))
9083 }
9084 "MD5" => Ok(Expression::Function(Box::new(Function::new(
9085 "MD5".to_string(),
9086 vec![data_arg],
9087 )))),
9088 _ => Ok(Expression::Function(Box::new(Function::new(
9089 "HASHBYTES".to_string(),
9090 vec![Expression::string(&algo), data_arg],
9091 )))),
9092 }
9093 }
9094 // JSON_EXTRACT_PATH(json, key1, key2, ...) -> target-specific JSON extraction
9095 "JSON_EXTRACT_PATH" | "JSON_EXTRACT_PATH_TEXT" if f.args.len() >= 2 => {
9096 let is_text = name == "JSON_EXTRACT_PATH_TEXT";
9097 let mut args = f.args;
9098 let json_expr = args.remove(0);
9099 // Build JSON path from remaining keys: $.key1.key2 or $.key1[0]
9100 let mut json_path = "$".to_string();
9101 for a in &args {
9102 match a {
9103 Expression::Literal(
9104 crate::expressions::Literal::String(s),
9105 ) => {
9106 // Numeric string keys become array indices: [0]
9107 if s.chars().all(|c| c.is_ascii_digit()) {
9108 json_path.push('[');
9109 json_path.push_str(s);
9110 json_path.push(']');
9111 } else {
9112 json_path.push('.');
9113 json_path.push_str(s);
9114 }
9115 }
9116 _ => {
9117 json_path.push_str(".?");
9118 }
9119 }
9120 }
9121 match target {
9122 DialectType::Spark
9123 | DialectType::Databricks
9124 | DialectType::Hive => {
9125 Ok(Expression::Function(Box::new(Function::new(
9126 "GET_JSON_OBJECT".to_string(),
9127 vec![json_expr, Expression::string(&json_path)],
9128 ))))
9129 }
9130 DialectType::Presto | DialectType::Trino => {
9131 let func_name = if is_text {
9132 "JSON_EXTRACT_SCALAR"
9133 } else {
9134 "JSON_EXTRACT"
9135 };
9136 Ok(Expression::Function(Box::new(Function::new(
9137 func_name.to_string(),
9138 vec![json_expr, Expression::string(&json_path)],
9139 ))))
9140 }
9141 DialectType::BigQuery | DialectType::MySQL => {
9142 let func_name = if is_text {
9143 "JSON_EXTRACT_SCALAR"
9144 } else {
9145 "JSON_EXTRACT"
9146 };
9147 Ok(Expression::Function(Box::new(Function::new(
9148 func_name.to_string(),
9149 vec![json_expr, Expression::string(&json_path)],
9150 ))))
9151 }
9152 DialectType::PostgreSQL | DialectType::Materialize => {
9153 // Keep as JSON_EXTRACT_PATH_TEXT / JSON_EXTRACT_PATH for PostgreSQL/Materialize
9154 let func_name = if is_text {
9155 "JSON_EXTRACT_PATH_TEXT"
9156 } else {
9157 "JSON_EXTRACT_PATH"
9158 };
9159 let mut new_args = vec![json_expr];
9160 new_args.extend(args);
9161 Ok(Expression::Function(Box::new(Function::new(
9162 func_name.to_string(),
9163 new_args,
9164 ))))
9165 }
9166 DialectType::DuckDB | DialectType::SQLite => {
9167 // Use -> for JSON_EXTRACT_PATH, ->> for JSON_EXTRACT_PATH_TEXT
9168 if is_text {
9169 Ok(Expression::JsonExtractScalar(Box::new(
9170 crate::expressions::JsonExtractFunc {
9171 this: json_expr,
9172 path: Expression::string(&json_path),
9173 returning: None,
9174 arrow_syntax: true,
9175 hash_arrow_syntax: false,
9176 wrapper_option: None,
9177 quotes_option: None,
9178 on_scalar_string: false,
9179 on_error: None,
9180 },
9181 )))
9182 } else {
9183 Ok(Expression::JsonExtract(Box::new(
9184 crate::expressions::JsonExtractFunc {
9185 this: json_expr,
9186 path: Expression::string(&json_path),
9187 returning: None,
9188 arrow_syntax: true,
9189 hash_arrow_syntax: false,
9190 wrapper_option: None,
9191 quotes_option: None,
9192 on_scalar_string: false,
9193 on_error: None,
9194 },
9195 )))
9196 }
9197 }
9198 DialectType::Redshift => {
9199 // Keep as JSON_EXTRACT_PATH_TEXT for Redshift
9200 let mut new_args = vec![json_expr];
9201 new_args.extend(args);
9202 Ok(Expression::Function(Box::new(Function::new(
9203 "JSON_EXTRACT_PATH_TEXT".to_string(),
9204 new_args,
9205 ))))
9206 }
9207 DialectType::TSQL => {
9208 // ISNULL(JSON_QUERY(json, '$.path'), JSON_VALUE(json, '$.path'))
9209 let jq = Expression::Function(Box::new(Function::new(
9210 "JSON_QUERY".to_string(),
9211 vec![json_expr.clone(), Expression::string(&json_path)],
9212 )));
9213 let jv = Expression::Function(Box::new(Function::new(
9214 "JSON_VALUE".to_string(),
9215 vec![json_expr, Expression::string(&json_path)],
9216 )));
9217 Ok(Expression::Function(Box::new(Function::new(
9218 "ISNULL".to_string(),
9219 vec![jq, jv],
9220 ))))
9221 }
9222 DialectType::ClickHouse => {
9223 let func_name = if is_text {
9224 "JSONExtractString"
9225 } else {
9226 "JSONExtractRaw"
9227 };
9228 let mut new_args = vec![json_expr];
9229 new_args.extend(args);
9230 Ok(Expression::Function(Box::new(Function::new(
9231 func_name.to_string(),
9232 new_args,
9233 ))))
9234 }
9235 _ => {
9236 let func_name = if is_text {
9237 "JSON_EXTRACT_SCALAR"
9238 } else {
9239 "JSON_EXTRACT"
9240 };
9241 Ok(Expression::Function(Box::new(Function::new(
9242 func_name.to_string(),
9243 vec![json_expr, Expression::string(&json_path)],
9244 ))))
9245 }
9246 }
9247 }
9248 // APPROX_DISTINCT(x) -> APPROX_COUNT_DISTINCT(x) for Spark/Hive/BigQuery
9249 "APPROX_DISTINCT" if f.args.len() >= 1 => {
9250 let name = match target {
9251 DialectType::Spark
9252 | DialectType::Databricks
9253 | DialectType::Hive
9254 | DialectType::BigQuery => "APPROX_COUNT_DISTINCT",
9255 _ => "APPROX_DISTINCT",
9256 };
9257 let mut args = f.args;
9258 // Hive doesn't support the accuracy parameter
9259 if name == "APPROX_COUNT_DISTINCT"
9260 && matches!(target, DialectType::Hive)
9261 {
9262 args.truncate(1);
9263 }
9264 Ok(Expression::Function(Box::new(Function::new(
9265 name.to_string(),
9266 args,
9267 ))))
9268 }
9269 // REGEXP_EXTRACT(x, pattern) - normalize default group index
9270 "REGEXP_EXTRACT" if f.args.len() == 2 => {
9271 // Determine source default group index
9272 let source_default = match source {
9273 DialectType::Presto
9274 | DialectType::Trino
9275 | DialectType::DuckDB => 0,
9276 _ => 1, // Hive/Spark/Databricks default = 1
9277 };
9278 // Determine target default group index
9279 let target_default = match target {
9280 DialectType::Presto
9281 | DialectType::Trino
9282 | DialectType::DuckDB
9283 | DialectType::BigQuery => 0,
9284 DialectType::Snowflake => {
9285 // Snowflake uses REGEXP_SUBSTR
9286 return Ok(Expression::Function(Box::new(Function::new(
9287 "REGEXP_SUBSTR".to_string(),
9288 f.args,
9289 ))));
9290 }
9291 _ => 1, // Hive/Spark/Databricks default = 1
9292 };
9293 if source_default != target_default {
9294 let mut args = f.args;
9295 args.push(Expression::number(source_default));
9296 Ok(Expression::Function(Box::new(Function::new(
9297 "REGEXP_EXTRACT".to_string(),
9298 args,
9299 ))))
9300 } else {
9301 Ok(Expression::Function(Box::new(Function::new(
9302 "REGEXP_EXTRACT".to_string(),
9303 f.args,
9304 ))))
9305 }
9306 }
9307 // RLIKE(str, pattern) -> RegexpLike expression (generates as target-specific form)
9308 "RLIKE" if f.args.len() == 2 => {
9309 let mut args = f.args;
9310 let str_expr = args.remove(0);
9311 let pattern = args.remove(0);
9312 match target {
9313 DialectType::DuckDB => {
9314 // REGEXP_MATCHES(str, pattern)
9315 Ok(Expression::Function(Box::new(Function::new(
9316 "REGEXP_MATCHES".to_string(),
9317 vec![str_expr, pattern],
9318 ))))
9319 }
9320 _ => {
9321 // Convert to RegexpLike which generates as RLIKE/~/REGEXP_LIKE per dialect
9322 Ok(Expression::RegexpLike(Box::new(
9323 crate::expressions::RegexpFunc {
9324 this: str_expr,
9325 pattern,
9326 flags: None,
9327 },
9328 )))
9329 }
9330 }
9331 }
9332 // EOMONTH(date[, month_offset]) -> target-specific
9333 "EOMONTH" if f.args.len() >= 1 => {
9334 let mut args = f.args;
9335 let date_arg = args.remove(0);
9336 let month_offset = if !args.is_empty() {
9337 Some(args.remove(0))
9338 } else {
9339 None
9340 };
9341
9342 // Helper: wrap date in CAST to DATE
9343 let cast_to_date = |e: Expression| -> Expression {
9344 Expression::Cast(Box::new(Cast {
9345 this: e,
9346 to: DataType::Date,
9347 trailing_comments: vec![],
9348 double_colon_syntax: false,
9349 format: None,
9350 default: None,
9351 }))
9352 };
9353
9354 match target {
9355 DialectType::TSQL | DialectType::Fabric => {
9356 // TSQL: EOMONTH(CAST(date AS DATE)) or EOMONTH(DATEADD(MONTH, offset, CAST(date AS DATE)))
9357 let date = cast_to_date(date_arg);
9358 let date = if let Some(offset) = month_offset {
9359 Expression::Function(Box::new(Function::new(
9360 "DATEADD".to_string(),
9361 vec![
9362 Expression::Identifier(Identifier::new(
9363 "MONTH",
9364 )),
9365 offset,
9366 date,
9367 ],
9368 )))
9369 } else {
9370 date
9371 };
9372 Ok(Expression::Function(Box::new(Function::new(
9373 "EOMONTH".to_string(),
9374 vec![date],
9375 ))))
9376 }
9377 DialectType::Presto
9378 | DialectType::Trino
9379 | DialectType::Athena => {
9380 // Presto: LAST_DAY_OF_MONTH(CAST(CAST(date AS TIMESTAMP) AS DATE))
9381 // or with offset: LAST_DAY_OF_MONTH(DATE_ADD('MONTH', offset, CAST(CAST(date AS TIMESTAMP) AS DATE)))
9382 let cast_ts = Expression::Cast(Box::new(Cast {
9383 this: date_arg,
9384 to: DataType::Timestamp {
9385 timezone: false,
9386 precision: None,
9387 },
9388 trailing_comments: vec![],
9389 double_colon_syntax: false,
9390 format: None,
9391 default: None,
9392 }));
9393 let date = cast_to_date(cast_ts);
9394 let date = if let Some(offset) = month_offset {
9395 Expression::Function(Box::new(Function::new(
9396 "DATE_ADD".to_string(),
9397 vec![Expression::string("MONTH"), offset, date],
9398 )))
9399 } else {
9400 date
9401 };
9402 Ok(Expression::Function(Box::new(Function::new(
9403 "LAST_DAY_OF_MONTH".to_string(),
9404 vec![date],
9405 ))))
9406 }
9407 DialectType::PostgreSQL => {
9408 // PostgreSQL: CAST(DATE_TRUNC('MONTH', CAST(date AS DATE) [+ INTERVAL 'offset MONTH']) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)
9409 let date = cast_to_date(date_arg);
9410 let date = if let Some(offset) = month_offset {
9411 let interval_str = format!(
9412 "{} MONTH",
9413 Self::expr_to_string_static(&offset)
9414 );
9415 Expression::Add(Box::new(
9416 crate::expressions::BinaryOp::new(
9417 date,
9418 Expression::Interval(Box::new(
9419 crate::expressions::Interval {
9420 this: Some(Expression::string(
9421 &interval_str,
9422 )),
9423 unit: None,
9424 },
9425 )),
9426 ),
9427 ))
9428 } else {
9429 date
9430 };
9431 let truncated =
9432 Expression::Function(Box::new(Function::new(
9433 "DATE_TRUNC".to_string(),
9434 vec![Expression::string("MONTH"), date],
9435 )));
9436 let plus_month = Expression::Add(Box::new(
9437 crate::expressions::BinaryOp::new(
9438 truncated,
9439 Expression::Interval(Box::new(
9440 crate::expressions::Interval {
9441 this: Some(Expression::string("1 MONTH")),
9442 unit: None,
9443 },
9444 )),
9445 ),
9446 ));
9447 let minus_day = Expression::Sub(Box::new(
9448 crate::expressions::BinaryOp::new(
9449 plus_month,
9450 Expression::Interval(Box::new(
9451 crate::expressions::Interval {
9452 this: Some(Expression::string("1 DAY")),
9453 unit: None,
9454 },
9455 )),
9456 ),
9457 ));
9458 Ok(Expression::Cast(Box::new(Cast {
9459 this: minus_day,
9460 to: DataType::Date,
9461 trailing_comments: vec![],
9462 double_colon_syntax: false,
9463 format: None,
9464 default: None,
9465 })))
9466 }
9467 DialectType::DuckDB => {
9468 // DuckDB: LAST_DAY(CAST(date AS DATE) [+ INTERVAL (offset) MONTH])
9469 let date = cast_to_date(date_arg);
9470 let date = if let Some(offset) = month_offset {
9471 // Wrap negative numbers in parentheses for DuckDB INTERVAL
9472 let interval_val =
9473 if matches!(&offset, Expression::Neg(_)) {
9474 Expression::Paren(Box::new(
9475 crate::expressions::Paren {
9476 this: offset,
9477 trailing_comments: Vec::new(),
9478 },
9479 ))
9480 } else {
9481 offset
9482 };
9483 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
9484 date,
9485 Expression::Interval(Box::new(crate::expressions::Interval {
9486 this: Some(interval_val),
9487 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
9488 unit: crate::expressions::IntervalUnit::Month,
9489 use_plural: false,
9490 }),
9491 })),
9492 )))
9493 } else {
9494 date
9495 };
9496 Ok(Expression::Function(Box::new(Function::new(
9497 "LAST_DAY".to_string(),
9498 vec![date],
9499 ))))
9500 }
9501 DialectType::Snowflake | DialectType::Redshift => {
9502 // Snowflake/Redshift: LAST_DAY(TO_DATE(date) or CAST(date AS DATE))
9503 // With offset: LAST_DAY(DATEADD(MONTH, offset, TO_DATE(date)))
9504 let date = if matches!(target, DialectType::Snowflake) {
9505 Expression::Function(Box::new(Function::new(
9506 "TO_DATE".to_string(),
9507 vec![date_arg],
9508 )))
9509 } else {
9510 cast_to_date(date_arg)
9511 };
9512 let date = if let Some(offset) = month_offset {
9513 Expression::Function(Box::new(Function::new(
9514 "DATEADD".to_string(),
9515 vec![
9516 Expression::Identifier(Identifier::new(
9517 "MONTH",
9518 )),
9519 offset,
9520 date,
9521 ],
9522 )))
9523 } else {
9524 date
9525 };
9526 Ok(Expression::Function(Box::new(Function::new(
9527 "LAST_DAY".to_string(),
9528 vec![date],
9529 ))))
9530 }
9531 DialectType::Spark | DialectType::Databricks => {
9532 // Spark: LAST_DAY(TO_DATE(date))
9533 // With offset: LAST_DAY(ADD_MONTHS(TO_DATE(date), offset))
9534 let date = Expression::Function(Box::new(Function::new(
9535 "TO_DATE".to_string(),
9536 vec![date_arg],
9537 )));
9538 let date = if let Some(offset) = month_offset {
9539 Expression::Function(Box::new(Function::new(
9540 "ADD_MONTHS".to_string(),
9541 vec![date, offset],
9542 )))
9543 } else {
9544 date
9545 };
9546 Ok(Expression::Function(Box::new(Function::new(
9547 "LAST_DAY".to_string(),
9548 vec![date],
9549 ))))
9550 }
9551 DialectType::MySQL => {
9552 // MySQL: LAST_DAY(DATE(date)) - no offset
9553 // With offset: LAST_DAY(DATE_ADD(date, INTERVAL offset MONTH)) - no DATE() wrapper
9554 let date = if let Some(offset) = month_offset {
9555 let iu = crate::expressions::IntervalUnit::Month;
9556 Expression::DateAdd(Box::new(
9557 crate::expressions::DateAddFunc {
9558 this: date_arg,
9559 interval: offset,
9560 unit: iu,
9561 },
9562 ))
9563 } else {
9564 Expression::Function(Box::new(Function::new(
9565 "DATE".to_string(),
9566 vec![date_arg],
9567 )))
9568 };
9569 Ok(Expression::Function(Box::new(Function::new(
9570 "LAST_DAY".to_string(),
9571 vec![date],
9572 ))))
9573 }
9574 DialectType::BigQuery => {
9575 // BigQuery: LAST_DAY(CAST(date AS DATE))
9576 // With offset: LAST_DAY(DATE_ADD(CAST(date AS DATE), INTERVAL offset MONTH))
9577 let date = cast_to_date(date_arg);
9578 let date = if let Some(offset) = month_offset {
9579 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
9580 this: Some(offset),
9581 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
9582 unit: crate::expressions::IntervalUnit::Month,
9583 use_plural: false,
9584 }),
9585 }));
9586 Expression::Function(Box::new(Function::new(
9587 "DATE_ADD".to_string(),
9588 vec![date, interval],
9589 )))
9590 } else {
9591 date
9592 };
9593 Ok(Expression::Function(Box::new(Function::new(
9594 "LAST_DAY".to_string(),
9595 vec![date],
9596 ))))
9597 }
9598 DialectType::ClickHouse => {
9599 // ClickHouse: LAST_DAY(CAST(date AS Nullable(DATE)))
9600 let date = Expression::Cast(Box::new(Cast {
9601 this: date_arg,
9602 to: DataType::Nullable {
9603 inner: Box::new(DataType::Date),
9604 },
9605 trailing_comments: vec![],
9606 double_colon_syntax: false,
9607 format: None,
9608 default: None,
9609 }));
9610 let date = if let Some(offset) = month_offset {
9611 Expression::Function(Box::new(Function::new(
9612 "DATE_ADD".to_string(),
9613 vec![
9614 Expression::Identifier(Identifier::new(
9615 "MONTH",
9616 )),
9617 offset,
9618 date,
9619 ],
9620 )))
9621 } else {
9622 date
9623 };
9624 Ok(Expression::Function(Box::new(Function::new(
9625 "LAST_DAY".to_string(),
9626 vec![date],
9627 ))))
9628 }
9629 DialectType::Hive => {
9630 // Hive: LAST_DAY(date)
9631 let date = if let Some(offset) = month_offset {
9632 Expression::Function(Box::new(Function::new(
9633 "ADD_MONTHS".to_string(),
9634 vec![date_arg, offset],
9635 )))
9636 } else {
9637 date_arg
9638 };
9639 Ok(Expression::Function(Box::new(Function::new(
9640 "LAST_DAY".to_string(),
9641 vec![date],
9642 ))))
9643 }
9644 _ => {
9645 // Default: LAST_DAY(date)
9646 let date = if let Some(offset) = month_offset {
9647 let unit =
9648 Expression::Identifier(Identifier::new("MONTH"));
9649 Expression::Function(Box::new(Function::new(
9650 "DATEADD".to_string(),
9651 vec![unit, offset, date_arg],
9652 )))
9653 } else {
9654 date_arg
9655 };
9656 Ok(Expression::Function(Box::new(Function::new(
9657 "LAST_DAY".to_string(),
9658 vec![date],
9659 ))))
9660 }
9661 }
9662 }
9663 // LAST_DAY(x) / LAST_DAY_OF_MONTH(x) -> target-specific
9664 "LAST_DAY" | "LAST_DAY_OF_MONTH"
9665 if !matches!(source, DialectType::BigQuery)
9666 && f.args.len() >= 1 =>
9667 {
9668 let first_arg = f.args.into_iter().next().unwrap();
9669 match target {
9670 DialectType::TSQL | DialectType::Fabric => {
9671 Ok(Expression::Function(Box::new(Function::new(
9672 "EOMONTH".to_string(),
9673 vec![first_arg],
9674 ))))
9675 }
9676 DialectType::Presto
9677 | DialectType::Trino
9678 | DialectType::Athena => {
9679 Ok(Expression::Function(Box::new(Function::new(
9680 "LAST_DAY_OF_MONTH".to_string(),
9681 vec![first_arg],
9682 ))))
9683 }
9684 _ => Ok(Expression::Function(Box::new(Function::new(
9685 "LAST_DAY".to_string(),
9686 vec![first_arg],
9687 )))),
9688 }
9689 }
9690 // MAP(keys_array, vals_array) from Presto (2-arg form) -> target-specific
9691 "MAP"
9692 if f.args.len() == 2
9693 && matches!(
9694 source,
9695 DialectType::Presto
9696 | DialectType::Trino
9697 | DialectType::Athena
9698 ) =>
9699 {
9700 let keys_arg = f.args[0].clone();
9701 let vals_arg = f.args[1].clone();
9702
9703 // Helper: extract array elements from Array/ArrayFunc/Function("ARRAY") expressions
9704 fn extract_array_elements(
9705 expr: &Expression,
9706 ) -> Option<&Vec<Expression>> {
9707 match expr {
9708 Expression::Array(arr) => Some(&arr.expressions),
9709 Expression::ArrayFunc(arr) => Some(&arr.expressions),
9710 Expression::Function(f)
9711 if f.name.eq_ignore_ascii_case("ARRAY") =>
9712 {
9713 Some(&f.args)
9714 }
9715 _ => None,
9716 }
9717 }
9718
9719 match target {
9720 DialectType::Spark | DialectType::Databricks => {
9721 // Presto MAP(keys, vals) -> Spark MAP_FROM_ARRAYS(keys, vals)
9722 Ok(Expression::Function(Box::new(Function::new(
9723 "MAP_FROM_ARRAYS".to_string(),
9724 f.args,
9725 ))))
9726 }
9727 DialectType::Hive => {
9728 // Presto MAP(ARRAY[k1,k2], ARRAY[v1,v2]) -> Hive MAP(k1, v1, k2, v2)
9729 if let (Some(keys), Some(vals)) = (
9730 extract_array_elements(&keys_arg),
9731 extract_array_elements(&vals_arg),
9732 ) {
9733 if keys.len() == vals.len() {
9734 let mut interleaved = Vec::new();
9735 for (k, v) in keys.iter().zip(vals.iter()) {
9736 interleaved.push(k.clone());
9737 interleaved.push(v.clone());
9738 }
9739 Ok(Expression::Function(Box::new(Function::new(
9740 "MAP".to_string(),
9741 interleaved,
9742 ))))
9743 } else {
9744 Ok(Expression::Function(Box::new(Function::new(
9745 "MAP".to_string(),
9746 f.args,
9747 ))))
9748 }
9749 } else {
9750 Ok(Expression::Function(Box::new(Function::new(
9751 "MAP".to_string(),
9752 f.args,
9753 ))))
9754 }
9755 }
9756 DialectType::Snowflake => {
9757 // Presto MAP(ARRAY[k1,k2], ARRAY[v1,v2]) -> Snowflake OBJECT_CONSTRUCT(k1, v1, k2, v2)
9758 if let (Some(keys), Some(vals)) = (
9759 extract_array_elements(&keys_arg),
9760 extract_array_elements(&vals_arg),
9761 ) {
9762 if keys.len() == vals.len() {
9763 let mut interleaved = Vec::new();
9764 for (k, v) in keys.iter().zip(vals.iter()) {
9765 interleaved.push(k.clone());
9766 interleaved.push(v.clone());
9767 }
9768 Ok(Expression::Function(Box::new(Function::new(
9769 "OBJECT_CONSTRUCT".to_string(),
9770 interleaved,
9771 ))))
9772 } else {
9773 Ok(Expression::Function(Box::new(Function::new(
9774 "MAP".to_string(),
9775 f.args,
9776 ))))
9777 }
9778 } else {
9779 Ok(Expression::Function(Box::new(Function::new(
9780 "MAP".to_string(),
9781 f.args,
9782 ))))
9783 }
9784 }
9785 _ => Ok(Expression::Function(f)),
9786 }
9787 }
9788 // MAP() with 0 args from Spark -> MAP(ARRAY[], ARRAY[]) for Presto/Trino
9789 "MAP"
9790 if f.args.is_empty()
9791 && matches!(
9792 source,
9793 DialectType::Hive
9794 | DialectType::Spark
9795 | DialectType::Databricks
9796 )
9797 && matches!(
9798 target,
9799 DialectType::Presto
9800 | DialectType::Trino
9801 | DialectType::Athena
9802 ) =>
9803 {
9804 let empty_keys =
9805 Expression::Array(Box::new(crate::expressions::Array {
9806 expressions: vec![],
9807 }));
9808 let empty_vals =
9809 Expression::Array(Box::new(crate::expressions::Array {
9810 expressions: vec![],
9811 }));
9812 Ok(Expression::Function(Box::new(Function::new(
9813 "MAP".to_string(),
9814 vec![empty_keys, empty_vals],
9815 ))))
9816 }
9817 // MAP(k1, v1, k2, v2, ...) from Hive/Spark -> target-specific
9818 "MAP"
9819 if f.args.len() >= 2
9820 && f.args.len() % 2 == 0
9821 && matches!(
9822 source,
9823 DialectType::Hive
9824 | DialectType::Spark
9825 | DialectType::Databricks
9826 | DialectType::ClickHouse
9827 ) =>
9828 {
9829 let args = f.args;
9830 match target {
9831 DialectType::DuckDB => {
9832 // MAP([k1, k2], [v1, v2])
9833 let mut keys = Vec::new();
9834 let mut vals = Vec::new();
9835 for (i, arg) in args.into_iter().enumerate() {
9836 if i % 2 == 0 {
9837 keys.push(arg);
9838 } else {
9839 vals.push(arg);
9840 }
9841 }
9842 let keys_arr = Expression::Array(Box::new(
9843 crate::expressions::Array { expressions: keys },
9844 ));
9845 let vals_arr = Expression::Array(Box::new(
9846 crate::expressions::Array { expressions: vals },
9847 ));
9848 Ok(Expression::Function(Box::new(Function::new(
9849 "MAP".to_string(),
9850 vec![keys_arr, vals_arr],
9851 ))))
9852 }
9853 DialectType::Presto | DialectType::Trino => {
9854 // MAP(ARRAY[k1, k2], ARRAY[v1, v2])
9855 let mut keys = Vec::new();
9856 let mut vals = Vec::new();
9857 for (i, arg) in args.into_iter().enumerate() {
9858 if i % 2 == 0 {
9859 keys.push(arg);
9860 } else {
9861 vals.push(arg);
9862 }
9863 }
9864 let keys_arr = Expression::Array(Box::new(
9865 crate::expressions::Array { expressions: keys },
9866 ));
9867 let vals_arr = Expression::Array(Box::new(
9868 crate::expressions::Array { expressions: vals },
9869 ));
9870 Ok(Expression::Function(Box::new(Function::new(
9871 "MAP".to_string(),
9872 vec![keys_arr, vals_arr],
9873 ))))
9874 }
9875 DialectType::Snowflake => Ok(Expression::Function(Box::new(
9876 Function::new("OBJECT_CONSTRUCT".to_string(), args),
9877 ))),
9878 DialectType::ClickHouse => Ok(Expression::Function(Box::new(
9879 Function::new("map".to_string(), args),
9880 ))),
9881 _ => Ok(Expression::Function(Box::new(Function::new(
9882 "MAP".to_string(),
9883 args,
9884 )))),
9885 }
9886 }
9887 // COLLECT_LIST(x) -> ARRAY_AGG(x) for most targets
9888 "COLLECT_LIST" if f.args.len() >= 1 => {
9889 let name = match target {
9890 DialectType::Spark
9891 | DialectType::Databricks
9892 | DialectType::Hive => "COLLECT_LIST",
9893 DialectType::DuckDB
9894 | DialectType::PostgreSQL
9895 | DialectType::Redshift
9896 | DialectType::Snowflake
9897 | DialectType::BigQuery => "ARRAY_AGG",
9898 DialectType::Presto | DialectType::Trino => "ARRAY_AGG",
9899 _ => "ARRAY_AGG",
9900 };
9901 Ok(Expression::Function(Box::new(Function::new(
9902 name.to_string(),
9903 f.args,
9904 ))))
9905 }
9906 // COLLECT_SET(x) -> target-specific distinct array aggregation
9907 "COLLECT_SET" if f.args.len() >= 1 => {
9908 let name = match target {
9909 DialectType::Spark
9910 | DialectType::Databricks
9911 | DialectType::Hive => "COLLECT_SET",
9912 DialectType::Presto
9913 | DialectType::Trino
9914 | DialectType::Athena => "SET_AGG",
9915 DialectType::Snowflake => "ARRAY_UNIQUE_AGG",
9916 _ => "ARRAY_AGG",
9917 };
9918 Ok(Expression::Function(Box::new(Function::new(
9919 name.to_string(),
9920 f.args,
9921 ))))
9922 }
9923 // ISNAN(x) / IS_NAN(x) - normalize
9924 "ISNAN" | "IS_NAN" => {
9925 let name = match target {
9926 DialectType::Spark
9927 | DialectType::Databricks
9928 | DialectType::Hive => "ISNAN",
9929 DialectType::Presto
9930 | DialectType::Trino
9931 | DialectType::Athena => "IS_NAN",
9932 DialectType::BigQuery
9933 | DialectType::PostgreSQL
9934 | DialectType::Redshift => "IS_NAN",
9935 DialectType::ClickHouse => "IS_NAN",
9936 _ => "ISNAN",
9937 };
9938 Ok(Expression::Function(Box::new(Function::new(
9939 name.to_string(),
9940 f.args,
9941 ))))
9942 }
9943 // SPLIT_PART(str, delim, index) -> target-specific
9944 "SPLIT_PART" if f.args.len() == 3 => {
9945 match target {
9946 DialectType::Spark | DialectType::Databricks => {
9947 // Keep as SPLIT_PART (Spark 3.4+)
9948 Ok(Expression::Function(Box::new(Function::new(
9949 "SPLIT_PART".to_string(),
9950 f.args,
9951 ))))
9952 }
9953 DialectType::DuckDB
9954 | DialectType::PostgreSQL
9955 | DialectType::Snowflake
9956 | DialectType::Redshift
9957 | DialectType::Trino
9958 | DialectType::Presto => Ok(Expression::Function(Box::new(
9959 Function::new("SPLIT_PART".to_string(), f.args),
9960 ))),
9961 DialectType::Hive => {
9962 // SPLIT(str, delim)[index]
9963 // Complex conversion, just keep as-is for now
9964 Ok(Expression::Function(Box::new(Function::new(
9965 "SPLIT_PART".to_string(),
9966 f.args,
9967 ))))
9968 }
9969 _ => Ok(Expression::Function(Box::new(Function::new(
9970 "SPLIT_PART".to_string(),
9971 f.args,
9972 )))),
9973 }
9974 }
9975 // JSON_EXTRACT(json, path) -> target-specific JSON extraction
9976 "JSON_EXTRACT" | "JSON_EXTRACT_SCALAR" if f.args.len() == 2 => {
9977 let is_scalar = name == "JSON_EXTRACT_SCALAR";
9978 match target {
9979 DialectType::Spark
9980 | DialectType::Databricks
9981 | DialectType::Hive => {
9982 let mut args = f.args;
9983 // Spark/Hive don't support Presto's TRY(expr) wrapper form here.
9984 // Mirror sqlglot by unwrapping TRY(expr) to expr before GET_JSON_OBJECT.
9985 if let Some(Expression::Function(inner)) = args.first() {
9986 if inner.name.eq_ignore_ascii_case("TRY")
9987 && inner.args.len() == 1
9988 {
9989 let mut inner_args = inner.args.clone();
9990 args[0] = inner_args.remove(0);
9991 }
9992 }
9993 Ok(Expression::Function(Box::new(Function::new(
9994 "GET_JSON_OBJECT".to_string(),
9995 args,
9996 ))))
9997 }
9998 DialectType::DuckDB | DialectType::SQLite => {
9999 // json -> path syntax
10000 let mut args = f.args;
10001 let json_expr = args.remove(0);
10002 let path = args.remove(0);
10003 Ok(Expression::JsonExtract(Box::new(
10004 crate::expressions::JsonExtractFunc {
10005 this: json_expr,
10006 path,
10007 returning: None,
10008 arrow_syntax: true,
10009 hash_arrow_syntax: false,
10010 wrapper_option: None,
10011 quotes_option: None,
10012 on_scalar_string: false,
10013 on_error: None,
10014 },
10015 )))
10016 }
10017 DialectType::TSQL => {
10018 let func_name = if is_scalar {
10019 "JSON_VALUE"
10020 } else {
10021 "JSON_QUERY"
10022 };
10023 Ok(Expression::Function(Box::new(Function::new(
10024 func_name.to_string(),
10025 f.args,
10026 ))))
10027 }
10028 DialectType::PostgreSQL | DialectType::Redshift => {
10029 let func_name = if is_scalar {
10030 "JSON_EXTRACT_PATH_TEXT"
10031 } else {
10032 "JSON_EXTRACT_PATH"
10033 };
10034 Ok(Expression::Function(Box::new(Function::new(
10035 func_name.to_string(),
10036 f.args,
10037 ))))
10038 }
10039 _ => Ok(Expression::Function(Box::new(Function::new(
10040 name.to_string(),
10041 f.args,
10042 )))),
10043 }
10044 }
10045 // SingleStore JSON_EXTRACT_JSON(json, key1, key2, ...) -> JSON_EXTRACT(json, '$.key1.key2' or '$.key1[key2]')
10046 // BSON_EXTRACT_BSON(json, key1, ...) -> JSONB_EXTRACT(json, '$.key1')
10047 "JSON_EXTRACT_JSON" | "BSON_EXTRACT_BSON"
10048 if f.args.len() >= 2
10049 && matches!(source, DialectType::SingleStore) =>
10050 {
10051 let is_bson = name == "BSON_EXTRACT_BSON";
10052 let mut args = f.args;
10053 let json_expr = args.remove(0);
10054
10055 // Build JSONPath from remaining arguments
10056 let mut path = String::from("$");
10057 for arg in &args {
10058 if let Expression::Literal(
10059 crate::expressions::Literal::String(s),
10060 ) = arg
10061 {
10062 // Check if it's a numeric string (array index)
10063 if s.parse::<i64>().is_ok() {
10064 path.push('[');
10065 path.push_str(s);
10066 path.push(']');
10067 } else {
10068 path.push('.');
10069 path.push_str(s);
10070 }
10071 }
10072 }
10073
10074 let target_func = if is_bson {
10075 "JSONB_EXTRACT"
10076 } else {
10077 "JSON_EXTRACT"
10078 };
10079 Ok(Expression::Function(Box::new(Function::new(
10080 target_func.to_string(),
10081 vec![json_expr, Expression::string(&path)],
10082 ))))
10083 }
10084 // ARRAY_SUM(lambda, array) from Doris -> ClickHouse arraySum
10085 "ARRAY_SUM" if matches!(target, DialectType::ClickHouse) => {
10086 Ok(Expression::Function(Box::new(Function {
10087 name: "arraySum".to_string(),
10088 args: f.args,
10089 distinct: f.distinct,
10090 trailing_comments: f.trailing_comments,
10091 use_bracket_syntax: f.use_bracket_syntax,
10092 no_parens: f.no_parens,
10093 quoted: f.quoted,
10094 })))
10095 }
10096 // TSQL JSON_QUERY/JSON_VALUE -> target-specific
10097 // Note: For TSQL->TSQL, JsonQuery stays as Expression::JsonQuery (source transform not called)
10098 // and is handled by JsonQueryValueConvert action. This handles the case where
10099 // TSQL read transform converted JsonQuery to Function("JSON_QUERY") for cross-dialect.
10100 "JSON_QUERY" | "JSON_VALUE"
10101 if f.args.len() == 2
10102 && matches!(
10103 source,
10104 DialectType::TSQL | DialectType::Fabric
10105 ) =>
10106 {
10107 match target {
10108 DialectType::Spark
10109 | DialectType::Databricks
10110 | DialectType::Hive => Ok(Expression::Function(Box::new(
10111 Function::new("GET_JSON_OBJECT".to_string(), f.args),
10112 ))),
10113 _ => Ok(Expression::Function(Box::new(Function::new(
10114 name.to_string(),
10115 f.args,
10116 )))),
10117 }
10118 }
10119 // UNIX_TIMESTAMP(x) -> TO_UNIXTIME(x) for Presto
10120 "UNIX_TIMESTAMP" if f.args.len() == 1 => {
10121 let arg = f.args.into_iter().next().unwrap();
10122 let is_hive_source = matches!(
10123 source,
10124 DialectType::Hive
10125 | DialectType::Spark
10126 | DialectType::Databricks
10127 );
10128 match target {
10129 DialectType::DuckDB if is_hive_source => {
10130 // DuckDB: EPOCH(STRPTIME(x, '%Y-%m-%d %H:%M:%S'))
10131 let strptime =
10132 Expression::Function(Box::new(Function::new(
10133 "STRPTIME".to_string(),
10134 vec![arg, Expression::string("%Y-%m-%d %H:%M:%S")],
10135 )));
10136 Ok(Expression::Function(Box::new(Function::new(
10137 "EPOCH".to_string(),
10138 vec![strptime],
10139 ))))
10140 }
10141 DialectType::Presto | DialectType::Trino if is_hive_source => {
10142 // Presto: TO_UNIXTIME(COALESCE(TRY(DATE_PARSE(CAST(x AS VARCHAR), '%Y-%m-%d %T')), PARSE_DATETIME(DATE_FORMAT(x, '%Y-%m-%d %T'), 'yyyy-MM-dd HH:mm:ss')))
10143 let cast_varchar =
10144 Expression::Cast(Box::new(crate::expressions::Cast {
10145 this: arg.clone(),
10146 to: DataType::VarChar {
10147 length: None,
10148 parenthesized_length: false,
10149 },
10150 trailing_comments: vec![],
10151 double_colon_syntax: false,
10152 format: None,
10153 default: None,
10154 }));
10155 let date_parse =
10156 Expression::Function(Box::new(Function::new(
10157 "DATE_PARSE".to_string(),
10158 vec![
10159 cast_varchar,
10160 Expression::string("%Y-%m-%d %T"),
10161 ],
10162 )));
10163 let try_expr = Expression::Function(Box::new(
10164 Function::new("TRY".to_string(), vec![date_parse]),
10165 ));
10166 let date_format =
10167 Expression::Function(Box::new(Function::new(
10168 "DATE_FORMAT".to_string(),
10169 vec![arg, Expression::string("%Y-%m-%d %T")],
10170 )));
10171 let parse_datetime =
10172 Expression::Function(Box::new(Function::new(
10173 "PARSE_DATETIME".to_string(),
10174 vec![
10175 date_format,
10176 Expression::string("yyyy-MM-dd HH:mm:ss"),
10177 ],
10178 )));
10179 let coalesce =
10180 Expression::Function(Box::new(Function::new(
10181 "COALESCE".to_string(),
10182 vec![try_expr, parse_datetime],
10183 )));
10184 Ok(Expression::Function(Box::new(Function::new(
10185 "TO_UNIXTIME".to_string(),
10186 vec![coalesce],
10187 ))))
10188 }
10189 DialectType::Presto | DialectType::Trino => {
10190 Ok(Expression::Function(Box::new(Function::new(
10191 "TO_UNIXTIME".to_string(),
10192 vec![arg],
10193 ))))
10194 }
10195 _ => Ok(Expression::Function(Box::new(Function::new(
10196 "UNIX_TIMESTAMP".to_string(),
10197 vec![arg],
10198 )))),
10199 }
10200 }
10201 // TO_UNIX_TIMESTAMP(x) -> UNIX_TIMESTAMP(x) for Spark/Hive
10202 "TO_UNIX_TIMESTAMP" if f.args.len() >= 1 => match target {
10203 DialectType::Spark
10204 | DialectType::Databricks
10205 | DialectType::Hive => Ok(Expression::Function(Box::new(
10206 Function::new("UNIX_TIMESTAMP".to_string(), f.args),
10207 ))),
10208 _ => Ok(Expression::Function(Box::new(Function::new(
10209 "TO_UNIX_TIMESTAMP".to_string(),
10210 f.args,
10211 )))),
10212 },
10213 // CURDATE() -> CURRENT_DATE
10214 "CURDATE" => {
10215 Ok(Expression::CurrentDate(crate::expressions::CurrentDate))
10216 }
10217 // CURTIME() -> CURRENT_TIME
10218 "CURTIME" => {
10219 Ok(Expression::CurrentTime(crate::expressions::CurrentTime {
10220 precision: None,
10221 }))
10222 }
10223 // ARRAY_SORT(x) or ARRAY_SORT(x, lambda) -> SORT_ARRAY(x) for Hive (drop lambda)
10224 "ARRAY_SORT" if f.args.len() >= 1 => {
10225 match target {
10226 DialectType::Hive => {
10227 let mut args = f.args;
10228 args.truncate(1); // Drop lambda comparator
10229 Ok(Expression::Function(Box::new(Function::new(
10230 "SORT_ARRAY".to_string(),
10231 args,
10232 ))))
10233 }
10234 _ => Ok(Expression::Function(f)),
10235 }
10236 }
10237 // SORT_ARRAY(x) -> ARRAY_SORT(x) for non-Hive/Spark
10238 "SORT_ARRAY" if f.args.len() == 1 => match target {
10239 DialectType::Hive
10240 | DialectType::Spark
10241 | DialectType::Databricks => Ok(Expression::Function(f)),
10242 _ => Ok(Expression::Function(Box::new(Function::new(
10243 "ARRAY_SORT".to_string(),
10244 f.args,
10245 )))),
10246 },
10247 // SORT_ARRAY(x, FALSE) -> ARRAY_REVERSE_SORT(x) for DuckDB, ARRAY_SORT(x, lambda) for Presto
10248 "SORT_ARRAY" if f.args.len() == 2 => {
10249 let is_desc =
10250 matches!(&f.args[1], Expression::Boolean(b) if !b.value);
10251 if is_desc {
10252 match target {
10253 DialectType::DuckDB => {
10254 Ok(Expression::Function(Box::new(Function::new(
10255 "ARRAY_REVERSE_SORT".to_string(),
10256 vec![f.args.into_iter().next().unwrap()],
10257 ))))
10258 }
10259 DialectType::Presto | DialectType::Trino => {
10260 let arr_arg = f.args.into_iter().next().unwrap();
10261 let a =
10262 Expression::Column(crate::expressions::Column {
10263 name: crate::expressions::Identifier::new("a"),
10264 table: None,
10265 join_mark: false,
10266 trailing_comments: Vec::new(),
10267 });
10268 let b =
10269 Expression::Column(crate::expressions::Column {
10270 name: crate::expressions::Identifier::new("b"),
10271 table: None,
10272 join_mark: false,
10273 trailing_comments: Vec::new(),
10274 });
10275 let case_expr = Expression::Case(Box::new(
10276 crate::expressions::Case {
10277 operand: None,
10278 whens: vec![
10279 (
10280 Expression::Lt(Box::new(
10281 BinaryOp::new(a.clone(), b.clone()),
10282 )),
10283 Expression::Literal(Literal::Number(
10284 "1".to_string(),
10285 )),
10286 ),
10287 (
10288 Expression::Gt(Box::new(
10289 BinaryOp::new(a.clone(), b.clone()),
10290 )),
10291 Expression::Literal(Literal::Number(
10292 "-1".to_string(),
10293 )),
10294 ),
10295 ],
10296 else_: Some(Expression::Literal(
10297 Literal::Number("0".to_string()),
10298 )),
10299 comments: Vec::new(),
10300 },
10301 ));
10302 let lambda = Expression::Lambda(Box::new(
10303 crate::expressions::LambdaExpr {
10304 parameters: vec![
10305 crate::expressions::Identifier::new("a"),
10306 crate::expressions::Identifier::new("b"),
10307 ],
10308 body: case_expr,
10309 colon: false,
10310 parameter_types: Vec::new(),
10311 },
10312 ));
10313 Ok(Expression::Function(Box::new(Function::new(
10314 "ARRAY_SORT".to_string(),
10315 vec![arr_arg, lambda],
10316 ))))
10317 }
10318 _ => Ok(Expression::Function(f)),
10319 }
10320 } else {
10321 // SORT_ARRAY(x, TRUE) -> ARRAY_SORT(x)
10322 match target {
10323 DialectType::Hive => Ok(Expression::Function(f)),
10324 _ => Ok(Expression::Function(Box::new(Function::new(
10325 "ARRAY_SORT".to_string(),
10326 vec![f.args.into_iter().next().unwrap()],
10327 )))),
10328 }
10329 }
10330 }
10331 // LEFT(x, n), RIGHT(x, n) -> SUBSTRING for targets without LEFT/RIGHT
10332 "LEFT" if f.args.len() == 2 => {
10333 match target {
10334 DialectType::Hive
10335 | DialectType::Presto
10336 | DialectType::Trino
10337 | DialectType::Athena => {
10338 let x = f.args[0].clone();
10339 let n = f.args[1].clone();
10340 Ok(Expression::Function(Box::new(Function::new(
10341 "SUBSTRING".to_string(),
10342 vec![x, Expression::number(1), n],
10343 ))))
10344 }
10345 DialectType::Spark | DialectType::Databricks
10346 if matches!(
10347 source,
10348 DialectType::TSQL | DialectType::Fabric
10349 ) =>
10350 {
10351 // TSQL LEFT(x, n) -> LEFT(CAST(x AS STRING), n) for Spark
10352 let x = f.args[0].clone();
10353 let n = f.args[1].clone();
10354 let cast_x = Expression::Cast(Box::new(Cast {
10355 this: x,
10356 to: DataType::VarChar {
10357 length: None,
10358 parenthesized_length: false,
10359 },
10360 double_colon_syntax: false,
10361 trailing_comments: Vec::new(),
10362 format: None,
10363 default: None,
10364 }));
10365 Ok(Expression::Function(Box::new(Function::new(
10366 "LEFT".to_string(),
10367 vec![cast_x, n],
10368 ))))
10369 }
10370 _ => Ok(Expression::Function(f)),
10371 }
10372 }
10373 "RIGHT" if f.args.len() == 2 => {
10374 match target {
10375 DialectType::Hive
10376 | DialectType::Presto
10377 | DialectType::Trino
10378 | DialectType::Athena => {
10379 let x = f.args[0].clone();
10380 let n = f.args[1].clone();
10381 // SUBSTRING(x, LENGTH(x) - (n - 1))
10382 let len_x = Expression::Function(Box::new(Function::new(
10383 "LENGTH".to_string(),
10384 vec![x.clone()],
10385 )));
10386 let n_minus_1 = Expression::Sub(Box::new(
10387 crate::expressions::BinaryOp::new(
10388 n,
10389 Expression::number(1),
10390 ),
10391 ));
10392 let n_minus_1_paren = Expression::Paren(Box::new(
10393 crate::expressions::Paren {
10394 this: n_minus_1,
10395 trailing_comments: Vec::new(),
10396 },
10397 ));
10398 let offset = Expression::Sub(Box::new(
10399 crate::expressions::BinaryOp::new(
10400 len_x,
10401 n_minus_1_paren,
10402 ),
10403 ));
10404 Ok(Expression::Function(Box::new(Function::new(
10405 "SUBSTRING".to_string(),
10406 vec![x, offset],
10407 ))))
10408 }
10409 DialectType::Spark | DialectType::Databricks
10410 if matches!(
10411 source,
10412 DialectType::TSQL | DialectType::Fabric
10413 ) =>
10414 {
10415 // TSQL RIGHT(x, n) -> RIGHT(CAST(x AS STRING), n) for Spark
10416 let x = f.args[0].clone();
10417 let n = f.args[1].clone();
10418 let cast_x = Expression::Cast(Box::new(Cast {
10419 this: x,
10420 to: DataType::VarChar {
10421 length: None,
10422 parenthesized_length: false,
10423 },
10424 double_colon_syntax: false,
10425 trailing_comments: Vec::new(),
10426 format: None,
10427 default: None,
10428 }));
10429 Ok(Expression::Function(Box::new(Function::new(
10430 "RIGHT".to_string(),
10431 vec![cast_x, n],
10432 ))))
10433 }
10434 _ => Ok(Expression::Function(f)),
10435 }
10436 }
10437 // MAP_FROM_ARRAYS(keys, vals) -> target-specific map construction
10438 "MAP_FROM_ARRAYS" if f.args.len() == 2 => match target {
10439 DialectType::Snowflake => Ok(Expression::Function(Box::new(
10440 Function::new("OBJECT_CONSTRUCT".to_string(), f.args),
10441 ))),
10442 DialectType::Spark | DialectType::Databricks => {
10443 Ok(Expression::Function(Box::new(Function::new(
10444 "MAP_FROM_ARRAYS".to_string(),
10445 f.args,
10446 ))))
10447 }
10448 _ => Ok(Expression::Function(Box::new(Function::new(
10449 "MAP".to_string(),
10450 f.args,
10451 )))),
10452 },
10453 // LIKE(foo, 'pat') -> foo LIKE 'pat'; LIKE(foo, 'pat', '!') -> foo LIKE 'pat' ESCAPE '!'
10454 // SQLite uses LIKE(pattern, string[, escape]) with args in reverse order
10455 "LIKE" if f.args.len() >= 2 => {
10456 let (this, pattern) = if matches!(source, DialectType::SQLite) {
10457 // SQLite: LIKE(pattern, string) -> string LIKE pattern
10458 (f.args[1].clone(), f.args[0].clone())
10459 } else {
10460 // Standard: LIKE(string, pattern) -> string LIKE pattern
10461 (f.args[0].clone(), f.args[1].clone())
10462 };
10463 let escape = if f.args.len() >= 3 {
10464 Some(f.args[2].clone())
10465 } else {
10466 None
10467 };
10468 Ok(Expression::Like(Box::new(crate::expressions::LikeOp {
10469 left: this,
10470 right: pattern,
10471 escape,
10472 quantifier: None,
10473 })))
10474 }
10475 // ILIKE(foo, 'pat') -> foo ILIKE 'pat'
10476 "ILIKE" if f.args.len() >= 2 => {
10477 let this = f.args[0].clone();
10478 let pattern = f.args[1].clone();
10479 let escape = if f.args.len() >= 3 {
10480 Some(f.args[2].clone())
10481 } else {
10482 None
10483 };
10484 Ok(Expression::ILike(Box::new(crate::expressions::LikeOp {
10485 left: this,
10486 right: pattern,
10487 escape,
10488 quantifier: None,
10489 })))
10490 }
10491 // CHAR(n) -> CHR(n) for non-MySQL/non-TSQL targets
10492 "CHAR" if f.args.len() == 1 => match target {
10493 DialectType::MySQL
10494 | DialectType::SingleStore
10495 | DialectType::TSQL => Ok(Expression::Function(f)),
10496 _ => Ok(Expression::Function(Box::new(Function::new(
10497 "CHR".to_string(),
10498 f.args,
10499 )))),
10500 },
10501 // CONCAT(a, b) -> a || b for PostgreSQL
10502 "CONCAT"
10503 if f.args.len() == 2
10504 && matches!(target, DialectType::PostgreSQL)
10505 && matches!(
10506 source,
10507 DialectType::ClickHouse | DialectType::MySQL
10508 ) =>
10509 {
10510 let mut args = f.args;
10511 let right = args.pop().unwrap();
10512 let left = args.pop().unwrap();
10513 Ok(Expression::DPipe(Box::new(crate::expressions::DPipe {
10514 this: Box::new(left),
10515 expression: Box::new(right),
10516 safe: None,
10517 })))
10518 }
10519 // ARRAY_TO_STRING(arr, delim) -> target-specific
10520 "ARRAY_TO_STRING" if f.args.len() >= 2 => match target {
10521 DialectType::Presto | DialectType::Trino => {
10522 Ok(Expression::Function(Box::new(Function::new(
10523 "ARRAY_JOIN".to_string(),
10524 f.args,
10525 ))))
10526 }
10527 DialectType::TSQL => Ok(Expression::Function(Box::new(
10528 Function::new("STRING_AGG".to_string(), f.args),
10529 ))),
10530 _ => Ok(Expression::Function(f)),
10531 },
10532 // ARRAY_CONCAT / LIST_CONCAT -> target-specific
10533 "ARRAY_CONCAT" | "LIST_CONCAT" if f.args.len() == 2 => match target {
10534 DialectType::Spark
10535 | DialectType::Databricks
10536 | DialectType::Hive => Ok(Expression::Function(Box::new(
10537 Function::new("CONCAT".to_string(), f.args),
10538 ))),
10539 DialectType::Snowflake => Ok(Expression::Function(Box::new(
10540 Function::new("ARRAY_CAT".to_string(), f.args),
10541 ))),
10542 DialectType::Redshift => Ok(Expression::Function(Box::new(
10543 Function::new("ARRAY_CONCAT".to_string(), f.args),
10544 ))),
10545 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
10546 Function::new("ARRAY_CAT".to_string(), f.args),
10547 ))),
10548 DialectType::DuckDB => Ok(Expression::Function(Box::new(
10549 Function::new("LIST_CONCAT".to_string(), f.args),
10550 ))),
10551 DialectType::Presto | DialectType::Trino => {
10552 Ok(Expression::Function(Box::new(Function::new(
10553 "CONCAT".to_string(),
10554 f.args,
10555 ))))
10556 }
10557 DialectType::BigQuery => Ok(Expression::Function(Box::new(
10558 Function::new("ARRAY_CONCAT".to_string(), f.args),
10559 ))),
10560 _ => Ok(Expression::Function(f)),
10561 },
10562 // ARRAY_CONTAINS(arr, x) / HAS(arr, x) / CONTAINS(arr, x) normalization
10563 "HAS" if f.args.len() == 2 => match target {
10564 DialectType::Spark
10565 | DialectType::Databricks
10566 | DialectType::Hive => Ok(Expression::Function(Box::new(
10567 Function::new("ARRAY_CONTAINS".to_string(), f.args),
10568 ))),
10569 DialectType::Presto | DialectType::Trino => {
10570 Ok(Expression::Function(Box::new(Function::new(
10571 "CONTAINS".to_string(),
10572 f.args,
10573 ))))
10574 }
10575 _ => Ok(Expression::Function(f)),
10576 },
10577 // NVL(a, b, c, d) -> COALESCE(a, b, c, d) - NVL should keep all args
10578 "NVL" if f.args.len() > 2 => Ok(Expression::Function(Box::new(
10579 Function::new("COALESCE".to_string(), f.args),
10580 ))),
10581 // ISNULL(x) in MySQL -> (x IS NULL)
10582 "ISNULL"
10583 if f.args.len() == 1
10584 && matches!(source, DialectType::MySQL)
10585 && matches!(target, DialectType::MySQL) =>
10586 {
10587 let arg = f.args.into_iter().next().unwrap();
10588 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
10589 this: Expression::IsNull(Box::new(
10590 crate::expressions::IsNull {
10591 this: arg,
10592 not: false,
10593 postfix_form: false,
10594 },
10595 )),
10596 trailing_comments: Vec::new(),
10597 })))
10598 }
10599 // MONTHNAME(x) -> DATE_FORMAT(x, '%M') for MySQL -> MySQL
10600 "MONTHNAME"
10601 if f.args.len() == 1 && matches!(target, DialectType::MySQL) =>
10602 {
10603 let arg = f.args.into_iter().next().unwrap();
10604 Ok(Expression::Function(Box::new(Function::new(
10605 "DATE_FORMAT".to_string(),
10606 vec![arg, Expression::string("%M")],
10607 ))))
10608 }
10609 // ClickHouse splitByString('s', x) -> DuckDB STR_SPLIT(x, 's') / Hive SPLIT(x, CONCAT('\\Q', 's', '\\E'))
10610 "SPLITBYSTRING" if f.args.len() == 2 => {
10611 let sep = f.args[0].clone();
10612 let str_arg = f.args[1].clone();
10613 match target {
10614 DialectType::DuckDB => Ok(Expression::Function(Box::new(
10615 Function::new("STR_SPLIT".to_string(), vec![str_arg, sep]),
10616 ))),
10617 DialectType::Doris => {
10618 Ok(Expression::Function(Box::new(Function::new(
10619 "SPLIT_BY_STRING".to_string(),
10620 vec![str_arg, sep],
10621 ))))
10622 }
10623 DialectType::Hive
10624 | DialectType::Spark
10625 | DialectType::Databricks => {
10626 // SPLIT(x, CONCAT('\\Q', sep, '\\E'))
10627 let escaped =
10628 Expression::Function(Box::new(Function::new(
10629 "CONCAT".to_string(),
10630 vec![
10631 Expression::string("\\Q"),
10632 sep,
10633 Expression::string("\\E"),
10634 ],
10635 )));
10636 Ok(Expression::Function(Box::new(Function::new(
10637 "SPLIT".to_string(),
10638 vec![str_arg, escaped],
10639 ))))
10640 }
10641 _ => Ok(Expression::Function(f)),
10642 }
10643 }
10644 // ClickHouse splitByRegexp('pattern', x) -> DuckDB STR_SPLIT_REGEX(x, 'pattern')
10645 "SPLITBYREGEXP" if f.args.len() == 2 => {
10646 let sep = f.args[0].clone();
10647 let str_arg = f.args[1].clone();
10648 match target {
10649 DialectType::DuckDB => {
10650 Ok(Expression::Function(Box::new(Function::new(
10651 "STR_SPLIT_REGEX".to_string(),
10652 vec![str_arg, sep],
10653 ))))
10654 }
10655 DialectType::Hive
10656 | DialectType::Spark
10657 | DialectType::Databricks => {
10658 Ok(Expression::Function(Box::new(Function::new(
10659 "SPLIT".to_string(),
10660 vec![str_arg, sep],
10661 ))))
10662 }
10663 _ => Ok(Expression::Function(f)),
10664 }
10665 }
10666 // ClickHouse toMonday(x) -> DATE_TRUNC('WEEK', x) / DATE_TRUNC(x, 'WEEK') for Doris
10667 "TOMONDAY" => {
10668 if f.args.len() == 1 {
10669 let arg = f.args.into_iter().next().unwrap();
10670 match target {
10671 DialectType::Doris => {
10672 Ok(Expression::Function(Box::new(Function::new(
10673 "DATE_TRUNC".to_string(),
10674 vec![arg, Expression::string("WEEK")],
10675 ))))
10676 }
10677 _ => Ok(Expression::Function(Box::new(Function::new(
10678 "DATE_TRUNC".to_string(),
10679 vec![Expression::string("WEEK"), arg],
10680 )))),
10681 }
10682 } else {
10683 Ok(Expression::Function(f))
10684 }
10685 }
10686 // COLLECT_LIST with FILTER(WHERE x IS NOT NULL) for targets that need it
10687 "COLLECT_LIST" if f.args.len() == 1 => match target {
10688 DialectType::Spark
10689 | DialectType::Databricks
10690 | DialectType::Hive => Ok(Expression::Function(f)),
10691 _ => Ok(Expression::Function(Box::new(Function::new(
10692 "ARRAY_AGG".to_string(),
10693 f.args,
10694 )))),
10695 },
10696 // TO_CHAR(x) with 1 arg -> CAST(x AS STRING) for Doris
10697 "TO_CHAR"
10698 if f.args.len() == 1 && matches!(target, DialectType::Doris) =>
10699 {
10700 let arg = f.args.into_iter().next().unwrap();
10701 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
10702 this: arg,
10703 to: DataType::Custom {
10704 name: "STRING".to_string(),
10705 },
10706 double_colon_syntax: false,
10707 trailing_comments: Vec::new(),
10708 format: None,
10709 default: None,
10710 })))
10711 }
10712 // DBMS_RANDOM.VALUE() -> RANDOM() for PostgreSQL
10713 "DBMS_RANDOM.VALUE" if f.args.is_empty() => match target {
10714 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
10715 Function::new("RANDOM".to_string(), vec![]),
10716 ))),
10717 _ => Ok(Expression::Function(f)),
10718 },
10719 // ClickHouse formatDateTime -> target-specific
10720 "FORMATDATETIME" if f.args.len() >= 2 => match target {
10721 DialectType::MySQL => Ok(Expression::Function(Box::new(
10722 Function::new("DATE_FORMAT".to_string(), f.args),
10723 ))),
10724 _ => Ok(Expression::Function(f)),
10725 },
10726 // REPLICATE('x', n) -> REPEAT('x', n) for non-TSQL targets
10727 "REPLICATE" if f.args.len() == 2 => match target {
10728 DialectType::TSQL => Ok(Expression::Function(f)),
10729 _ => Ok(Expression::Function(Box::new(Function::new(
10730 "REPEAT".to_string(),
10731 f.args,
10732 )))),
10733 },
10734 // LEN(x) -> LENGTH(x) for non-TSQL targets
10735 // No CAST needed when arg is already a string literal
10736 "LEN" if f.args.len() == 1 => {
10737 match target {
10738 DialectType::TSQL => Ok(Expression::Function(f)),
10739 DialectType::Spark | DialectType::Databricks => {
10740 let arg = f.args.into_iter().next().unwrap();
10741 // Don't wrap string literals with CAST - they're already strings
10742 let is_string = matches!(
10743 &arg,
10744 Expression::Literal(
10745 crate::expressions::Literal::String(_)
10746 )
10747 );
10748 let final_arg = if is_string {
10749 arg
10750 } else {
10751 Expression::Cast(Box::new(Cast {
10752 this: arg,
10753 to: DataType::VarChar {
10754 length: None,
10755 parenthesized_length: false,
10756 },
10757 double_colon_syntax: false,
10758 trailing_comments: Vec::new(),
10759 format: None,
10760 default: None,
10761 }))
10762 };
10763 Ok(Expression::Function(Box::new(Function::new(
10764 "LENGTH".to_string(),
10765 vec![final_arg],
10766 ))))
10767 }
10768 _ => {
10769 let arg = f.args.into_iter().next().unwrap();
10770 Ok(Expression::Function(Box::new(Function::new(
10771 "LENGTH".to_string(),
10772 vec![arg],
10773 ))))
10774 }
10775 }
10776 }
10777 // COUNT_BIG(x) -> COUNT(x) for non-TSQL targets
10778 "COUNT_BIG" if f.args.len() == 1 => match target {
10779 DialectType::TSQL => Ok(Expression::Function(f)),
10780 _ => Ok(Expression::Function(Box::new(Function::new(
10781 "COUNT".to_string(),
10782 f.args,
10783 )))),
10784 },
10785 // DATEFROMPARTS(y, m, d) -> MAKE_DATE(y, m, d) for non-TSQL targets
10786 "DATEFROMPARTS" if f.args.len() == 3 => match target {
10787 DialectType::TSQL => Ok(Expression::Function(f)),
10788 _ => Ok(Expression::Function(Box::new(Function::new(
10789 "MAKE_DATE".to_string(),
10790 f.args,
10791 )))),
10792 },
10793 // REGEXP_LIKE(str, pattern) -> RegexpLike expression (target-specific output)
10794 "REGEXP_LIKE" if f.args.len() >= 2 => {
10795 let str_expr = f.args[0].clone();
10796 let pattern = f.args[1].clone();
10797 let flags = if f.args.len() >= 3 {
10798 Some(f.args[2].clone())
10799 } else {
10800 None
10801 };
10802 match target {
10803 DialectType::DuckDB => {
10804 let mut new_args = vec![str_expr, pattern];
10805 if let Some(fl) = flags {
10806 new_args.push(fl);
10807 }
10808 Ok(Expression::Function(Box::new(Function::new(
10809 "REGEXP_MATCHES".to_string(),
10810 new_args,
10811 ))))
10812 }
10813 _ => Ok(Expression::RegexpLike(Box::new(
10814 crate::expressions::RegexpFunc {
10815 this: str_expr,
10816 pattern,
10817 flags,
10818 },
10819 ))),
10820 }
10821 }
10822 // ClickHouse arrayJoin -> UNNEST for PostgreSQL
10823 "ARRAYJOIN" if f.args.len() == 1 => match target {
10824 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
10825 Function::new("UNNEST".to_string(), f.args),
10826 ))),
10827 _ => Ok(Expression::Function(f)),
10828 },
10829 // DATETIMEFROMPARTS(y, m, d, h, mi, s, ms) -> MAKE_TIMESTAMP / TIMESTAMP_FROM_PARTS
10830 "DATETIMEFROMPARTS" if f.args.len() == 7 => {
10831 match target {
10832 DialectType::TSQL => Ok(Expression::Function(f)),
10833 DialectType::DuckDB => {
10834 // MAKE_TIMESTAMP(y, m, d, h, mi, s + (ms / 1000.0))
10835 let mut args = f.args;
10836 let ms = args.pop().unwrap();
10837 let s = args.pop().unwrap();
10838 // s + (ms / 1000.0)
10839 let ms_frac = Expression::Div(Box::new(BinaryOp::new(
10840 ms,
10841 Expression::Literal(
10842 crate::expressions::Literal::Number(
10843 "1000.0".to_string(),
10844 ),
10845 ),
10846 )));
10847 let s_with_ms = Expression::Add(Box::new(BinaryOp::new(
10848 s,
10849 Expression::Paren(Box::new(Paren {
10850 this: ms_frac,
10851 trailing_comments: vec![],
10852 })),
10853 )));
10854 args.push(s_with_ms);
10855 Ok(Expression::Function(Box::new(Function::new(
10856 "MAKE_TIMESTAMP".to_string(),
10857 args,
10858 ))))
10859 }
10860 DialectType::Snowflake => {
10861 // TIMESTAMP_FROM_PARTS(y, m, d, h, mi, s, ms * 1000000)
10862 let mut args = f.args;
10863 let ms = args.pop().unwrap();
10864 // ms * 1000000
10865 let ns = Expression::Mul(Box::new(BinaryOp::new(
10866 ms,
10867 Expression::number(1000000),
10868 )));
10869 args.push(ns);
10870 Ok(Expression::Function(Box::new(Function::new(
10871 "TIMESTAMP_FROM_PARTS".to_string(),
10872 args,
10873 ))))
10874 }
10875 _ => {
10876 // Default: keep function name for other targets
10877 Ok(Expression::Function(Box::new(Function::new(
10878 "DATETIMEFROMPARTS".to_string(),
10879 f.args,
10880 ))))
10881 }
10882 }
10883 }
10884 // CONVERT(type, expr [, style]) -> CAST(expr AS type) for non-TSQL targets
10885 // TRY_CONVERT(type, expr [, style]) -> TRY_CAST(expr AS type) for non-TSQL targets
10886 "CONVERT" | "TRY_CONVERT" if f.args.len() >= 2 => {
10887 let is_try = name == "TRY_CONVERT";
10888 let type_expr = f.args[0].clone();
10889 let value_expr = f.args[1].clone();
10890 let style = if f.args.len() >= 3 {
10891 Some(&f.args[2])
10892 } else {
10893 None
10894 };
10895
10896 // For TSQL->TSQL, normalize types and preserve CONVERT/TRY_CONVERT
10897 if matches!(target, DialectType::TSQL) {
10898 let normalized_type = match &type_expr {
10899 Expression::DataType(dt) => {
10900 let new_dt = match dt {
10901 DataType::Int { .. } => DataType::Custom {
10902 name: "INTEGER".to_string(),
10903 },
10904 _ => dt.clone(),
10905 };
10906 Expression::DataType(new_dt)
10907 }
10908 Expression::Identifier(id) => {
10909 let upper = id.name.to_uppercase();
10910 let normalized = match upper.as_str() {
10911 "INT" => "INTEGER",
10912 _ => &upper,
10913 };
10914 Expression::Identifier(
10915 crate::expressions::Identifier::new(normalized),
10916 )
10917 }
10918 Expression::Column(col) => {
10919 let upper = col.name.name.to_uppercase();
10920 let normalized = match upper.as_str() {
10921 "INT" => "INTEGER",
10922 _ => &upper,
10923 };
10924 Expression::Identifier(
10925 crate::expressions::Identifier::new(normalized),
10926 )
10927 }
10928 _ => type_expr.clone(),
10929 };
10930 let func_name = if is_try { "TRY_CONVERT" } else { "CONVERT" };
10931 let mut new_args = vec![normalized_type, value_expr];
10932 if let Some(s) = style {
10933 new_args.push(s.clone());
10934 }
10935 return Ok(Expression::Function(Box::new(Function::new(
10936 func_name.to_string(),
10937 new_args,
10938 ))));
10939 }
10940
10941 // For other targets: CONVERT(type, expr) -> CAST(expr AS type)
10942 fn expr_to_datatype(e: &Expression) -> Option<DataType> {
10943 match e {
10944 Expression::DataType(dt) => {
10945 // Convert NVARCHAR/NCHAR Custom types to standard VarChar/Char
10946 match dt {
10947 DataType::Custom { name }
10948 if name.starts_with("NVARCHAR(")
10949 || name.starts_with("NCHAR(") =>
10950 {
10951 // Extract the length from "NVARCHAR(200)" or "NCHAR(40)"
10952 let inner = &name[name.find('(').unwrap() + 1
10953 ..name.len() - 1];
10954 if inner.eq_ignore_ascii_case("MAX") {
10955 Some(DataType::Text)
10956 } else if let Ok(len) = inner.parse::<u32>() {
10957 if name.starts_with("NCHAR") {
10958 Some(DataType::Char {
10959 length: Some(len),
10960 })
10961 } else {
10962 Some(DataType::VarChar {
10963 length: Some(len),
10964 parenthesized_length: false,
10965 })
10966 }
10967 } else {
10968 Some(dt.clone())
10969 }
10970 }
10971 DataType::Custom { name } if name == "NVARCHAR" => {
10972 Some(DataType::VarChar {
10973 length: None,
10974 parenthesized_length: false,
10975 })
10976 }
10977 DataType::Custom { name } if name == "NCHAR" => {
10978 Some(DataType::Char { length: None })
10979 }
10980 DataType::Custom { name }
10981 if name == "NVARCHAR(MAX)"
10982 || name == "VARCHAR(MAX)" =>
10983 {
10984 Some(DataType::Text)
10985 }
10986 _ => Some(dt.clone()),
10987 }
10988 }
10989 Expression::Identifier(id) => {
10990 let name = id.name.to_uppercase();
10991 match name.as_str() {
10992 "INT" | "INTEGER" => Some(DataType::Int {
10993 length: None,
10994 integer_spelling: false,
10995 }),
10996 "BIGINT" => Some(DataType::BigInt { length: None }),
10997 "SMALLINT" => {
10998 Some(DataType::SmallInt { length: None })
10999 }
11000 "TINYINT" => {
11001 Some(DataType::TinyInt { length: None })
11002 }
11003 "FLOAT" => Some(DataType::Float {
11004 precision: None,
11005 scale: None,
11006 real_spelling: false,
11007 }),
11008 "REAL" => Some(DataType::Float {
11009 precision: None,
11010 scale: None,
11011 real_spelling: true,
11012 }),
11013 "DATETIME" | "DATETIME2" => {
11014 Some(DataType::Timestamp {
11015 timezone: false,
11016 precision: None,
11017 })
11018 }
11019 "DATE" => Some(DataType::Date),
11020 "BIT" => Some(DataType::Boolean),
11021 "TEXT" => Some(DataType::Text),
11022 "NUMERIC" => Some(DataType::Decimal {
11023 precision: None,
11024 scale: None,
11025 }),
11026 "MONEY" => Some(DataType::Decimal {
11027 precision: Some(15),
11028 scale: Some(4),
11029 }),
11030 "SMALLMONEY" => Some(DataType::Decimal {
11031 precision: Some(6),
11032 scale: Some(4),
11033 }),
11034 "VARCHAR" => Some(DataType::VarChar {
11035 length: None,
11036 parenthesized_length: false,
11037 }),
11038 "NVARCHAR" => Some(DataType::VarChar {
11039 length: None,
11040 parenthesized_length: false,
11041 }),
11042 "CHAR" => Some(DataType::Char { length: None }),
11043 "NCHAR" => Some(DataType::Char { length: None }),
11044 _ => Some(DataType::Custom { name }),
11045 }
11046 }
11047 Expression::Column(col) => {
11048 let name = col.name.name.to_uppercase();
11049 match name.as_str() {
11050 "INT" | "INTEGER" => Some(DataType::Int {
11051 length: None,
11052 integer_spelling: false,
11053 }),
11054 "BIGINT" => Some(DataType::BigInt { length: None }),
11055 "FLOAT" => Some(DataType::Float {
11056 precision: None,
11057 scale: None,
11058 real_spelling: false,
11059 }),
11060 "DATETIME" | "DATETIME2" => {
11061 Some(DataType::Timestamp {
11062 timezone: false,
11063 precision: None,
11064 })
11065 }
11066 "DATE" => Some(DataType::Date),
11067 "NUMERIC" => Some(DataType::Decimal {
11068 precision: None,
11069 scale: None,
11070 }),
11071 "VARCHAR" => Some(DataType::VarChar {
11072 length: None,
11073 parenthesized_length: false,
11074 }),
11075 "NVARCHAR" => Some(DataType::VarChar {
11076 length: None,
11077 parenthesized_length: false,
11078 }),
11079 "CHAR" => Some(DataType::Char { length: None }),
11080 "NCHAR" => Some(DataType::Char { length: None }),
11081 _ => Some(DataType::Custom { name }),
11082 }
11083 }
11084 // NVARCHAR(200) parsed as Function("NVARCHAR", [200])
11085 Expression::Function(f) => {
11086 let fname = f.name.to_uppercase();
11087 match fname.as_str() {
11088 "VARCHAR" | "NVARCHAR" => {
11089 let len = f.args.first().and_then(|a| {
11090 if let Expression::Literal(
11091 crate::expressions::Literal::Number(n),
11092 ) = a
11093 {
11094 n.parse::<u32>().ok()
11095 } else if let Expression::Identifier(id) = a
11096 {
11097 if id.name.eq_ignore_ascii_case("MAX") {
11098 None
11099 } else {
11100 None
11101 }
11102 } else {
11103 None
11104 }
11105 });
11106 // Check for VARCHAR(MAX) -> TEXT
11107 let is_max = f.args.first().map_or(false, |a| {
11108 matches!(a, Expression::Identifier(id) if id.name.eq_ignore_ascii_case("MAX"))
11109 || matches!(a, Expression::Column(col) if col.name.name.eq_ignore_ascii_case("MAX"))
11110 });
11111 if is_max {
11112 Some(DataType::Text)
11113 } else {
11114 Some(DataType::VarChar {
11115 length: len,
11116 parenthesized_length: false,
11117 })
11118 }
11119 }
11120 "NCHAR" | "CHAR" => {
11121 let len = f.args.first().and_then(|a| {
11122 if let Expression::Literal(
11123 crate::expressions::Literal::Number(n),
11124 ) = a
11125 {
11126 n.parse::<u32>().ok()
11127 } else {
11128 None
11129 }
11130 });
11131 Some(DataType::Char { length: len })
11132 }
11133 "NUMERIC" | "DECIMAL" => {
11134 let precision = f.args.first().and_then(|a| {
11135 if let Expression::Literal(
11136 crate::expressions::Literal::Number(n),
11137 ) = a
11138 {
11139 n.parse::<u32>().ok()
11140 } else {
11141 None
11142 }
11143 });
11144 let scale = f.args.get(1).and_then(|a| {
11145 if let Expression::Literal(
11146 crate::expressions::Literal::Number(n),
11147 ) = a
11148 {
11149 n.parse::<u32>().ok()
11150 } else {
11151 None
11152 }
11153 });
11154 Some(DataType::Decimal { precision, scale })
11155 }
11156 _ => None,
11157 }
11158 }
11159 _ => None,
11160 }
11161 }
11162
11163 if let Some(mut dt) = expr_to_datatype(&type_expr) {
11164 // For TSQL source: VARCHAR/CHAR without length defaults to 30
11165 let is_tsql_source =
11166 matches!(source, DialectType::TSQL | DialectType::Fabric);
11167 if is_tsql_source {
11168 match &dt {
11169 DataType::VarChar { length: None, .. } => {
11170 dt = DataType::VarChar {
11171 length: Some(30),
11172 parenthesized_length: false,
11173 };
11174 }
11175 DataType::Char { length: None } => {
11176 dt = DataType::Char { length: Some(30) };
11177 }
11178 _ => {}
11179 }
11180 }
11181
11182 // Determine if this is a string type
11183 let is_string_type = matches!(
11184 dt,
11185 DataType::VarChar { .. }
11186 | DataType::Char { .. }
11187 | DataType::Text
11188 ) || matches!(&dt, DataType::Custom { name } if name == "NVARCHAR" || name == "NCHAR"
11189 || name.starts_with("NVARCHAR(") || name.starts_with("NCHAR(")
11190 || name.starts_with("VARCHAR(") || name == "VARCHAR"
11191 || name == "STRING");
11192
11193 // Determine if this is a date/time type
11194 let is_datetime_type = matches!(
11195 dt,
11196 DataType::Timestamp { .. } | DataType::Date
11197 ) || matches!(&dt, DataType::Custom { name } if name == "DATETIME"
11198 || name == "DATETIME2" || name == "SMALLDATETIME");
11199
11200 // Check for date conversion with style
11201 if style.is_some() {
11202 let style_num = style.and_then(|s| {
11203 if let Expression::Literal(
11204 crate::expressions::Literal::Number(n),
11205 ) = s
11206 {
11207 n.parse::<u32>().ok()
11208 } else {
11209 None
11210 }
11211 });
11212
11213 // TSQL CONVERT date styles (Java format)
11214 let format_str = style_num.and_then(|n| match n {
11215 101 => Some("MM/dd/yyyy"),
11216 102 => Some("yyyy.MM.dd"),
11217 103 => Some("dd/MM/yyyy"),
11218 104 => Some("dd.MM.yyyy"),
11219 105 => Some("dd-MM-yyyy"),
11220 108 => Some("HH:mm:ss"),
11221 110 => Some("MM-dd-yyyy"),
11222 112 => Some("yyyyMMdd"),
11223 120 | 20 => Some("yyyy-MM-dd HH:mm:ss"),
11224 121 | 21 => Some("yyyy-MM-dd HH:mm:ss.SSSSSS"),
11225 126 | 127 => Some("yyyy-MM-dd'T'HH:mm:ss.SSS"),
11226 _ => None,
11227 });
11228
11229 // Non-string, non-datetime types with style: just CAST, ignore the style
11230 if !is_string_type && !is_datetime_type {
11231 let cast_expr = if is_try {
11232 Expression::TryCast(Box::new(
11233 crate::expressions::Cast {
11234 this: value_expr,
11235 to: dt,
11236 trailing_comments: Vec::new(),
11237 double_colon_syntax: false,
11238 format: None,
11239 default: None,
11240 },
11241 ))
11242 } else {
11243 Expression::Cast(Box::new(
11244 crate::expressions::Cast {
11245 this: value_expr,
11246 to: dt,
11247 trailing_comments: Vec::new(),
11248 double_colon_syntax: false,
11249 format: None,
11250 default: None,
11251 },
11252 ))
11253 };
11254 return Ok(cast_expr);
11255 }
11256
11257 if let Some(java_fmt) = format_str {
11258 let c_fmt = java_fmt
11259 .replace("yyyy", "%Y")
11260 .replace("MM", "%m")
11261 .replace("dd", "%d")
11262 .replace("HH", "%H")
11263 .replace("mm", "%M")
11264 .replace("ss", "%S")
11265 .replace("SSSSSS", "%f")
11266 .replace("SSS", "%f")
11267 .replace("'T'", "T");
11268
11269 // For datetime target types: style is the INPUT format for parsing strings -> dates
11270 if is_datetime_type {
11271 match target {
11272 DialectType::DuckDB => {
11273 return Ok(Expression::Function(Box::new(
11274 Function::new(
11275 "STRPTIME".to_string(),
11276 vec![
11277 value_expr,
11278 Expression::string(&c_fmt),
11279 ],
11280 ),
11281 )));
11282 }
11283 DialectType::Spark
11284 | DialectType::Databricks => {
11285 // CONVERT(DATETIME, x, style) -> TO_TIMESTAMP(x, fmt)
11286 // CONVERT(DATE, x, style) -> TO_DATE(x, fmt)
11287 let func_name =
11288 if matches!(dt, DataType::Date) {
11289 "TO_DATE"
11290 } else {
11291 "TO_TIMESTAMP"
11292 };
11293 return Ok(Expression::Function(Box::new(
11294 Function::new(
11295 func_name.to_string(),
11296 vec![
11297 value_expr,
11298 Expression::string(java_fmt),
11299 ],
11300 ),
11301 )));
11302 }
11303 DialectType::Hive => {
11304 return Ok(Expression::Function(Box::new(
11305 Function::new(
11306 "TO_TIMESTAMP".to_string(),
11307 vec![
11308 value_expr,
11309 Expression::string(java_fmt),
11310 ],
11311 ),
11312 )));
11313 }
11314 _ => {
11315 return Ok(Expression::Cast(Box::new(
11316 crate::expressions::Cast {
11317 this: value_expr,
11318 to: dt,
11319 trailing_comments: Vec::new(),
11320 double_colon_syntax: false,
11321 format: None,
11322 default: None,
11323 },
11324 )));
11325 }
11326 }
11327 }
11328
11329 // For string target types: style is the OUTPUT format for dates -> strings
11330 match target {
11331 DialectType::DuckDB => Ok(Expression::Function(
11332 Box::new(Function::new(
11333 "STRPTIME".to_string(),
11334 vec![
11335 value_expr,
11336 Expression::string(&c_fmt),
11337 ],
11338 )),
11339 )),
11340 DialectType::Spark | DialectType::Databricks => {
11341 // For string target types with style: CAST(DATE_FORMAT(x, fmt) AS type)
11342 // Determine the target string type
11343 let string_dt = match &dt {
11344 DataType::VarChar {
11345 length: Some(l),
11346 ..
11347 } => DataType::VarChar {
11348 length: Some(*l),
11349 parenthesized_length: false,
11350 },
11351 DataType::Text => DataType::Custom {
11352 name: "STRING".to_string(),
11353 },
11354 _ => DataType::Custom {
11355 name: "STRING".to_string(),
11356 },
11357 };
11358 let date_format_expr = Expression::Function(
11359 Box::new(Function::new(
11360 "DATE_FORMAT".to_string(),
11361 vec![
11362 value_expr,
11363 Expression::string(java_fmt),
11364 ],
11365 )),
11366 );
11367 let cast_expr = if is_try {
11368 Expression::TryCast(Box::new(
11369 crate::expressions::Cast {
11370 this: date_format_expr,
11371 to: string_dt,
11372 trailing_comments: Vec::new(),
11373 double_colon_syntax: false,
11374 format: None,
11375 default: None,
11376 },
11377 ))
11378 } else {
11379 Expression::Cast(Box::new(
11380 crate::expressions::Cast {
11381 this: date_format_expr,
11382 to: string_dt,
11383 trailing_comments: Vec::new(),
11384 double_colon_syntax: false,
11385 format: None,
11386 default: None,
11387 },
11388 ))
11389 };
11390 Ok(cast_expr)
11391 }
11392 DialectType::MySQL | DialectType::SingleStore => {
11393 // For MySQL: CAST(DATE_FORMAT(x, mysql_fmt) AS CHAR(n))
11394 let mysql_fmt = java_fmt
11395 .replace("yyyy", "%Y")
11396 .replace("MM", "%m")
11397 .replace("dd", "%d")
11398 .replace("HH:mm:ss.SSSSSS", "%T")
11399 .replace("HH:mm:ss", "%T")
11400 .replace("HH", "%H")
11401 .replace("mm", "%i")
11402 .replace("ss", "%S");
11403 let date_format_expr = Expression::Function(
11404 Box::new(Function::new(
11405 "DATE_FORMAT".to_string(),
11406 vec![
11407 value_expr,
11408 Expression::string(&mysql_fmt),
11409 ],
11410 )),
11411 );
11412 // MySQL uses CHAR for string casts
11413 let mysql_dt = match &dt {
11414 DataType::VarChar { length, .. } => {
11415 DataType::Char { length: *length }
11416 }
11417 _ => dt,
11418 };
11419 Ok(Expression::Cast(Box::new(
11420 crate::expressions::Cast {
11421 this: date_format_expr,
11422 to: mysql_dt,
11423 trailing_comments: Vec::new(),
11424 double_colon_syntax: false,
11425 format: None,
11426 default: None,
11427 },
11428 )))
11429 }
11430 DialectType::Hive => {
11431 let func_name = "TO_TIMESTAMP";
11432 Ok(Expression::Function(Box::new(
11433 Function::new(
11434 func_name.to_string(),
11435 vec![
11436 value_expr,
11437 Expression::string(java_fmt),
11438 ],
11439 ),
11440 )))
11441 }
11442 _ => Ok(Expression::Cast(Box::new(
11443 crate::expressions::Cast {
11444 this: value_expr,
11445 to: dt,
11446 trailing_comments: Vec::new(),
11447 double_colon_syntax: false,
11448 format: None,
11449 default: None,
11450 },
11451 ))),
11452 }
11453 } else {
11454 // Unknown style, just CAST
11455 let cast_expr = if is_try {
11456 Expression::TryCast(Box::new(
11457 crate::expressions::Cast {
11458 this: value_expr,
11459 to: dt,
11460 trailing_comments: Vec::new(),
11461 double_colon_syntax: false,
11462 format: None,
11463 default: None,
11464 },
11465 ))
11466 } else {
11467 Expression::Cast(Box::new(
11468 crate::expressions::Cast {
11469 this: value_expr,
11470 to: dt,
11471 trailing_comments: Vec::new(),
11472 double_colon_syntax: false,
11473 format: None,
11474 default: None,
11475 },
11476 ))
11477 };
11478 Ok(cast_expr)
11479 }
11480 } else {
11481 // No style - simple CAST
11482 let final_dt = if matches!(
11483 target,
11484 DialectType::MySQL | DialectType::SingleStore
11485 ) {
11486 match &dt {
11487 DataType::Int { .. }
11488 | DataType::BigInt { .. }
11489 | DataType::SmallInt { .. }
11490 | DataType::TinyInt { .. } => DataType::Custom {
11491 name: "SIGNED".to_string(),
11492 },
11493 DataType::VarChar { length, .. } => {
11494 DataType::Char { length: *length }
11495 }
11496 _ => dt,
11497 }
11498 } else {
11499 dt
11500 };
11501 let cast_expr = if is_try {
11502 Expression::TryCast(Box::new(
11503 crate::expressions::Cast {
11504 this: value_expr,
11505 to: final_dt,
11506 trailing_comments: Vec::new(),
11507 double_colon_syntax: false,
11508 format: None,
11509 default: None,
11510 },
11511 ))
11512 } else {
11513 Expression::Cast(Box::new(crate::expressions::Cast {
11514 this: value_expr,
11515 to: final_dt,
11516 trailing_comments: Vec::new(),
11517 double_colon_syntax: false,
11518 format: None,
11519 default: None,
11520 }))
11521 };
11522 Ok(cast_expr)
11523 }
11524 } else {
11525 // Can't convert type expression - keep as CONVERT/TRY_CONVERT function
11526 Ok(Expression::Function(f))
11527 }
11528 }
11529 // STRFTIME(val, fmt) from DuckDB / STRFTIME(fmt, val) from SQLite -> target-specific
11530 "STRFTIME" if f.args.len() == 2 => {
11531 // SQLite uses STRFTIME(fmt, val); DuckDB uses STRFTIME(val, fmt)
11532 let (val, fmt_expr) = if matches!(source, DialectType::SQLite) {
11533 // SQLite: args[0] = format, args[1] = value
11534 (f.args[1].clone(), &f.args[0])
11535 } else {
11536 // DuckDB and others: args[0] = value, args[1] = format
11537 (f.args[0].clone(), &f.args[1])
11538 };
11539
11540 // Helper to convert C-style format to Java-style
11541 fn c_to_java_format(fmt: &str) -> String {
11542 fmt.replace("%Y", "yyyy")
11543 .replace("%m", "MM")
11544 .replace("%d", "dd")
11545 .replace("%H", "HH")
11546 .replace("%M", "mm")
11547 .replace("%S", "ss")
11548 .replace("%f", "SSSSSS")
11549 .replace("%y", "yy")
11550 .replace("%-m", "M")
11551 .replace("%-d", "d")
11552 .replace("%-H", "H")
11553 .replace("%-I", "h")
11554 .replace("%I", "hh")
11555 .replace("%p", "a")
11556 .replace("%j", "DDD")
11557 .replace("%a", "EEE")
11558 .replace("%b", "MMM")
11559 .replace("%F", "yyyy-MM-dd")
11560 .replace("%T", "HH:mm:ss")
11561 }
11562
11563 // Helper: recursively convert format strings within expressions (handles CONCAT)
11564 fn convert_fmt_expr(
11565 expr: &Expression,
11566 converter: &dyn Fn(&str) -> String,
11567 ) -> Expression {
11568 match expr {
11569 Expression::Literal(
11570 crate::expressions::Literal::String(s),
11571 ) => Expression::string(&converter(s)),
11572 Expression::Function(func)
11573 if func.name.eq_ignore_ascii_case("CONCAT") =>
11574 {
11575 let new_args: Vec<Expression> = func
11576 .args
11577 .iter()
11578 .map(|a| convert_fmt_expr(a, converter))
11579 .collect();
11580 Expression::Function(Box::new(Function::new(
11581 "CONCAT".to_string(),
11582 new_args,
11583 )))
11584 }
11585 other => other.clone(),
11586 }
11587 }
11588
11589 match target {
11590 DialectType::DuckDB => {
11591 if matches!(source, DialectType::SQLite) {
11592 // SQLite STRFTIME(fmt, val) -> DuckDB STRFTIME(CAST(val AS TIMESTAMP), fmt)
11593 let cast_val = Expression::Cast(Box::new(Cast {
11594 this: val,
11595 to: crate::expressions::DataType::Timestamp {
11596 precision: None,
11597 timezone: false,
11598 },
11599 trailing_comments: Vec::new(),
11600 double_colon_syntax: false,
11601 format: None,
11602 default: None,
11603 }));
11604 Ok(Expression::Function(Box::new(Function::new(
11605 "STRFTIME".to_string(),
11606 vec![cast_val, fmt_expr.clone()],
11607 ))))
11608 } else {
11609 Ok(Expression::Function(f))
11610 }
11611 }
11612 DialectType::Spark
11613 | DialectType::Databricks
11614 | DialectType::Hive => {
11615 // STRFTIME(val, fmt) -> DATE_FORMAT(val, java_fmt)
11616 let converted_fmt =
11617 convert_fmt_expr(fmt_expr, &c_to_java_format);
11618 Ok(Expression::Function(Box::new(Function::new(
11619 "DATE_FORMAT".to_string(),
11620 vec![val, converted_fmt],
11621 ))))
11622 }
11623 DialectType::TSQL | DialectType::Fabric => {
11624 // STRFTIME(val, fmt) -> FORMAT(val, java_fmt)
11625 let converted_fmt =
11626 convert_fmt_expr(fmt_expr, &c_to_java_format);
11627 Ok(Expression::Function(Box::new(Function::new(
11628 "FORMAT".to_string(),
11629 vec![val, converted_fmt],
11630 ))))
11631 }
11632 DialectType::Presto
11633 | DialectType::Trino
11634 | DialectType::Athena => {
11635 // STRFTIME(val, fmt) -> DATE_FORMAT(val, presto_fmt) (convert DuckDB format to Presto)
11636 if let Expression::Literal(
11637 crate::expressions::Literal::String(s),
11638 ) = fmt_expr
11639 {
11640 let presto_fmt = duckdb_to_presto_format(s);
11641 Ok(Expression::Function(Box::new(Function::new(
11642 "DATE_FORMAT".to_string(),
11643 vec![val, Expression::string(&presto_fmt)],
11644 ))))
11645 } else {
11646 Ok(Expression::Function(Box::new(Function::new(
11647 "DATE_FORMAT".to_string(),
11648 vec![val, fmt_expr.clone()],
11649 ))))
11650 }
11651 }
11652 DialectType::BigQuery => {
11653 // STRFTIME(val, fmt) -> FORMAT_DATE(bq_fmt, val) - note reversed arg order
11654 if let Expression::Literal(
11655 crate::expressions::Literal::String(s),
11656 ) = fmt_expr
11657 {
11658 let bq_fmt = duckdb_to_bigquery_format(s);
11659 Ok(Expression::Function(Box::new(Function::new(
11660 "FORMAT_DATE".to_string(),
11661 vec![Expression::string(&bq_fmt), val],
11662 ))))
11663 } else {
11664 Ok(Expression::Function(Box::new(Function::new(
11665 "FORMAT_DATE".to_string(),
11666 vec![fmt_expr.clone(), val],
11667 ))))
11668 }
11669 }
11670 DialectType::PostgreSQL | DialectType::Redshift => {
11671 // STRFTIME(val, fmt) -> TO_CHAR(val, pg_fmt)
11672 if let Expression::Literal(
11673 crate::expressions::Literal::String(s),
11674 ) = fmt_expr
11675 {
11676 let pg_fmt = s
11677 .replace("%Y", "YYYY")
11678 .replace("%m", "MM")
11679 .replace("%d", "DD")
11680 .replace("%H", "HH24")
11681 .replace("%M", "MI")
11682 .replace("%S", "SS")
11683 .replace("%y", "YY")
11684 .replace("%-m", "FMMM")
11685 .replace("%-d", "FMDD")
11686 .replace("%-H", "FMHH24")
11687 .replace("%-I", "FMHH12")
11688 .replace("%p", "AM")
11689 .replace("%F", "YYYY-MM-DD")
11690 .replace("%T", "HH24:MI:SS");
11691 Ok(Expression::Function(Box::new(Function::new(
11692 "TO_CHAR".to_string(),
11693 vec![val, Expression::string(&pg_fmt)],
11694 ))))
11695 } else {
11696 Ok(Expression::Function(Box::new(Function::new(
11697 "TO_CHAR".to_string(),
11698 vec![val, fmt_expr.clone()],
11699 ))))
11700 }
11701 }
11702 _ => Ok(Expression::Function(f)),
11703 }
11704 }
11705 // STRPTIME(val, fmt) from DuckDB -> target-specific date parse function
11706 "STRPTIME" if f.args.len() == 2 => {
11707 let val = f.args[0].clone();
11708 let fmt_expr = &f.args[1];
11709
11710 fn c_to_java_format_parse(fmt: &str) -> String {
11711 fmt.replace("%Y", "yyyy")
11712 .replace("%m", "MM")
11713 .replace("%d", "dd")
11714 .replace("%H", "HH")
11715 .replace("%M", "mm")
11716 .replace("%S", "ss")
11717 .replace("%f", "SSSSSS")
11718 .replace("%y", "yy")
11719 .replace("%-m", "M")
11720 .replace("%-d", "d")
11721 .replace("%-H", "H")
11722 .replace("%-I", "h")
11723 .replace("%I", "hh")
11724 .replace("%p", "a")
11725 .replace("%F", "yyyy-MM-dd")
11726 .replace("%T", "HH:mm:ss")
11727 }
11728
11729 match target {
11730 DialectType::DuckDB => Ok(Expression::Function(f)),
11731 DialectType::Spark | DialectType::Databricks => {
11732 // STRPTIME(val, fmt) -> TO_TIMESTAMP(val, java_fmt)
11733 if let Expression::Literal(
11734 crate::expressions::Literal::String(s),
11735 ) = fmt_expr
11736 {
11737 let java_fmt = c_to_java_format_parse(s);
11738 Ok(Expression::Function(Box::new(Function::new(
11739 "TO_TIMESTAMP".to_string(),
11740 vec![val, Expression::string(&java_fmt)],
11741 ))))
11742 } else {
11743 Ok(Expression::Function(Box::new(Function::new(
11744 "TO_TIMESTAMP".to_string(),
11745 vec![val, fmt_expr.clone()],
11746 ))))
11747 }
11748 }
11749 DialectType::Hive => {
11750 // STRPTIME(val, fmt) -> CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(val, java_fmt)) AS TIMESTAMP)
11751 if let Expression::Literal(
11752 crate::expressions::Literal::String(s),
11753 ) = fmt_expr
11754 {
11755 let java_fmt = c_to_java_format_parse(s);
11756 let unix_ts =
11757 Expression::Function(Box::new(Function::new(
11758 "UNIX_TIMESTAMP".to_string(),
11759 vec![val, Expression::string(&java_fmt)],
11760 )));
11761 let from_unix =
11762 Expression::Function(Box::new(Function::new(
11763 "FROM_UNIXTIME".to_string(),
11764 vec![unix_ts],
11765 )));
11766 Ok(Expression::Cast(Box::new(
11767 crate::expressions::Cast {
11768 this: from_unix,
11769 to: DataType::Timestamp {
11770 timezone: false,
11771 precision: None,
11772 },
11773 trailing_comments: Vec::new(),
11774 double_colon_syntax: false,
11775 format: None,
11776 default: None,
11777 },
11778 )))
11779 } else {
11780 Ok(Expression::Function(f))
11781 }
11782 }
11783 DialectType::Presto
11784 | DialectType::Trino
11785 | DialectType::Athena => {
11786 // STRPTIME(val, fmt) -> DATE_PARSE(val, presto_fmt) (convert DuckDB format to Presto)
11787 if let Expression::Literal(
11788 crate::expressions::Literal::String(s),
11789 ) = fmt_expr
11790 {
11791 let presto_fmt = duckdb_to_presto_format(s);
11792 Ok(Expression::Function(Box::new(Function::new(
11793 "DATE_PARSE".to_string(),
11794 vec![val, Expression::string(&presto_fmt)],
11795 ))))
11796 } else {
11797 Ok(Expression::Function(Box::new(Function::new(
11798 "DATE_PARSE".to_string(),
11799 vec![val, fmt_expr.clone()],
11800 ))))
11801 }
11802 }
11803 DialectType::BigQuery => {
11804 // STRPTIME(val, fmt) -> PARSE_TIMESTAMP(bq_fmt, val) - note reversed arg order
11805 if let Expression::Literal(
11806 crate::expressions::Literal::String(s),
11807 ) = fmt_expr
11808 {
11809 let bq_fmt = duckdb_to_bigquery_format(s);
11810 Ok(Expression::Function(Box::new(Function::new(
11811 "PARSE_TIMESTAMP".to_string(),
11812 vec![Expression::string(&bq_fmt), val],
11813 ))))
11814 } else {
11815 Ok(Expression::Function(Box::new(Function::new(
11816 "PARSE_TIMESTAMP".to_string(),
11817 vec![fmt_expr.clone(), val],
11818 ))))
11819 }
11820 }
11821 _ => Ok(Expression::Function(f)),
11822 }
11823 }
11824 // DATE_FORMAT(val, fmt) from Presto source (C-style format) -> target-specific
11825 "DATE_FORMAT"
11826 if f.args.len() >= 2
11827 && matches!(
11828 source,
11829 DialectType::Presto
11830 | DialectType::Trino
11831 | DialectType::Athena
11832 ) =>
11833 {
11834 let val = f.args[0].clone();
11835 let fmt_expr = &f.args[1];
11836
11837 match target {
11838 DialectType::Presto
11839 | DialectType::Trino
11840 | DialectType::Athena => {
11841 // Presto -> Presto: normalize format (e.g., %H:%i:%S -> %T)
11842 if let Expression::Literal(
11843 crate::expressions::Literal::String(s),
11844 ) = fmt_expr
11845 {
11846 let normalized = crate::dialects::presto::PrestoDialect::normalize_presto_format(s);
11847 Ok(Expression::Function(Box::new(Function::new(
11848 "DATE_FORMAT".to_string(),
11849 vec![val, Expression::string(&normalized)],
11850 ))))
11851 } else {
11852 Ok(Expression::Function(f))
11853 }
11854 }
11855 DialectType::Hive
11856 | DialectType::Spark
11857 | DialectType::Databricks => {
11858 // Convert Presto C-style to Java-style format
11859 if let Expression::Literal(
11860 crate::expressions::Literal::String(s),
11861 ) = fmt_expr
11862 {
11863 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
11864 Ok(Expression::Function(Box::new(Function::new(
11865 "DATE_FORMAT".to_string(),
11866 vec![val, Expression::string(&java_fmt)],
11867 ))))
11868 } else {
11869 Ok(Expression::Function(f))
11870 }
11871 }
11872 DialectType::DuckDB => {
11873 // Convert to STRFTIME(val, duckdb_fmt)
11874 if let Expression::Literal(
11875 crate::expressions::Literal::String(s),
11876 ) = fmt_expr
11877 {
11878 let duckdb_fmt = crate::dialects::presto::PrestoDialect::presto_to_duckdb_format(s);
11879 Ok(Expression::Function(Box::new(Function::new(
11880 "STRFTIME".to_string(),
11881 vec![val, Expression::string(&duckdb_fmt)],
11882 ))))
11883 } else {
11884 Ok(Expression::Function(Box::new(Function::new(
11885 "STRFTIME".to_string(),
11886 vec![val, fmt_expr.clone()],
11887 ))))
11888 }
11889 }
11890 DialectType::BigQuery => {
11891 // Convert to FORMAT_DATE(bq_fmt, val) - reversed args
11892 if let Expression::Literal(
11893 crate::expressions::Literal::String(s),
11894 ) = fmt_expr
11895 {
11896 let bq_fmt = crate::dialects::presto::PrestoDialect::presto_to_bigquery_format(s);
11897 Ok(Expression::Function(Box::new(Function::new(
11898 "FORMAT_DATE".to_string(),
11899 vec![Expression::string(&bq_fmt), val],
11900 ))))
11901 } else {
11902 Ok(Expression::Function(Box::new(Function::new(
11903 "FORMAT_DATE".to_string(),
11904 vec![fmt_expr.clone(), val],
11905 ))))
11906 }
11907 }
11908 _ => Ok(Expression::Function(f)),
11909 }
11910 }
11911 // DATE_PARSE(val, fmt) from Presto source -> target-specific parse function
11912 "DATE_PARSE"
11913 if f.args.len() >= 2
11914 && matches!(
11915 source,
11916 DialectType::Presto
11917 | DialectType::Trino
11918 | DialectType::Athena
11919 ) =>
11920 {
11921 let val = f.args[0].clone();
11922 let fmt_expr = &f.args[1];
11923
11924 match target {
11925 DialectType::Presto
11926 | DialectType::Trino
11927 | DialectType::Athena => {
11928 // Presto -> Presto: normalize format
11929 if let Expression::Literal(
11930 crate::expressions::Literal::String(s),
11931 ) = fmt_expr
11932 {
11933 let normalized = crate::dialects::presto::PrestoDialect::normalize_presto_format(s);
11934 Ok(Expression::Function(Box::new(Function::new(
11935 "DATE_PARSE".to_string(),
11936 vec![val, Expression::string(&normalized)],
11937 ))))
11938 } else {
11939 Ok(Expression::Function(f))
11940 }
11941 }
11942 DialectType::Hive => {
11943 // Presto -> Hive: if default format, just CAST(x AS TIMESTAMP)
11944 if let Expression::Literal(
11945 crate::expressions::Literal::String(s),
11946 ) = fmt_expr
11947 {
11948 if crate::dialects::presto::PrestoDialect::is_default_timestamp_format(s)
11949 || crate::dialects::presto::PrestoDialect::is_default_date_format(s) {
11950 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
11951 this: val,
11952 to: DataType::Timestamp { timezone: false, precision: None },
11953 trailing_comments: Vec::new(),
11954 double_colon_syntax: false,
11955 format: None,
11956 default: None,
11957 })))
11958 } else {
11959 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
11960 Ok(Expression::Function(Box::new(Function::new(
11961 "TO_TIMESTAMP".to_string(),
11962 vec![val, Expression::string(&java_fmt)],
11963 ))))
11964 }
11965 } else {
11966 Ok(Expression::Function(f))
11967 }
11968 }
11969 DialectType::Spark | DialectType::Databricks => {
11970 // Presto -> Spark: TO_TIMESTAMP(val, java_fmt)
11971 if let Expression::Literal(
11972 crate::expressions::Literal::String(s),
11973 ) = fmt_expr
11974 {
11975 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
11976 Ok(Expression::Function(Box::new(Function::new(
11977 "TO_TIMESTAMP".to_string(),
11978 vec![val, Expression::string(&java_fmt)],
11979 ))))
11980 } else {
11981 Ok(Expression::Function(f))
11982 }
11983 }
11984 DialectType::DuckDB => {
11985 // Presto -> DuckDB: STRPTIME(val, duckdb_fmt)
11986 if let Expression::Literal(
11987 crate::expressions::Literal::String(s),
11988 ) = fmt_expr
11989 {
11990 let duckdb_fmt = crate::dialects::presto::PrestoDialect::presto_to_duckdb_format(s);
11991 Ok(Expression::Function(Box::new(Function::new(
11992 "STRPTIME".to_string(),
11993 vec![val, Expression::string(&duckdb_fmt)],
11994 ))))
11995 } else {
11996 Ok(Expression::Function(Box::new(Function::new(
11997 "STRPTIME".to_string(),
11998 vec![val, fmt_expr.clone()],
11999 ))))
12000 }
12001 }
12002 _ => Ok(Expression::Function(f)),
12003 }
12004 }
12005 // FROM_BASE64(x) / TO_BASE64(x) from Presto -> Hive-specific renames
12006 "FROM_BASE64"
12007 if f.args.len() == 1 && matches!(target, DialectType::Hive) =>
12008 {
12009 Ok(Expression::Function(Box::new(Function::new(
12010 "UNBASE64".to_string(),
12011 f.args,
12012 ))))
12013 }
12014 "TO_BASE64"
12015 if f.args.len() == 1 && matches!(target, DialectType::Hive) =>
12016 {
12017 Ok(Expression::Function(Box::new(Function::new(
12018 "BASE64".to_string(),
12019 f.args,
12020 ))))
12021 }
12022 // FROM_UNIXTIME(x) -> CAST(FROM_UNIXTIME(x) AS TIMESTAMP) for Spark
12023 "FROM_UNIXTIME"
12024 if f.args.len() == 1
12025 && matches!(
12026 source,
12027 DialectType::Presto
12028 | DialectType::Trino
12029 | DialectType::Athena
12030 )
12031 && matches!(
12032 target,
12033 DialectType::Spark | DialectType::Databricks
12034 ) =>
12035 {
12036 // Wrap FROM_UNIXTIME(x) in CAST(... AS TIMESTAMP)
12037 let from_unix = Expression::Function(Box::new(Function::new(
12038 "FROM_UNIXTIME".to_string(),
12039 f.args,
12040 )));
12041 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
12042 this: from_unix,
12043 to: DataType::Timestamp {
12044 timezone: false,
12045 precision: None,
12046 },
12047 trailing_comments: Vec::new(),
12048 double_colon_syntax: false,
12049 format: None,
12050 default: None,
12051 })))
12052 }
12053 // DATE_FORMAT(val, fmt) from Hive/Spark/MySQL -> target-specific format function
12054 "DATE_FORMAT"
12055 if f.args.len() >= 2
12056 && !matches!(
12057 target,
12058 DialectType::Hive
12059 | DialectType::Spark
12060 | DialectType::Databricks
12061 | DialectType::MySQL
12062 | DialectType::SingleStore
12063 ) =>
12064 {
12065 let val = f.args[0].clone();
12066 let fmt_expr = &f.args[1];
12067 let is_hive_source = matches!(
12068 source,
12069 DialectType::Hive
12070 | DialectType::Spark
12071 | DialectType::Databricks
12072 );
12073
12074 fn java_to_c_format(fmt: &str) -> String {
12075 // Replace Java patterns with C strftime patterns.
12076 // Uses multi-pass to handle patterns that conflict.
12077 // First pass: replace multi-char patterns (longer first)
12078 let result = fmt
12079 .replace("yyyy", "%Y")
12080 .replace("SSSSSS", "%f")
12081 .replace("EEEE", "%W")
12082 .replace("MM", "%m")
12083 .replace("dd", "%d")
12084 .replace("HH", "%H")
12085 .replace("mm", "%M")
12086 .replace("ss", "%S")
12087 .replace("yy", "%y");
12088 // Second pass: handle single-char timezone patterns
12089 // z -> %Z (timezone name), Z -> %z (timezone offset)
12090 // Must be careful not to replace 'z'/'Z' inside already-replaced %Y, %M etc.
12091 let mut out = String::new();
12092 let chars: Vec<char> = result.chars().collect();
12093 let mut i = 0;
12094 while i < chars.len() {
12095 if chars[i] == '%' && i + 1 < chars.len() {
12096 // Already a format specifier, skip both chars
12097 out.push(chars[i]);
12098 out.push(chars[i + 1]);
12099 i += 2;
12100 } else if chars[i] == 'z' {
12101 out.push_str("%Z");
12102 i += 1;
12103 } else if chars[i] == 'Z' {
12104 out.push_str("%z");
12105 i += 1;
12106 } else {
12107 out.push(chars[i]);
12108 i += 1;
12109 }
12110 }
12111 out
12112 }
12113
12114 fn java_to_presto_format(fmt: &str) -> String {
12115 // Presto uses %T for HH:MM:SS
12116 let c_fmt = java_to_c_format(fmt);
12117 c_fmt.replace("%H:%M:%S", "%T")
12118 }
12119
12120 fn java_to_bq_format(fmt: &str) -> String {
12121 // BigQuery uses %F for yyyy-MM-dd and %T for HH:mm:ss
12122 let c_fmt = java_to_c_format(fmt);
12123 c_fmt.replace("%Y-%m-%d", "%F").replace("%H:%M:%S", "%T")
12124 }
12125
12126 // For Hive source, CAST string literals to appropriate type
12127 let cast_val = if is_hive_source {
12128 match &val {
12129 Expression::Literal(
12130 crate::expressions::Literal::String(_),
12131 ) => {
12132 match target {
12133 DialectType::DuckDB
12134 | DialectType::Presto
12135 | DialectType::Trino
12136 | DialectType::Athena => {
12137 Self::ensure_cast_timestamp(val.clone())
12138 }
12139 DialectType::BigQuery => {
12140 // BigQuery: CAST(val AS DATETIME)
12141 Expression::Cast(Box::new(
12142 crate::expressions::Cast {
12143 this: val.clone(),
12144 to: DataType::Custom {
12145 name: "DATETIME".to_string(),
12146 },
12147 trailing_comments: vec![],
12148 double_colon_syntax: false,
12149 format: None,
12150 default: None,
12151 },
12152 ))
12153 }
12154 _ => val.clone(),
12155 }
12156 }
12157 // For CAST(x AS DATE) or DATE literal, Presto needs CAST(CAST(x AS DATE) AS TIMESTAMP)
12158 Expression::Cast(c)
12159 if matches!(c.to, DataType::Date)
12160 && matches!(
12161 target,
12162 DialectType::Presto
12163 | DialectType::Trino
12164 | DialectType::Athena
12165 ) =>
12166 {
12167 Expression::Cast(Box::new(crate::expressions::Cast {
12168 this: val.clone(),
12169 to: DataType::Timestamp {
12170 timezone: false,
12171 precision: None,
12172 },
12173 trailing_comments: vec![],
12174 double_colon_syntax: false,
12175 format: None,
12176 default: None,
12177 }))
12178 }
12179 Expression::Literal(crate::expressions::Literal::Date(
12180 _,
12181 )) if matches!(
12182 target,
12183 DialectType::Presto
12184 | DialectType::Trino
12185 | DialectType::Athena
12186 ) =>
12187 {
12188 // DATE 'x' -> CAST(CAST('x' AS DATE) AS TIMESTAMP)
12189 let cast_date = Self::date_literal_to_cast(val.clone());
12190 Expression::Cast(Box::new(crate::expressions::Cast {
12191 this: cast_date,
12192 to: DataType::Timestamp {
12193 timezone: false,
12194 precision: None,
12195 },
12196 trailing_comments: vec![],
12197 double_colon_syntax: false,
12198 format: None,
12199 default: None,
12200 }))
12201 }
12202 _ => val.clone(),
12203 }
12204 } else {
12205 val.clone()
12206 };
12207
12208 match target {
12209 DialectType::DuckDB => {
12210 if let Expression::Literal(
12211 crate::expressions::Literal::String(s),
12212 ) = fmt_expr
12213 {
12214 let c_fmt = if is_hive_source {
12215 java_to_c_format(s)
12216 } else {
12217 s.clone()
12218 };
12219 Ok(Expression::Function(Box::new(Function::new(
12220 "STRFTIME".to_string(),
12221 vec![cast_val, Expression::string(&c_fmt)],
12222 ))))
12223 } else {
12224 Ok(Expression::Function(Box::new(Function::new(
12225 "STRFTIME".to_string(),
12226 vec![cast_val, fmt_expr.clone()],
12227 ))))
12228 }
12229 }
12230 DialectType::Presto
12231 | DialectType::Trino
12232 | DialectType::Athena => {
12233 if is_hive_source {
12234 if let Expression::Literal(
12235 crate::expressions::Literal::String(s),
12236 ) = fmt_expr
12237 {
12238 let p_fmt = java_to_presto_format(s);
12239 Ok(Expression::Function(Box::new(Function::new(
12240 "DATE_FORMAT".to_string(),
12241 vec![cast_val, Expression::string(&p_fmt)],
12242 ))))
12243 } else {
12244 Ok(Expression::Function(Box::new(Function::new(
12245 "DATE_FORMAT".to_string(),
12246 vec![cast_val, fmt_expr.clone()],
12247 ))))
12248 }
12249 } else {
12250 Ok(Expression::Function(Box::new(Function::new(
12251 "DATE_FORMAT".to_string(),
12252 f.args,
12253 ))))
12254 }
12255 }
12256 DialectType::BigQuery => {
12257 // DATE_FORMAT(val, fmt) -> FORMAT_DATE(fmt, val)
12258 if let Expression::Literal(
12259 crate::expressions::Literal::String(s),
12260 ) = fmt_expr
12261 {
12262 let bq_fmt = if is_hive_source {
12263 java_to_bq_format(s)
12264 } else {
12265 java_to_c_format(s)
12266 };
12267 Ok(Expression::Function(Box::new(Function::new(
12268 "FORMAT_DATE".to_string(),
12269 vec![Expression::string(&bq_fmt), cast_val],
12270 ))))
12271 } else {
12272 Ok(Expression::Function(Box::new(Function::new(
12273 "FORMAT_DATE".to_string(),
12274 vec![fmt_expr.clone(), cast_val],
12275 ))))
12276 }
12277 }
12278 DialectType::PostgreSQL | DialectType::Redshift => {
12279 if let Expression::Literal(
12280 crate::expressions::Literal::String(s),
12281 ) = fmt_expr
12282 {
12283 let pg_fmt = s
12284 .replace("yyyy", "YYYY")
12285 .replace("MM", "MM")
12286 .replace("dd", "DD")
12287 .replace("HH", "HH24")
12288 .replace("mm", "MI")
12289 .replace("ss", "SS")
12290 .replace("yy", "YY");
12291 Ok(Expression::Function(Box::new(Function::new(
12292 "TO_CHAR".to_string(),
12293 vec![val, Expression::string(&pg_fmt)],
12294 ))))
12295 } else {
12296 Ok(Expression::Function(Box::new(Function::new(
12297 "TO_CHAR".to_string(),
12298 vec![val, fmt_expr.clone()],
12299 ))))
12300 }
12301 }
12302 _ => Ok(Expression::Function(f)),
12303 }
12304 }
12305 // DATEDIFF(unit, start, end) - 3-arg form
12306 // SQLite uses DATEDIFF(date1, date2, unit_string) instead
12307 "DATEDIFF" if f.args.len() == 3 => {
12308 let mut args = f.args;
12309 // SQLite source: args = (date1, date2, unit_string)
12310 // Standard source: args = (unit, start, end)
12311 let (_arg0, arg1, arg2, unit_str) =
12312 if matches!(source, DialectType::SQLite) {
12313 let date1 = args.remove(0);
12314 let date2 = args.remove(0);
12315 let unit_expr = args.remove(0);
12316 let unit_s = Self::get_unit_str_static(&unit_expr);
12317
12318 // For SQLite target, generate JULIANDAY arithmetic directly
12319 if matches!(target, DialectType::SQLite) {
12320 let jd_first = Expression::Function(Box::new(
12321 Function::new("JULIANDAY".to_string(), vec![date1]),
12322 ));
12323 let jd_second = Expression::Function(Box::new(
12324 Function::new("JULIANDAY".to_string(), vec![date2]),
12325 ));
12326 let diff = Expression::Sub(Box::new(
12327 crate::expressions::BinaryOp::new(
12328 jd_first, jd_second,
12329 ),
12330 ));
12331 let paren_diff = Expression::Paren(Box::new(
12332 crate::expressions::Paren {
12333 this: diff,
12334 trailing_comments: Vec::new(),
12335 },
12336 ));
12337 let adjusted = match unit_s.as_str() {
12338 "HOUR" => Expression::Mul(Box::new(
12339 crate::expressions::BinaryOp::new(
12340 paren_diff,
12341 Expression::Literal(Literal::Number(
12342 "24.0".to_string(),
12343 )),
12344 ),
12345 )),
12346 "MINUTE" => Expression::Mul(Box::new(
12347 crate::expressions::BinaryOp::new(
12348 paren_diff,
12349 Expression::Literal(Literal::Number(
12350 "1440.0".to_string(),
12351 )),
12352 ),
12353 )),
12354 "SECOND" => Expression::Mul(Box::new(
12355 crate::expressions::BinaryOp::new(
12356 paren_diff,
12357 Expression::Literal(Literal::Number(
12358 "86400.0".to_string(),
12359 )),
12360 ),
12361 )),
12362 "MONTH" => Expression::Div(Box::new(
12363 crate::expressions::BinaryOp::new(
12364 paren_diff,
12365 Expression::Literal(Literal::Number(
12366 "30.0".to_string(),
12367 )),
12368 ),
12369 )),
12370 "YEAR" => Expression::Div(Box::new(
12371 crate::expressions::BinaryOp::new(
12372 paren_diff,
12373 Expression::Literal(Literal::Number(
12374 "365.0".to_string(),
12375 )),
12376 ),
12377 )),
12378 _ => paren_diff,
12379 };
12380 return Ok(Expression::Cast(Box::new(Cast {
12381 this: adjusted,
12382 to: DataType::Int {
12383 length: None,
12384 integer_spelling: true,
12385 },
12386 trailing_comments: vec![],
12387 double_colon_syntax: false,
12388 format: None,
12389 default: None,
12390 })));
12391 }
12392
12393 // For other targets, remap to standard (unit, start, end) form
12394 let unit_ident =
12395 Expression::Identifier(Identifier::new(&unit_s));
12396 (unit_ident, date1, date2, unit_s)
12397 } else {
12398 let arg0 = args.remove(0);
12399 let arg1 = args.remove(0);
12400 let arg2 = args.remove(0);
12401 let unit_s = Self::get_unit_str_static(&arg0);
12402 (arg0, arg1, arg2, unit_s)
12403 };
12404
12405 // For Hive/Spark source, string literal dates need to be cast
12406 // Note: Databricks is excluded - it handles string args like standard SQL
12407 let is_hive_spark =
12408 matches!(source, DialectType::Hive | DialectType::Spark);
12409
12410 match target {
12411 DialectType::Snowflake => {
12412 let unit =
12413 Expression::Identifier(Identifier::new(&unit_str));
12414 // Use ensure_to_date_preserved to add TO_DATE with a marker
12415 // that prevents the Snowflake TO_DATE handler from converting it to CAST
12416 let d1 = if is_hive_spark {
12417 Self::ensure_to_date_preserved(arg1)
12418 } else {
12419 arg1
12420 };
12421 let d2 = if is_hive_spark {
12422 Self::ensure_to_date_preserved(arg2)
12423 } else {
12424 arg2
12425 };
12426 Ok(Expression::Function(Box::new(Function::new(
12427 "DATEDIFF".to_string(),
12428 vec![unit, d1, d2],
12429 ))))
12430 }
12431 DialectType::Redshift => {
12432 let unit =
12433 Expression::Identifier(Identifier::new(&unit_str));
12434 let d1 = if is_hive_spark {
12435 Self::ensure_cast_date(arg1)
12436 } else {
12437 arg1
12438 };
12439 let d2 = if is_hive_spark {
12440 Self::ensure_cast_date(arg2)
12441 } else {
12442 arg2
12443 };
12444 Ok(Expression::Function(Box::new(Function::new(
12445 "DATEDIFF".to_string(),
12446 vec![unit, d1, d2],
12447 ))))
12448 }
12449 DialectType::TSQL => {
12450 let unit =
12451 Expression::Identifier(Identifier::new(&unit_str));
12452 Ok(Expression::Function(Box::new(Function::new(
12453 "DATEDIFF".to_string(),
12454 vec![unit, arg1, arg2],
12455 ))))
12456 }
12457 DialectType::DuckDB => {
12458 let is_redshift_tsql = matches!(
12459 source,
12460 DialectType::Redshift | DialectType::TSQL
12461 );
12462 if is_hive_spark {
12463 // For Hive/Spark source, CAST string args to DATE and emit DATE_DIFF directly
12464 let d1 = Self::ensure_cast_date(arg1);
12465 let d2 = Self::ensure_cast_date(arg2);
12466 Ok(Expression::Function(Box::new(Function::new(
12467 "DATE_DIFF".to_string(),
12468 vec![Expression::string(&unit_str), d1, d2],
12469 ))))
12470 } else if matches!(source, DialectType::Snowflake) {
12471 // For Snowflake source: special handling per unit
12472 match unit_str.as_str() {
12473 "NANOSECOND" => {
12474 // DATEDIFF(NANOSECOND, start, end) -> EPOCH_NS(CAST(end AS TIMESTAMP_NS)) - EPOCH_NS(CAST(start AS TIMESTAMP_NS))
12475 fn cast_to_timestamp_ns(
12476 expr: Expression,
12477 ) -> Expression
12478 {
12479 Expression::Cast(Box::new(Cast {
12480 this: expr,
12481 to: DataType::Custom {
12482 name: "TIMESTAMP_NS".to_string(),
12483 },
12484 trailing_comments: vec![],
12485 double_colon_syntax: false,
12486 format: None,
12487 default: None,
12488 }))
12489 }
12490 let epoch_end = Expression::Function(Box::new(
12491 Function::new(
12492 "EPOCH_NS".to_string(),
12493 vec![cast_to_timestamp_ns(arg2)],
12494 ),
12495 ));
12496 let epoch_start = Expression::Function(
12497 Box::new(Function::new(
12498 "EPOCH_NS".to_string(),
12499 vec![cast_to_timestamp_ns(arg1)],
12500 )),
12501 );
12502 Ok(Expression::Sub(Box::new(BinaryOp::new(
12503 epoch_end,
12504 epoch_start,
12505 ))))
12506 }
12507 "WEEK" => {
12508 // DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST(x AS DATE)), DATE_TRUNC('WEEK', CAST(y AS DATE)))
12509 let d1 = Self::force_cast_date(arg1);
12510 let d2 = Self::force_cast_date(arg2);
12511 let dt1 = Expression::Function(Box::new(
12512 Function::new(
12513 "DATE_TRUNC".to_string(),
12514 vec![Expression::string("WEEK"), d1],
12515 ),
12516 ));
12517 let dt2 = Expression::Function(Box::new(
12518 Function::new(
12519 "DATE_TRUNC".to_string(),
12520 vec![Expression::string("WEEK"), d2],
12521 ),
12522 ));
12523 Ok(Expression::Function(Box::new(
12524 Function::new(
12525 "DATE_DIFF".to_string(),
12526 vec![
12527 Expression::string(&unit_str),
12528 dt1,
12529 dt2,
12530 ],
12531 ),
12532 )))
12533 }
12534 _ => {
12535 // YEAR, MONTH, QUARTER, DAY, etc.: CAST to DATE
12536 let d1 = Self::force_cast_date(arg1);
12537 let d2 = Self::force_cast_date(arg2);
12538 Ok(Expression::Function(Box::new(
12539 Function::new(
12540 "DATE_DIFF".to_string(),
12541 vec![
12542 Expression::string(&unit_str),
12543 d1,
12544 d2,
12545 ],
12546 ),
12547 )))
12548 }
12549 }
12550 } else if is_redshift_tsql {
12551 // For Redshift/TSQL source, CAST args to TIMESTAMP (always)
12552 let d1 = Self::force_cast_timestamp(arg1);
12553 let d2 = Self::force_cast_timestamp(arg2);
12554 Ok(Expression::Function(Box::new(Function::new(
12555 "DATE_DIFF".to_string(),
12556 vec![Expression::string(&unit_str), d1, d2],
12557 ))))
12558 } else {
12559 // Keep as DATEDIFF so DuckDB's transform_datediff handles
12560 // DATE_TRUNC for WEEK, CAST for string literals, etc.
12561 let unit =
12562 Expression::Identifier(Identifier::new(&unit_str));
12563 Ok(Expression::Function(Box::new(Function::new(
12564 "DATEDIFF".to_string(),
12565 vec![unit, arg1, arg2],
12566 ))))
12567 }
12568 }
12569 DialectType::BigQuery => {
12570 let is_redshift_tsql = matches!(
12571 source,
12572 DialectType::Redshift
12573 | DialectType::TSQL
12574 | DialectType::Snowflake
12575 );
12576 let cast_d1 = if is_hive_spark {
12577 Self::ensure_cast_date(arg1)
12578 } else if is_redshift_tsql {
12579 Self::force_cast_datetime(arg1)
12580 } else {
12581 Self::ensure_cast_datetime(arg1)
12582 };
12583 let cast_d2 = if is_hive_spark {
12584 Self::ensure_cast_date(arg2)
12585 } else if is_redshift_tsql {
12586 Self::force_cast_datetime(arg2)
12587 } else {
12588 Self::ensure_cast_datetime(arg2)
12589 };
12590 let unit =
12591 Expression::Identifier(Identifier::new(&unit_str));
12592 Ok(Expression::Function(Box::new(Function::new(
12593 "DATE_DIFF".to_string(),
12594 vec![cast_d2, cast_d1, unit],
12595 ))))
12596 }
12597 DialectType::Presto
12598 | DialectType::Trino
12599 | DialectType::Athena => {
12600 // For Hive/Spark source, string literals need double-cast: CAST(CAST(x AS TIMESTAMP) AS DATE)
12601 // For Redshift/TSQL source, args need CAST to TIMESTAMP (always)
12602 let is_redshift_tsql = matches!(
12603 source,
12604 DialectType::Redshift
12605 | DialectType::TSQL
12606 | DialectType::Snowflake
12607 );
12608 let d1 = if is_hive_spark {
12609 Self::double_cast_timestamp_date(arg1)
12610 } else if is_redshift_tsql {
12611 Self::force_cast_timestamp(arg1)
12612 } else {
12613 arg1
12614 };
12615 let d2 = if is_hive_spark {
12616 Self::double_cast_timestamp_date(arg2)
12617 } else if is_redshift_tsql {
12618 Self::force_cast_timestamp(arg2)
12619 } else {
12620 arg2
12621 };
12622 Ok(Expression::Function(Box::new(Function::new(
12623 "DATE_DIFF".to_string(),
12624 vec![Expression::string(&unit_str), d1, d2],
12625 ))))
12626 }
12627 DialectType::Hive => match unit_str.as_str() {
12628 "MONTH" => Ok(Expression::Cast(Box::new(Cast {
12629 this: Expression::Function(Box::new(Function::new(
12630 "MONTHS_BETWEEN".to_string(),
12631 vec![arg2, arg1],
12632 ))),
12633 to: DataType::Int {
12634 length: None,
12635 integer_spelling: false,
12636 },
12637 trailing_comments: vec![],
12638 double_colon_syntax: false,
12639 format: None,
12640 default: None,
12641 }))),
12642 "WEEK" => Ok(Expression::Cast(Box::new(Cast {
12643 this: Expression::Div(Box::new(
12644 crate::expressions::BinaryOp::new(
12645 Expression::Function(Box::new(Function::new(
12646 "DATEDIFF".to_string(),
12647 vec![arg2, arg1],
12648 ))),
12649 Expression::number(7),
12650 ),
12651 )),
12652 to: DataType::Int {
12653 length: None,
12654 integer_spelling: false,
12655 },
12656 trailing_comments: vec![],
12657 double_colon_syntax: false,
12658 format: None,
12659 default: None,
12660 }))),
12661 _ => Ok(Expression::Function(Box::new(Function::new(
12662 "DATEDIFF".to_string(),
12663 vec![arg2, arg1],
12664 )))),
12665 },
12666 DialectType::Spark | DialectType::Databricks => {
12667 let unit =
12668 Expression::Identifier(Identifier::new(&unit_str));
12669 Ok(Expression::Function(Box::new(Function::new(
12670 "DATEDIFF".to_string(),
12671 vec![unit, arg1, arg2],
12672 ))))
12673 }
12674 _ => {
12675 // For Hive/Spark source targeting PostgreSQL etc., cast string literals to DATE
12676 let d1 = if is_hive_spark {
12677 Self::ensure_cast_date(arg1)
12678 } else {
12679 arg1
12680 };
12681 let d2 = if is_hive_spark {
12682 Self::ensure_cast_date(arg2)
12683 } else {
12684 arg2
12685 };
12686 let unit =
12687 Expression::Identifier(Identifier::new(&unit_str));
12688 Ok(Expression::Function(Box::new(Function::new(
12689 "DATEDIFF".to_string(),
12690 vec![unit, d1, d2],
12691 ))))
12692 }
12693 }
12694 }
12695 // DATEDIFF(end, start) - 2-arg form from Hive/MySQL
12696 "DATEDIFF" if f.args.len() == 2 => {
12697 let mut args = f.args;
12698 let arg0 = args.remove(0);
12699 let arg1 = args.remove(0);
12700
12701 // Helper: unwrap TO_DATE(x) -> x (extracts inner arg)
12702 // Also recognizes TryCast/Cast to DATE that may have been produced by
12703 // cross-dialect TO_DATE -> TRY_CAST conversion
12704 let unwrap_to_date = |e: Expression| -> (Expression, bool) {
12705 if let Expression::Function(ref f) = e {
12706 if f.name.eq_ignore_ascii_case("TO_DATE")
12707 && f.args.len() == 1
12708 {
12709 return (f.args[0].clone(), true);
12710 }
12711 }
12712 // Also recognize TryCast(x, Date) as an already-converted TO_DATE
12713 if let Expression::TryCast(ref c) = e {
12714 if matches!(c.to, DataType::Date) {
12715 return (e, true); // Already properly cast, return as-is
12716 }
12717 }
12718 (e, false)
12719 };
12720
12721 match target {
12722 DialectType::DuckDB => {
12723 // For Hive source, always CAST to DATE
12724 // If arg is TO_DATE(x) or TRY_CAST(x AS DATE), use it directly
12725 let cast_d0 = if matches!(
12726 source,
12727 DialectType::Hive
12728 | DialectType::Spark
12729 | DialectType::Databricks
12730 ) {
12731 let (inner, was_to_date) = unwrap_to_date(arg1);
12732 if was_to_date {
12733 // Already a date expression, use directly
12734 if matches!(&inner, Expression::TryCast(_)) {
12735 inner // Already TRY_CAST(x AS DATE)
12736 } else {
12737 Self::try_cast_date(inner)
12738 }
12739 } else {
12740 Self::force_cast_date(inner)
12741 }
12742 } else {
12743 Self::ensure_cast_date(arg1)
12744 };
12745 let cast_d1 = if matches!(
12746 source,
12747 DialectType::Hive
12748 | DialectType::Spark
12749 | DialectType::Databricks
12750 ) {
12751 let (inner, was_to_date) = unwrap_to_date(arg0);
12752 if was_to_date {
12753 if matches!(&inner, Expression::TryCast(_)) {
12754 inner
12755 } else {
12756 Self::try_cast_date(inner)
12757 }
12758 } else {
12759 Self::force_cast_date(inner)
12760 }
12761 } else {
12762 Self::ensure_cast_date(arg0)
12763 };
12764 Ok(Expression::Function(Box::new(Function::new(
12765 "DATE_DIFF".to_string(),
12766 vec![Expression::string("DAY"), cast_d0, cast_d1],
12767 ))))
12768 }
12769 DialectType::Presto
12770 | DialectType::Trino
12771 | DialectType::Athena => {
12772 // For Hive/Spark source, apply double_cast_timestamp_date
12773 // For other sources (MySQL etc.), just swap args without casting
12774 if matches!(
12775 source,
12776 DialectType::Hive
12777 | DialectType::Spark
12778 | DialectType::Databricks
12779 ) {
12780 let cast_fn = |e: Expression| -> Expression {
12781 let (inner, was_to_date) = unwrap_to_date(e);
12782 if was_to_date {
12783 let first_cast =
12784 Self::double_cast_timestamp_date(inner);
12785 Self::double_cast_timestamp_date(first_cast)
12786 } else {
12787 Self::double_cast_timestamp_date(inner)
12788 }
12789 };
12790 Ok(Expression::Function(Box::new(Function::new(
12791 "DATE_DIFF".to_string(),
12792 vec![
12793 Expression::string("DAY"),
12794 cast_fn(arg1),
12795 cast_fn(arg0),
12796 ],
12797 ))))
12798 } else {
12799 Ok(Expression::Function(Box::new(Function::new(
12800 "DATE_DIFF".to_string(),
12801 vec![Expression::string("DAY"), arg1, arg0],
12802 ))))
12803 }
12804 }
12805 DialectType::Redshift => {
12806 let unit = Expression::Identifier(Identifier::new("DAY"));
12807 Ok(Expression::Function(Box::new(Function::new(
12808 "DATEDIFF".to_string(),
12809 vec![unit, arg1, arg0],
12810 ))))
12811 }
12812 _ => Ok(Expression::Function(Box::new(Function::new(
12813 "DATEDIFF".to_string(),
12814 vec![arg0, arg1],
12815 )))),
12816 }
12817 }
12818 // DATE_DIFF(unit, start, end) - 3-arg with string unit (ClickHouse/DuckDB style)
12819 "DATE_DIFF" if f.args.len() == 3 => {
12820 let mut args = f.args;
12821 let arg0 = args.remove(0);
12822 let arg1 = args.remove(0);
12823 let arg2 = args.remove(0);
12824 let unit_str = Self::get_unit_str_static(&arg0);
12825
12826 match target {
12827 DialectType::DuckDB => {
12828 // DuckDB: DATE_DIFF('UNIT', start, end)
12829 Ok(Expression::Function(Box::new(Function::new(
12830 "DATE_DIFF".to_string(),
12831 vec![Expression::string(&unit_str), arg1, arg2],
12832 ))))
12833 }
12834 DialectType::Presto
12835 | DialectType::Trino
12836 | DialectType::Athena => {
12837 Ok(Expression::Function(Box::new(Function::new(
12838 "DATE_DIFF".to_string(),
12839 vec![Expression::string(&unit_str), arg1, arg2],
12840 ))))
12841 }
12842 DialectType::ClickHouse => {
12843 // ClickHouse: DATE_DIFF(UNIT, start, end) - identifier unit
12844 let unit =
12845 Expression::Identifier(Identifier::new(&unit_str));
12846 Ok(Expression::Function(Box::new(Function::new(
12847 "DATE_DIFF".to_string(),
12848 vec![unit, arg1, arg2],
12849 ))))
12850 }
12851 DialectType::Snowflake | DialectType::Redshift => {
12852 let unit =
12853 Expression::Identifier(Identifier::new(&unit_str));
12854 Ok(Expression::Function(Box::new(Function::new(
12855 "DATEDIFF".to_string(),
12856 vec![unit, arg1, arg2],
12857 ))))
12858 }
12859 _ => {
12860 let unit =
12861 Expression::Identifier(Identifier::new(&unit_str));
12862 Ok(Expression::Function(Box::new(Function::new(
12863 "DATEDIFF".to_string(),
12864 vec![unit, arg1, arg2],
12865 ))))
12866 }
12867 }
12868 }
12869 // DATEADD(unit, val, date) - 3-arg form
12870 "DATEADD" if f.args.len() == 3 => {
12871 let mut args = f.args;
12872 let arg0 = args.remove(0);
12873 let arg1 = args.remove(0);
12874 let arg2 = args.remove(0);
12875 let unit_str = Self::get_unit_str_static(&arg0);
12876
12877 // Normalize TSQL unit abbreviations to standard names
12878 let unit_str = match unit_str.as_str() {
12879 "YY" | "YYYY" => "YEAR".to_string(),
12880 "QQ" | "Q" => "QUARTER".to_string(),
12881 "MM" | "M" => "MONTH".to_string(),
12882 "WK" | "WW" => "WEEK".to_string(),
12883 "DD" | "D" | "DY" => "DAY".to_string(),
12884 "HH" => "HOUR".to_string(),
12885 "MI" | "N" => "MINUTE".to_string(),
12886 "SS" | "S" => "SECOND".to_string(),
12887 "MS" => "MILLISECOND".to_string(),
12888 "MCS" | "US" => "MICROSECOND".to_string(),
12889 _ => unit_str,
12890 };
12891 match target {
12892 DialectType::Snowflake => {
12893 let unit =
12894 Expression::Identifier(Identifier::new(&unit_str));
12895 // Cast string literal to TIMESTAMP, but not for Snowflake source
12896 // (Snowflake natively accepts string literals in DATEADD)
12897 let arg2 = if matches!(
12898 &arg2,
12899 Expression::Literal(Literal::String(_))
12900 ) && !matches!(source, DialectType::Snowflake)
12901 {
12902 Expression::Cast(Box::new(Cast {
12903 this: arg2,
12904 to: DataType::Timestamp {
12905 precision: None,
12906 timezone: false,
12907 },
12908 trailing_comments: Vec::new(),
12909 double_colon_syntax: false,
12910 format: None,
12911 default: None,
12912 }))
12913 } else {
12914 arg2
12915 };
12916 Ok(Expression::Function(Box::new(Function::new(
12917 "DATEADD".to_string(),
12918 vec![unit, arg1, arg2],
12919 ))))
12920 }
12921 DialectType::TSQL => {
12922 let unit =
12923 Expression::Identifier(Identifier::new(&unit_str));
12924 // Cast string literal to DATETIME2, but not when source is Spark/Databricks family
12925 let arg2 = if matches!(
12926 &arg2,
12927 Expression::Literal(Literal::String(_))
12928 ) && !matches!(
12929 source,
12930 DialectType::Spark
12931 | DialectType::Databricks
12932 | DialectType::Hive
12933 ) {
12934 Expression::Cast(Box::new(Cast {
12935 this: arg2,
12936 to: DataType::Custom {
12937 name: "DATETIME2".to_string(),
12938 },
12939 trailing_comments: Vec::new(),
12940 double_colon_syntax: false,
12941 format: None,
12942 default: None,
12943 }))
12944 } else {
12945 arg2
12946 };
12947 Ok(Expression::Function(Box::new(Function::new(
12948 "DATEADD".to_string(),
12949 vec![unit, arg1, arg2],
12950 ))))
12951 }
12952 DialectType::Redshift => {
12953 let unit =
12954 Expression::Identifier(Identifier::new(&unit_str));
12955 Ok(Expression::Function(Box::new(Function::new(
12956 "DATEADD".to_string(),
12957 vec![unit, arg1, arg2],
12958 ))))
12959 }
12960 DialectType::Databricks => {
12961 let unit =
12962 Expression::Identifier(Identifier::new(&unit_str));
12963 // Sources with native DATEADD (TSQL, Databricks, Snowflake) -> DATEADD
12964 // Other sources (Redshift TsOrDsAdd, etc.) -> DATE_ADD
12965 let func_name = if matches!(
12966 source,
12967 DialectType::TSQL
12968 | DialectType::Fabric
12969 | DialectType::Databricks
12970 | DialectType::Snowflake
12971 ) {
12972 "DATEADD"
12973 } else {
12974 "DATE_ADD"
12975 };
12976 Ok(Expression::Function(Box::new(Function::new(
12977 func_name.to_string(),
12978 vec![unit, arg1, arg2],
12979 ))))
12980 }
12981 DialectType::DuckDB => {
12982 // Special handling for NANOSECOND from Snowflake
12983 if unit_str == "NANOSECOND"
12984 && matches!(source, DialectType::Snowflake)
12985 {
12986 // DATEADD(NANOSECOND, offset, ts) -> MAKE_TIMESTAMP_NS(EPOCH_NS(CAST(ts AS TIMESTAMP_NS)) + offset)
12987 let cast_ts = Expression::Cast(Box::new(Cast {
12988 this: arg2,
12989 to: DataType::Custom {
12990 name: "TIMESTAMP_NS".to_string(),
12991 },
12992 trailing_comments: vec![],
12993 double_colon_syntax: false,
12994 format: None,
12995 default: None,
12996 }));
12997 let epoch_ns =
12998 Expression::Function(Box::new(Function::new(
12999 "EPOCH_NS".to_string(),
13000 vec![cast_ts],
13001 )));
13002 let sum = Expression::Add(Box::new(BinaryOp::new(
13003 epoch_ns, arg1,
13004 )));
13005 Ok(Expression::Function(Box::new(Function::new(
13006 "MAKE_TIMESTAMP_NS".to_string(),
13007 vec![sum],
13008 ))))
13009 } else {
13010 // DuckDB: convert to date + INTERVAL syntax with CAST
13011 let iu = Self::parse_interval_unit_static(&unit_str);
13012 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
13013 this: Some(arg1),
13014 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: iu, use_plural: false }),
13015 }));
13016 // Cast string literal to TIMESTAMP
13017 let arg2 = if matches!(
13018 &arg2,
13019 Expression::Literal(Literal::String(_))
13020 ) {
13021 Expression::Cast(Box::new(Cast {
13022 this: arg2,
13023 to: DataType::Timestamp {
13024 precision: None,
13025 timezone: false,
13026 },
13027 trailing_comments: Vec::new(),
13028 double_colon_syntax: false,
13029 format: None,
13030 default: None,
13031 }))
13032 } else {
13033 arg2
13034 };
13035 Ok(Expression::Add(Box::new(
13036 crate::expressions::BinaryOp::new(arg2, interval),
13037 )))
13038 }
13039 }
13040 DialectType::Spark => {
13041 // For TSQL source: convert to ADD_MONTHS/DATE_ADD(date, val)
13042 // For other sources: keep 3-arg DATE_ADD(UNIT, val, date) form
13043 if matches!(source, DialectType::TSQL | DialectType::Fabric)
13044 {
13045 fn multiply_expr_spark(
13046 expr: Expression,
13047 factor: i64,
13048 ) -> Expression
13049 {
13050 if let Expression::Literal(
13051 crate::expressions::Literal::Number(n),
13052 ) = &expr
13053 {
13054 if let Ok(val) = n.parse::<i64>() {
13055 return Expression::Literal(
13056 crate::expressions::Literal::Number(
13057 (val * factor).to_string(),
13058 ),
13059 );
13060 }
13061 }
13062 Expression::Mul(Box::new(
13063 crate::expressions::BinaryOp::new(
13064 expr,
13065 Expression::Literal(
13066 crate::expressions::Literal::Number(
13067 factor.to_string(),
13068 ),
13069 ),
13070 ),
13071 ))
13072 }
13073 let normalized_unit = match unit_str.as_str() {
13074 "YEAR" | "YY" | "YYYY" => "YEAR",
13075 "QUARTER" | "QQ" | "Q" => "QUARTER",
13076 "MONTH" | "MM" | "M" => "MONTH",
13077 "WEEK" | "WK" | "WW" => "WEEK",
13078 "DAY" | "DD" | "D" | "DY" => "DAY",
13079 _ => &unit_str,
13080 };
13081 match normalized_unit {
13082 "YEAR" => {
13083 let months = multiply_expr_spark(arg1, 12);
13084 Ok(Expression::Function(Box::new(
13085 Function::new(
13086 "ADD_MONTHS".to_string(),
13087 vec![arg2, months],
13088 ),
13089 )))
13090 }
13091 "QUARTER" => {
13092 let months = multiply_expr_spark(arg1, 3);
13093 Ok(Expression::Function(Box::new(
13094 Function::new(
13095 "ADD_MONTHS".to_string(),
13096 vec![arg2, months],
13097 ),
13098 )))
13099 }
13100 "MONTH" => Ok(Expression::Function(Box::new(
13101 Function::new(
13102 "ADD_MONTHS".to_string(),
13103 vec![arg2, arg1],
13104 ),
13105 ))),
13106 "WEEK" => {
13107 let days = multiply_expr_spark(arg1, 7);
13108 Ok(Expression::Function(Box::new(
13109 Function::new(
13110 "DATE_ADD".to_string(),
13111 vec![arg2, days],
13112 ),
13113 )))
13114 }
13115 "DAY" => Ok(Expression::Function(Box::new(
13116 Function::new(
13117 "DATE_ADD".to_string(),
13118 vec![arg2, arg1],
13119 ),
13120 ))),
13121 _ => {
13122 let unit = Expression::Identifier(
13123 Identifier::new(&unit_str),
13124 );
13125 Ok(Expression::Function(Box::new(
13126 Function::new(
13127 "DATE_ADD".to_string(),
13128 vec![unit, arg1, arg2],
13129 ),
13130 )))
13131 }
13132 }
13133 } else {
13134 // Non-TSQL source: keep 3-arg DATE_ADD(UNIT, val, date)
13135 let unit =
13136 Expression::Identifier(Identifier::new(&unit_str));
13137 Ok(Expression::Function(Box::new(Function::new(
13138 "DATE_ADD".to_string(),
13139 vec![unit, arg1, arg2],
13140 ))))
13141 }
13142 }
13143 DialectType::Hive => match unit_str.as_str() {
13144 "MONTH" => {
13145 Ok(Expression::Function(Box::new(Function::new(
13146 "ADD_MONTHS".to_string(),
13147 vec![arg2, arg1],
13148 ))))
13149 }
13150 _ => Ok(Expression::Function(Box::new(Function::new(
13151 "DATE_ADD".to_string(),
13152 vec![arg2, arg1],
13153 )))),
13154 },
13155 DialectType::Presto
13156 | DialectType::Trino
13157 | DialectType::Athena => {
13158 // Cast string literal date to TIMESTAMP
13159 let arg2 = if matches!(
13160 &arg2,
13161 Expression::Literal(Literal::String(_))
13162 ) {
13163 Expression::Cast(Box::new(Cast {
13164 this: arg2,
13165 to: DataType::Timestamp {
13166 precision: None,
13167 timezone: false,
13168 },
13169 trailing_comments: Vec::new(),
13170 double_colon_syntax: false,
13171 format: None,
13172 default: None,
13173 }))
13174 } else {
13175 arg2
13176 };
13177 Ok(Expression::Function(Box::new(Function::new(
13178 "DATE_ADD".to_string(),
13179 vec![Expression::string(&unit_str), arg1, arg2],
13180 ))))
13181 }
13182 DialectType::MySQL => {
13183 let iu = Self::parse_interval_unit_static(&unit_str);
13184 Ok(Expression::DateAdd(Box::new(
13185 crate::expressions::DateAddFunc {
13186 this: arg2,
13187 interval: arg1,
13188 unit: iu,
13189 },
13190 )))
13191 }
13192 DialectType::PostgreSQL => {
13193 // Cast string literal date to TIMESTAMP
13194 let arg2 = if matches!(
13195 &arg2,
13196 Expression::Literal(Literal::String(_))
13197 ) {
13198 Expression::Cast(Box::new(Cast {
13199 this: arg2,
13200 to: DataType::Timestamp {
13201 precision: None,
13202 timezone: false,
13203 },
13204 trailing_comments: Vec::new(),
13205 double_colon_syntax: false,
13206 format: None,
13207 default: None,
13208 }))
13209 } else {
13210 arg2
13211 };
13212 let interval = Expression::Interval(Box::new(
13213 crate::expressions::Interval {
13214 this: Some(Expression::string(&format!(
13215 "{} {}",
13216 Self::expr_to_string_static(&arg1),
13217 unit_str
13218 ))),
13219 unit: None,
13220 },
13221 ));
13222 Ok(Expression::Add(Box::new(
13223 crate::expressions::BinaryOp::new(arg2, interval),
13224 )))
13225 }
13226 DialectType::BigQuery => {
13227 let iu = Self::parse_interval_unit_static(&unit_str);
13228 let interval = Expression::Interval(Box::new(
13229 crate::expressions::Interval {
13230 this: Some(arg1),
13231 unit: Some(
13232 crate::expressions::IntervalUnitSpec::Simple {
13233 unit: iu,
13234 use_plural: false,
13235 },
13236 ),
13237 },
13238 ));
13239 // Non-TSQL sources: CAST string literal to DATETIME
13240 let arg2 = if !matches!(
13241 source,
13242 DialectType::TSQL | DialectType::Fabric
13243 ) && matches!(
13244 &arg2,
13245 Expression::Literal(Literal::String(_))
13246 ) {
13247 Expression::Cast(Box::new(Cast {
13248 this: arg2,
13249 to: DataType::Custom {
13250 name: "DATETIME".to_string(),
13251 },
13252 trailing_comments: Vec::new(),
13253 double_colon_syntax: false,
13254 format: None,
13255 default: None,
13256 }))
13257 } else {
13258 arg2
13259 };
13260 Ok(Expression::Function(Box::new(Function::new(
13261 "DATE_ADD".to_string(),
13262 vec![arg2, interval],
13263 ))))
13264 }
13265 _ => {
13266 let unit =
13267 Expression::Identifier(Identifier::new(&unit_str));
13268 Ok(Expression::Function(Box::new(Function::new(
13269 "DATEADD".to_string(),
13270 vec![unit, arg1, arg2],
13271 ))))
13272 }
13273 }
13274 }
13275 // DATE_ADD - 3-arg: either (unit, val, date) from Presto/ClickHouse
13276 // or (date, val, 'UNIT') from Generic canonical form
13277 "DATE_ADD" if f.args.len() == 3 => {
13278 let mut args = f.args;
13279 let arg0 = args.remove(0);
13280 let arg1 = args.remove(0);
13281 let arg2 = args.remove(0);
13282 // Detect Generic canonical form: DATE_ADD(date, amount, 'UNIT')
13283 // where arg2 is a string literal matching a unit name
13284 let arg2_unit = match &arg2 {
13285 Expression::Literal(Literal::String(s)) => {
13286 let u = s.to_uppercase();
13287 if matches!(
13288 u.as_str(),
13289 "DAY"
13290 | "MONTH"
13291 | "YEAR"
13292 | "HOUR"
13293 | "MINUTE"
13294 | "SECOND"
13295 | "WEEK"
13296 | "QUARTER"
13297 | "MILLISECOND"
13298 | "MICROSECOND"
13299 ) {
13300 Some(u)
13301 } else {
13302 None
13303 }
13304 }
13305 _ => None,
13306 };
13307 // Reorder: if arg2 is the unit, swap to (unit, val, date) form
13308 let (unit_str, val, date) = if let Some(u) = arg2_unit {
13309 (u, arg1, arg0)
13310 } else {
13311 (Self::get_unit_str_static(&arg0), arg1, arg2)
13312 };
13313 // Alias for backward compat with the rest of the match
13314 let arg1 = val;
13315 let arg2 = date;
13316
13317 match target {
13318 DialectType::Presto
13319 | DialectType::Trino
13320 | DialectType::Athena => {
13321 Ok(Expression::Function(Box::new(Function::new(
13322 "DATE_ADD".to_string(),
13323 vec![Expression::string(&unit_str), arg1, arg2],
13324 ))))
13325 }
13326 DialectType::DuckDB => {
13327 let iu = Self::parse_interval_unit_static(&unit_str);
13328 let interval = Expression::Interval(Box::new(
13329 crate::expressions::Interval {
13330 this: Some(arg1),
13331 unit: Some(
13332 crate::expressions::IntervalUnitSpec::Simple {
13333 unit: iu,
13334 use_plural: false,
13335 },
13336 ),
13337 },
13338 ));
13339 Ok(Expression::Add(Box::new(
13340 crate::expressions::BinaryOp::new(arg2, interval),
13341 )))
13342 }
13343 DialectType::PostgreSQL
13344 | DialectType::Materialize
13345 | DialectType::RisingWave => {
13346 // PostgreSQL: x + INTERVAL '1 DAY'
13347 let amount_str = Self::expr_to_string_static(&arg1);
13348 let interval = Expression::Interval(Box::new(
13349 crate::expressions::Interval {
13350 this: Some(Expression::string(&format!(
13351 "{} {}",
13352 amount_str, unit_str
13353 ))),
13354 unit: None,
13355 },
13356 ));
13357 Ok(Expression::Add(Box::new(
13358 crate::expressions::BinaryOp::new(arg2, interval),
13359 )))
13360 }
13361 DialectType::Snowflake
13362 | DialectType::TSQL
13363 | DialectType::Redshift => {
13364 let unit =
13365 Expression::Identifier(Identifier::new(&unit_str));
13366 Ok(Expression::Function(Box::new(Function::new(
13367 "DATEADD".to_string(),
13368 vec![unit, arg1, arg2],
13369 ))))
13370 }
13371 DialectType::BigQuery
13372 | DialectType::MySQL
13373 | DialectType::Doris
13374 | DialectType::StarRocks
13375 | DialectType::Drill => {
13376 // DATE_ADD(date, INTERVAL amount UNIT)
13377 let iu = Self::parse_interval_unit_static(&unit_str);
13378 let interval = Expression::Interval(Box::new(
13379 crate::expressions::Interval {
13380 this: Some(arg1),
13381 unit: Some(
13382 crate::expressions::IntervalUnitSpec::Simple {
13383 unit: iu,
13384 use_plural: false,
13385 },
13386 ),
13387 },
13388 ));
13389 Ok(Expression::Function(Box::new(Function::new(
13390 "DATE_ADD".to_string(),
13391 vec![arg2, interval],
13392 ))))
13393 }
13394 DialectType::SQLite => {
13395 // SQLite: DATE(x, '1 DAY')
13396 // Build the string '1 DAY' from amount and unit
13397 let amount_str = match &arg1 {
13398 Expression::Literal(Literal::Number(n)) => n.clone(),
13399 _ => "1".to_string(),
13400 };
13401 Ok(Expression::Function(Box::new(Function::new(
13402 "DATE".to_string(),
13403 vec![
13404 arg2,
13405 Expression::string(format!(
13406 "{} {}",
13407 amount_str, unit_str
13408 )),
13409 ],
13410 ))))
13411 }
13412 DialectType::Dremio => {
13413 // Dremio: DATE_ADD(date, amount) - drops unit
13414 Ok(Expression::Function(Box::new(Function::new(
13415 "DATE_ADD".to_string(),
13416 vec![arg2, arg1],
13417 ))))
13418 }
13419 DialectType::Spark => {
13420 // Spark: DATE_ADD(date, val) for DAY, or DATEADD(UNIT, val, date)
13421 if unit_str == "DAY" {
13422 Ok(Expression::Function(Box::new(Function::new(
13423 "DATE_ADD".to_string(),
13424 vec![arg2, arg1],
13425 ))))
13426 } else {
13427 let unit =
13428 Expression::Identifier(Identifier::new(&unit_str));
13429 Ok(Expression::Function(Box::new(Function::new(
13430 "DATE_ADD".to_string(),
13431 vec![unit, arg1, arg2],
13432 ))))
13433 }
13434 }
13435 DialectType::Databricks => {
13436 let unit =
13437 Expression::Identifier(Identifier::new(&unit_str));
13438 Ok(Expression::Function(Box::new(Function::new(
13439 "DATE_ADD".to_string(),
13440 vec![unit, arg1, arg2],
13441 ))))
13442 }
13443 DialectType::Hive => {
13444 // Hive: DATE_ADD(date, val) for DAY
13445 Ok(Expression::Function(Box::new(Function::new(
13446 "DATE_ADD".to_string(),
13447 vec![arg2, arg1],
13448 ))))
13449 }
13450 _ => {
13451 let unit =
13452 Expression::Identifier(Identifier::new(&unit_str));
13453 Ok(Expression::Function(Box::new(Function::new(
13454 "DATE_ADD".to_string(),
13455 vec![unit, arg1, arg2],
13456 ))))
13457 }
13458 }
13459 }
13460 // DATE_ADD(date, days) - 2-arg Hive/Spark/Generic form (add days)
13461 "DATE_ADD"
13462 if f.args.len() == 2
13463 && matches!(
13464 source,
13465 DialectType::Hive
13466 | DialectType::Spark
13467 | DialectType::Databricks
13468 | DialectType::Generic
13469 ) =>
13470 {
13471 let mut args = f.args;
13472 let date = args.remove(0);
13473 let days = args.remove(0);
13474 match target {
13475 DialectType::Hive | DialectType::Spark => {
13476 // Keep as DATE_ADD(date, days) for Hive/Spark
13477 Ok(Expression::Function(Box::new(Function::new(
13478 "DATE_ADD".to_string(),
13479 vec![date, days],
13480 ))))
13481 }
13482 DialectType::Databricks => {
13483 // Databricks: DATEADD(DAY, days, date)
13484 Ok(Expression::Function(Box::new(Function::new(
13485 "DATEADD".to_string(),
13486 vec![
13487 Expression::Identifier(Identifier::new("DAY")),
13488 days,
13489 date,
13490 ],
13491 ))))
13492 }
13493 DialectType::DuckDB => {
13494 // DuckDB: CAST(date AS DATE) + INTERVAL days DAY
13495 let cast_date = Self::ensure_cast_date(date);
13496 // Wrap complex expressions (like Mul from DATE_SUB negation) in Paren
13497 let interval_val = if matches!(
13498 days,
13499 Expression::Mul(_)
13500 | Expression::Sub(_)
13501 | Expression::Add(_)
13502 ) {
13503 Expression::Paren(Box::new(crate::expressions::Paren {
13504 this: days,
13505 trailing_comments: vec![],
13506 }))
13507 } else {
13508 days
13509 };
13510 let interval = Expression::Interval(Box::new(
13511 crate::expressions::Interval {
13512 this: Some(interval_val),
13513 unit: Some(
13514 crate::expressions::IntervalUnitSpec::Simple {
13515 unit: crate::expressions::IntervalUnit::Day,
13516 use_plural: false,
13517 },
13518 ),
13519 },
13520 ));
13521 Ok(Expression::Add(Box::new(
13522 crate::expressions::BinaryOp::new(cast_date, interval),
13523 )))
13524 }
13525 DialectType::Snowflake => {
13526 // For Hive source with string literal date, use CAST(CAST(date AS TIMESTAMP) AS DATE)
13527 let cast_date = if matches!(
13528 source,
13529 DialectType::Hive
13530 | DialectType::Spark
13531 | DialectType::Databricks
13532 ) {
13533 if matches!(
13534 date,
13535 Expression::Literal(Literal::String(_))
13536 ) {
13537 Self::double_cast_timestamp_date(date)
13538 } else {
13539 date
13540 }
13541 } else {
13542 date
13543 };
13544 Ok(Expression::Function(Box::new(Function::new(
13545 "DATEADD".to_string(),
13546 vec![
13547 Expression::Identifier(Identifier::new("DAY")),
13548 days,
13549 cast_date,
13550 ],
13551 ))))
13552 }
13553 DialectType::Redshift => {
13554 Ok(Expression::Function(Box::new(Function::new(
13555 "DATEADD".to_string(),
13556 vec![
13557 Expression::Identifier(Identifier::new("DAY")),
13558 days,
13559 date,
13560 ],
13561 ))))
13562 }
13563 DialectType::TSQL | DialectType::Fabric => {
13564 // For Hive source with string literal date, use CAST(CAST(date AS DATETIME2) AS DATE)
13565 // But Databricks DATE_ADD doesn't need this wrapping for TSQL
13566 let cast_date = if matches!(
13567 source,
13568 DialectType::Hive | DialectType::Spark
13569 ) {
13570 if matches!(
13571 date,
13572 Expression::Literal(Literal::String(_))
13573 ) {
13574 Self::double_cast_datetime2_date(date)
13575 } else {
13576 date
13577 }
13578 } else {
13579 date
13580 };
13581 Ok(Expression::Function(Box::new(Function::new(
13582 "DATEADD".to_string(),
13583 vec![
13584 Expression::Identifier(Identifier::new("DAY")),
13585 days,
13586 cast_date,
13587 ],
13588 ))))
13589 }
13590 DialectType::Presto
13591 | DialectType::Trino
13592 | DialectType::Athena => {
13593 // For Hive source with string literal date, use CAST(CAST(date AS TIMESTAMP) AS DATE)
13594 let cast_date = if matches!(
13595 source,
13596 DialectType::Hive
13597 | DialectType::Spark
13598 | DialectType::Databricks
13599 ) {
13600 if matches!(
13601 date,
13602 Expression::Literal(Literal::String(_))
13603 ) {
13604 Self::double_cast_timestamp_date(date)
13605 } else {
13606 date
13607 }
13608 } else {
13609 date
13610 };
13611 Ok(Expression::Function(Box::new(Function::new(
13612 "DATE_ADD".to_string(),
13613 vec![Expression::string("DAY"), days, cast_date],
13614 ))))
13615 }
13616 DialectType::BigQuery => {
13617 // For Hive/Spark source, wrap date in CAST(CAST(date AS DATETIME) AS DATE)
13618 let cast_date = if matches!(
13619 source,
13620 DialectType::Hive
13621 | DialectType::Spark
13622 | DialectType::Databricks
13623 ) {
13624 Self::double_cast_datetime_date(date)
13625 } else {
13626 date
13627 };
13628 // Wrap complex expressions in Paren for interval
13629 let interval_val = if matches!(
13630 days,
13631 Expression::Mul(_)
13632 | Expression::Sub(_)
13633 | Expression::Add(_)
13634 ) {
13635 Expression::Paren(Box::new(crate::expressions::Paren {
13636 this: days,
13637 trailing_comments: vec![],
13638 }))
13639 } else {
13640 days
13641 };
13642 let interval = Expression::Interval(Box::new(
13643 crate::expressions::Interval {
13644 this: Some(interval_val),
13645 unit: Some(
13646 crate::expressions::IntervalUnitSpec::Simple {
13647 unit: crate::expressions::IntervalUnit::Day,
13648 use_plural: false,
13649 },
13650 ),
13651 },
13652 ));
13653 Ok(Expression::Function(Box::new(Function::new(
13654 "DATE_ADD".to_string(),
13655 vec![cast_date, interval],
13656 ))))
13657 }
13658 DialectType::MySQL => {
13659 let iu = crate::expressions::IntervalUnit::Day;
13660 Ok(Expression::DateAdd(Box::new(
13661 crate::expressions::DateAddFunc {
13662 this: date,
13663 interval: days,
13664 unit: iu,
13665 },
13666 )))
13667 }
13668 DialectType::PostgreSQL => {
13669 let interval = Expression::Interval(Box::new(
13670 crate::expressions::Interval {
13671 this: Some(Expression::string(&format!(
13672 "{} DAY",
13673 Self::expr_to_string_static(&days)
13674 ))),
13675 unit: None,
13676 },
13677 ));
13678 Ok(Expression::Add(Box::new(
13679 crate::expressions::BinaryOp::new(date, interval),
13680 )))
13681 }
13682 DialectType::Doris
13683 | DialectType::StarRocks
13684 | DialectType::Drill => {
13685 // DATE_ADD(date, INTERVAL days DAY)
13686 let interval = Expression::Interval(Box::new(
13687 crate::expressions::Interval {
13688 this: Some(days),
13689 unit: Some(
13690 crate::expressions::IntervalUnitSpec::Simple {
13691 unit: crate::expressions::IntervalUnit::Day,
13692 use_plural: false,
13693 },
13694 ),
13695 },
13696 ));
13697 Ok(Expression::Function(Box::new(Function::new(
13698 "DATE_ADD".to_string(),
13699 vec![date, interval],
13700 ))))
13701 }
13702 _ => Ok(Expression::Function(Box::new(Function::new(
13703 "DATE_ADD".to_string(),
13704 vec![date, days],
13705 )))),
13706 }
13707 }
13708 // DATE_SUB(date, days) - 2-arg Hive/Spark form (subtract days)
13709 "DATE_SUB"
13710 if f.args.len() == 2
13711 && matches!(
13712 source,
13713 DialectType::Hive
13714 | DialectType::Spark
13715 | DialectType::Databricks
13716 ) =>
13717 {
13718 let mut args = f.args;
13719 let date = args.remove(0);
13720 let days = args.remove(0);
13721 // Helper to create days * -1
13722 let make_neg_days = |d: Expression| -> Expression {
13723 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
13724 d,
13725 Expression::Literal(Literal::Number("-1".to_string())),
13726 )))
13727 };
13728 let is_string_literal =
13729 matches!(date, Expression::Literal(Literal::String(_)));
13730 match target {
13731 DialectType::Hive
13732 | DialectType::Spark
13733 | DialectType::Databricks => {
13734 // Keep as DATE_SUB(date, days) for Hive/Spark
13735 Ok(Expression::Function(Box::new(Function::new(
13736 "DATE_SUB".to_string(),
13737 vec![date, days],
13738 ))))
13739 }
13740 DialectType::DuckDB => {
13741 let cast_date = Self::ensure_cast_date(date);
13742 let neg = make_neg_days(days);
13743 let interval = Expression::Interval(Box::new(
13744 crate::expressions::Interval {
13745 this: Some(Expression::Paren(Box::new(
13746 crate::expressions::Paren {
13747 this: neg,
13748 trailing_comments: vec![],
13749 },
13750 ))),
13751 unit: Some(
13752 crate::expressions::IntervalUnitSpec::Simple {
13753 unit: crate::expressions::IntervalUnit::Day,
13754 use_plural: false,
13755 },
13756 ),
13757 },
13758 ));
13759 Ok(Expression::Add(Box::new(
13760 crate::expressions::BinaryOp::new(cast_date, interval),
13761 )))
13762 }
13763 DialectType::Snowflake => {
13764 let cast_date = if is_string_literal {
13765 Self::double_cast_timestamp_date(date)
13766 } else {
13767 date
13768 };
13769 let neg = make_neg_days(days);
13770 Ok(Expression::Function(Box::new(Function::new(
13771 "DATEADD".to_string(),
13772 vec![
13773 Expression::Identifier(Identifier::new("DAY")),
13774 neg,
13775 cast_date,
13776 ],
13777 ))))
13778 }
13779 DialectType::Redshift => {
13780 let neg = make_neg_days(days);
13781 Ok(Expression::Function(Box::new(Function::new(
13782 "DATEADD".to_string(),
13783 vec![
13784 Expression::Identifier(Identifier::new("DAY")),
13785 neg,
13786 date,
13787 ],
13788 ))))
13789 }
13790 DialectType::TSQL | DialectType::Fabric => {
13791 let cast_date = if is_string_literal {
13792 Self::double_cast_datetime2_date(date)
13793 } else {
13794 date
13795 };
13796 let neg = make_neg_days(days);
13797 Ok(Expression::Function(Box::new(Function::new(
13798 "DATEADD".to_string(),
13799 vec![
13800 Expression::Identifier(Identifier::new("DAY")),
13801 neg,
13802 cast_date,
13803 ],
13804 ))))
13805 }
13806 DialectType::Presto
13807 | DialectType::Trino
13808 | DialectType::Athena => {
13809 let cast_date = if is_string_literal {
13810 Self::double_cast_timestamp_date(date)
13811 } else {
13812 date
13813 };
13814 let neg = make_neg_days(days);
13815 Ok(Expression::Function(Box::new(Function::new(
13816 "DATE_ADD".to_string(),
13817 vec![Expression::string("DAY"), neg, cast_date],
13818 ))))
13819 }
13820 DialectType::BigQuery => {
13821 let cast_date = if is_string_literal {
13822 Self::double_cast_datetime_date(date)
13823 } else {
13824 date
13825 };
13826 let neg = make_neg_days(days);
13827 let interval = Expression::Interval(Box::new(
13828 crate::expressions::Interval {
13829 this: Some(Expression::Paren(Box::new(
13830 crate::expressions::Paren {
13831 this: neg,
13832 trailing_comments: vec![],
13833 },
13834 ))),
13835 unit: Some(
13836 crate::expressions::IntervalUnitSpec::Simple {
13837 unit: crate::expressions::IntervalUnit::Day,
13838 use_plural: false,
13839 },
13840 ),
13841 },
13842 ));
13843 Ok(Expression::Function(Box::new(Function::new(
13844 "DATE_ADD".to_string(),
13845 vec![cast_date, interval],
13846 ))))
13847 }
13848 _ => Ok(Expression::Function(Box::new(Function::new(
13849 "DATE_SUB".to_string(),
13850 vec![date, days],
13851 )))),
13852 }
13853 }
13854 // ADD_MONTHS(date, val) -> target-specific
13855 "ADD_MONTHS" if f.args.len() == 2 => {
13856 let mut args = f.args;
13857 let date = args.remove(0);
13858 let val = args.remove(0);
13859 match target {
13860 DialectType::TSQL => {
13861 let cast_date = Self::ensure_cast_datetime2(date);
13862 Ok(Expression::Function(Box::new(Function::new(
13863 "DATEADD".to_string(),
13864 vec![
13865 Expression::Identifier(Identifier::new("MONTH")),
13866 val,
13867 cast_date,
13868 ],
13869 ))))
13870 }
13871 DialectType::DuckDB => {
13872 let interval = Expression::Interval(Box::new(
13873 crate::expressions::Interval {
13874 this: Some(val),
13875 unit: Some(
13876 crate::expressions::IntervalUnitSpec::Simple {
13877 unit:
13878 crate::expressions::IntervalUnit::Month,
13879 use_plural: false,
13880 },
13881 ),
13882 },
13883 ));
13884 Ok(Expression::Add(Box::new(
13885 crate::expressions::BinaryOp::new(date, interval),
13886 )))
13887 }
13888 DialectType::Snowflake => {
13889 // Keep ADD_MONTHS when source is Snowflake
13890 if matches!(source, DialectType::Snowflake) {
13891 Ok(Expression::Function(Box::new(Function::new(
13892 "ADD_MONTHS".to_string(),
13893 vec![date, val],
13894 ))))
13895 } else {
13896 Ok(Expression::Function(Box::new(Function::new(
13897 "DATEADD".to_string(),
13898 vec![
13899 Expression::Identifier(Identifier::new(
13900 "MONTH",
13901 )),
13902 val,
13903 date,
13904 ],
13905 ))))
13906 }
13907 }
13908 DialectType::Redshift => {
13909 Ok(Expression::Function(Box::new(Function::new(
13910 "DATEADD".to_string(),
13911 vec![
13912 Expression::Identifier(Identifier::new("MONTH")),
13913 val,
13914 date,
13915 ],
13916 ))))
13917 }
13918 DialectType::Presto
13919 | DialectType::Trino
13920 | DialectType::Athena => {
13921 Ok(Expression::Function(Box::new(Function::new(
13922 "DATE_ADD".to_string(),
13923 vec![Expression::string("MONTH"), val, date],
13924 ))))
13925 }
13926 DialectType::BigQuery => {
13927 let interval = Expression::Interval(Box::new(
13928 crate::expressions::Interval {
13929 this: Some(val),
13930 unit: Some(
13931 crate::expressions::IntervalUnitSpec::Simple {
13932 unit:
13933 crate::expressions::IntervalUnit::Month,
13934 use_plural: false,
13935 },
13936 ),
13937 },
13938 ));
13939 Ok(Expression::Function(Box::new(Function::new(
13940 "DATE_ADD".to_string(),
13941 vec![date, interval],
13942 ))))
13943 }
13944 _ => Ok(Expression::Function(Box::new(Function::new(
13945 "ADD_MONTHS".to_string(),
13946 vec![date, val],
13947 )))),
13948 }
13949 }
13950 // DATETRUNC(unit, date) - TSQL form -> DATE_TRUNC for other targets
13951 "DATETRUNC" if f.args.len() == 2 => {
13952 let mut args = f.args;
13953 let arg0 = args.remove(0);
13954 let arg1 = args.remove(0);
13955 let unit_str = Self::get_unit_str_static(&arg0);
13956 match target {
13957 DialectType::TSQL | DialectType::Fabric => {
13958 // Keep as DATETRUNC for TSQL - the target handler will uppercase the unit
13959 Ok(Expression::Function(Box::new(Function::new(
13960 "DATETRUNC".to_string(),
13961 vec![
13962 Expression::Identifier(Identifier::new(&unit_str)),
13963 arg1,
13964 ],
13965 ))))
13966 }
13967 DialectType::DuckDB => {
13968 // DuckDB: DATE_TRUNC('UNIT', expr) with CAST for string literals
13969 let date = Self::ensure_cast_timestamp(arg1);
13970 Ok(Expression::Function(Box::new(Function::new(
13971 "DATE_TRUNC".to_string(),
13972 vec![Expression::string(&unit_str), date],
13973 ))))
13974 }
13975 DialectType::ClickHouse => {
13976 // ClickHouse: dateTrunc('UNIT', expr)
13977 Ok(Expression::Function(Box::new(Function::new(
13978 "dateTrunc".to_string(),
13979 vec![Expression::string(&unit_str), arg1],
13980 ))))
13981 }
13982 _ => {
13983 // Standard: DATE_TRUNC('UNIT', expr)
13984 let unit = Expression::string(&unit_str);
13985 Ok(Expression::Function(Box::new(Function::new(
13986 "DATE_TRUNC".to_string(),
13987 vec![unit, arg1],
13988 ))))
13989 }
13990 }
13991 }
13992 // GETDATE() -> CURRENT_TIMESTAMP for non-TSQL targets
13993 "GETDATE" if f.args.is_empty() => match target {
13994 DialectType::TSQL => Ok(Expression::Function(f)),
13995 DialectType::Redshift => Ok(Expression::Function(Box::new(
13996 Function::new("GETDATE".to_string(), vec![]),
13997 ))),
13998 _ => Ok(Expression::CurrentTimestamp(
13999 crate::expressions::CurrentTimestamp {
14000 precision: None,
14001 sysdate: false,
14002 },
14003 )),
14004 },
14005 // TO_HEX(x) / HEX(x) -> target-specific hex function
14006 "TO_HEX" | "HEX" if f.args.len() == 1 => {
14007 let name = match target {
14008 DialectType::Presto | DialectType::Trino => "TO_HEX",
14009 DialectType::Spark
14010 | DialectType::Databricks
14011 | DialectType::Hive => "HEX",
14012 DialectType::DuckDB
14013 | DialectType::PostgreSQL
14014 | DialectType::Redshift => "TO_HEX",
14015 _ => &f.name,
14016 };
14017 Ok(Expression::Function(Box::new(Function::new(
14018 name.to_string(),
14019 f.args,
14020 ))))
14021 }
14022 // FROM_HEX(x) / UNHEX(x) -> target-specific hex decode function
14023 "FROM_HEX" | "UNHEX" if f.args.len() == 1 => {
14024 match target {
14025 DialectType::BigQuery => {
14026 // BigQuery: UNHEX(x) -> FROM_HEX(x)
14027 // Special case: UNHEX(MD5(x)) -> FROM_HEX(TO_HEX(MD5(x)))
14028 // because BigQuery MD5 returns BYTES, not hex string
14029 let arg = &f.args[0];
14030 let wrapped_arg = match arg {
14031 Expression::Function(inner_f)
14032 if inner_f.name.to_uppercase() == "MD5"
14033 || inner_f.name.to_uppercase() == "SHA1"
14034 || inner_f.name.to_uppercase() == "SHA256"
14035 || inner_f.name.to_uppercase() == "SHA512" =>
14036 {
14037 // Wrap hash function in TO_HEX for BigQuery
14038 Expression::Function(Box::new(Function::new(
14039 "TO_HEX".to_string(),
14040 vec![arg.clone()],
14041 )))
14042 }
14043 _ => f.args.into_iter().next().unwrap(),
14044 };
14045 Ok(Expression::Function(Box::new(Function::new(
14046 "FROM_HEX".to_string(),
14047 vec![wrapped_arg],
14048 ))))
14049 }
14050 _ => {
14051 let name = match target {
14052 DialectType::Presto | DialectType::Trino => "FROM_HEX",
14053 DialectType::Spark
14054 | DialectType::Databricks
14055 | DialectType::Hive => "UNHEX",
14056 _ => &f.name,
14057 };
14058 Ok(Expression::Function(Box::new(Function::new(
14059 name.to_string(),
14060 f.args,
14061 ))))
14062 }
14063 }
14064 }
14065 // TO_UTF8(x) -> ENCODE(x, 'utf-8') for Spark
14066 "TO_UTF8" if f.args.len() == 1 => match target {
14067 DialectType::Spark | DialectType::Databricks => {
14068 let mut args = f.args;
14069 args.push(Expression::string("utf-8"));
14070 Ok(Expression::Function(Box::new(Function::new(
14071 "ENCODE".to_string(),
14072 args,
14073 ))))
14074 }
14075 _ => Ok(Expression::Function(f)),
14076 },
14077 // FROM_UTF8(x) -> DECODE(x, 'utf-8') for Spark
14078 "FROM_UTF8" if f.args.len() == 1 => match target {
14079 DialectType::Spark | DialectType::Databricks => {
14080 let mut args = f.args;
14081 args.push(Expression::string("utf-8"));
14082 Ok(Expression::Function(Box::new(Function::new(
14083 "DECODE".to_string(),
14084 args,
14085 ))))
14086 }
14087 _ => Ok(Expression::Function(f)),
14088 },
14089 // STARTS_WITH(x, y) / STARTSWITH(x, y) -> target-specific
14090 "STARTS_WITH" | "STARTSWITH" if f.args.len() == 2 => {
14091 let name = match target {
14092 DialectType::Spark | DialectType::Databricks => "STARTSWITH",
14093 DialectType::Presto | DialectType::Trino => "STARTS_WITH",
14094 DialectType::PostgreSQL | DialectType::Redshift => {
14095 "STARTS_WITH"
14096 }
14097 _ => &f.name,
14098 };
14099 Ok(Expression::Function(Box::new(Function::new(
14100 name.to_string(),
14101 f.args,
14102 ))))
14103 }
14104 // APPROX_COUNT_DISTINCT(x) <-> APPROX_DISTINCT(x)
14105 "APPROX_COUNT_DISTINCT" if f.args.len() >= 1 => {
14106 let name = match target {
14107 DialectType::Presto
14108 | DialectType::Trino
14109 | DialectType::Athena => "APPROX_DISTINCT",
14110 _ => "APPROX_COUNT_DISTINCT",
14111 };
14112 Ok(Expression::Function(Box::new(Function::new(
14113 name.to_string(),
14114 f.args,
14115 ))))
14116 }
14117 // JSON_EXTRACT -> GET_JSON_OBJECT for Spark/Hive
14118 "JSON_EXTRACT"
14119 if f.args.len() == 2
14120 && !matches!(source, DialectType::BigQuery)
14121 && matches!(
14122 target,
14123 DialectType::Spark
14124 | DialectType::Databricks
14125 | DialectType::Hive
14126 ) =>
14127 {
14128 Ok(Expression::Function(Box::new(Function::new(
14129 "GET_JSON_OBJECT".to_string(),
14130 f.args,
14131 ))))
14132 }
14133 // JSON_EXTRACT(x, path) -> x -> path for SQLite (arrow syntax)
14134 "JSON_EXTRACT"
14135 if f.args.len() == 2 && matches!(target, DialectType::SQLite) =>
14136 {
14137 let mut args = f.args;
14138 let path = args.remove(1);
14139 let this = args.remove(0);
14140 Ok(Expression::JsonExtract(Box::new(
14141 crate::expressions::JsonExtractFunc {
14142 this,
14143 path,
14144 returning: None,
14145 arrow_syntax: true,
14146 hash_arrow_syntax: false,
14147 wrapper_option: None,
14148 quotes_option: None,
14149 on_scalar_string: false,
14150 on_error: None,
14151 },
14152 )))
14153 }
14154 // JSON_FORMAT(x) -> TO_JSON(x) for Spark, TO_JSON_STRING for BigQuery, CAST(TO_JSON(x) AS TEXT) for DuckDB
14155 "JSON_FORMAT" if f.args.len() == 1 => {
14156 match target {
14157 DialectType::Spark | DialectType::Databricks => {
14158 // Presto JSON_FORMAT(JSON '...') needs Spark's string-unquoting flow:
14159 // REGEXP_EXTRACT(TO_JSON(FROM_JSON('[...]', SCHEMA_OF_JSON('[...]'))), '^.(.*).$', 1)
14160 if matches!(
14161 source,
14162 DialectType::Presto
14163 | DialectType::Trino
14164 | DialectType::Athena
14165 ) {
14166 if let Some(Expression::ParseJson(pj)) = f.args.first()
14167 {
14168 if let Expression::Literal(Literal::String(s)) =
14169 &pj.this
14170 {
14171 let wrapped = Expression::Literal(
14172 Literal::String(format!("[{}]", s)),
14173 );
14174 let schema_of_json = Expression::Function(
14175 Box::new(Function::new(
14176 "SCHEMA_OF_JSON".to_string(),
14177 vec![wrapped.clone()],
14178 )),
14179 );
14180 let from_json = Expression::Function(Box::new(
14181 Function::new(
14182 "FROM_JSON".to_string(),
14183 vec![wrapped, schema_of_json],
14184 ),
14185 ));
14186 let to_json = Expression::Function(Box::new(
14187 Function::new(
14188 "TO_JSON".to_string(),
14189 vec![from_json],
14190 ),
14191 ));
14192 return Ok(Expression::Function(Box::new(
14193 Function::new(
14194 "REGEXP_EXTRACT".to_string(),
14195 vec![
14196 to_json,
14197 Expression::Literal(
14198 Literal::String(
14199 "^.(.*).$".to_string(),
14200 ),
14201 ),
14202 Expression::Literal(
14203 Literal::Number(
14204 "1".to_string(),
14205 ),
14206 ),
14207 ],
14208 ),
14209 )));
14210 }
14211 }
14212 }
14213
14214 // Strip inner CAST(... AS JSON) or TO_JSON() if present
14215 // The CastToJsonForSpark may have already converted CAST(x AS JSON) to TO_JSON(x)
14216 let mut args = f.args;
14217 if let Some(Expression::Cast(ref c)) = args.first() {
14218 if matches!(&c.to, DataType::Json | DataType::JsonB) {
14219 args = vec![c.this.clone()];
14220 }
14221 } else if let Some(Expression::Function(ref inner_f)) =
14222 args.first()
14223 {
14224 if inner_f.name.eq_ignore_ascii_case("TO_JSON")
14225 && inner_f.args.len() == 1
14226 {
14227 // Already TO_JSON(x) from CastToJsonForSpark, just use the inner arg
14228 args = inner_f.args.clone();
14229 }
14230 }
14231 Ok(Expression::Function(Box::new(Function::new(
14232 "TO_JSON".to_string(),
14233 args,
14234 ))))
14235 }
14236 DialectType::BigQuery => Ok(Expression::Function(Box::new(
14237 Function::new("TO_JSON_STRING".to_string(), f.args),
14238 ))),
14239 DialectType::DuckDB => {
14240 // CAST(TO_JSON(x) AS TEXT)
14241 let to_json = Expression::Function(Box::new(
14242 Function::new("TO_JSON".to_string(), f.args),
14243 ));
14244 Ok(Expression::Cast(Box::new(Cast {
14245 this: to_json,
14246 to: DataType::Text,
14247 trailing_comments: Vec::new(),
14248 double_colon_syntax: false,
14249 format: None,
14250 default: None,
14251 })))
14252 }
14253 _ => Ok(Expression::Function(f)),
14254 }
14255 }
14256 // SYSDATE -> CURRENT_TIMESTAMP for non-Oracle/Redshift/Snowflake targets
14257 "SYSDATE" if f.args.is_empty() => {
14258 match target {
14259 DialectType::Oracle | DialectType::Redshift => {
14260 Ok(Expression::Function(f))
14261 }
14262 DialectType::Snowflake => {
14263 // Snowflake uses SYSDATE() with parens
14264 let mut f = *f;
14265 f.no_parens = false;
14266 Ok(Expression::Function(Box::new(f)))
14267 }
14268 DialectType::DuckDB => {
14269 // DuckDB: SYSDATE() -> CURRENT_TIMESTAMP AT TIME ZONE 'UTC'
14270 Ok(Expression::AtTimeZone(Box::new(
14271 crate::expressions::AtTimeZone {
14272 this: Expression::CurrentTimestamp(
14273 crate::expressions::CurrentTimestamp {
14274 precision: None,
14275 sysdate: false,
14276 },
14277 ),
14278 zone: Expression::Literal(Literal::String(
14279 "UTC".to_string(),
14280 )),
14281 },
14282 )))
14283 }
14284 _ => Ok(Expression::CurrentTimestamp(
14285 crate::expressions::CurrentTimestamp {
14286 precision: None,
14287 sysdate: true,
14288 },
14289 )),
14290 }
14291 }
14292 // LOGICAL_OR(x) -> BOOL_OR(x)
14293 "LOGICAL_OR" if f.args.len() == 1 => {
14294 let name = match target {
14295 DialectType::Spark | DialectType::Databricks => "BOOL_OR",
14296 _ => &f.name,
14297 };
14298 Ok(Expression::Function(Box::new(Function::new(
14299 name.to_string(),
14300 f.args,
14301 ))))
14302 }
14303 // LOGICAL_AND(x) -> BOOL_AND(x)
14304 "LOGICAL_AND" if f.args.len() == 1 => {
14305 let name = match target {
14306 DialectType::Spark | DialectType::Databricks => "BOOL_AND",
14307 _ => &f.name,
14308 };
14309 Ok(Expression::Function(Box::new(Function::new(
14310 name.to_string(),
14311 f.args,
14312 ))))
14313 }
14314 // MONTHS_ADD(d, n) -> ADD_MONTHS(d, n) for Oracle
14315 "MONTHS_ADD" if f.args.len() == 2 => match target {
14316 DialectType::Oracle => Ok(Expression::Function(Box::new(
14317 Function::new("ADD_MONTHS".to_string(), f.args),
14318 ))),
14319 _ => Ok(Expression::Function(f)),
14320 },
14321 // ARRAY_JOIN(arr, sep[, null_replacement]) -> target-specific
14322 "ARRAY_JOIN" if f.args.len() >= 2 => {
14323 match target {
14324 DialectType::Spark | DialectType::Databricks => {
14325 // Keep as ARRAY_JOIN for Spark (it supports null_replacement)
14326 Ok(Expression::Function(f))
14327 }
14328 DialectType::Hive => {
14329 // ARRAY_JOIN(arr, sep[, null_rep]) -> CONCAT_WS(sep, arr) (drop null_replacement)
14330 let mut args = f.args;
14331 let arr = args.remove(0);
14332 let sep = args.remove(0);
14333 // Drop any remaining args (null_replacement)
14334 Ok(Expression::Function(Box::new(Function::new(
14335 "CONCAT_WS".to_string(),
14336 vec![sep, arr],
14337 ))))
14338 }
14339 DialectType::Presto | DialectType::Trino => {
14340 Ok(Expression::Function(f))
14341 }
14342 _ => Ok(Expression::Function(f)),
14343 }
14344 }
14345 // LOCATE(substr, str, pos) 3-arg -> target-specific
14346 // For Presto/DuckDB: STRPOS doesn't support 3-arg, need complex expansion
14347 "LOCATE"
14348 if f.args.len() == 3
14349 && matches!(
14350 target,
14351 DialectType::Presto
14352 | DialectType::Trino
14353 | DialectType::Athena
14354 | DialectType::DuckDB
14355 ) =>
14356 {
14357 let mut args = f.args;
14358 let substr = args.remove(0);
14359 let string = args.remove(0);
14360 let pos = args.remove(0);
14361 // STRPOS(SUBSTRING(string, pos), substr)
14362 let substring_call = Expression::Function(Box::new(Function::new(
14363 "SUBSTRING".to_string(),
14364 vec![string.clone(), pos.clone()],
14365 )));
14366 let strpos_call = Expression::Function(Box::new(Function::new(
14367 "STRPOS".to_string(),
14368 vec![substring_call, substr.clone()],
14369 )));
14370 // STRPOS(...) + pos - 1
14371 let pos_adjusted =
14372 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
14373 Expression::Add(Box::new(
14374 crate::expressions::BinaryOp::new(
14375 strpos_call.clone(),
14376 pos.clone(),
14377 ),
14378 )),
14379 Expression::number(1),
14380 )));
14381 // STRPOS(...) = 0
14382 let is_zero =
14383 Expression::Eq(Box::new(crate::expressions::BinaryOp::new(
14384 strpos_call.clone(),
14385 Expression::number(0),
14386 )));
14387
14388 match target {
14389 DialectType::Presto
14390 | DialectType::Trino
14391 | DialectType::Athena => {
14392 // IF(STRPOS(...) = 0, 0, STRPOS(...) + pos - 1)
14393 Ok(Expression::Function(Box::new(Function::new(
14394 "IF".to_string(),
14395 vec![is_zero, Expression::number(0), pos_adjusted],
14396 ))))
14397 }
14398 DialectType::DuckDB => {
14399 // CASE WHEN STRPOS(...) = 0 THEN 0 ELSE STRPOS(...) + pos - 1 END
14400 Ok(Expression::Case(Box::new(crate::expressions::Case {
14401 operand: None,
14402 whens: vec![(is_zero, Expression::number(0))],
14403 else_: Some(pos_adjusted),
14404 comments: Vec::new(),
14405 })))
14406 }
14407 _ => Ok(Expression::Function(Box::new(Function::new(
14408 "LOCATE".to_string(),
14409 vec![substr, string, pos],
14410 )))),
14411 }
14412 }
14413 // STRPOS(haystack, needle, occurrence) 3-arg -> INSTR(haystack, needle, 1, occurrence)
14414 "STRPOS"
14415 if f.args.len() == 3
14416 && matches!(
14417 target,
14418 DialectType::BigQuery
14419 | DialectType::Oracle
14420 | DialectType::Teradata
14421 ) =>
14422 {
14423 let mut args = f.args;
14424 let haystack = args.remove(0);
14425 let needle = args.remove(0);
14426 let occurrence = args.remove(0);
14427 Ok(Expression::Function(Box::new(Function::new(
14428 "INSTR".to_string(),
14429 vec![haystack, needle, Expression::number(1), occurrence],
14430 ))))
14431 }
14432 // SCHEMA_NAME(id) -> target-specific
14433 "SCHEMA_NAME" if f.args.len() <= 1 => match target {
14434 DialectType::MySQL | DialectType::SingleStore => {
14435 Ok(Expression::Function(Box::new(Function::new(
14436 "SCHEMA".to_string(),
14437 vec![],
14438 ))))
14439 }
14440 DialectType::PostgreSQL => Ok(Expression::CurrentSchema(Box::new(
14441 crate::expressions::CurrentSchema { this: None },
14442 ))),
14443 DialectType::SQLite => Ok(Expression::string("main")),
14444 _ => Ok(Expression::Function(f)),
14445 },
14446 // STRTOL(str, base) -> FROM_BASE(str, base) for Trino/Presto
14447 "STRTOL" if f.args.len() == 2 => match target {
14448 DialectType::Presto | DialectType::Trino => {
14449 Ok(Expression::Function(Box::new(Function::new(
14450 "FROM_BASE".to_string(),
14451 f.args,
14452 ))))
14453 }
14454 _ => Ok(Expression::Function(f)),
14455 },
14456 // EDITDIST3(a, b) -> LEVENSHTEIN(a, b) for Spark
14457 "EDITDIST3" if f.args.len() == 2 => match target {
14458 DialectType::Spark | DialectType::Databricks => {
14459 Ok(Expression::Function(Box::new(Function::new(
14460 "LEVENSHTEIN".to_string(),
14461 f.args,
14462 ))))
14463 }
14464 _ => Ok(Expression::Function(f)),
14465 },
14466 // FORMAT(num, decimals) from MySQL -> DuckDB FORMAT('{:,.Xf}', num)
14467 "FORMAT"
14468 if f.args.len() == 2
14469 && matches!(
14470 source,
14471 DialectType::MySQL | DialectType::SingleStore
14472 )
14473 && matches!(target, DialectType::DuckDB) =>
14474 {
14475 let mut args = f.args;
14476 let num_expr = args.remove(0);
14477 let decimals_expr = args.remove(0);
14478 // Extract decimal count
14479 let dec_count = match &decimals_expr {
14480 Expression::Literal(Literal::Number(n)) => n.clone(),
14481 _ => "0".to_string(),
14482 };
14483 let fmt_str = format!("{{:,.{}f}}", dec_count);
14484 Ok(Expression::Function(Box::new(Function::new(
14485 "FORMAT".to_string(),
14486 vec![Expression::string(&fmt_str), num_expr],
14487 ))))
14488 }
14489 // FORMAT(x, fmt) from TSQL -> DATE_FORMAT for Spark, or expand short codes
14490 "FORMAT"
14491 if f.args.len() == 2
14492 && matches!(
14493 source,
14494 DialectType::TSQL | DialectType::Fabric
14495 ) =>
14496 {
14497 let val_expr = f.args[0].clone();
14498 let fmt_expr = f.args[1].clone();
14499 // Expand unambiguous .NET single-char date format shortcodes to full patterns.
14500 // Only expand shortcodes that are NOT also valid numeric format specifiers.
14501 // Ambiguous: d, D, f, F, g, G (used for both dates and numbers)
14502 // Unambiguous date: m/M (Month day), t/T (Time), y/Y (Year month)
14503 let (expanded_fmt, is_shortcode) = match &fmt_expr {
14504 Expression::Literal(crate::expressions::Literal::String(s)) => {
14505 match s.as_str() {
14506 "m" | "M" => (Expression::string("MMMM d"), true),
14507 "t" => (Expression::string("h:mm tt"), true),
14508 "T" => (Expression::string("h:mm:ss tt"), true),
14509 "y" | "Y" => (Expression::string("MMMM yyyy"), true),
14510 _ => (fmt_expr.clone(), false),
14511 }
14512 }
14513 _ => (fmt_expr.clone(), false),
14514 };
14515 // Check if the format looks like a date format
14516 let is_date_format = is_shortcode
14517 || match &expanded_fmt {
14518 Expression::Literal(
14519 crate::expressions::Literal::String(s),
14520 ) => {
14521 // Date formats typically contain yyyy, MM, dd, MMMM, HH, etc.
14522 s.contains("yyyy")
14523 || s.contains("YYYY")
14524 || s.contains("MM")
14525 || s.contains("dd")
14526 || s.contains("MMMM")
14527 || s.contains("HH")
14528 || s.contains("hh")
14529 || s.contains("ss")
14530 }
14531 _ => false,
14532 };
14533 match target {
14534 DialectType::Spark | DialectType::Databricks => {
14535 let func_name = if is_date_format {
14536 "DATE_FORMAT"
14537 } else {
14538 "FORMAT_NUMBER"
14539 };
14540 Ok(Expression::Function(Box::new(Function::new(
14541 func_name.to_string(),
14542 vec![val_expr, expanded_fmt],
14543 ))))
14544 }
14545 _ => {
14546 // For TSQL and other targets, expand shortcodes but keep FORMAT
14547 if is_shortcode {
14548 Ok(Expression::Function(Box::new(Function::new(
14549 "FORMAT".to_string(),
14550 vec![val_expr, expanded_fmt],
14551 ))))
14552 } else {
14553 Ok(Expression::Function(f))
14554 }
14555 }
14556 }
14557 }
14558 // FORMAT('%s', x) from Trino/Presto -> target-specific
14559 "FORMAT"
14560 if f.args.len() >= 2
14561 && matches!(
14562 source,
14563 DialectType::Trino
14564 | DialectType::Presto
14565 | DialectType::Athena
14566 ) =>
14567 {
14568 let fmt_expr = f.args[0].clone();
14569 let value_args: Vec<Expression> = f.args[1..].to_vec();
14570 match target {
14571 // DuckDB: replace %s with {} in format string
14572 DialectType::DuckDB => {
14573 let new_fmt = match &fmt_expr {
14574 Expression::Literal(Literal::String(s)) => {
14575 Expression::Literal(Literal::String(
14576 s.replace("%s", "{}"),
14577 ))
14578 }
14579 _ => fmt_expr,
14580 };
14581 let mut args = vec![new_fmt];
14582 args.extend(value_args);
14583 Ok(Expression::Function(Box::new(Function::new(
14584 "FORMAT".to_string(),
14585 args,
14586 ))))
14587 }
14588 // Snowflake: FORMAT('%s', x) -> TO_CHAR(x) when just %s
14589 DialectType::Snowflake => match &fmt_expr {
14590 Expression::Literal(Literal::String(s))
14591 if s == "%s" && value_args.len() == 1 =>
14592 {
14593 Ok(Expression::Function(Box::new(Function::new(
14594 "TO_CHAR".to_string(),
14595 value_args,
14596 ))))
14597 }
14598 _ => Ok(Expression::Function(f)),
14599 },
14600 // Default: keep FORMAT as-is
14601 _ => Ok(Expression::Function(f)),
14602 }
14603 }
14604 // LIST_CONTAINS / LIST_HAS / ARRAY_CONTAINS -> target-specific
14605 "LIST_CONTAINS" | "LIST_HAS" | "ARRAY_CONTAINS"
14606 if f.args.len() == 2 =>
14607 {
14608 match target {
14609 DialectType::PostgreSQL | DialectType::Redshift => {
14610 // CASE WHEN needle IS NULL THEN NULL ELSE COALESCE(needle = ANY(arr), FALSE) END
14611 let arr = f.args[0].clone();
14612 let needle = f.args[1].clone();
14613 // Convert [] to ARRAY[] for PostgreSQL
14614 let pg_arr = match arr {
14615 Expression::Array(a) => Expression::ArrayFunc(
14616 Box::new(crate::expressions::ArrayConstructor {
14617 expressions: a.expressions,
14618 bracket_notation: false,
14619 use_list_keyword: false,
14620 }),
14621 ),
14622 _ => arr,
14623 };
14624 // needle = ANY(arr) using the Any quantified expression
14625 let any_expr = Expression::Any(Box::new(
14626 crate::expressions::QuantifiedExpr {
14627 this: needle.clone(),
14628 subquery: pg_arr,
14629 op: Some(crate::expressions::QuantifiedOp::Eq),
14630 },
14631 ));
14632 let coalesce = Expression::Coalesce(Box::new(
14633 crate::expressions::VarArgFunc {
14634 expressions: vec![
14635 any_expr,
14636 Expression::Boolean(
14637 crate::expressions::BooleanLiteral {
14638 value: false,
14639 },
14640 ),
14641 ],
14642 original_name: None,
14643 },
14644 ));
14645 let is_null_check = Expression::IsNull(Box::new(
14646 crate::expressions::IsNull {
14647 this: needle,
14648 not: false,
14649 postfix_form: false,
14650 },
14651 ));
14652 Ok(Expression::Case(Box::new(Case {
14653 operand: None,
14654 whens: vec![(
14655 is_null_check,
14656 Expression::Null(crate::expressions::Null),
14657 )],
14658 else_: Some(coalesce),
14659 comments: Vec::new(),
14660 })))
14661 }
14662 _ => Ok(Expression::Function(Box::new(Function::new(
14663 "ARRAY_CONTAINS".to_string(),
14664 f.args,
14665 )))),
14666 }
14667 }
14668 // LIST_HAS_ANY / ARRAY_HAS_ANY -> target-specific overlap operator
14669 "LIST_HAS_ANY" | "ARRAY_HAS_ANY" if f.args.len() == 2 => {
14670 match target {
14671 DialectType::PostgreSQL | DialectType::Redshift => {
14672 // arr1 && arr2 with ARRAY[] syntax
14673 let mut args = f.args;
14674 let arr1 = args.remove(0);
14675 let arr2 = args.remove(0);
14676 let pg_arr1 = match arr1 {
14677 Expression::Array(a) => Expression::ArrayFunc(
14678 Box::new(crate::expressions::ArrayConstructor {
14679 expressions: a.expressions,
14680 bracket_notation: false,
14681 use_list_keyword: false,
14682 }),
14683 ),
14684 _ => arr1,
14685 };
14686 let pg_arr2 = match arr2 {
14687 Expression::Array(a) => Expression::ArrayFunc(
14688 Box::new(crate::expressions::ArrayConstructor {
14689 expressions: a.expressions,
14690 bracket_notation: false,
14691 use_list_keyword: false,
14692 }),
14693 ),
14694 _ => arr2,
14695 };
14696 Ok(Expression::ArrayOverlaps(Box::new(BinaryOp::new(
14697 pg_arr1, pg_arr2,
14698 ))))
14699 }
14700 DialectType::DuckDB => {
14701 // DuckDB: arr1 && arr2 (native support)
14702 let mut args = f.args;
14703 let arr1 = args.remove(0);
14704 let arr2 = args.remove(0);
14705 Ok(Expression::ArrayOverlaps(Box::new(BinaryOp::new(
14706 arr1, arr2,
14707 ))))
14708 }
14709 _ => Ok(Expression::Function(Box::new(Function::new(
14710 "LIST_HAS_ANY".to_string(),
14711 f.args,
14712 )))),
14713 }
14714 }
14715 // APPROX_QUANTILE(x, q) -> target-specific
14716 "APPROX_QUANTILE" if f.args.len() == 2 => match target {
14717 DialectType::Snowflake => Ok(Expression::Function(Box::new(
14718 Function::new("APPROX_PERCENTILE".to_string(), f.args),
14719 ))),
14720 DialectType::DuckDB => Ok(Expression::Function(f)),
14721 _ => Ok(Expression::Function(f)),
14722 },
14723 // MAKE_DATE(y, m, d) -> DATE(y, m, d) for BigQuery
14724 "MAKE_DATE" if f.args.len() == 3 => match target {
14725 DialectType::BigQuery => Ok(Expression::Function(Box::new(
14726 Function::new("DATE".to_string(), f.args),
14727 ))),
14728 _ => Ok(Expression::Function(f)),
14729 },
14730 // RANGE(start, end[, step]) -> target-specific
14731 "RANGE"
14732 if f.args.len() >= 2 && !matches!(target, DialectType::DuckDB) =>
14733 {
14734 let start = f.args[0].clone();
14735 let end = f.args[1].clone();
14736 let step = f.args.get(2).cloned();
14737 match target {
14738 DialectType::Spark | DialectType::Databricks => {
14739 // RANGE(start, end) -> SEQUENCE(start, end-1)
14740 // RANGE(start, end, step) -> SEQUENCE(start, end-step, step) when step constant
14741 // RANGE(start, start) -> ARRAY() (empty)
14742 // RANGE(start, end, 0) -> ARRAY() (empty)
14743 // When end is variable: IF((end - 1) <= start, ARRAY(), SEQUENCE(start, (end - 1)))
14744
14745 // Check for constant args
14746 fn extract_i64(e: &Expression) -> Option<i64> {
14747 match e {
14748 Expression::Literal(Literal::Number(n)) => {
14749 n.parse::<i64>().ok()
14750 }
14751 Expression::Neg(u) => {
14752 if let Expression::Literal(Literal::Number(n)) =
14753 &u.this
14754 {
14755 n.parse::<i64>().ok().map(|v| -v)
14756 } else {
14757 None
14758 }
14759 }
14760 _ => None,
14761 }
14762 }
14763 let start_val = extract_i64(&start);
14764 let end_val = extract_i64(&end);
14765 let step_val = step.as_ref().and_then(|s| extract_i64(s));
14766
14767 // Check for RANGE(x, x) or RANGE(x, y, 0) -> empty array
14768 if step_val == Some(0) {
14769 return Ok(Expression::Function(Box::new(
14770 Function::new("ARRAY".to_string(), vec![]),
14771 )));
14772 }
14773 if let (Some(s), Some(e_val)) = (start_val, end_val) {
14774 if s == e_val {
14775 return Ok(Expression::Function(Box::new(
14776 Function::new("ARRAY".to_string(), vec![]),
14777 )));
14778 }
14779 }
14780
14781 if let (Some(_s_val), Some(e_val)) = (start_val, end_val) {
14782 // All constants - compute new end = end - step (if step provided) or end - 1
14783 match step_val {
14784 Some(st) if st < 0 => {
14785 // Negative step: SEQUENCE(start, end - step, step)
14786 let new_end = e_val - st; // end - step (= end + |step|)
14787 let mut args =
14788 vec![start, Expression::number(new_end)];
14789 if let Some(s) = step {
14790 args.push(s);
14791 }
14792 Ok(Expression::Function(Box::new(
14793 Function::new("SEQUENCE".to_string(), args),
14794 )))
14795 }
14796 Some(st) => {
14797 let new_end = e_val - st;
14798 let mut args =
14799 vec![start, Expression::number(new_end)];
14800 if let Some(s) = step {
14801 args.push(s);
14802 }
14803 Ok(Expression::Function(Box::new(
14804 Function::new("SEQUENCE".to_string(), args),
14805 )))
14806 }
14807 None => {
14808 // No step: SEQUENCE(start, end - 1)
14809 let new_end = e_val - 1;
14810 Ok(Expression::Function(Box::new(
14811 Function::new(
14812 "SEQUENCE".to_string(),
14813 vec![
14814 start,
14815 Expression::number(new_end),
14816 ],
14817 ),
14818 )))
14819 }
14820 }
14821 } else {
14822 // Variable end: IF((end - 1) <= start, ARRAY(), SEQUENCE(start, (end - 1)))
14823 let end_m1 = Expression::Sub(Box::new(BinaryOp::new(
14824 end.clone(),
14825 Expression::number(1),
14826 )));
14827 let cond = Expression::Lte(Box::new(BinaryOp::new(
14828 Expression::Paren(Box::new(Paren {
14829 this: end_m1.clone(),
14830 trailing_comments: Vec::new(),
14831 })),
14832 start.clone(),
14833 )));
14834 let empty = Expression::Function(Box::new(
14835 Function::new("ARRAY".to_string(), vec![]),
14836 ));
14837 let mut seq_args = vec![
14838 start,
14839 Expression::Paren(Box::new(Paren {
14840 this: end_m1,
14841 trailing_comments: Vec::new(),
14842 })),
14843 ];
14844 if let Some(s) = step {
14845 seq_args.push(s);
14846 }
14847 let seq = Expression::Function(Box::new(
14848 Function::new("SEQUENCE".to_string(), seq_args),
14849 ));
14850 Ok(Expression::IfFunc(Box::new(
14851 crate::expressions::IfFunc {
14852 condition: cond,
14853 true_value: empty,
14854 false_value: Some(seq),
14855 original_name: None,
14856 },
14857 )))
14858 }
14859 }
14860 DialectType::SQLite => {
14861 // RANGE(start, end) -> GENERATE_SERIES(start, end)
14862 // The subquery wrapping is handled at the Alias level
14863 let mut args = vec![start, end];
14864 if let Some(s) = step {
14865 args.push(s);
14866 }
14867 Ok(Expression::Function(Box::new(Function::new(
14868 "GENERATE_SERIES".to_string(),
14869 args,
14870 ))))
14871 }
14872 _ => Ok(Expression::Function(f)),
14873 }
14874 }
14875 // ARRAY_REVERSE_SORT -> target-specific
14876 // (handled above as well, but also need DuckDB self-normalization)
14877 // MAP_FROM_ARRAYS(keys, values) -> target-specific map construction
14878 "MAP_FROM_ARRAYS" if f.args.len() == 2 => match target {
14879 DialectType::Snowflake => Ok(Expression::Function(Box::new(
14880 Function::new("OBJECT_CONSTRUCT".to_string(), f.args),
14881 ))),
14882 DialectType::Spark | DialectType::Databricks => {
14883 Ok(Expression::Function(Box::new(Function::new(
14884 "MAP_FROM_ARRAYS".to_string(),
14885 f.args,
14886 ))))
14887 }
14888 _ => Ok(Expression::Function(Box::new(Function::new(
14889 "MAP".to_string(),
14890 f.args,
14891 )))),
14892 },
14893 // VARIANCE(x) -> varSamp(x) for ClickHouse
14894 "VARIANCE" if f.args.len() == 1 => match target {
14895 DialectType::ClickHouse => Ok(Expression::Function(Box::new(
14896 Function::new("varSamp".to_string(), f.args),
14897 ))),
14898 _ => Ok(Expression::Function(f)),
14899 },
14900 // STDDEV(x) -> stddevSamp(x) for ClickHouse
14901 "STDDEV" if f.args.len() == 1 => match target {
14902 DialectType::ClickHouse => Ok(Expression::Function(Box::new(
14903 Function::new("stddevSamp".to_string(), f.args),
14904 ))),
14905 _ => Ok(Expression::Function(f)),
14906 },
14907 // ISINF(x) -> IS_INF(x) for BigQuery
14908 "ISINF" if f.args.len() == 1 => match target {
14909 DialectType::BigQuery => Ok(Expression::Function(Box::new(
14910 Function::new("IS_INF".to_string(), f.args),
14911 ))),
14912 _ => Ok(Expression::Function(f)),
14913 },
14914 // CONTAINS(arr, x) -> ARRAY_CONTAINS(arr, x) for Spark/Hive
14915 "CONTAINS" if f.args.len() == 2 => match target {
14916 DialectType::Spark
14917 | DialectType::Databricks
14918 | DialectType::Hive => Ok(Expression::Function(Box::new(
14919 Function::new("ARRAY_CONTAINS".to_string(), f.args),
14920 ))),
14921 _ => Ok(Expression::Function(f)),
14922 },
14923 // ARRAY_CONTAINS(arr, x) -> CONTAINS(arr, x) for Presto
14924 "ARRAY_CONTAINS" if f.args.len() == 2 => match target {
14925 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
14926 Ok(Expression::Function(Box::new(Function::new(
14927 "CONTAINS".to_string(),
14928 f.args,
14929 ))))
14930 }
14931 DialectType::DuckDB => Ok(Expression::Function(Box::new(
14932 Function::new("ARRAY_CONTAINS".to_string(), f.args),
14933 ))),
14934 _ => Ok(Expression::Function(f)),
14935 },
14936 // TO_UNIXTIME(x) -> UNIX_TIMESTAMP(x) for Hive/Spark
14937 "TO_UNIXTIME" if f.args.len() == 1 => match target {
14938 DialectType::Hive
14939 | DialectType::Spark
14940 | DialectType::Databricks => Ok(Expression::Function(Box::new(
14941 Function::new("UNIX_TIMESTAMP".to_string(), f.args),
14942 ))),
14943 _ => Ok(Expression::Function(f)),
14944 },
14945 // FROM_UNIXTIME(x) -> target-specific
14946 "FROM_UNIXTIME" if f.args.len() == 1 => {
14947 match target {
14948 DialectType::Hive
14949 | DialectType::Spark
14950 | DialectType::Databricks
14951 | DialectType::Presto
14952 | DialectType::Trino => Ok(Expression::Function(f)),
14953 DialectType::DuckDB => {
14954 // DuckDB: TO_TIMESTAMP(x)
14955 let arg = f.args.into_iter().next().unwrap();
14956 Ok(Expression::Function(Box::new(Function::new(
14957 "TO_TIMESTAMP".to_string(),
14958 vec![arg],
14959 ))))
14960 }
14961 DialectType::PostgreSQL => {
14962 // PG: TO_TIMESTAMP(col)
14963 let arg = f.args.into_iter().next().unwrap();
14964 Ok(Expression::Function(Box::new(Function::new(
14965 "TO_TIMESTAMP".to_string(),
14966 vec![arg],
14967 ))))
14968 }
14969 DialectType::Redshift => {
14970 // Redshift: (TIMESTAMP 'epoch' + col * INTERVAL '1 SECOND')
14971 let arg = f.args.into_iter().next().unwrap();
14972 let epoch_ts = Expression::Literal(Literal::Timestamp(
14973 "epoch".to_string(),
14974 ));
14975 let interval = Expression::Interval(Box::new(
14976 crate::expressions::Interval {
14977 this: Some(Expression::string("1 SECOND")),
14978 unit: None,
14979 },
14980 ));
14981 let mul =
14982 Expression::Mul(Box::new(BinaryOp::new(arg, interval)));
14983 let add =
14984 Expression::Add(Box::new(BinaryOp::new(epoch_ts, mul)));
14985 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
14986 this: add,
14987 trailing_comments: Vec::new(),
14988 })))
14989 }
14990 _ => Ok(Expression::Function(f)),
14991 }
14992 }
14993 // FROM_UNIXTIME(x, fmt) with 2 args from Hive/Spark -> target-specific
14994 "FROM_UNIXTIME"
14995 if f.args.len() == 2
14996 && matches!(
14997 source,
14998 DialectType::Hive
14999 | DialectType::Spark
15000 | DialectType::Databricks
15001 ) =>
15002 {
15003 let mut args = f.args;
15004 let unix_ts = args.remove(0);
15005 let fmt_expr = args.remove(0);
15006 match target {
15007 DialectType::DuckDB => {
15008 // DuckDB: STRFTIME(TO_TIMESTAMP(x), c_fmt)
15009 let to_ts = Expression::Function(Box::new(Function::new(
15010 "TO_TIMESTAMP".to_string(),
15011 vec![unix_ts],
15012 )));
15013 if let Expression::Literal(
15014 crate::expressions::Literal::String(s),
15015 ) = &fmt_expr
15016 {
15017 let c_fmt = Self::hive_format_to_c_format(s);
15018 Ok(Expression::Function(Box::new(Function::new(
15019 "STRFTIME".to_string(),
15020 vec![to_ts, Expression::string(&c_fmt)],
15021 ))))
15022 } else {
15023 Ok(Expression::Function(Box::new(Function::new(
15024 "STRFTIME".to_string(),
15025 vec![to_ts, fmt_expr],
15026 ))))
15027 }
15028 }
15029 DialectType::Presto
15030 | DialectType::Trino
15031 | DialectType::Athena => {
15032 // Presto: DATE_FORMAT(FROM_UNIXTIME(x), presto_fmt)
15033 let from_unix =
15034 Expression::Function(Box::new(Function::new(
15035 "FROM_UNIXTIME".to_string(),
15036 vec![unix_ts],
15037 )));
15038 if let Expression::Literal(
15039 crate::expressions::Literal::String(s),
15040 ) = &fmt_expr
15041 {
15042 let p_fmt = Self::hive_format_to_presto_format(s);
15043 Ok(Expression::Function(Box::new(Function::new(
15044 "DATE_FORMAT".to_string(),
15045 vec![from_unix, Expression::string(&p_fmt)],
15046 ))))
15047 } else {
15048 Ok(Expression::Function(Box::new(Function::new(
15049 "DATE_FORMAT".to_string(),
15050 vec![from_unix, fmt_expr],
15051 ))))
15052 }
15053 }
15054 _ => {
15055 // Keep as FROM_UNIXTIME(x, fmt) for other targets
15056 Ok(Expression::Function(Box::new(Function::new(
15057 "FROM_UNIXTIME".to_string(),
15058 vec![unix_ts, fmt_expr],
15059 ))))
15060 }
15061 }
15062 }
15063 // DATEPART(unit, expr) -> EXTRACT(unit FROM expr) for Spark
15064 "DATEPART" | "DATE_PART" if f.args.len() == 2 => {
15065 let unit_str = Self::get_unit_str_static(&f.args[0]);
15066 // Get the raw unit text preserving original case
15067 let raw_unit = match &f.args[0] {
15068 Expression::Identifier(id) => id.name.clone(),
15069 Expression::Literal(crate::expressions::Literal::String(s)) => {
15070 s.clone()
15071 }
15072 Expression::Column(col) => col.name.name.clone(),
15073 _ => unit_str.clone(),
15074 };
15075 match target {
15076 DialectType::TSQL | DialectType::Fabric => {
15077 // Preserve original case of unit for TSQL
15078 let unit_name = match unit_str.as_str() {
15079 "YY" | "YYYY" => "YEAR".to_string(),
15080 "QQ" | "Q" => "QUARTER".to_string(),
15081 "MM" | "M" => "MONTH".to_string(),
15082 "WK" | "WW" => "WEEK".to_string(),
15083 "DD" | "D" | "DY" => "DAY".to_string(),
15084 "HH" => "HOUR".to_string(),
15085 "MI" | "N" => "MINUTE".to_string(),
15086 "SS" | "S" => "SECOND".to_string(),
15087 _ => raw_unit.clone(), // preserve original case
15088 };
15089 let mut args = f.args;
15090 args[0] =
15091 Expression::Identifier(Identifier::new(&unit_name));
15092 Ok(Expression::Function(Box::new(Function::new(
15093 "DATEPART".to_string(),
15094 args,
15095 ))))
15096 }
15097 DialectType::Spark | DialectType::Databricks => {
15098 // DATEPART(unit, expr) -> EXTRACT(unit FROM expr)
15099 // Preserve original case for non-abbreviation units
15100 let unit = match unit_str.as_str() {
15101 "YY" | "YYYY" => "YEAR".to_string(),
15102 "QQ" | "Q" => "QUARTER".to_string(),
15103 "MM" | "M" => "MONTH".to_string(),
15104 "WK" | "WW" => "WEEK".to_string(),
15105 "DD" | "D" | "DY" => "DAY".to_string(),
15106 "HH" => "HOUR".to_string(),
15107 "MI" | "N" => "MINUTE".to_string(),
15108 "SS" | "S" => "SECOND".to_string(),
15109 _ => raw_unit, // preserve original case
15110 };
15111 Ok(Expression::Extract(Box::new(
15112 crate::expressions::ExtractFunc {
15113 this: f.args[1].clone(),
15114 field: crate::expressions::DateTimeField::Custom(
15115 unit,
15116 ),
15117 },
15118 )))
15119 }
15120 _ => Ok(Expression::Function(Box::new(Function::new(
15121 "DATE_PART".to_string(),
15122 f.args,
15123 )))),
15124 }
15125 }
15126 // DATENAME(mm, date) -> FORMAT(CAST(date AS DATETIME2), 'MMMM') for TSQL
15127 // DATENAME(dw, date) -> FORMAT(CAST(date AS DATETIME2), 'dddd') for TSQL
15128 // DATENAME(mm, date) -> DATE_FORMAT(CAST(date AS TIMESTAMP), 'MMMM') for Spark
15129 // DATENAME(dw, date) -> DATE_FORMAT(CAST(date AS TIMESTAMP), 'EEEE') for Spark
15130 "DATENAME" if f.args.len() == 2 => {
15131 let unit_str = Self::get_unit_str_static(&f.args[0]);
15132 let date_expr = f.args[1].clone();
15133 match unit_str.as_str() {
15134 "MM" | "M" | "MONTH" => match target {
15135 DialectType::TSQL => {
15136 let cast_date = Expression::Cast(Box::new(
15137 crate::expressions::Cast {
15138 this: date_expr,
15139 to: DataType::Custom {
15140 name: "DATETIME2".to_string(),
15141 },
15142 trailing_comments: Vec::new(),
15143 double_colon_syntax: false,
15144 format: None,
15145 default: None,
15146 },
15147 ));
15148 Ok(Expression::Function(Box::new(Function::new(
15149 "FORMAT".to_string(),
15150 vec![cast_date, Expression::string("MMMM")],
15151 ))))
15152 }
15153 DialectType::Spark | DialectType::Databricks => {
15154 let cast_date = Expression::Cast(Box::new(
15155 crate::expressions::Cast {
15156 this: date_expr,
15157 to: DataType::Timestamp {
15158 timezone: false,
15159 precision: None,
15160 },
15161 trailing_comments: Vec::new(),
15162 double_colon_syntax: false,
15163 format: None,
15164 default: None,
15165 },
15166 ));
15167 Ok(Expression::Function(Box::new(Function::new(
15168 "DATE_FORMAT".to_string(),
15169 vec![cast_date, Expression::string("MMMM")],
15170 ))))
15171 }
15172 _ => Ok(Expression::Function(f)),
15173 },
15174 "DW" | "WEEKDAY" => match target {
15175 DialectType::TSQL => {
15176 let cast_date = Expression::Cast(Box::new(
15177 crate::expressions::Cast {
15178 this: date_expr,
15179 to: DataType::Custom {
15180 name: "DATETIME2".to_string(),
15181 },
15182 trailing_comments: Vec::new(),
15183 double_colon_syntax: false,
15184 format: None,
15185 default: None,
15186 },
15187 ));
15188 Ok(Expression::Function(Box::new(Function::new(
15189 "FORMAT".to_string(),
15190 vec![cast_date, Expression::string("dddd")],
15191 ))))
15192 }
15193 DialectType::Spark | DialectType::Databricks => {
15194 let cast_date = Expression::Cast(Box::new(
15195 crate::expressions::Cast {
15196 this: date_expr,
15197 to: DataType::Timestamp {
15198 timezone: false,
15199 precision: None,
15200 },
15201 trailing_comments: Vec::new(),
15202 double_colon_syntax: false,
15203 format: None,
15204 default: None,
15205 },
15206 ));
15207 Ok(Expression::Function(Box::new(Function::new(
15208 "DATE_FORMAT".to_string(),
15209 vec![cast_date, Expression::string("EEEE")],
15210 ))))
15211 }
15212 _ => Ok(Expression::Function(f)),
15213 },
15214 _ => Ok(Expression::Function(f)),
15215 }
15216 }
15217 // STRING_AGG(x, sep) without WITHIN GROUP -> target-specific
15218 "STRING_AGG" if f.args.len() >= 2 => {
15219 let x = f.args[0].clone();
15220 let sep = f.args[1].clone();
15221 match target {
15222 DialectType::MySQL
15223 | DialectType::SingleStore
15224 | DialectType::Doris
15225 | DialectType::StarRocks => Ok(Expression::GroupConcat(
15226 Box::new(crate::expressions::GroupConcatFunc {
15227 this: x,
15228 separator: Some(sep),
15229 order_by: None,
15230 distinct: false,
15231 filter: None,
15232 }),
15233 )),
15234 DialectType::SQLite => Ok(Expression::GroupConcat(Box::new(
15235 crate::expressions::GroupConcatFunc {
15236 this: x,
15237 separator: Some(sep),
15238 order_by: None,
15239 distinct: false,
15240 filter: None,
15241 },
15242 ))),
15243 DialectType::PostgreSQL | DialectType::Redshift => {
15244 Ok(Expression::StringAgg(Box::new(
15245 crate::expressions::StringAggFunc {
15246 this: x,
15247 separator: Some(sep),
15248 order_by: None,
15249 distinct: false,
15250 filter: None,
15251 limit: None,
15252 },
15253 )))
15254 }
15255 _ => Ok(Expression::Function(f)),
15256 }
15257 }
15258 // JSON_ARRAYAGG -> JSON_AGG for PostgreSQL
15259 "JSON_ARRAYAGG" => match target {
15260 DialectType::PostgreSQL => {
15261 Ok(Expression::Function(Box::new(Function {
15262 name: "JSON_AGG".to_string(),
15263 ..(*f)
15264 })))
15265 }
15266 _ => Ok(Expression::Function(f)),
15267 },
15268 // SCHEMA_NAME(id) -> CURRENT_SCHEMA for PostgreSQL, 'main' for SQLite
15269 "SCHEMA_NAME" => match target {
15270 DialectType::PostgreSQL => Ok(Expression::CurrentSchema(Box::new(
15271 crate::expressions::CurrentSchema { this: None },
15272 ))),
15273 DialectType::SQLite => Ok(Expression::string("main")),
15274 _ => Ok(Expression::Function(f)),
15275 },
15276 // TO_TIMESTAMP(x, fmt) 2-arg from Spark/Hive: convert Java format to target format
15277 "TO_TIMESTAMP"
15278 if f.args.len() == 2
15279 && matches!(
15280 source,
15281 DialectType::Spark
15282 | DialectType::Databricks
15283 | DialectType::Hive
15284 )
15285 && matches!(target, DialectType::DuckDB) =>
15286 {
15287 let mut args = f.args;
15288 let val = args.remove(0);
15289 let fmt_expr = args.remove(0);
15290 if let Expression::Literal(Literal::String(ref s)) = fmt_expr {
15291 // Convert Java/Spark format to C strptime format
15292 fn java_to_c_fmt(fmt: &str) -> String {
15293 let result = fmt
15294 .replace("yyyy", "%Y")
15295 .replace("SSSSSS", "%f")
15296 .replace("EEEE", "%W")
15297 .replace("MM", "%m")
15298 .replace("dd", "%d")
15299 .replace("HH", "%H")
15300 .replace("mm", "%M")
15301 .replace("ss", "%S")
15302 .replace("yy", "%y");
15303 let mut out = String::new();
15304 let chars: Vec<char> = result.chars().collect();
15305 let mut i = 0;
15306 while i < chars.len() {
15307 if chars[i] == '%' && i + 1 < chars.len() {
15308 out.push(chars[i]);
15309 out.push(chars[i + 1]);
15310 i += 2;
15311 } else if chars[i] == 'z' {
15312 out.push_str("%Z");
15313 i += 1;
15314 } else if chars[i] == 'Z' {
15315 out.push_str("%z");
15316 i += 1;
15317 } else {
15318 out.push(chars[i]);
15319 i += 1;
15320 }
15321 }
15322 out
15323 }
15324 let c_fmt = java_to_c_fmt(s);
15325 Ok(Expression::Function(Box::new(Function::new(
15326 "STRPTIME".to_string(),
15327 vec![val, Expression::string(&c_fmt)],
15328 ))))
15329 } else {
15330 Ok(Expression::Function(Box::new(Function::new(
15331 "STRPTIME".to_string(),
15332 vec![val, fmt_expr],
15333 ))))
15334 }
15335 }
15336 // TO_DATE(x) 1-arg from Doris: date conversion
15337 "TO_DATE"
15338 if f.args.len() == 1
15339 && matches!(
15340 source,
15341 DialectType::Doris | DialectType::StarRocks
15342 ) =>
15343 {
15344 let arg = f.args.into_iter().next().unwrap();
15345 match target {
15346 DialectType::Oracle
15347 | DialectType::DuckDB
15348 | DialectType::TSQL => {
15349 // CAST(x AS DATE)
15350 Ok(Expression::Cast(Box::new(Cast {
15351 this: arg,
15352 to: DataType::Date,
15353 double_colon_syntax: false,
15354 trailing_comments: vec![],
15355 format: None,
15356 default: None,
15357 })))
15358 }
15359 DialectType::MySQL | DialectType::SingleStore => {
15360 // DATE(x)
15361 Ok(Expression::Function(Box::new(Function::new(
15362 "DATE".to_string(),
15363 vec![arg],
15364 ))))
15365 }
15366 _ => {
15367 // Default: keep as TO_DATE(x) (Spark, PostgreSQL, etc.)
15368 Ok(Expression::Function(Box::new(Function::new(
15369 "TO_DATE".to_string(),
15370 vec![arg],
15371 ))))
15372 }
15373 }
15374 }
15375 // TO_DATE(x) 1-arg from Spark/Hive: safe date conversion
15376 "TO_DATE"
15377 if f.args.len() == 1
15378 && matches!(
15379 source,
15380 DialectType::Spark
15381 | DialectType::Databricks
15382 | DialectType::Hive
15383 ) =>
15384 {
15385 let arg = f.args.into_iter().next().unwrap();
15386 match target {
15387 DialectType::DuckDB => {
15388 // Spark TO_DATE is safe -> TRY_CAST(x AS DATE)
15389 Ok(Expression::TryCast(Box::new(Cast {
15390 this: arg,
15391 to: DataType::Date,
15392 double_colon_syntax: false,
15393 trailing_comments: vec![],
15394 format: None,
15395 default: None,
15396 })))
15397 }
15398 DialectType::Presto
15399 | DialectType::Trino
15400 | DialectType::Athena => {
15401 // CAST(CAST(x AS TIMESTAMP) AS DATE)
15402 Ok(Self::double_cast_timestamp_date(arg))
15403 }
15404 DialectType::Snowflake => {
15405 // Spark's TO_DATE is safe -> TRY_TO_DATE(x, 'yyyy-mm-DD')
15406 // The default Spark format 'yyyy-MM-dd' maps to Snowflake 'yyyy-mm-DD'
15407 Ok(Expression::Function(Box::new(Function::new(
15408 "TRY_TO_DATE".to_string(),
15409 vec![arg, Expression::string("yyyy-mm-DD")],
15410 ))))
15411 }
15412 _ => {
15413 // Default: keep as TO_DATE(x)
15414 Ok(Expression::Function(Box::new(Function::new(
15415 "TO_DATE".to_string(),
15416 vec![arg],
15417 ))))
15418 }
15419 }
15420 }
15421 // TO_DATE(x, fmt) 2-arg from Spark/Hive: format-based date conversion
15422 "TO_DATE"
15423 if f.args.len() == 2
15424 && matches!(
15425 source,
15426 DialectType::Spark
15427 | DialectType::Databricks
15428 | DialectType::Hive
15429 ) =>
15430 {
15431 let mut args = f.args;
15432 let val = args.remove(0);
15433 let fmt_expr = args.remove(0);
15434 let is_default_format = matches!(&fmt_expr, Expression::Literal(Literal::String(s)) if s == "yyyy-MM-dd");
15435
15436 if is_default_format {
15437 // Default format: same as 1-arg form
15438 match target {
15439 DialectType::DuckDB => {
15440 Ok(Expression::TryCast(Box::new(Cast {
15441 this: val,
15442 to: DataType::Date,
15443 double_colon_syntax: false,
15444 trailing_comments: vec![],
15445 format: None,
15446 default: None,
15447 })))
15448 }
15449 DialectType::Presto
15450 | DialectType::Trino
15451 | DialectType::Athena => {
15452 Ok(Self::double_cast_timestamp_date(val))
15453 }
15454 DialectType::Snowflake => {
15455 // TRY_TO_DATE(x, format) with Snowflake format mapping
15456 let sf_fmt = "yyyy-MM-dd"
15457 .replace("yyyy", "yyyy")
15458 .replace("MM", "mm")
15459 .replace("dd", "DD");
15460 Ok(Expression::Function(Box::new(Function::new(
15461 "TRY_TO_DATE".to_string(),
15462 vec![val, Expression::string(&sf_fmt)],
15463 ))))
15464 }
15465 _ => Ok(Expression::Function(Box::new(Function::new(
15466 "TO_DATE".to_string(),
15467 vec![val],
15468 )))),
15469 }
15470 } else {
15471 // Non-default format: use format-based parsing
15472 if let Expression::Literal(Literal::String(ref s)) = fmt_expr {
15473 match target {
15474 DialectType::DuckDB => {
15475 // CAST(CAST(TRY_STRPTIME(x, c_fmt) AS TIMESTAMP) AS DATE)
15476 fn java_to_c_fmt_todate(fmt: &str) -> String {
15477 let result = fmt
15478 .replace("yyyy", "%Y")
15479 .replace("SSSSSS", "%f")
15480 .replace("EEEE", "%W")
15481 .replace("MM", "%m")
15482 .replace("dd", "%d")
15483 .replace("HH", "%H")
15484 .replace("mm", "%M")
15485 .replace("ss", "%S")
15486 .replace("yy", "%y");
15487 let mut out = String::new();
15488 let chars: Vec<char> = result.chars().collect();
15489 let mut i = 0;
15490 while i < chars.len() {
15491 if chars[i] == '%' && i + 1 < chars.len() {
15492 out.push(chars[i]);
15493 out.push(chars[i + 1]);
15494 i += 2;
15495 } else if chars[i] == 'z' {
15496 out.push_str("%Z");
15497 i += 1;
15498 } else if chars[i] == 'Z' {
15499 out.push_str("%z");
15500 i += 1;
15501 } else {
15502 out.push(chars[i]);
15503 i += 1;
15504 }
15505 }
15506 out
15507 }
15508 let c_fmt = java_to_c_fmt_todate(s);
15509 // CAST(CAST(TRY_STRPTIME(x, fmt) AS TIMESTAMP) AS DATE)
15510 let try_strptime =
15511 Expression::Function(Box::new(Function::new(
15512 "TRY_STRPTIME".to_string(),
15513 vec![val, Expression::string(&c_fmt)],
15514 )));
15515 let cast_ts = Expression::Cast(Box::new(Cast {
15516 this: try_strptime,
15517 to: DataType::Timestamp {
15518 precision: None,
15519 timezone: false,
15520 },
15521 double_colon_syntax: false,
15522 trailing_comments: vec![],
15523 format: None,
15524 default: None,
15525 }));
15526 Ok(Expression::Cast(Box::new(Cast {
15527 this: cast_ts,
15528 to: DataType::Date,
15529 double_colon_syntax: false,
15530 trailing_comments: vec![],
15531 format: None,
15532 default: None,
15533 })))
15534 }
15535 DialectType::Presto
15536 | DialectType::Trino
15537 | DialectType::Athena => {
15538 // CAST(DATE_PARSE(x, presto_fmt) AS DATE)
15539 let p_fmt = s
15540 .replace("yyyy", "%Y")
15541 .replace("SSSSSS", "%f")
15542 .replace("MM", "%m")
15543 .replace("dd", "%d")
15544 .replace("HH", "%H")
15545 .replace("mm", "%M")
15546 .replace("ss", "%S")
15547 .replace("yy", "%y");
15548 let date_parse =
15549 Expression::Function(Box::new(Function::new(
15550 "DATE_PARSE".to_string(),
15551 vec![val, Expression::string(&p_fmt)],
15552 )));
15553 Ok(Expression::Cast(Box::new(Cast {
15554 this: date_parse,
15555 to: DataType::Date,
15556 double_colon_syntax: false,
15557 trailing_comments: vec![],
15558 format: None,
15559 default: None,
15560 })))
15561 }
15562 DialectType::Snowflake => {
15563 // TRY_TO_DATE(x, snowflake_fmt)
15564 Ok(Expression::Function(Box::new(Function::new(
15565 "TRY_TO_DATE".to_string(),
15566 vec![val, Expression::string(s)],
15567 ))))
15568 }
15569 _ => Ok(Expression::Function(Box::new(Function::new(
15570 "TO_DATE".to_string(),
15571 vec![val, fmt_expr],
15572 )))),
15573 }
15574 } else {
15575 Ok(Expression::Function(Box::new(Function::new(
15576 "TO_DATE".to_string(),
15577 vec![val, fmt_expr],
15578 ))))
15579 }
15580 }
15581 }
15582 // TO_TIMESTAMP(x) 1-arg: epoch conversion
15583 "TO_TIMESTAMP"
15584 if f.args.len() == 1
15585 && matches!(source, DialectType::DuckDB)
15586 && matches!(
15587 target,
15588 DialectType::BigQuery
15589 | DialectType::Presto
15590 | DialectType::Trino
15591 | DialectType::Hive
15592 | DialectType::Spark
15593 | DialectType::Databricks
15594 | DialectType::Athena
15595 ) =>
15596 {
15597 let arg = f.args.into_iter().next().unwrap();
15598 let func_name = match target {
15599 DialectType::BigQuery => "TIMESTAMP_SECONDS",
15600 DialectType::Presto
15601 | DialectType::Trino
15602 | DialectType::Athena
15603 | DialectType::Hive
15604 | DialectType::Spark
15605 | DialectType::Databricks => "FROM_UNIXTIME",
15606 _ => "TO_TIMESTAMP",
15607 };
15608 Ok(Expression::Function(Box::new(Function::new(
15609 func_name.to_string(),
15610 vec![arg],
15611 ))))
15612 }
15613 // CONCAT(x) single-arg: -> CONCAT(COALESCE(x, '')) for Spark
15614 "CONCAT" if f.args.len() == 1 => {
15615 let arg = f.args.into_iter().next().unwrap();
15616 match target {
15617 DialectType::Presto
15618 | DialectType::Trino
15619 | DialectType::Athena => {
15620 // CONCAT(a) -> CAST(a AS VARCHAR)
15621 Ok(Expression::Cast(Box::new(Cast {
15622 this: arg,
15623 to: DataType::VarChar {
15624 length: None,
15625 parenthesized_length: false,
15626 },
15627 trailing_comments: vec![],
15628 double_colon_syntax: false,
15629 format: None,
15630 default: None,
15631 })))
15632 }
15633 DialectType::TSQL => {
15634 // CONCAT(a) -> a
15635 Ok(arg)
15636 }
15637 DialectType::DuckDB => {
15638 // Keep CONCAT(a) for DuckDB (native support)
15639 Ok(Expression::Function(Box::new(Function::new(
15640 "CONCAT".to_string(),
15641 vec![arg],
15642 ))))
15643 }
15644 DialectType::Spark | DialectType::Databricks => {
15645 let coalesced = Expression::Coalesce(Box::new(
15646 crate::expressions::VarArgFunc {
15647 expressions: vec![arg, Expression::string("")],
15648 original_name: None,
15649 },
15650 ));
15651 Ok(Expression::Function(Box::new(Function::new(
15652 "CONCAT".to_string(),
15653 vec![coalesced],
15654 ))))
15655 }
15656 _ => Ok(Expression::Function(Box::new(Function::new(
15657 "CONCAT".to_string(),
15658 vec![arg],
15659 )))),
15660 }
15661 }
15662 // REGEXP_EXTRACT(a, p) 2-arg: BigQuery default group is 0 (no 3rd arg needed)
15663 "REGEXP_EXTRACT"
15664 if f.args.len() == 3 && matches!(target, DialectType::BigQuery) =>
15665 {
15666 // If group_index is 0, drop it
15667 let drop_group = match &f.args[2] {
15668 Expression::Literal(Literal::Number(n)) => n == "0",
15669 _ => false,
15670 };
15671 if drop_group {
15672 let mut args = f.args;
15673 args.truncate(2);
15674 Ok(Expression::Function(Box::new(Function::new(
15675 "REGEXP_EXTRACT".to_string(),
15676 args,
15677 ))))
15678 } else {
15679 Ok(Expression::Function(f))
15680 }
15681 }
15682 // REGEXP_EXTRACT(a, pattern, group, flags) 4-arg -> REGEXP_SUBSTR for Snowflake
15683 "REGEXP_EXTRACT"
15684 if f.args.len() == 4
15685 && matches!(target, DialectType::Snowflake) =>
15686 {
15687 // REGEXP_EXTRACT(a, 'pattern', 2, 'i') -> REGEXP_SUBSTR(a, 'pattern', 1, 1, 'i', 2)
15688 let mut args = f.args;
15689 let this = args.remove(0);
15690 let pattern = args.remove(0);
15691 let group = args.remove(0);
15692 let flags = args.remove(0);
15693 Ok(Expression::Function(Box::new(Function::new(
15694 "REGEXP_SUBSTR".to_string(),
15695 vec![
15696 this,
15697 pattern,
15698 Expression::number(1),
15699 Expression::number(1),
15700 flags,
15701 group,
15702 ],
15703 ))))
15704 }
15705 // REGEXP_SUBSTR(a, pattern, position) 3-arg -> REGEXP_EXTRACT(SUBSTRING(a, pos), pattern)
15706 "REGEXP_SUBSTR"
15707 if f.args.len() == 3
15708 && matches!(
15709 target,
15710 DialectType::DuckDB
15711 | DialectType::Presto
15712 | DialectType::Trino
15713 | DialectType::Spark
15714 | DialectType::Databricks
15715 ) =>
15716 {
15717 let mut args = f.args;
15718 let this = args.remove(0);
15719 let pattern = args.remove(0);
15720 let position = args.remove(0);
15721 // Wrap subject in SUBSTRING(this, position) to apply the offset
15722 let substring_expr = Expression::Function(Box::new(Function::new(
15723 "SUBSTRING".to_string(),
15724 vec![this, position],
15725 )));
15726 let target_name = match target {
15727 DialectType::DuckDB => "REGEXP_EXTRACT",
15728 _ => "REGEXP_EXTRACT",
15729 };
15730 Ok(Expression::Function(Box::new(Function::new(
15731 target_name.to_string(),
15732 vec![substring_expr, pattern],
15733 ))))
15734 }
15735 // TO_DAYS(x) -> (DATEDIFF(x, '0000-01-01') + 1) or target-specific
15736 "TO_DAYS" if f.args.len() == 1 => {
15737 let x = f.args.into_iter().next().unwrap();
15738 let epoch = Expression::string("0000-01-01");
15739 // Build the final target-specific expression directly
15740 let datediff_expr = match target {
15741 DialectType::MySQL | DialectType::SingleStore => {
15742 // MySQL: (DATEDIFF(x, '0000-01-01') + 1)
15743 Expression::Function(Box::new(Function::new(
15744 "DATEDIFF".to_string(),
15745 vec![x, epoch],
15746 )))
15747 }
15748 DialectType::DuckDB => {
15749 // DuckDB: (DATE_DIFF('DAY', CAST('0000-01-01' AS DATE), CAST(x AS DATE)) + 1)
15750 let cast_epoch = Expression::Cast(Box::new(Cast {
15751 this: epoch,
15752 to: DataType::Date,
15753 trailing_comments: Vec::new(),
15754 double_colon_syntax: false,
15755 format: None,
15756 default: None,
15757 }));
15758 let cast_x = Expression::Cast(Box::new(Cast {
15759 this: x,
15760 to: DataType::Date,
15761 trailing_comments: Vec::new(),
15762 double_colon_syntax: false,
15763 format: None,
15764 default: None,
15765 }));
15766 Expression::Function(Box::new(Function::new(
15767 "DATE_DIFF".to_string(),
15768 vec![Expression::string("DAY"), cast_epoch, cast_x],
15769 )))
15770 }
15771 DialectType::Presto
15772 | DialectType::Trino
15773 | DialectType::Athena => {
15774 // Presto: (DATE_DIFF('DAY', CAST(CAST('0000-01-01' AS TIMESTAMP) AS DATE), CAST(CAST(x AS TIMESTAMP) AS DATE)) + 1)
15775 let cast_epoch = Self::double_cast_timestamp_date(epoch);
15776 let cast_x = Self::double_cast_timestamp_date(x);
15777 Expression::Function(Box::new(Function::new(
15778 "DATE_DIFF".to_string(),
15779 vec![Expression::string("DAY"), cast_epoch, cast_x],
15780 )))
15781 }
15782 _ => {
15783 // Default: (DATEDIFF(x, '0000-01-01') + 1)
15784 Expression::Function(Box::new(Function::new(
15785 "DATEDIFF".to_string(),
15786 vec![x, epoch],
15787 )))
15788 }
15789 };
15790 let add_one = Expression::Add(Box::new(BinaryOp::new(
15791 datediff_expr,
15792 Expression::number(1),
15793 )));
15794 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
15795 this: add_one,
15796 trailing_comments: Vec::new(),
15797 })))
15798 }
15799 // STR_TO_DATE(x, format) -> DATE_PARSE / STRPTIME / TO_DATE etc.
15800 "STR_TO_DATE"
15801 if f.args.len() == 2
15802 && matches!(
15803 target,
15804 DialectType::Presto | DialectType::Trino
15805 ) =>
15806 {
15807 let mut args = f.args;
15808 let x = args.remove(0);
15809 let format_expr = args.remove(0);
15810 // Check if the format contains time components
15811 let has_time =
15812 if let Expression::Literal(Literal::String(ref fmt)) =
15813 format_expr
15814 {
15815 fmt.contains("%H")
15816 || fmt.contains("%T")
15817 || fmt.contains("%M")
15818 || fmt.contains("%S")
15819 || fmt.contains("%I")
15820 || fmt.contains("%p")
15821 } else {
15822 false
15823 };
15824 let date_parse = Expression::Function(Box::new(Function::new(
15825 "DATE_PARSE".to_string(),
15826 vec![x, format_expr],
15827 )));
15828 if has_time {
15829 // Has time components: just DATE_PARSE
15830 Ok(date_parse)
15831 } else {
15832 // Date-only: CAST(DATE_PARSE(...) AS DATE)
15833 Ok(Expression::Cast(Box::new(Cast {
15834 this: date_parse,
15835 to: DataType::Date,
15836 trailing_comments: Vec::new(),
15837 double_colon_syntax: false,
15838 format: None,
15839 default: None,
15840 })))
15841 }
15842 }
15843 "STR_TO_DATE"
15844 if f.args.len() == 2
15845 && matches!(
15846 target,
15847 DialectType::PostgreSQL | DialectType::Redshift
15848 ) =>
15849 {
15850 let mut args = f.args;
15851 let x = args.remove(0);
15852 let fmt = args.remove(0);
15853 let pg_fmt = match fmt {
15854 Expression::Literal(Literal::String(s)) => Expression::string(
15855 &s.replace("%Y", "YYYY")
15856 .replace("%m", "MM")
15857 .replace("%d", "DD")
15858 .replace("%H", "HH24")
15859 .replace("%M", "MI")
15860 .replace("%S", "SS"),
15861 ),
15862 other => other,
15863 };
15864 let to_date = Expression::Function(Box::new(Function::new(
15865 "TO_DATE".to_string(),
15866 vec![x, pg_fmt],
15867 )));
15868 Ok(Expression::Cast(Box::new(Cast {
15869 this: to_date,
15870 to: DataType::Timestamp {
15871 timezone: false,
15872 precision: None,
15873 },
15874 trailing_comments: Vec::new(),
15875 double_colon_syntax: false,
15876 format: None,
15877 default: None,
15878 })))
15879 }
15880 // RANGE(start, end) -> GENERATE_SERIES for SQLite
15881 "RANGE"
15882 if (f.args.len() == 1 || f.args.len() == 2)
15883 && matches!(target, DialectType::SQLite) =>
15884 {
15885 if f.args.len() == 2 {
15886 // RANGE(start, end) -> (SELECT value AS col_alias FROM GENERATE_SERIES(start, end))
15887 // For SQLite, RANGE is exclusive on end, GENERATE_SERIES is inclusive
15888 let mut args = f.args;
15889 let start = args.remove(0);
15890 let end = args.remove(0);
15891 Ok(Expression::Function(Box::new(Function::new(
15892 "GENERATE_SERIES".to_string(),
15893 vec![start, end],
15894 ))))
15895 } else {
15896 Ok(Expression::Function(f))
15897 }
15898 }
15899 // UNIFORM(low, high[, seed]) -> UNIFORM(low, high, RANDOM([seed])) for Snowflake
15900 // When source is Snowflake, keep as-is (args already in correct form)
15901 "UNIFORM"
15902 if matches!(target, DialectType::Snowflake)
15903 && (f.args.len() == 2 || f.args.len() == 3) =>
15904 {
15905 if matches!(source, DialectType::Snowflake) {
15906 // Snowflake -> Snowflake: keep as-is
15907 Ok(Expression::Function(f))
15908 } else {
15909 let mut args = f.args;
15910 let low = args.remove(0);
15911 let high = args.remove(0);
15912 let random = if !args.is_empty() {
15913 let seed = args.remove(0);
15914 Expression::Function(Box::new(Function::new(
15915 "RANDOM".to_string(),
15916 vec![seed],
15917 )))
15918 } else {
15919 Expression::Function(Box::new(Function::new(
15920 "RANDOM".to_string(),
15921 vec![],
15922 )))
15923 };
15924 Ok(Expression::Function(Box::new(Function::new(
15925 "UNIFORM".to_string(),
15926 vec![low, high, random],
15927 ))))
15928 }
15929 }
15930 // TO_UTC_TIMESTAMP(ts, tz) -> target-specific UTC conversion
15931 "TO_UTC_TIMESTAMP" if f.args.len() == 2 => {
15932 let mut args = f.args;
15933 let ts_arg = args.remove(0);
15934 let tz_arg = args.remove(0);
15935 // Cast string literal to TIMESTAMP for all targets
15936 let ts_cast =
15937 if matches!(&ts_arg, Expression::Literal(Literal::String(_))) {
15938 Expression::Cast(Box::new(Cast {
15939 this: ts_arg,
15940 to: DataType::Timestamp {
15941 timezone: false,
15942 precision: None,
15943 },
15944 trailing_comments: vec![],
15945 double_colon_syntax: false,
15946 format: None,
15947 default: None,
15948 }))
15949 } else {
15950 ts_arg
15951 };
15952 match target {
15953 DialectType::Spark | DialectType::Databricks => {
15954 Ok(Expression::Function(Box::new(Function::new(
15955 "TO_UTC_TIMESTAMP".to_string(),
15956 vec![ts_cast, tz_arg],
15957 ))))
15958 }
15959 DialectType::Snowflake => {
15960 // CONVERT_TIMEZONE(tz, 'UTC', CAST(ts AS TIMESTAMP))
15961 Ok(Expression::Function(Box::new(Function::new(
15962 "CONVERT_TIMEZONE".to_string(),
15963 vec![tz_arg, Expression::string("UTC"), ts_cast],
15964 ))))
15965 }
15966 DialectType::Presto
15967 | DialectType::Trino
15968 | DialectType::Athena => {
15969 // WITH_TIMEZONE(CAST(ts AS TIMESTAMP), tz) AT TIME ZONE 'UTC'
15970 let wtz = Expression::Function(Box::new(Function::new(
15971 "WITH_TIMEZONE".to_string(),
15972 vec![ts_cast, tz_arg],
15973 )));
15974 Ok(Expression::AtTimeZone(Box::new(
15975 crate::expressions::AtTimeZone {
15976 this: wtz,
15977 zone: Expression::string("UTC"),
15978 },
15979 )))
15980 }
15981 DialectType::BigQuery => {
15982 // DATETIME(TIMESTAMP(CAST(ts AS DATETIME), tz), 'UTC')
15983 let cast_dt = Expression::Cast(Box::new(Cast {
15984 this: if let Expression::Cast(c) = ts_cast {
15985 c.this
15986 } else {
15987 ts_cast.clone()
15988 },
15989 to: DataType::Custom {
15990 name: "DATETIME".to_string(),
15991 },
15992 trailing_comments: vec![],
15993 double_colon_syntax: false,
15994 format: None,
15995 default: None,
15996 }));
15997 let ts_func =
15998 Expression::Function(Box::new(Function::new(
15999 "TIMESTAMP".to_string(),
16000 vec![cast_dt, tz_arg],
16001 )));
16002 Ok(Expression::Function(Box::new(Function::new(
16003 "DATETIME".to_string(),
16004 vec![ts_func, Expression::string("UTC")],
16005 ))))
16006 }
16007 _ => {
16008 // DuckDB, PostgreSQL, Redshift: CAST(ts AS TIMESTAMP) AT TIME ZONE tz AT TIME ZONE 'UTC'
16009 let atz1 = Expression::AtTimeZone(Box::new(
16010 crate::expressions::AtTimeZone {
16011 this: ts_cast,
16012 zone: tz_arg,
16013 },
16014 ));
16015 Ok(Expression::AtTimeZone(Box::new(
16016 crate::expressions::AtTimeZone {
16017 this: atz1,
16018 zone: Expression::string("UTC"),
16019 },
16020 )))
16021 }
16022 }
16023 }
16024 // FROM_UTC_TIMESTAMP(ts, tz) -> target-specific UTC conversion
16025 "FROM_UTC_TIMESTAMP" if f.args.len() == 2 => {
16026 let mut args = f.args;
16027 let ts_arg = args.remove(0);
16028 let tz_arg = args.remove(0);
16029 // Cast string literal to TIMESTAMP
16030 let ts_cast =
16031 if matches!(&ts_arg, Expression::Literal(Literal::String(_))) {
16032 Expression::Cast(Box::new(Cast {
16033 this: ts_arg,
16034 to: DataType::Timestamp {
16035 timezone: false,
16036 precision: None,
16037 },
16038 trailing_comments: vec![],
16039 double_colon_syntax: false,
16040 format: None,
16041 default: None,
16042 }))
16043 } else {
16044 ts_arg
16045 };
16046 match target {
16047 DialectType::Spark | DialectType::Databricks => {
16048 Ok(Expression::Function(Box::new(Function::new(
16049 "FROM_UTC_TIMESTAMP".to_string(),
16050 vec![ts_cast, tz_arg],
16051 ))))
16052 }
16053 DialectType::Presto
16054 | DialectType::Trino
16055 | DialectType::Athena => {
16056 // AT_TIMEZONE(CAST(ts AS TIMESTAMP), tz)
16057 Ok(Expression::Function(Box::new(Function::new(
16058 "AT_TIMEZONE".to_string(),
16059 vec![ts_cast, tz_arg],
16060 ))))
16061 }
16062 DialectType::Snowflake => {
16063 // CONVERT_TIMEZONE('UTC', tz, CAST(ts AS TIMESTAMP))
16064 Ok(Expression::Function(Box::new(Function::new(
16065 "CONVERT_TIMEZONE".to_string(),
16066 vec![Expression::string("UTC"), tz_arg, ts_cast],
16067 ))))
16068 }
16069 _ => {
16070 // DuckDB, PostgreSQL, Redshift: CAST(ts AS TIMESTAMP) AT TIME ZONE tz
16071 Ok(Expression::AtTimeZone(Box::new(
16072 crate::expressions::AtTimeZone {
16073 this: ts_cast,
16074 zone: tz_arg,
16075 },
16076 )))
16077 }
16078 }
16079 }
16080 // MAP_FROM_ARRAYS(keys, values) -> target-specific map construction
16081 "MAP_FROM_ARRAYS" if f.args.len() == 2 => {
16082 let name = match target {
16083 DialectType::Snowflake => "OBJECT_CONSTRUCT",
16084 _ => "MAP",
16085 };
16086 Ok(Expression::Function(Box::new(Function::new(
16087 name.to_string(),
16088 f.args,
16089 ))))
16090 }
16091 // STR_TO_MAP(s, pair_delim, kv_delim) -> SPLIT_TO_MAP for Presto
16092 "STR_TO_MAP" if f.args.len() >= 1 => match target {
16093 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
16094 Ok(Expression::Function(Box::new(Function::new(
16095 "SPLIT_TO_MAP".to_string(),
16096 f.args,
16097 ))))
16098 }
16099 _ => Ok(Expression::Function(f)),
16100 },
16101 // TIME_TO_STR(x, fmt) -> Expression::TimeToStr for proper generation
16102 "TIME_TO_STR" if f.args.len() == 2 => {
16103 let mut args = f.args;
16104 let this = args.remove(0);
16105 let fmt_expr = args.remove(0);
16106 let format =
16107 if let Expression::Literal(Literal::String(s)) = fmt_expr {
16108 s
16109 } else {
16110 "%Y-%m-%d %H:%M:%S".to_string()
16111 };
16112 Ok(Expression::TimeToStr(Box::new(
16113 crate::expressions::TimeToStr {
16114 this: Box::new(this),
16115 format,
16116 culture: None,
16117 zone: None,
16118 },
16119 )))
16120 }
16121 // STR_TO_TIME(x, fmt) -> Expression::StrToTime for proper generation
16122 "STR_TO_TIME" if f.args.len() == 2 => {
16123 let mut args = f.args;
16124 let this = args.remove(0);
16125 let fmt_expr = args.remove(0);
16126 let format =
16127 if let Expression::Literal(Literal::String(s)) = fmt_expr {
16128 s
16129 } else {
16130 "%Y-%m-%d %H:%M:%S".to_string()
16131 };
16132 Ok(Expression::StrToTime(Box::new(
16133 crate::expressions::StrToTime {
16134 this: Box::new(this),
16135 format,
16136 zone: None,
16137 safe: None,
16138 target_type: None,
16139 },
16140 )))
16141 }
16142 // STR_TO_UNIX(x, fmt) -> Expression::StrToUnix for proper generation
16143 "STR_TO_UNIX" if f.args.len() >= 1 => {
16144 let mut args = f.args;
16145 let this = args.remove(0);
16146 let format = if !args.is_empty() {
16147 if let Expression::Literal(Literal::String(s)) = args.remove(0)
16148 {
16149 Some(s)
16150 } else {
16151 None
16152 }
16153 } else {
16154 None
16155 };
16156 Ok(Expression::StrToUnix(Box::new(
16157 crate::expressions::StrToUnix {
16158 this: Some(Box::new(this)),
16159 format,
16160 },
16161 )))
16162 }
16163 // TIME_TO_UNIX(x) -> Expression::TimeToUnix for proper generation
16164 "TIME_TO_UNIX" if f.args.len() == 1 => {
16165 let mut args = f.args;
16166 let this = args.remove(0);
16167 Ok(Expression::TimeToUnix(Box::new(
16168 crate::expressions::UnaryFunc {
16169 this,
16170 original_name: None,
16171 },
16172 )))
16173 }
16174 // UNIX_TO_STR(x, fmt) -> Expression::UnixToStr for proper generation
16175 "UNIX_TO_STR" if f.args.len() >= 1 => {
16176 let mut args = f.args;
16177 let this = args.remove(0);
16178 let format = if !args.is_empty() {
16179 if let Expression::Literal(Literal::String(s)) = args.remove(0)
16180 {
16181 Some(s)
16182 } else {
16183 None
16184 }
16185 } else {
16186 None
16187 };
16188 Ok(Expression::UnixToStr(Box::new(
16189 crate::expressions::UnixToStr {
16190 this: Box::new(this),
16191 format,
16192 },
16193 )))
16194 }
16195 // UNIX_TO_TIME(x) -> Expression::UnixToTime for proper generation
16196 "UNIX_TO_TIME" if f.args.len() == 1 => {
16197 let mut args = f.args;
16198 let this = args.remove(0);
16199 Ok(Expression::UnixToTime(Box::new(
16200 crate::expressions::UnixToTime {
16201 this: Box::new(this),
16202 scale: None,
16203 zone: None,
16204 hours: None,
16205 minutes: None,
16206 format: None,
16207 target_type: None,
16208 },
16209 )))
16210 }
16211 // TIME_STR_TO_DATE(x) -> Expression::TimeStrToDate for proper generation
16212 "TIME_STR_TO_DATE" if f.args.len() == 1 => {
16213 let mut args = f.args;
16214 let this = args.remove(0);
16215 Ok(Expression::TimeStrToDate(Box::new(
16216 crate::expressions::UnaryFunc {
16217 this,
16218 original_name: None,
16219 },
16220 )))
16221 }
16222 // TIME_STR_TO_TIME(x) -> Expression::TimeStrToTime for proper generation
16223 "TIME_STR_TO_TIME" if f.args.len() == 1 => {
16224 let mut args = f.args;
16225 let this = args.remove(0);
16226 Ok(Expression::TimeStrToTime(Box::new(
16227 crate::expressions::TimeStrToTime {
16228 this: Box::new(this),
16229 zone: None,
16230 },
16231 )))
16232 }
16233 // MONTHS_BETWEEN(end, start) -> DuckDB complex expansion
16234 "MONTHS_BETWEEN" if f.args.len() == 2 => {
16235 match target {
16236 DialectType::DuckDB => {
16237 let mut args = f.args;
16238 let end_date = args.remove(0);
16239 let start_date = args.remove(0);
16240 let cast_end = Self::ensure_cast_date(end_date);
16241 let cast_start = Self::ensure_cast_date(start_date);
16242 // DATE_DIFF('MONTH', start, end) + CASE WHEN DAY(end) = DAY(LAST_DAY(end)) AND DAY(start) = DAY(LAST_DAY(start)) THEN 0 ELSE (DAY(end) - DAY(start)) / 31.0 END
16243 let dd = Expression::Function(Box::new(Function::new(
16244 "DATE_DIFF".to_string(),
16245 vec![
16246 Expression::string("MONTH"),
16247 cast_start.clone(),
16248 cast_end.clone(),
16249 ],
16250 )));
16251 let day_end =
16252 Expression::Function(Box::new(Function::new(
16253 "DAY".to_string(),
16254 vec![cast_end.clone()],
16255 )));
16256 let day_start =
16257 Expression::Function(Box::new(Function::new(
16258 "DAY".to_string(),
16259 vec![cast_start.clone()],
16260 )));
16261 let last_day_end =
16262 Expression::Function(Box::new(Function::new(
16263 "LAST_DAY".to_string(),
16264 vec![cast_end.clone()],
16265 )));
16266 let last_day_start =
16267 Expression::Function(Box::new(Function::new(
16268 "LAST_DAY".to_string(),
16269 vec![cast_start.clone()],
16270 )));
16271 let day_last_end = Expression::Function(Box::new(
16272 Function::new("DAY".to_string(), vec![last_day_end]),
16273 ));
16274 let day_last_start = Expression::Function(Box::new(
16275 Function::new("DAY".to_string(), vec![last_day_start]),
16276 ));
16277 let cond1 = Expression::Eq(Box::new(BinaryOp::new(
16278 day_end.clone(),
16279 day_last_end,
16280 )));
16281 let cond2 = Expression::Eq(Box::new(BinaryOp::new(
16282 day_start.clone(),
16283 day_last_start,
16284 )));
16285 let both_cond =
16286 Expression::And(Box::new(BinaryOp::new(cond1, cond2)));
16287 let day_diff = Expression::Sub(Box::new(BinaryOp::new(
16288 day_end, day_start,
16289 )));
16290 let day_diff_paren = Expression::Paren(Box::new(
16291 crate::expressions::Paren {
16292 this: day_diff,
16293 trailing_comments: Vec::new(),
16294 },
16295 ));
16296 let frac = Expression::Div(Box::new(BinaryOp::new(
16297 day_diff_paren,
16298 Expression::Literal(Literal::Number(
16299 "31.0".to_string(),
16300 )),
16301 )));
16302 let case_expr = Expression::Case(Box::new(Case {
16303 operand: None,
16304 whens: vec![(both_cond, Expression::number(0))],
16305 else_: Some(frac),
16306 comments: Vec::new(),
16307 }));
16308 Ok(Expression::Add(Box::new(BinaryOp::new(dd, case_expr))))
16309 }
16310 DialectType::Snowflake | DialectType::Redshift => {
16311 let mut args = f.args;
16312 let end_date = args.remove(0);
16313 let start_date = args.remove(0);
16314 let unit = Expression::Identifier(Identifier::new("MONTH"));
16315 Ok(Expression::Function(Box::new(Function::new(
16316 "DATEDIFF".to_string(),
16317 vec![unit, start_date, end_date],
16318 ))))
16319 }
16320 DialectType::Presto
16321 | DialectType::Trino
16322 | DialectType::Athena => {
16323 let mut args = f.args;
16324 let end_date = args.remove(0);
16325 let start_date = args.remove(0);
16326 Ok(Expression::Function(Box::new(Function::new(
16327 "DATE_DIFF".to_string(),
16328 vec![Expression::string("MONTH"), start_date, end_date],
16329 ))))
16330 }
16331 _ => Ok(Expression::Function(f)),
16332 }
16333 }
16334 // MONTHS_BETWEEN(end, start, roundOff) - 3-arg form (Spark-specific)
16335 // Drop the roundOff arg for non-Spark targets, keep it for Spark
16336 "MONTHS_BETWEEN" if f.args.len() == 3 => {
16337 match target {
16338 DialectType::Spark | DialectType::Databricks => {
16339 Ok(Expression::Function(f))
16340 }
16341 _ => {
16342 // Drop the 3rd arg and delegate to the 2-arg logic
16343 let mut args = f.args;
16344 let end_date = args.remove(0);
16345 let start_date = args.remove(0);
16346 // Re-create as 2-arg and process
16347 let f2 = Function::new(
16348 "MONTHS_BETWEEN".to_string(),
16349 vec![end_date, start_date],
16350 );
16351 let e2 = Expression::Function(Box::new(f2));
16352 Self::cross_dialect_normalize(e2, source, target)
16353 }
16354 }
16355 }
16356 // TO_TIMESTAMP(x) with 1 arg -> CAST(x AS TIMESTAMP) for most targets
16357 "TO_TIMESTAMP"
16358 if f.args.len() == 1
16359 && matches!(
16360 source,
16361 DialectType::Spark
16362 | DialectType::Databricks
16363 | DialectType::Hive
16364 ) =>
16365 {
16366 let arg = f.args.into_iter().next().unwrap();
16367 Ok(Expression::Cast(Box::new(Cast {
16368 this: arg,
16369 to: DataType::Timestamp {
16370 timezone: false,
16371 precision: None,
16372 },
16373 trailing_comments: vec![],
16374 double_colon_syntax: false,
16375 format: None,
16376 default: None,
16377 })))
16378 }
16379 // STRING(x) -> CAST(x AS STRING) for Spark target
16380 "STRING"
16381 if f.args.len() == 1
16382 && matches!(
16383 source,
16384 DialectType::Spark | DialectType::Databricks
16385 ) =>
16386 {
16387 let arg = f.args.into_iter().next().unwrap();
16388 let dt = match target {
16389 DialectType::Spark
16390 | DialectType::Databricks
16391 | DialectType::Hive => DataType::Custom {
16392 name: "STRING".to_string(),
16393 },
16394 _ => DataType::Text,
16395 };
16396 Ok(Expression::Cast(Box::new(Cast {
16397 this: arg,
16398 to: dt,
16399 trailing_comments: vec![],
16400 double_colon_syntax: false,
16401 format: None,
16402 default: None,
16403 })))
16404 }
16405 // LOGICAL_OR(x) -> BOOL_OR(x) for Spark target
16406 "LOGICAL_OR" if f.args.len() == 1 => {
16407 let name = match target {
16408 DialectType::Spark | DialectType::Databricks => "BOOL_OR",
16409 _ => "LOGICAL_OR",
16410 };
16411 Ok(Expression::Function(Box::new(Function::new(
16412 name.to_string(),
16413 f.args,
16414 ))))
16415 }
16416 // SPLIT(x, pattern) from Spark -> STR_SPLIT_REGEX for DuckDB, REGEXP_SPLIT for Presto
16417 "SPLIT"
16418 if f.args.len() == 2
16419 && matches!(
16420 source,
16421 DialectType::Spark
16422 | DialectType::Databricks
16423 | DialectType::Hive
16424 ) =>
16425 {
16426 let name = match target {
16427 DialectType::DuckDB => "STR_SPLIT_REGEX",
16428 DialectType::Presto
16429 | DialectType::Trino
16430 | DialectType::Athena => "REGEXP_SPLIT",
16431 DialectType::Spark
16432 | DialectType::Databricks
16433 | DialectType::Hive => "SPLIT",
16434 _ => "SPLIT",
16435 };
16436 Ok(Expression::Function(Box::new(Function::new(
16437 name.to_string(),
16438 f.args,
16439 ))))
16440 }
16441 // TRY_ELEMENT_AT -> ELEMENT_AT for Presto, array[idx] for DuckDB
16442 "TRY_ELEMENT_AT" if f.args.len() == 2 => match target {
16443 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
16444 Ok(Expression::Function(Box::new(Function::new(
16445 "ELEMENT_AT".to_string(),
16446 f.args,
16447 ))))
16448 }
16449 DialectType::DuckDB => {
16450 let mut args = f.args;
16451 let arr = args.remove(0);
16452 let idx = args.remove(0);
16453 Ok(Expression::Subscript(Box::new(
16454 crate::expressions::Subscript {
16455 this: arr,
16456 index: idx,
16457 },
16458 )))
16459 }
16460 _ => Ok(Expression::Function(f)),
16461 },
16462 // ARRAY_FILTER(arr, lambda) -> FILTER for Hive/Spark/Presto, LIST_FILTER for DuckDB
16463 "ARRAY_FILTER" if f.args.len() == 2 => {
16464 let name = match target {
16465 DialectType::DuckDB => "LIST_FILTER",
16466 DialectType::StarRocks => "ARRAY_FILTER",
16467 _ => "FILTER",
16468 };
16469 Ok(Expression::Function(Box::new(Function::new(
16470 name.to_string(),
16471 f.args,
16472 ))))
16473 }
16474 // FILTER(arr, lambda) -> ARRAY_FILTER for StarRocks, LIST_FILTER for DuckDB
16475 "FILTER" if f.args.len() == 2 => {
16476 let name = match target {
16477 DialectType::DuckDB => "LIST_FILTER",
16478 DialectType::StarRocks => "ARRAY_FILTER",
16479 _ => "FILTER",
16480 };
16481 Ok(Expression::Function(Box::new(Function::new(
16482 name.to_string(),
16483 f.args,
16484 ))))
16485 }
16486 // REDUCE(arr, init, lambda1, lambda2) -> AGGREGATE for Spark
16487 "REDUCE" if f.args.len() >= 3 => {
16488 let name = match target {
16489 DialectType::Spark | DialectType::Databricks => "AGGREGATE",
16490 _ => "REDUCE",
16491 };
16492 Ok(Expression::Function(Box::new(Function::new(
16493 name.to_string(),
16494 f.args,
16495 ))))
16496 }
16497 // CURRENT_SCHEMA() -> dialect-specific
16498 "CURRENT_SCHEMA" => {
16499 match target {
16500 DialectType::PostgreSQL => {
16501 // PostgreSQL: CURRENT_SCHEMA (no parens)
16502 Ok(Expression::Function(Box::new(Function {
16503 name: "CURRENT_SCHEMA".to_string(),
16504 args: vec![],
16505 distinct: false,
16506 trailing_comments: vec![],
16507 use_bracket_syntax: false,
16508 no_parens: true,
16509 quoted: false,
16510 })))
16511 }
16512 DialectType::MySQL
16513 | DialectType::Doris
16514 | DialectType::StarRocks => Ok(Expression::Function(Box::new(
16515 Function::new("SCHEMA".to_string(), vec![]),
16516 ))),
16517 DialectType::TSQL => Ok(Expression::Function(Box::new(
16518 Function::new("SCHEMA_NAME".to_string(), vec![]),
16519 ))),
16520 DialectType::SQLite => {
16521 Ok(Expression::Literal(Literal::String("main".to_string())))
16522 }
16523 _ => Ok(Expression::Function(f)),
16524 }
16525 }
16526 // LTRIM(str, chars) 2-arg -> TRIM(LEADING chars FROM str) for Spark/Hive/Databricks/ClickHouse
16527 "LTRIM" if f.args.len() == 2 => match target {
16528 DialectType::Spark
16529 | DialectType::Hive
16530 | DialectType::Databricks
16531 | DialectType::ClickHouse => {
16532 let mut args = f.args;
16533 let str_expr = args.remove(0);
16534 let chars = args.remove(0);
16535 Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
16536 this: str_expr,
16537 characters: Some(chars),
16538 position: crate::expressions::TrimPosition::Leading,
16539 sql_standard_syntax: true,
16540 position_explicit: true,
16541 })))
16542 }
16543 _ => Ok(Expression::Function(f)),
16544 },
16545 // RTRIM(str, chars) 2-arg -> TRIM(TRAILING chars FROM str) for Spark/Hive/Databricks/ClickHouse
16546 "RTRIM" if f.args.len() == 2 => match target {
16547 DialectType::Spark
16548 | DialectType::Hive
16549 | DialectType::Databricks
16550 | DialectType::ClickHouse => {
16551 let mut args = f.args;
16552 let str_expr = args.remove(0);
16553 let chars = args.remove(0);
16554 Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
16555 this: str_expr,
16556 characters: Some(chars),
16557 position: crate::expressions::TrimPosition::Trailing,
16558 sql_standard_syntax: true,
16559 position_explicit: true,
16560 })))
16561 }
16562 _ => Ok(Expression::Function(f)),
16563 },
16564 // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
16565 "ARRAY_REVERSE" if f.args.len() == 1 => match target {
16566 DialectType::ClickHouse => {
16567 let mut new_f = *f;
16568 new_f.name = "arrayReverse".to_string();
16569 Ok(Expression::Function(Box::new(new_f)))
16570 }
16571 _ => Ok(Expression::Function(f)),
16572 },
16573 // UUID() -> NEWID() for TSQL
16574 "UUID" if f.args.is_empty() => match target {
16575 DialectType::TSQL | DialectType::Fabric => {
16576 Ok(Expression::Function(Box::new(Function::new(
16577 "NEWID".to_string(),
16578 vec![],
16579 ))))
16580 }
16581 _ => Ok(Expression::Function(f)),
16582 },
16583 // FARM_FINGERPRINT(x) -> farmFingerprint64(x) for ClickHouse, FARMFINGERPRINT64(x) for Redshift
16584 "FARM_FINGERPRINT" if f.args.len() == 1 => match target {
16585 DialectType::ClickHouse => {
16586 let mut new_f = *f;
16587 new_f.name = "farmFingerprint64".to_string();
16588 Ok(Expression::Function(Box::new(new_f)))
16589 }
16590 DialectType::Redshift => {
16591 let mut new_f = *f;
16592 new_f.name = "FARMFINGERPRINT64".to_string();
16593 Ok(Expression::Function(Box::new(new_f)))
16594 }
16595 _ => Ok(Expression::Function(f)),
16596 },
16597 // JSON_KEYS(x) -> JSON_OBJECT_KEYS(x) for Databricks/Spark, OBJECT_KEYS(x) for Snowflake
16598 "JSON_KEYS" => match target {
16599 DialectType::Databricks | DialectType::Spark => {
16600 let mut new_f = *f;
16601 new_f.name = "JSON_OBJECT_KEYS".to_string();
16602 Ok(Expression::Function(Box::new(new_f)))
16603 }
16604 DialectType::Snowflake => {
16605 let mut new_f = *f;
16606 new_f.name = "OBJECT_KEYS".to_string();
16607 Ok(Expression::Function(Box::new(new_f)))
16608 }
16609 _ => Ok(Expression::Function(f)),
16610 },
16611 // WEEKOFYEAR(x) -> WEEKISO(x) for Snowflake
16612 "WEEKOFYEAR" => match target {
16613 DialectType::Snowflake => {
16614 let mut new_f = *f;
16615 new_f.name = "WEEKISO".to_string();
16616 Ok(Expression::Function(Box::new(new_f)))
16617 }
16618 _ => Ok(Expression::Function(f)),
16619 },
16620 // FORMAT(fmt, args...) -> FORMAT_STRING(fmt, args...) for Databricks
16621 "FORMAT"
16622 if f.args.len() >= 2 && matches!(source, DialectType::Generic) =>
16623 {
16624 match target {
16625 DialectType::Databricks | DialectType::Spark => {
16626 let mut new_f = *f;
16627 new_f.name = "FORMAT_STRING".to_string();
16628 Ok(Expression::Function(Box::new(new_f)))
16629 }
16630 _ => Ok(Expression::Function(f)),
16631 }
16632 }
16633 // CONCAT_WS('-', args...) -> CONCAT_WS('-', CAST(arg AS VARCHAR), ...) for Presto/Trino
16634 "CONCAT_WS" if f.args.len() >= 2 => match target {
16635 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
16636 let mut args = f.args;
16637 let sep = args.remove(0);
16638 let cast_args: Vec<Expression> = args
16639 .into_iter()
16640 .map(|a| {
16641 Expression::Cast(Box::new(Cast {
16642 this: a,
16643 to: DataType::VarChar {
16644 length: None,
16645 parenthesized_length: false,
16646 },
16647 double_colon_syntax: false,
16648 trailing_comments: Vec::new(),
16649 format: None,
16650 default: None,
16651 }))
16652 })
16653 .collect();
16654 let mut new_args = vec![sep];
16655 new_args.extend(cast_args);
16656 Ok(Expression::Function(Box::new(Function::new(
16657 "CONCAT_WS".to_string(),
16658 new_args,
16659 ))))
16660 }
16661 _ => Ok(Expression::Function(f)),
16662 },
16663 // ARRAY_SLICE(x, start, end) -> SLICE(x, start, end) for Presto/Trino/Databricks, arraySlice for ClickHouse
16664 "ARRAY_SLICE" if f.args.len() >= 2 => match target {
16665 DialectType::Presto
16666 | DialectType::Trino
16667 | DialectType::Athena
16668 | DialectType::Databricks
16669 | DialectType::Spark => {
16670 let mut new_f = *f;
16671 new_f.name = "SLICE".to_string();
16672 Ok(Expression::Function(Box::new(new_f)))
16673 }
16674 DialectType::ClickHouse => {
16675 let mut new_f = *f;
16676 new_f.name = "arraySlice".to_string();
16677 Ok(Expression::Function(Box::new(new_f)))
16678 }
16679 _ => Ok(Expression::Function(f)),
16680 },
16681 // ARRAY_PREPEND(arr, x) -> LIST_PREPEND(x, arr) for DuckDB (swap args)
16682 "ARRAY_PREPEND" if f.args.len() == 2 => match target {
16683 DialectType::DuckDB => {
16684 let mut args = f.args;
16685 let arr = args.remove(0);
16686 let val = args.remove(0);
16687 Ok(Expression::Function(Box::new(Function::new(
16688 "LIST_PREPEND".to_string(),
16689 vec![val, arr],
16690 ))))
16691 }
16692 _ => Ok(Expression::Function(f)),
16693 },
16694 // ARRAY_REMOVE(arr, target) -> dialect-specific
16695 "ARRAY_REMOVE" if f.args.len() == 2 => {
16696 match target {
16697 DialectType::DuckDB => {
16698 let mut args = f.args;
16699 let arr = args.remove(0);
16700 let target_val = args.remove(0);
16701 let u_id = crate::expressions::Identifier::new("_u");
16702 // LIST_FILTER(arr, _u -> _u <> target)
16703 let lambda = Expression::Lambda(Box::new(
16704 crate::expressions::LambdaExpr {
16705 parameters: vec![u_id.clone()],
16706 body: Expression::Neq(Box::new(BinaryOp {
16707 left: Expression::Identifier(u_id),
16708 right: target_val,
16709 left_comments: Vec::new(),
16710 operator_comments: Vec::new(),
16711 trailing_comments: Vec::new(),
16712 })),
16713 colon: false,
16714 parameter_types: Vec::new(),
16715 },
16716 ));
16717 Ok(Expression::Function(Box::new(Function::new(
16718 "LIST_FILTER".to_string(),
16719 vec![arr, lambda],
16720 ))))
16721 }
16722 DialectType::ClickHouse => {
16723 let mut args = f.args;
16724 let arr = args.remove(0);
16725 let target_val = args.remove(0);
16726 let u_id = crate::expressions::Identifier::new("_u");
16727 // arrayFilter(_u -> _u <> target, arr)
16728 let lambda = Expression::Lambda(Box::new(
16729 crate::expressions::LambdaExpr {
16730 parameters: vec![u_id.clone()],
16731 body: Expression::Neq(Box::new(BinaryOp {
16732 left: Expression::Identifier(u_id),
16733 right: target_val,
16734 left_comments: Vec::new(),
16735 operator_comments: Vec::new(),
16736 trailing_comments: Vec::new(),
16737 })),
16738 colon: false,
16739 parameter_types: Vec::new(),
16740 },
16741 ));
16742 Ok(Expression::Function(Box::new(Function::new(
16743 "arrayFilter".to_string(),
16744 vec![lambda, arr],
16745 ))))
16746 }
16747 DialectType::BigQuery => {
16748 // ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)
16749 let mut args = f.args;
16750 let arr = args.remove(0);
16751 let target_val = args.remove(0);
16752 let u_id = crate::expressions::Identifier::new("_u");
16753 let u_col =
16754 Expression::Column(crate::expressions::Column {
16755 name: u_id.clone(),
16756 table: None,
16757 join_mark: false,
16758 trailing_comments: Vec::new(),
16759 });
16760 // UNNEST(the_array) AS _u
16761 let unnest_expr = Expression::Unnest(Box::new(
16762 crate::expressions::UnnestFunc {
16763 this: arr,
16764 expressions: Vec::new(),
16765 with_ordinality: false,
16766 alias: None,
16767 offset_alias: None,
16768 },
16769 ));
16770 let aliased_unnest = Expression::Alias(Box::new(
16771 crate::expressions::Alias {
16772 this: unnest_expr,
16773 alias: u_id.clone(),
16774 column_aliases: Vec::new(),
16775 pre_alias_comments: Vec::new(),
16776 trailing_comments: Vec::new(),
16777 },
16778 ));
16779 // _u <> target
16780 let where_cond = Expression::Neq(Box::new(BinaryOp {
16781 left: u_col.clone(),
16782 right: target_val,
16783 left_comments: Vec::new(),
16784 operator_comments: Vec::new(),
16785 trailing_comments: Vec::new(),
16786 }));
16787 // SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target
16788 let subquery = Expression::Select(Box::new(
16789 crate::expressions::Select::new()
16790 .column(u_col)
16791 .from(aliased_unnest)
16792 .where_(where_cond),
16793 ));
16794 // ARRAY(subquery) -- use ArrayFunc with subquery as single element
16795 Ok(Expression::ArrayFunc(Box::new(
16796 crate::expressions::ArrayConstructor {
16797 expressions: vec![subquery],
16798 bracket_notation: false,
16799 use_list_keyword: false,
16800 },
16801 )))
16802 }
16803 _ => Ok(Expression::Function(f)),
16804 }
16805 }
16806 // PARSE_JSON(str) -> remove for SQLite/Doris (just use the string literal)
16807 "PARSE_JSON" if f.args.len() == 1 => {
16808 match target {
16809 DialectType::SQLite
16810 | DialectType::Doris
16811 | DialectType::MySQL
16812 | DialectType::StarRocks => {
16813 // Strip PARSE_JSON, return the inner argument
16814 Ok(f.args.into_iter().next().unwrap())
16815 }
16816 _ => Ok(Expression::Function(f)),
16817 }
16818 }
16819 // JSON_REMOVE(PARSE_JSON(str), path...) -> for SQLite strip PARSE_JSON
16820 // This is handled by PARSE_JSON stripping above; JSON_REMOVE is passed through
16821 "JSON_REMOVE" => Ok(Expression::Function(f)),
16822 // JSON_SET(PARSE_JSON(str), path, PARSE_JSON(val)) -> for SQLite strip PARSE_JSON
16823 // This is handled by PARSE_JSON stripping above; JSON_SET is passed through
16824 "JSON_SET" => Ok(Expression::Function(f)),
16825 // DECODE(x, search1, result1, ..., default) -> CASE WHEN
16826 // Behavior per search value type:
16827 // NULL literal -> CASE WHEN x IS NULL THEN result
16828 // Literal (number, string, bool) -> CASE WHEN x = literal THEN result
16829 // Non-literal (column, expr) -> CASE WHEN x = search OR (x IS NULL AND search IS NULL) THEN result
16830 "DECODE" if f.args.len() >= 3 => {
16831 // Keep as DECODE for targets that support it natively
16832 let keep_as_decode = matches!(
16833 target,
16834 DialectType::Oracle
16835 | DialectType::Snowflake
16836 | DialectType::Redshift
16837 | DialectType::Teradata
16838 | DialectType::Spark
16839 | DialectType::Databricks
16840 );
16841 if keep_as_decode {
16842 return Ok(Expression::Function(f));
16843 }
16844
16845 let mut args = f.args;
16846 let this_expr = args.remove(0);
16847 let mut pairs = Vec::new();
16848 let mut default = None;
16849 let mut i = 0;
16850 while i + 1 < args.len() {
16851 pairs.push((args[i].clone(), args[i + 1].clone()));
16852 i += 2;
16853 }
16854 if i < args.len() {
16855 default = Some(args[i].clone());
16856 }
16857 // Helper: check if expression is a literal value
16858 fn is_literal(e: &Expression) -> bool {
16859 matches!(
16860 e,
16861 Expression::Literal(_)
16862 | Expression::Boolean(_)
16863 | Expression::Neg(_)
16864 )
16865 }
16866 let whens: Vec<(Expression, Expression)> = pairs
16867 .into_iter()
16868 .map(|(search, result)| {
16869 if matches!(&search, Expression::Null(_)) {
16870 // NULL search -> IS NULL
16871 let condition = Expression::Is(Box::new(BinaryOp {
16872 left: this_expr.clone(),
16873 right: Expression::Null(crate::expressions::Null),
16874 left_comments: Vec::new(),
16875 operator_comments: Vec::new(),
16876 trailing_comments: Vec::new(),
16877 }));
16878 (condition, result)
16879 } else if is_literal(&search) {
16880 // Literal search -> simple equality
16881 let eq = Expression::Eq(Box::new(BinaryOp {
16882 left: this_expr.clone(),
16883 right: search,
16884 left_comments: Vec::new(),
16885 operator_comments: Vec::new(),
16886 trailing_comments: Vec::new(),
16887 }));
16888 (eq, result)
16889 } else {
16890 // Non-literal (column ref, expression) -> null-safe comparison
16891 let needs_paren = matches!(
16892 &search,
16893 Expression::Eq(_)
16894 | Expression::Neq(_)
16895 | Expression::Gt(_)
16896 | Expression::Gte(_)
16897 | Expression::Lt(_)
16898 | Expression::Lte(_)
16899 );
16900 let search_for_eq = if needs_paren {
16901 Expression::Paren(Box::new(
16902 crate::expressions::Paren {
16903 this: search.clone(),
16904 trailing_comments: Vec::new(),
16905 },
16906 ))
16907 } else {
16908 search.clone()
16909 };
16910 let eq = Expression::Eq(Box::new(BinaryOp {
16911 left: this_expr.clone(),
16912 right: search_for_eq,
16913 left_comments: Vec::new(),
16914 operator_comments: Vec::new(),
16915 trailing_comments: Vec::new(),
16916 }));
16917 let search_for_null = if needs_paren {
16918 Expression::Paren(Box::new(
16919 crate::expressions::Paren {
16920 this: search.clone(),
16921 trailing_comments: Vec::new(),
16922 },
16923 ))
16924 } else {
16925 search.clone()
16926 };
16927 let x_is_null = Expression::Is(Box::new(BinaryOp {
16928 left: this_expr.clone(),
16929 right: Expression::Null(crate::expressions::Null),
16930 left_comments: Vec::new(),
16931 operator_comments: Vec::new(),
16932 trailing_comments: Vec::new(),
16933 }));
16934 let s_is_null = Expression::Is(Box::new(BinaryOp {
16935 left: search_for_null,
16936 right: Expression::Null(crate::expressions::Null),
16937 left_comments: Vec::new(),
16938 operator_comments: Vec::new(),
16939 trailing_comments: Vec::new(),
16940 }));
16941 let both_null = Expression::And(Box::new(BinaryOp {
16942 left: x_is_null,
16943 right: s_is_null,
16944 left_comments: Vec::new(),
16945 operator_comments: Vec::new(),
16946 trailing_comments: Vec::new(),
16947 }));
16948 let condition = Expression::Or(Box::new(BinaryOp {
16949 left: eq,
16950 right: Expression::Paren(Box::new(
16951 crate::expressions::Paren {
16952 this: both_null,
16953 trailing_comments: Vec::new(),
16954 },
16955 )),
16956 left_comments: Vec::new(),
16957 operator_comments: Vec::new(),
16958 trailing_comments: Vec::new(),
16959 }));
16960 (condition, result)
16961 }
16962 })
16963 .collect();
16964 Ok(Expression::Case(Box::new(Case {
16965 operand: None,
16966 whens,
16967 else_: default,
16968 comments: Vec::new(),
16969 })))
16970 }
16971 // LEVENSHTEIN(a, b, ...) -> dialect-specific
16972 "LEVENSHTEIN" => {
16973 match target {
16974 DialectType::BigQuery => {
16975 let mut new_f = *f;
16976 new_f.name = "EDIT_DISTANCE".to_string();
16977 Ok(Expression::Function(Box::new(new_f)))
16978 }
16979 DialectType::Drill => {
16980 let mut new_f = *f;
16981 new_f.name = "LEVENSHTEIN_DISTANCE".to_string();
16982 Ok(Expression::Function(Box::new(new_f)))
16983 }
16984 DialectType::PostgreSQL if f.args.len() == 6 => {
16985 // PostgreSQL: LEVENSHTEIN(src, tgt, ins, del, sub, max_d) -> LEVENSHTEIN_LESS_EQUAL
16986 // 2 args: basic, 5 args: with costs, 6 args: with costs + max_distance
16987 let mut new_f = *f;
16988 new_f.name = "LEVENSHTEIN_LESS_EQUAL".to_string();
16989 Ok(Expression::Function(Box::new(new_f)))
16990 }
16991 _ => Ok(Expression::Function(f)),
16992 }
16993 }
16994 // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
16995 "ARRAY_REVERSE" => match target {
16996 DialectType::ClickHouse => {
16997 let mut new_f = *f;
16998 new_f.name = "arrayReverse".to_string();
16999 Ok(Expression::Function(Box::new(new_f)))
17000 }
17001 _ => Ok(Expression::Function(f)),
17002 },
17003 // GENERATE_DATE_ARRAY(start, end[, step]) -> target-specific
17004 "GENERATE_DATE_ARRAY" => {
17005 let mut args = f.args;
17006 if matches!(target, DialectType::BigQuery) {
17007 // BigQuery keeps GENERATE_DATE_ARRAY; add default interval if not present
17008 if args.len() == 2 {
17009 let default_interval = Expression::Interval(Box::new(
17010 crate::expressions::Interval {
17011 this: Some(Expression::Literal(Literal::String(
17012 "1".to_string(),
17013 ))),
17014 unit: Some(
17015 crate::expressions::IntervalUnitSpec::Simple {
17016 unit: crate::expressions::IntervalUnit::Day,
17017 use_plural: false,
17018 },
17019 ),
17020 },
17021 ));
17022 args.push(default_interval);
17023 }
17024 Ok(Expression::Function(Box::new(Function::new(
17025 "GENERATE_DATE_ARRAY".to_string(),
17026 args,
17027 ))))
17028 } else if matches!(target, DialectType::DuckDB) {
17029 // DuckDB: CAST(GENERATE_SERIES(start, end, step) AS DATE[])
17030 let start = args.get(0).cloned();
17031 let end = args.get(1).cloned();
17032 let step = args.get(2).cloned().or_else(|| {
17033 Some(Expression::Interval(Box::new(
17034 crate::expressions::Interval {
17035 this: Some(Expression::Literal(Literal::String(
17036 "1".to_string(),
17037 ))),
17038 unit: Some(
17039 crate::expressions::IntervalUnitSpec::Simple {
17040 unit: crate::expressions::IntervalUnit::Day,
17041 use_plural: false,
17042 },
17043 ),
17044 },
17045 )))
17046 });
17047 let gen_series = Expression::GenerateSeries(Box::new(
17048 crate::expressions::GenerateSeries {
17049 start: start.map(Box::new),
17050 end: end.map(Box::new),
17051 step: step.map(Box::new),
17052 is_end_exclusive: None,
17053 },
17054 ));
17055 Ok(Expression::Cast(Box::new(Cast {
17056 this: gen_series,
17057 to: DataType::Array {
17058 element_type: Box::new(DataType::Date),
17059 dimension: None,
17060 },
17061 trailing_comments: vec![],
17062 double_colon_syntax: false,
17063 format: None,
17064 default: None,
17065 })))
17066 } else if matches!(
17067 target,
17068 DialectType::Presto | DialectType::Trino | DialectType::Athena
17069 ) {
17070 // Presto/Trino: SEQUENCE(start, end, interval) with interval normalization
17071 let start = args.get(0).cloned();
17072 let end = args.get(1).cloned();
17073 let step = args.get(2).cloned().or_else(|| {
17074 Some(Expression::Interval(Box::new(
17075 crate::expressions::Interval {
17076 this: Some(Expression::Literal(Literal::String(
17077 "1".to_string(),
17078 ))),
17079 unit: Some(
17080 crate::expressions::IntervalUnitSpec::Simple {
17081 unit: crate::expressions::IntervalUnit::Day,
17082 use_plural: false,
17083 },
17084 ),
17085 },
17086 )))
17087 });
17088 let gen_series = Expression::GenerateSeries(Box::new(
17089 crate::expressions::GenerateSeries {
17090 start: start.map(Box::new),
17091 end: end.map(Box::new),
17092 step: step.map(Box::new),
17093 is_end_exclusive: None,
17094 },
17095 ));
17096 Ok(gen_series)
17097 } else if matches!(
17098 target,
17099 DialectType::Spark | DialectType::Databricks
17100 ) {
17101 // Spark/Databricks: SEQUENCE(start, end, step) - keep step as-is
17102 let start = args.get(0).cloned();
17103 let end = args.get(1).cloned();
17104 let step = args.get(2).cloned().or_else(|| {
17105 Some(Expression::Interval(Box::new(
17106 crate::expressions::Interval {
17107 this: Some(Expression::Literal(Literal::String(
17108 "1".to_string(),
17109 ))),
17110 unit: Some(
17111 crate::expressions::IntervalUnitSpec::Simple {
17112 unit: crate::expressions::IntervalUnit::Day,
17113 use_plural: false,
17114 },
17115 ),
17116 },
17117 )))
17118 });
17119 let gen_series = Expression::GenerateSeries(Box::new(
17120 crate::expressions::GenerateSeries {
17121 start: start.map(Box::new),
17122 end: end.map(Box::new),
17123 step: step.map(Box::new),
17124 is_end_exclusive: None,
17125 },
17126 ));
17127 Ok(gen_series)
17128 } else if matches!(target, DialectType::Snowflake) {
17129 // Snowflake: keep as GENERATE_DATE_ARRAY for later transform
17130 if args.len() == 2 {
17131 let default_interval = Expression::Interval(Box::new(
17132 crate::expressions::Interval {
17133 this: Some(Expression::Literal(Literal::String(
17134 "1".to_string(),
17135 ))),
17136 unit: Some(
17137 crate::expressions::IntervalUnitSpec::Simple {
17138 unit: crate::expressions::IntervalUnit::Day,
17139 use_plural: false,
17140 },
17141 ),
17142 },
17143 ));
17144 args.push(default_interval);
17145 }
17146 Ok(Expression::Function(Box::new(Function::new(
17147 "GENERATE_DATE_ARRAY".to_string(),
17148 args,
17149 ))))
17150 } else if matches!(
17151 target,
17152 DialectType::MySQL
17153 | DialectType::TSQL
17154 | DialectType::Fabric
17155 | DialectType::Redshift
17156 ) {
17157 // MySQL/TSQL/Redshift: keep as GENERATE_DATE_ARRAY for the preprocess
17158 // step (unnest_generate_date_array_using_recursive_cte) to convert to CTE
17159 Ok(Expression::Function(Box::new(Function::new(
17160 "GENERATE_DATE_ARRAY".to_string(),
17161 args,
17162 ))))
17163 } else {
17164 // PostgreSQL/others: convert to GenerateSeries
17165 let start = args.get(0).cloned();
17166 let end = args.get(1).cloned();
17167 let step = args.get(2).cloned().or_else(|| {
17168 Some(Expression::Interval(Box::new(
17169 crate::expressions::Interval {
17170 this: Some(Expression::Literal(Literal::String(
17171 "1".to_string(),
17172 ))),
17173 unit: Some(
17174 crate::expressions::IntervalUnitSpec::Simple {
17175 unit: crate::expressions::IntervalUnit::Day,
17176 use_plural: false,
17177 },
17178 ),
17179 },
17180 )))
17181 });
17182 Ok(Expression::GenerateSeries(Box::new(
17183 crate::expressions::GenerateSeries {
17184 start: start.map(Box::new),
17185 end: end.map(Box::new),
17186 step: step.map(Box::new),
17187 is_end_exclusive: None,
17188 },
17189 )))
17190 }
17191 }
17192 _ => Ok(Expression::Function(f)),
17193 }
17194 } else if let Expression::AggregateFunction(mut af) = e {
17195 let name = af.name.to_uppercase();
17196 match name.as_str() {
17197 "ARBITRARY" if af.args.len() == 1 => {
17198 let arg = af.args.into_iter().next().unwrap();
17199 Ok(convert_arbitrary(arg, target))
17200 }
17201 "JSON_ARRAYAGG" => {
17202 match target {
17203 DialectType::PostgreSQL => {
17204 af.name = "JSON_AGG".to_string();
17205 // Add NULLS FIRST to ORDER BY items for PostgreSQL
17206 for ordered in af.order_by.iter_mut() {
17207 if ordered.nulls_first.is_none() {
17208 ordered.nulls_first = Some(true);
17209 }
17210 }
17211 Ok(Expression::AggregateFunction(af))
17212 }
17213 _ => Ok(Expression::AggregateFunction(af)),
17214 }
17215 }
17216 _ => Ok(Expression::AggregateFunction(af)),
17217 }
17218 } else if let Expression::JSONArrayAgg(ja) = e {
17219 // JSONArrayAgg -> JSON_AGG for PostgreSQL, JSON_ARRAYAGG for others
17220 match target {
17221 DialectType::PostgreSQL => {
17222 let mut order_by = Vec::new();
17223 if let Some(order_expr) = ja.order {
17224 if let Expression::OrderBy(ob) = *order_expr {
17225 for mut ordered in ob.expressions {
17226 if ordered.nulls_first.is_none() {
17227 ordered.nulls_first = Some(true);
17228 }
17229 order_by.push(ordered);
17230 }
17231 }
17232 }
17233 Ok(Expression::AggregateFunction(Box::new(
17234 crate::expressions::AggregateFunction {
17235 name: "JSON_AGG".to_string(),
17236 args: vec![*ja.this],
17237 distinct: false,
17238 filter: None,
17239 order_by,
17240 limit: None,
17241 ignore_nulls: None,
17242 },
17243 )))
17244 }
17245 _ => Ok(Expression::JSONArrayAgg(ja)),
17246 }
17247 } else if let Expression::ToNumber(tn) = e {
17248 // TO_NUMBER(x) with no format/precision/scale -> CAST(x AS DOUBLE)
17249 let arg = *tn.this;
17250 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
17251 this: arg,
17252 to: crate::expressions::DataType::Double {
17253 precision: None,
17254 scale: None,
17255 },
17256 double_colon_syntax: false,
17257 trailing_comments: Vec::new(),
17258 format: None,
17259 default: None,
17260 })))
17261 } else {
17262 Ok(e)
17263 }
17264 }
17265
17266 Action::RegexpLikeToDuckDB => {
17267 if let Expression::RegexpLike(f) = e {
17268 let mut args = vec![f.this, f.pattern];
17269 if let Some(flags) = f.flags {
17270 args.push(flags);
17271 }
17272 Ok(Expression::Function(Box::new(Function::new(
17273 "REGEXP_MATCHES".to_string(),
17274 args,
17275 ))))
17276 } else {
17277 Ok(e)
17278 }
17279 }
17280 Action::EpochConvert => {
17281 if let Expression::Epoch(f) = e {
17282 let arg = f.this;
17283 let name = match target {
17284 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
17285 "UNIX_TIMESTAMP"
17286 }
17287 DialectType::Presto | DialectType::Trino => "TO_UNIXTIME",
17288 DialectType::BigQuery => "TIME_TO_UNIX",
17289 _ => "EPOCH",
17290 };
17291 Ok(Expression::Function(Box::new(Function::new(
17292 name.to_string(),
17293 vec![arg],
17294 ))))
17295 } else {
17296 Ok(e)
17297 }
17298 }
17299 Action::EpochMsConvert => {
17300 use crate::expressions::{BinaryOp, Cast};
17301 if let Expression::EpochMs(f) = e {
17302 let arg = f.this;
17303 match target {
17304 DialectType::Spark | DialectType::Databricks => {
17305 Ok(Expression::Function(Box::new(Function::new(
17306 "TIMESTAMP_MILLIS".to_string(),
17307 vec![arg],
17308 ))))
17309 }
17310 DialectType::BigQuery => Ok(Expression::Function(Box::new(
17311 Function::new("TIMESTAMP_MILLIS".to_string(), vec![arg]),
17312 ))),
17313 DialectType::Presto | DialectType::Trino => {
17314 // FROM_UNIXTIME(CAST(x AS DOUBLE) / POW(10, 3))
17315 let cast_arg = Expression::Cast(Box::new(Cast {
17316 this: arg,
17317 to: DataType::Double {
17318 precision: None,
17319 scale: None,
17320 },
17321 trailing_comments: Vec::new(),
17322 double_colon_syntax: false,
17323 format: None,
17324 default: None,
17325 }));
17326 let div = Expression::Div(Box::new(BinaryOp::new(
17327 cast_arg,
17328 Expression::Function(Box::new(Function::new(
17329 "POW".to_string(),
17330 vec![Expression::number(10), Expression::number(3)],
17331 ))),
17332 )));
17333 Ok(Expression::Function(Box::new(Function::new(
17334 "FROM_UNIXTIME".to_string(),
17335 vec![div],
17336 ))))
17337 }
17338 DialectType::MySQL => {
17339 // FROM_UNIXTIME(x / POWER(10, 3))
17340 let div = Expression::Div(Box::new(BinaryOp::new(
17341 arg,
17342 Expression::Function(Box::new(Function::new(
17343 "POWER".to_string(),
17344 vec![Expression::number(10), Expression::number(3)],
17345 ))),
17346 )));
17347 Ok(Expression::Function(Box::new(Function::new(
17348 "FROM_UNIXTIME".to_string(),
17349 vec![div],
17350 ))))
17351 }
17352 DialectType::PostgreSQL | DialectType::Redshift => {
17353 // TO_TIMESTAMP(CAST(x AS DOUBLE PRECISION) / POWER(10, 3))
17354 let cast_arg = Expression::Cast(Box::new(Cast {
17355 this: arg,
17356 to: DataType::Custom {
17357 name: "DOUBLE PRECISION".to_string(),
17358 },
17359 trailing_comments: Vec::new(),
17360 double_colon_syntax: false,
17361 format: None,
17362 default: None,
17363 }));
17364 let div = Expression::Div(Box::new(BinaryOp::new(
17365 cast_arg,
17366 Expression::Function(Box::new(Function::new(
17367 "POWER".to_string(),
17368 vec![Expression::number(10), Expression::number(3)],
17369 ))),
17370 )));
17371 Ok(Expression::Function(Box::new(Function::new(
17372 "TO_TIMESTAMP".to_string(),
17373 vec![div],
17374 ))))
17375 }
17376 DialectType::ClickHouse => {
17377 // fromUnixTimestamp64Milli(CAST(x AS Nullable(Int64)))
17378 let cast_arg = Expression::Cast(Box::new(Cast {
17379 this: arg,
17380 to: DataType::Nullable {
17381 inner: Box::new(DataType::BigInt { length: None }),
17382 },
17383 trailing_comments: Vec::new(),
17384 double_colon_syntax: false,
17385 format: None,
17386 default: None,
17387 }));
17388 Ok(Expression::Function(Box::new(Function::new(
17389 "fromUnixTimestamp64Milli".to_string(),
17390 vec![cast_arg],
17391 ))))
17392 }
17393 _ => Ok(Expression::Function(Box::new(Function::new(
17394 "EPOCH_MS".to_string(),
17395 vec![arg],
17396 )))),
17397 }
17398 } else {
17399 Ok(e)
17400 }
17401 }
17402 Action::TSQLTypeNormalize => {
17403 if let Expression::DataType(dt) = e {
17404 let new_dt = match &dt {
17405 DataType::Custom { name } if name.eq_ignore_ascii_case("MONEY") => {
17406 DataType::Decimal {
17407 precision: Some(15),
17408 scale: Some(4),
17409 }
17410 }
17411 DataType::Custom { name }
17412 if name.eq_ignore_ascii_case("SMALLMONEY") =>
17413 {
17414 DataType::Decimal {
17415 precision: Some(6),
17416 scale: Some(4),
17417 }
17418 }
17419 DataType::Custom { name } if name.eq_ignore_ascii_case("DATETIME2") => {
17420 DataType::Timestamp {
17421 timezone: false,
17422 precision: None,
17423 }
17424 }
17425 DataType::Custom { name } if name.eq_ignore_ascii_case("REAL") => {
17426 DataType::Float {
17427 precision: None,
17428 scale: None,
17429 real_spelling: false,
17430 }
17431 }
17432 DataType::Float {
17433 real_spelling: true,
17434 ..
17435 } => DataType::Float {
17436 precision: None,
17437 scale: None,
17438 real_spelling: false,
17439 },
17440 DataType::Custom { name } if name.eq_ignore_ascii_case("IMAGE") => {
17441 DataType::Custom {
17442 name: "BLOB".to_string(),
17443 }
17444 }
17445 DataType::Custom { name } if name.eq_ignore_ascii_case("BIT") => {
17446 DataType::Boolean
17447 }
17448 DataType::Custom { name }
17449 if name.eq_ignore_ascii_case("ROWVERSION") =>
17450 {
17451 DataType::Custom {
17452 name: "BINARY".to_string(),
17453 }
17454 }
17455 DataType::Custom { name }
17456 if name.eq_ignore_ascii_case("UNIQUEIDENTIFIER") =>
17457 {
17458 match target {
17459 DialectType::Spark
17460 | DialectType::Databricks
17461 | DialectType::Hive => DataType::Custom {
17462 name: "STRING".to_string(),
17463 },
17464 _ => DataType::VarChar {
17465 length: Some(36),
17466 parenthesized_length: true,
17467 },
17468 }
17469 }
17470 DataType::Custom { name }
17471 if name.eq_ignore_ascii_case("DATETIMEOFFSET") =>
17472 {
17473 match target {
17474 DialectType::Spark
17475 | DialectType::Databricks
17476 | DialectType::Hive => DataType::Timestamp {
17477 timezone: false,
17478 precision: None,
17479 },
17480 _ => DataType::Timestamp {
17481 timezone: true,
17482 precision: None,
17483 },
17484 }
17485 }
17486 DataType::Custom { ref name }
17487 if name.to_uppercase().starts_with("DATETIME2(") =>
17488 {
17489 // DATETIME2(n) -> TIMESTAMP
17490 DataType::Timestamp {
17491 timezone: false,
17492 precision: None,
17493 }
17494 }
17495 DataType::Custom { ref name }
17496 if name.to_uppercase().starts_with("TIME(") =>
17497 {
17498 // TIME(n) -> TIMESTAMP for Spark, keep as TIME for others
17499 match target {
17500 DialectType::Spark
17501 | DialectType::Databricks
17502 | DialectType::Hive => DataType::Timestamp {
17503 timezone: false,
17504 precision: None,
17505 },
17506 _ => return Ok(Expression::DataType(dt)),
17507 }
17508 }
17509 DataType::Custom { ref name }
17510 if name.to_uppercase().starts_with("NUMERIC") =>
17511 {
17512 // Parse NUMERIC(p,s) back to Decimal(p,s)
17513 let upper = name.to_uppercase();
17514 if let Some(inner) = upper
17515 .strip_prefix("NUMERIC(")
17516 .and_then(|s| s.strip_suffix(')'))
17517 {
17518 let parts: Vec<&str> = inner.split(',').collect();
17519 let precision =
17520 parts.first().and_then(|s| s.trim().parse::<u32>().ok());
17521 let scale =
17522 parts.get(1).and_then(|s| s.trim().parse::<u32>().ok());
17523 DataType::Decimal { precision, scale }
17524 } else if upper == "NUMERIC" {
17525 DataType::Decimal {
17526 precision: None,
17527 scale: None,
17528 }
17529 } else {
17530 return Ok(Expression::DataType(dt));
17531 }
17532 }
17533 DataType::Float {
17534 precision: Some(p), ..
17535 } => {
17536 // For Hive/Spark: FLOAT(1-32) -> FLOAT, FLOAT(33+) -> DOUBLE (IEEE 754 boundary)
17537 // For other targets: FLOAT(1-24) -> FLOAT, FLOAT(25+) -> DOUBLE (TSQL boundary)
17538 let boundary = match target {
17539 DialectType::Hive
17540 | DialectType::Spark
17541 | DialectType::Databricks => 32,
17542 _ => 24,
17543 };
17544 if *p <= boundary {
17545 DataType::Float {
17546 precision: None,
17547 scale: None,
17548 real_spelling: false,
17549 }
17550 } else {
17551 DataType::Double {
17552 precision: None,
17553 scale: None,
17554 }
17555 }
17556 }
17557 DataType::TinyInt { .. } => match target {
17558 DialectType::DuckDB => DataType::Custom {
17559 name: "UTINYINT".to_string(),
17560 },
17561 DialectType::Hive
17562 | DialectType::Spark
17563 | DialectType::Databricks => DataType::SmallInt { length: None },
17564 _ => return Ok(Expression::DataType(dt)),
17565 },
17566 // INTEGER -> INT for Spark/Databricks
17567 DataType::Int {
17568 length,
17569 integer_spelling: true,
17570 } => DataType::Int {
17571 length: *length,
17572 integer_spelling: false,
17573 },
17574 _ => return Ok(Expression::DataType(dt)),
17575 };
17576 Ok(Expression::DataType(new_dt))
17577 } else {
17578 Ok(e)
17579 }
17580 }
17581 Action::MySQLSafeDivide => {
17582 use crate::expressions::{BinaryOp, Cast};
17583 if let Expression::Div(op) = e {
17584 let left = op.left;
17585 let right = op.right;
17586 // For SQLite: CAST left as REAL but NO NULLIF wrapping
17587 if matches!(target, DialectType::SQLite) {
17588 let new_left = Expression::Cast(Box::new(Cast {
17589 this: left,
17590 to: DataType::Float {
17591 precision: None,
17592 scale: None,
17593 real_spelling: true,
17594 },
17595 trailing_comments: Vec::new(),
17596 double_colon_syntax: false,
17597 format: None,
17598 default: None,
17599 }));
17600 return Ok(Expression::Div(Box::new(BinaryOp::new(new_left, right))));
17601 }
17602 // Wrap right in NULLIF(right, 0)
17603 let nullif_right = Expression::Function(Box::new(Function::new(
17604 "NULLIF".to_string(),
17605 vec![right, Expression::number(0)],
17606 )));
17607 // For some dialects, also CAST the left side
17608 let new_left = match target {
17609 DialectType::PostgreSQL
17610 | DialectType::Redshift
17611 | DialectType::Teradata => Expression::Cast(Box::new(Cast {
17612 this: left,
17613 to: DataType::Custom {
17614 name: "DOUBLE PRECISION".to_string(),
17615 },
17616 trailing_comments: Vec::new(),
17617 double_colon_syntax: false,
17618 format: None,
17619 default: None,
17620 })),
17621 DialectType::Drill | DialectType::Trino | DialectType::Presto => {
17622 Expression::Cast(Box::new(Cast {
17623 this: left,
17624 to: DataType::Double {
17625 precision: None,
17626 scale: None,
17627 },
17628 trailing_comments: Vec::new(),
17629 double_colon_syntax: false,
17630 format: None,
17631 default: None,
17632 }))
17633 }
17634 DialectType::TSQL => Expression::Cast(Box::new(Cast {
17635 this: left,
17636 to: DataType::Float {
17637 precision: None,
17638 scale: None,
17639 real_spelling: false,
17640 },
17641 trailing_comments: Vec::new(),
17642 double_colon_syntax: false,
17643 format: None,
17644 default: None,
17645 })),
17646 _ => left,
17647 };
17648 Ok(Expression::Div(Box::new(BinaryOp::new(
17649 new_left,
17650 nullif_right,
17651 ))))
17652 } else {
17653 Ok(e)
17654 }
17655 }
17656 Action::AlterTableRenameStripSchema => {
17657 if let Expression::AlterTable(mut at) = e {
17658 if let Some(crate::expressions::AlterTableAction::RenameTable(
17659 ref mut new_tbl,
17660 )) = at.actions.first_mut()
17661 {
17662 new_tbl.schema = None;
17663 new_tbl.catalog = None;
17664 }
17665 Ok(Expression::AlterTable(at))
17666 } else {
17667 Ok(e)
17668 }
17669 }
17670 Action::NullsOrdering => {
17671 // Fill in the source dialect's implied null ordering default.
17672 // This makes implicit null ordering explicit so the target generator
17673 // can correctly strip or keep it.
17674 //
17675 // Dialect null ordering categories:
17676 // nulls_are_large (Oracle, PostgreSQL, Redshift, Snowflake):
17677 // ASC -> NULLS LAST, DESC -> NULLS FIRST
17678 // nulls_are_small (Spark, Hive, BigQuery, MySQL, Databricks, ClickHouse, etc.):
17679 // ASC -> NULLS FIRST, DESC -> NULLS LAST
17680 // nulls_are_last (DuckDB, Presto, Trino, Dremio, Athena):
17681 // NULLS LAST always (both ASC and DESC)
17682 if let Expression::Ordered(mut o) = e {
17683 let is_asc = !o.desc;
17684
17685 let is_source_nulls_large = matches!(
17686 source,
17687 DialectType::Oracle
17688 | DialectType::PostgreSQL
17689 | DialectType::Redshift
17690 | DialectType::Snowflake
17691 );
17692 let is_source_nulls_last = matches!(
17693 source,
17694 DialectType::DuckDB
17695 | DialectType::Presto
17696 | DialectType::Trino
17697 | DialectType::Dremio
17698 | DialectType::Athena
17699 | DialectType::ClickHouse
17700 | DialectType::Drill
17701 | DialectType::Exasol
17702 | DialectType::DataFusion
17703 );
17704
17705 // Determine target category to check if default matches
17706 let is_target_nulls_large = matches!(
17707 target,
17708 DialectType::Oracle
17709 | DialectType::PostgreSQL
17710 | DialectType::Redshift
17711 | DialectType::Snowflake
17712 );
17713 let is_target_nulls_last = matches!(
17714 target,
17715 DialectType::DuckDB
17716 | DialectType::Presto
17717 | DialectType::Trino
17718 | DialectType::Dremio
17719 | DialectType::Athena
17720 | DialectType::ClickHouse
17721 | DialectType::Drill
17722 | DialectType::Exasol
17723 | DialectType::DataFusion
17724 );
17725
17726 // Compute the implied nulls_first for source
17727 let source_nulls_first = if is_source_nulls_large {
17728 !is_asc // ASC -> NULLS LAST (false), DESC -> NULLS FIRST (true)
17729 } else if is_source_nulls_last {
17730 false // NULLS LAST always
17731 } else {
17732 is_asc // nulls_are_small: ASC -> NULLS FIRST (true), DESC -> NULLS LAST (false)
17733 };
17734
17735 // Compute the target's default
17736 let target_nulls_first = if is_target_nulls_large {
17737 !is_asc
17738 } else if is_target_nulls_last {
17739 false
17740 } else {
17741 is_asc
17742 };
17743
17744 // Only add explicit nulls ordering if source and target defaults differ
17745 if source_nulls_first != target_nulls_first {
17746 o.nulls_first = Some(source_nulls_first);
17747 }
17748 // If they match, leave nulls_first as None so the generator won't output it
17749
17750 Ok(Expression::Ordered(o))
17751 } else {
17752 Ok(e)
17753 }
17754 }
17755 Action::StringAggConvert => {
17756 match e {
17757 Expression::WithinGroup(wg) => {
17758 // STRING_AGG(x, sep) WITHIN GROUP (ORDER BY z) -> target-specific
17759 // Extract args and distinct flag from either Function, AggregateFunction, or StringAgg
17760 let (x_opt, sep_opt, distinct) = match wg.this {
17761 Expression::AggregateFunction(ref af)
17762 if af.name.eq_ignore_ascii_case("STRING_AGG")
17763 && af.args.len() >= 2 =>
17764 {
17765 (
17766 Some(af.args[0].clone()),
17767 Some(af.args[1].clone()),
17768 af.distinct,
17769 )
17770 }
17771 Expression::Function(ref f)
17772 if f.name.eq_ignore_ascii_case("STRING_AGG")
17773 && f.args.len() >= 2 =>
17774 {
17775 (Some(f.args[0].clone()), Some(f.args[1].clone()), false)
17776 }
17777 Expression::StringAgg(ref sa) => {
17778 (Some(sa.this.clone()), sa.separator.clone(), sa.distinct)
17779 }
17780 _ => (None, None, false),
17781 };
17782 if let (Some(x), Some(sep)) = (x_opt, sep_opt) {
17783 let order_by = wg.order_by;
17784
17785 match target {
17786 DialectType::TSQL | DialectType::Fabric => {
17787 // Keep as WithinGroup(StringAgg) for TSQL
17788 Ok(Expression::WithinGroup(Box::new(
17789 crate::expressions::WithinGroup {
17790 this: Expression::StringAgg(Box::new(
17791 crate::expressions::StringAggFunc {
17792 this: x,
17793 separator: Some(sep),
17794 order_by: None, // order_by goes in WithinGroup, not StringAgg
17795 distinct,
17796 filter: None,
17797 limit: None,
17798 },
17799 )),
17800 order_by,
17801 },
17802 )))
17803 }
17804 DialectType::MySQL
17805 | DialectType::SingleStore
17806 | DialectType::Doris
17807 | DialectType::StarRocks => {
17808 // GROUP_CONCAT(x ORDER BY z SEPARATOR sep)
17809 Ok(Expression::GroupConcat(Box::new(
17810 crate::expressions::GroupConcatFunc {
17811 this: x,
17812 separator: Some(sep),
17813 order_by: Some(order_by),
17814 distinct,
17815 filter: None,
17816 },
17817 )))
17818 }
17819 DialectType::SQLite => {
17820 // GROUP_CONCAT(x, sep) - no ORDER BY support
17821 Ok(Expression::GroupConcat(Box::new(
17822 crate::expressions::GroupConcatFunc {
17823 this: x,
17824 separator: Some(sep),
17825 order_by: None,
17826 distinct,
17827 filter: None,
17828 },
17829 )))
17830 }
17831 DialectType::PostgreSQL | DialectType::Redshift => {
17832 // STRING_AGG(x, sep ORDER BY z)
17833 Ok(Expression::StringAgg(Box::new(
17834 crate::expressions::StringAggFunc {
17835 this: x,
17836 separator: Some(sep),
17837 order_by: Some(order_by),
17838 distinct,
17839 filter: None,
17840 limit: None,
17841 },
17842 )))
17843 }
17844 _ => {
17845 // Default: keep as STRING_AGG(x, sep) with ORDER BY inside
17846 Ok(Expression::StringAgg(Box::new(
17847 crate::expressions::StringAggFunc {
17848 this: x,
17849 separator: Some(sep),
17850 order_by: Some(order_by),
17851 distinct,
17852 filter: None,
17853 limit: None,
17854 },
17855 )))
17856 }
17857 }
17858 } else {
17859 Ok(Expression::WithinGroup(wg))
17860 }
17861 }
17862 Expression::StringAgg(sa) => {
17863 match target {
17864 DialectType::MySQL
17865 | DialectType::SingleStore
17866 | DialectType::Doris
17867 | DialectType::StarRocks => {
17868 // STRING_AGG(x, sep) -> GROUP_CONCAT(x SEPARATOR sep)
17869 Ok(Expression::GroupConcat(Box::new(
17870 crate::expressions::GroupConcatFunc {
17871 this: sa.this,
17872 separator: sa.separator,
17873 order_by: sa.order_by,
17874 distinct: sa.distinct,
17875 filter: sa.filter,
17876 },
17877 )))
17878 }
17879 DialectType::SQLite => {
17880 // STRING_AGG(x, sep) -> GROUP_CONCAT(x, sep)
17881 Ok(Expression::GroupConcat(Box::new(
17882 crate::expressions::GroupConcatFunc {
17883 this: sa.this,
17884 separator: sa.separator,
17885 order_by: None, // SQLite doesn't support ORDER BY in GROUP_CONCAT
17886 distinct: sa.distinct,
17887 filter: sa.filter,
17888 },
17889 )))
17890 }
17891 DialectType::Spark | DialectType::Databricks => {
17892 // STRING_AGG(x, sep) -> LISTAGG(x, sep)
17893 Ok(Expression::ListAgg(Box::new(
17894 crate::expressions::ListAggFunc {
17895 this: sa.this,
17896 separator: sa.separator,
17897 on_overflow: None,
17898 order_by: sa.order_by,
17899 distinct: sa.distinct,
17900 filter: None,
17901 },
17902 )))
17903 }
17904 _ => Ok(Expression::StringAgg(sa)),
17905 }
17906 }
17907 _ => Ok(e),
17908 }
17909 }
17910 Action::GroupConcatConvert => {
17911 // Helper to expand CONCAT(a, b, c) -> a || b || c (for PostgreSQL/SQLite)
17912 // or CONCAT(a, b, c) -> a + b + c (for TSQL)
17913 fn expand_concat_to_dpipe(expr: Expression) -> Expression {
17914 if let Expression::Function(ref f) = expr {
17915 if f.name.to_uppercase() == "CONCAT" && f.args.len() > 1 {
17916 let mut result = f.args[0].clone();
17917 for arg in &f.args[1..] {
17918 result = Expression::Concat(Box::new(BinaryOp {
17919 left: result,
17920 right: arg.clone(),
17921 left_comments: vec![],
17922 operator_comments: vec![],
17923 trailing_comments: vec![],
17924 }));
17925 }
17926 return result;
17927 }
17928 }
17929 expr
17930 }
17931 fn expand_concat_to_plus(expr: Expression) -> Expression {
17932 if let Expression::Function(ref f) = expr {
17933 if f.name.to_uppercase() == "CONCAT" && f.args.len() > 1 {
17934 let mut result = f.args[0].clone();
17935 for arg in &f.args[1..] {
17936 result = Expression::Add(Box::new(BinaryOp {
17937 left: result,
17938 right: arg.clone(),
17939 left_comments: vec![],
17940 operator_comments: vec![],
17941 trailing_comments: vec![],
17942 }));
17943 }
17944 return result;
17945 }
17946 }
17947 expr
17948 }
17949 // Helper to wrap each arg in CAST(arg AS VARCHAR) for Presto/Trino CONCAT
17950 fn wrap_concat_args_in_varchar_cast(expr: Expression) -> Expression {
17951 if let Expression::Function(ref f) = expr {
17952 if f.name.to_uppercase() == "CONCAT" && f.args.len() > 1 {
17953 let new_args: Vec<Expression> = f
17954 .args
17955 .iter()
17956 .map(|arg| {
17957 Expression::Cast(Box::new(crate::expressions::Cast {
17958 this: arg.clone(),
17959 to: crate::expressions::DataType::VarChar {
17960 length: None,
17961 parenthesized_length: false,
17962 },
17963 trailing_comments: Vec::new(),
17964 double_colon_syntax: false,
17965 format: None,
17966 default: None,
17967 }))
17968 })
17969 .collect();
17970 return Expression::Function(Box::new(
17971 crate::expressions::Function::new(
17972 "CONCAT".to_string(),
17973 new_args,
17974 ),
17975 ));
17976 }
17977 }
17978 expr
17979 }
17980 if let Expression::GroupConcat(gc) = e {
17981 match target {
17982 DialectType::Presto => {
17983 // GROUP_CONCAT(x [, sep]) -> ARRAY_JOIN(ARRAY_AGG(x), sep)
17984 let sep = gc.separator.unwrap_or(Expression::string(","));
17985 // For multi-arg CONCAT, wrap each arg in CAST(... AS VARCHAR)
17986 let this = wrap_concat_args_in_varchar_cast(gc.this);
17987 let array_agg =
17988 Expression::ArrayAgg(Box::new(crate::expressions::AggFunc {
17989 this,
17990 distinct: gc.distinct,
17991 filter: gc.filter,
17992 order_by: gc.order_by.unwrap_or_default(),
17993 name: None,
17994 ignore_nulls: None,
17995 having_max: None,
17996 limit: None,
17997 }));
17998 Ok(Expression::ArrayJoin(Box::new(
17999 crate::expressions::ArrayJoinFunc {
18000 this: array_agg,
18001 separator: sep,
18002 null_replacement: None,
18003 },
18004 )))
18005 }
18006 DialectType::Trino => {
18007 // GROUP_CONCAT(x [, sep]) -> LISTAGG(x, sep)
18008 let sep = gc.separator.unwrap_or(Expression::string(","));
18009 // For multi-arg CONCAT, wrap each arg in CAST(... AS VARCHAR)
18010 let this = wrap_concat_args_in_varchar_cast(gc.this);
18011 Ok(Expression::ListAgg(Box::new(
18012 crate::expressions::ListAggFunc {
18013 this,
18014 separator: Some(sep),
18015 on_overflow: None,
18016 order_by: gc.order_by,
18017 distinct: gc.distinct,
18018 filter: gc.filter,
18019 },
18020 )))
18021 }
18022 DialectType::PostgreSQL
18023 | DialectType::Redshift
18024 | DialectType::Snowflake
18025 | DialectType::DuckDB
18026 | DialectType::Hive
18027 | DialectType::ClickHouse => {
18028 // GROUP_CONCAT(x [, sep]) -> STRING_AGG(x, sep)
18029 let sep = gc.separator.unwrap_or(Expression::string(","));
18030 // Expand CONCAT(a,b,c) -> a || b || c for || dialects
18031 let this = expand_concat_to_dpipe(gc.this);
18032 // For PostgreSQL, add NULLS LAST for DESC / NULLS FIRST for ASC
18033 let order_by = if target == DialectType::PostgreSQL {
18034 gc.order_by.map(|ords| {
18035 ords.into_iter()
18036 .map(|mut o| {
18037 if o.nulls_first.is_none() {
18038 if o.desc {
18039 o.nulls_first = Some(false);
18040 // NULLS LAST
18041 } else {
18042 o.nulls_first = Some(true);
18043 // NULLS FIRST
18044 }
18045 }
18046 o
18047 })
18048 .collect()
18049 })
18050 } else {
18051 gc.order_by
18052 };
18053 Ok(Expression::StringAgg(Box::new(
18054 crate::expressions::StringAggFunc {
18055 this,
18056 separator: Some(sep),
18057 order_by,
18058 distinct: gc.distinct,
18059 filter: gc.filter,
18060 limit: None,
18061 },
18062 )))
18063 }
18064 DialectType::TSQL => {
18065 // GROUP_CONCAT(x [, sep]) -> STRING_AGG(x, sep) WITHIN GROUP (ORDER BY ...)
18066 // TSQL doesn't support DISTINCT in STRING_AGG
18067 let sep = gc.separator.unwrap_or(Expression::string(","));
18068 // Expand CONCAT(a,b,c) -> a + b + c for TSQL
18069 let this = expand_concat_to_plus(gc.this);
18070 Ok(Expression::StringAgg(Box::new(
18071 crate::expressions::StringAggFunc {
18072 this,
18073 separator: Some(sep),
18074 order_by: gc.order_by,
18075 distinct: false, // TSQL doesn't support DISTINCT in STRING_AGG
18076 filter: gc.filter,
18077 limit: None,
18078 },
18079 )))
18080 }
18081 DialectType::SQLite => {
18082 // GROUP_CONCAT stays as GROUP_CONCAT but ORDER BY is removed
18083 // SQLite GROUP_CONCAT doesn't support ORDER BY
18084 // Expand CONCAT(a,b,c) -> a || b || c
18085 let this = expand_concat_to_dpipe(gc.this);
18086 Ok(Expression::GroupConcat(Box::new(
18087 crate::expressions::GroupConcatFunc {
18088 this,
18089 separator: gc.separator,
18090 order_by: None, // SQLite doesn't support ORDER BY in GROUP_CONCAT
18091 distinct: gc.distinct,
18092 filter: gc.filter,
18093 },
18094 )))
18095 }
18096 DialectType::Spark | DialectType::Databricks => {
18097 // GROUP_CONCAT(x [, sep]) -> LISTAGG(x, sep)
18098 let sep = gc.separator.unwrap_or(Expression::string(","));
18099 Ok(Expression::ListAgg(Box::new(
18100 crate::expressions::ListAggFunc {
18101 this: gc.this,
18102 separator: Some(sep),
18103 on_overflow: None,
18104 order_by: gc.order_by,
18105 distinct: gc.distinct,
18106 filter: None,
18107 },
18108 )))
18109 }
18110 DialectType::MySQL
18111 | DialectType::SingleStore
18112 | DialectType::StarRocks => {
18113 // MySQL GROUP_CONCAT should have explicit SEPARATOR (default ',')
18114 if gc.separator.is_none() {
18115 let mut gc = gc;
18116 gc.separator = Some(Expression::string(","));
18117 Ok(Expression::GroupConcat(gc))
18118 } else {
18119 Ok(Expression::GroupConcat(gc))
18120 }
18121 }
18122 _ => Ok(Expression::GroupConcat(gc)),
18123 }
18124 } else {
18125 Ok(e)
18126 }
18127 }
18128 Action::TempTableHash => {
18129 match e {
18130 Expression::CreateTable(mut ct) => {
18131 // TSQL #table -> TEMPORARY TABLE with # stripped from name
18132 let name = &ct.name.name.name;
18133 if name.starts_with('#') {
18134 ct.name.name.name = name.trim_start_matches('#').to_string();
18135 }
18136 // Set temporary flag
18137 ct.temporary = true;
18138 Ok(Expression::CreateTable(ct))
18139 }
18140 Expression::Table(mut tr) => {
18141 // Strip # from table references
18142 let name = &tr.name.name;
18143 if name.starts_with('#') {
18144 tr.name.name = name.trim_start_matches('#').to_string();
18145 }
18146 Ok(Expression::Table(tr))
18147 }
18148 Expression::DropTable(mut dt) => {
18149 // Strip # from DROP TABLE names
18150 for table_ref in &mut dt.names {
18151 if table_ref.name.name.starts_with('#') {
18152 table_ref.name.name =
18153 table_ref.name.name.trim_start_matches('#').to_string();
18154 }
18155 }
18156 Ok(Expression::DropTable(dt))
18157 }
18158 _ => Ok(e),
18159 }
18160 }
18161 Action::NvlClearOriginal => {
18162 if let Expression::Nvl(mut f) = e {
18163 f.original_name = None;
18164 Ok(Expression::Nvl(f))
18165 } else {
18166 Ok(e)
18167 }
18168 }
18169 Action::HiveCastToTryCast => {
18170 // Convert Hive/Spark CAST to TRY_CAST for targets that support it
18171 if let Expression::Cast(mut c) = e {
18172 // For Spark/Hive -> DuckDB: TIMESTAMP -> TIMESTAMPTZ
18173 // (Spark's TIMESTAMP is always timezone-aware)
18174 if matches!(target, DialectType::DuckDB)
18175 && matches!(source, DialectType::Spark | DialectType::Databricks)
18176 && matches!(
18177 c.to,
18178 DataType::Timestamp {
18179 timezone: false,
18180 ..
18181 }
18182 )
18183 {
18184 c.to = DataType::Custom {
18185 name: "TIMESTAMPTZ".to_string(),
18186 };
18187 }
18188 // For Spark source -> Databricks: VARCHAR/CHAR -> STRING
18189 // Spark parses VARCHAR(n)/CHAR(n) as TEXT, normalize to STRING
18190 if matches!(target, DialectType::Databricks | DialectType::Spark)
18191 && matches!(
18192 source,
18193 DialectType::Spark | DialectType::Databricks | DialectType::Hive
18194 )
18195 && Self::has_varchar_char_type(&c.to)
18196 {
18197 c.to = Self::normalize_varchar_to_string(c.to);
18198 }
18199 Ok(Expression::TryCast(c))
18200 } else {
18201 Ok(e)
18202 }
18203 }
18204 Action::XorExpand => {
18205 // Expand XOR to (a AND NOT b) OR (NOT a AND b) for dialects without XOR keyword
18206 // Snowflake: use BOOLXOR(a, b) instead
18207 if let Expression::Xor(xor) = e {
18208 // Collect all XOR operands
18209 let mut operands = Vec::new();
18210 if let Some(this) = xor.this {
18211 operands.push(*this);
18212 }
18213 if let Some(expr) = xor.expression {
18214 operands.push(*expr);
18215 }
18216 operands.extend(xor.expressions);
18217
18218 // Snowflake: use BOOLXOR(a, b)
18219 if matches!(target, DialectType::Snowflake) && operands.len() == 2 {
18220 let a = operands.remove(0);
18221 let b = operands.remove(0);
18222 return Ok(Expression::Function(Box::new(Function::new(
18223 "BOOLXOR".to_string(),
18224 vec![a, b],
18225 ))));
18226 }
18227
18228 // Helper to build (a AND NOT b) OR (NOT a AND b)
18229 let make_xor = |a: Expression, b: Expression| -> Expression {
18230 let not_b = Expression::Not(Box::new(
18231 crate::expressions::UnaryOp::new(b.clone()),
18232 ));
18233 let not_a = Expression::Not(Box::new(
18234 crate::expressions::UnaryOp::new(a.clone()),
18235 ));
18236 let left_and = Expression::And(Box::new(BinaryOp {
18237 left: a,
18238 right: Expression::Paren(Box::new(Paren {
18239 this: not_b,
18240 trailing_comments: Vec::new(),
18241 })),
18242 left_comments: Vec::new(),
18243 operator_comments: Vec::new(),
18244 trailing_comments: Vec::new(),
18245 }));
18246 let right_and = Expression::And(Box::new(BinaryOp {
18247 left: Expression::Paren(Box::new(Paren {
18248 this: not_a,
18249 trailing_comments: Vec::new(),
18250 })),
18251 right: b,
18252 left_comments: Vec::new(),
18253 operator_comments: Vec::new(),
18254 trailing_comments: Vec::new(),
18255 }));
18256 Expression::Or(Box::new(BinaryOp {
18257 left: Expression::Paren(Box::new(Paren {
18258 this: left_and,
18259 trailing_comments: Vec::new(),
18260 })),
18261 right: Expression::Paren(Box::new(Paren {
18262 this: right_and,
18263 trailing_comments: Vec::new(),
18264 })),
18265 left_comments: Vec::new(),
18266 operator_comments: Vec::new(),
18267 trailing_comments: Vec::new(),
18268 }))
18269 };
18270
18271 if operands.len() >= 2 {
18272 let mut result = make_xor(operands.remove(0), operands.remove(0));
18273 for operand in operands {
18274 result = make_xor(result, operand);
18275 }
18276 Ok(result)
18277 } else if operands.len() == 1 {
18278 Ok(operands.remove(0))
18279 } else {
18280 // No operands - return FALSE (shouldn't happen)
18281 Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
18282 value: false,
18283 }))
18284 }
18285 } else {
18286 Ok(e)
18287 }
18288 }
18289 Action::DatePartUnquote => {
18290 // DATE_PART('month', x) -> DATE_PART(month, x) for Snowflake target
18291 // Convert the quoted string first arg to a bare Column/Identifier
18292 if let Expression::Function(mut f) = e {
18293 if let Some(Expression::Literal(crate::expressions::Literal::String(s))) =
18294 f.args.first()
18295 {
18296 let bare_name = s.to_lowercase();
18297 f.args[0] = Expression::Column(crate::expressions::Column {
18298 name: Identifier::new(bare_name),
18299 table: None,
18300 join_mark: false,
18301 trailing_comments: Vec::new(),
18302 });
18303 }
18304 Ok(Expression::Function(f))
18305 } else {
18306 Ok(e)
18307 }
18308 }
18309 Action::ArrayLengthConvert => {
18310 // Extract the argument from the expression
18311 let arg = match e {
18312 Expression::Cardinality(ref f) => f.this.clone(),
18313 Expression::ArrayLength(ref f) => f.this.clone(),
18314 Expression::ArraySize(ref f) => f.this.clone(),
18315 _ => return Ok(e),
18316 };
18317 match target {
18318 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
18319 Ok(Expression::Function(Box::new(Function::new(
18320 "SIZE".to_string(),
18321 vec![arg],
18322 ))))
18323 }
18324 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
18325 Ok(Expression::Cardinality(Box::new(
18326 crate::expressions::UnaryFunc::new(arg),
18327 )))
18328 }
18329 DialectType::BigQuery => Ok(Expression::ArrayLength(Box::new(
18330 crate::expressions::UnaryFunc::new(arg),
18331 ))),
18332 DialectType::DuckDB => Ok(Expression::ArrayLength(Box::new(
18333 crate::expressions::UnaryFunc::new(arg),
18334 ))),
18335 DialectType::PostgreSQL | DialectType::Redshift => {
18336 // PostgreSQL ARRAY_LENGTH requires dimension arg
18337 Ok(Expression::Function(Box::new(Function::new(
18338 "ARRAY_LENGTH".to_string(),
18339 vec![arg, Expression::number(1)],
18340 ))))
18341 }
18342 DialectType::Snowflake => Ok(Expression::ArraySize(Box::new(
18343 crate::expressions::UnaryFunc::new(arg),
18344 ))),
18345 _ => Ok(e), // Keep original
18346 }
18347 }
18348
18349 Action::JsonExtractToArrow => {
18350 // JSON_EXTRACT(x, path) -> x -> path for SQLite/DuckDB (set arrow_syntax = true)
18351 if let Expression::JsonExtract(mut f) = e {
18352 f.arrow_syntax = true;
18353 // Transform path: convert bracket notation to dot notation
18354 // SQLite strips wildcards, DuckDB preserves them
18355 if let Expression::Literal(Literal::String(ref s)) = f.path {
18356 let mut transformed = s.clone();
18357 if matches!(target, DialectType::SQLite) {
18358 transformed = Self::strip_json_wildcards(&transformed);
18359 }
18360 transformed = Self::bracket_to_dot_notation(&transformed);
18361 if transformed != *s {
18362 f.path = Expression::string(&transformed);
18363 }
18364 }
18365 Ok(Expression::JsonExtract(f))
18366 } else {
18367 Ok(e)
18368 }
18369 }
18370
18371 Action::JsonExtractToGetJsonObject => {
18372 if let Expression::JsonExtract(f) = e {
18373 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
18374 // JSON_EXTRACT(x, '$.key') -> JSON_EXTRACT_PATH(x, 'key') for PostgreSQL
18375 // Use proper decomposition that handles brackets
18376 let keys: Vec<Expression> =
18377 if let Expression::Literal(Literal::String(ref s)) = f.path {
18378 let parts = Self::decompose_json_path(s);
18379 parts.into_iter().map(|k| Expression::string(&k)).collect()
18380 } else {
18381 vec![f.path]
18382 };
18383 let func_name = if matches!(target, DialectType::Redshift) {
18384 "JSON_EXTRACT_PATH_TEXT"
18385 } else {
18386 "JSON_EXTRACT_PATH"
18387 };
18388 let mut args = vec![f.this];
18389 args.extend(keys);
18390 Ok(Expression::Function(Box::new(Function::new(
18391 func_name.to_string(),
18392 args,
18393 ))))
18394 } else {
18395 // GET_JSON_OBJECT(x, '$.path') for Hive/Spark
18396 // Convert bracket double quotes to single quotes
18397 let path = if let Expression::Literal(Literal::String(ref s)) = f.path {
18398 let normalized = Self::bracket_to_single_quotes(s);
18399 if normalized != *s {
18400 Expression::string(&normalized)
18401 } else {
18402 f.path
18403 }
18404 } else {
18405 f.path
18406 };
18407 Ok(Expression::Function(Box::new(Function::new(
18408 "GET_JSON_OBJECT".to_string(),
18409 vec![f.this, path],
18410 ))))
18411 }
18412 } else {
18413 Ok(e)
18414 }
18415 }
18416
18417 Action::JsonExtractScalarToGetJsonObject => {
18418 // JSON_EXTRACT_SCALAR(x, '$.path') -> GET_JSON_OBJECT(x, '$.path') for Hive/Spark
18419 if let Expression::JsonExtractScalar(f) = e {
18420 Ok(Expression::Function(Box::new(Function::new(
18421 "GET_JSON_OBJECT".to_string(),
18422 vec![f.this, f.path],
18423 ))))
18424 } else {
18425 Ok(e)
18426 }
18427 }
18428
18429 Action::JsonExtractToTsql => {
18430 // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> ISNULL(JSON_QUERY(x, path), JSON_VALUE(x, path)) for TSQL
18431 let (this, path) = match e {
18432 Expression::JsonExtract(f) => (f.this, f.path),
18433 Expression::JsonExtractScalar(f) => (f.this, f.path),
18434 _ => return Ok(e),
18435 };
18436 // Transform path: strip wildcards, convert bracket notation to dot notation
18437 let transformed_path = if let Expression::Literal(Literal::String(ref s)) = path
18438 {
18439 let stripped = Self::strip_json_wildcards(s);
18440 let dotted = Self::bracket_to_dot_notation(&stripped);
18441 Expression::string(&dotted)
18442 } else {
18443 path
18444 };
18445 let json_query = Expression::Function(Box::new(Function::new(
18446 "JSON_QUERY".to_string(),
18447 vec![this.clone(), transformed_path.clone()],
18448 )));
18449 let json_value = Expression::Function(Box::new(Function::new(
18450 "JSON_VALUE".to_string(),
18451 vec![this, transformed_path],
18452 )));
18453 Ok(Expression::Function(Box::new(Function::new(
18454 "ISNULL".to_string(),
18455 vec![json_query, json_value],
18456 ))))
18457 }
18458
18459 Action::JsonExtractToClickHouse => {
18460 // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> JSONExtractString(x, 'key1', idx, 'key2') for ClickHouse
18461 let (this, path) = match e {
18462 Expression::JsonExtract(f) => (f.this, f.path),
18463 Expression::JsonExtractScalar(f) => (f.this, f.path),
18464 _ => return Ok(e),
18465 };
18466 let args: Vec<Expression> =
18467 if let Expression::Literal(Literal::String(ref s)) = path {
18468 let parts = Self::decompose_json_path(s);
18469 let mut result = vec![this];
18470 for part in parts {
18471 // ClickHouse uses 1-based integer indices for array access
18472 if let Ok(idx) = part.parse::<i64>() {
18473 result.push(Expression::number(idx + 1));
18474 } else {
18475 result.push(Expression::string(&part));
18476 }
18477 }
18478 result
18479 } else {
18480 vec![this, path]
18481 };
18482 Ok(Expression::Function(Box::new(Function::new(
18483 "JSONExtractString".to_string(),
18484 args,
18485 ))))
18486 }
18487
18488 Action::JsonExtractScalarConvert => {
18489 // JSON_EXTRACT_SCALAR -> target-specific
18490 if let Expression::JsonExtractScalar(f) = e {
18491 match target {
18492 DialectType::PostgreSQL | DialectType::Redshift => {
18493 // JSON_EXTRACT_SCALAR(x, '$.path') -> JSON_EXTRACT_PATH_TEXT(x, 'key1', 'key2')
18494 let keys: Vec<Expression> =
18495 if let Expression::Literal(Literal::String(ref s)) = f.path {
18496 let parts = Self::decompose_json_path(s);
18497 parts.into_iter().map(|k| Expression::string(&k)).collect()
18498 } else {
18499 vec![f.path]
18500 };
18501 let mut args = vec![f.this];
18502 args.extend(keys);
18503 Ok(Expression::Function(Box::new(Function::new(
18504 "JSON_EXTRACT_PATH_TEXT".to_string(),
18505 args,
18506 ))))
18507 }
18508 DialectType::Snowflake => {
18509 // JSON_EXTRACT_SCALAR(x, '$.path') -> JSON_EXTRACT_PATH_TEXT(x, 'stripped_path')
18510 let stripped_path =
18511 if let Expression::Literal(Literal::String(ref s)) = f.path {
18512 let stripped = Self::strip_json_dollar_prefix(s);
18513 Expression::string(&stripped)
18514 } else {
18515 f.path
18516 };
18517 Ok(Expression::Function(Box::new(Function::new(
18518 "JSON_EXTRACT_PATH_TEXT".to_string(),
18519 vec![f.this, stripped_path],
18520 ))))
18521 }
18522 DialectType::SQLite | DialectType::DuckDB => {
18523 // JSON_EXTRACT_SCALAR(x, '$.path') -> x ->> '$.path'
18524 Ok(Expression::JsonExtractScalar(Box::new(
18525 crate::expressions::JsonExtractFunc {
18526 this: f.this,
18527 path: f.path,
18528 returning: f.returning,
18529 arrow_syntax: true,
18530 hash_arrow_syntax: false,
18531 wrapper_option: None,
18532 quotes_option: None,
18533 on_scalar_string: false,
18534 on_error: None,
18535 },
18536 )))
18537 }
18538 _ => Ok(Expression::JsonExtractScalar(f)),
18539 }
18540 } else {
18541 Ok(e)
18542 }
18543 }
18544
18545 Action::JsonPathNormalize => {
18546 // Normalize JSON path format for BigQuery, MySQL, etc.
18547 if let Expression::JsonExtract(mut f) = e {
18548 if let Expression::Literal(Literal::String(ref s)) = f.path {
18549 let mut normalized = s.clone();
18550 // Convert bracket notation and handle wildcards per dialect
18551 match target {
18552 DialectType::BigQuery => {
18553 // BigQuery strips wildcards and uses single quotes in brackets
18554 normalized = Self::strip_json_wildcards(&normalized);
18555 normalized = Self::bracket_to_single_quotes(&normalized);
18556 }
18557 DialectType::MySQL => {
18558 // MySQL preserves wildcards, converts brackets to dot notation
18559 normalized = Self::bracket_to_dot_notation(&normalized);
18560 }
18561 _ => {}
18562 }
18563 if normalized != *s {
18564 f.path = Expression::string(&normalized);
18565 }
18566 }
18567 Ok(Expression::JsonExtract(f))
18568 } else {
18569 Ok(e)
18570 }
18571 }
18572
18573 Action::JsonQueryValueConvert => {
18574 // JsonQuery/JsonValue -> target-specific
18575 let (f, is_query) = match e {
18576 Expression::JsonQuery(f) => (f, true),
18577 Expression::JsonValue(f) => (f, false),
18578 _ => return Ok(e),
18579 };
18580 match target {
18581 DialectType::TSQL | DialectType::Fabric => {
18582 // ISNULL(JSON_QUERY(...), JSON_VALUE(...))
18583 let json_query = Expression::Function(Box::new(Function::new(
18584 "JSON_QUERY".to_string(),
18585 vec![f.this.clone(), f.path.clone()],
18586 )));
18587 let json_value = Expression::Function(Box::new(Function::new(
18588 "JSON_VALUE".to_string(),
18589 vec![f.this, f.path],
18590 )));
18591 Ok(Expression::Function(Box::new(Function::new(
18592 "ISNULL".to_string(),
18593 vec![json_query, json_value],
18594 ))))
18595 }
18596 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
18597 Ok(Expression::Function(Box::new(Function::new(
18598 "GET_JSON_OBJECT".to_string(),
18599 vec![f.this, f.path],
18600 ))))
18601 }
18602 DialectType::PostgreSQL | DialectType::Redshift => {
18603 Ok(Expression::Function(Box::new(Function::new(
18604 "JSON_EXTRACT_PATH_TEXT".to_string(),
18605 vec![f.this, f.path],
18606 ))))
18607 }
18608 DialectType::DuckDB | DialectType::SQLite => {
18609 // json -> path arrow syntax
18610 Ok(Expression::JsonExtract(Box::new(
18611 crate::expressions::JsonExtractFunc {
18612 this: f.this,
18613 path: f.path,
18614 returning: f.returning,
18615 arrow_syntax: true,
18616 hash_arrow_syntax: false,
18617 wrapper_option: f.wrapper_option,
18618 quotes_option: f.quotes_option,
18619 on_scalar_string: f.on_scalar_string,
18620 on_error: f.on_error,
18621 },
18622 )))
18623 }
18624 DialectType::Snowflake => {
18625 // GET_PATH(PARSE_JSON(json), 'path')
18626 // Strip $. prefix from path
18627 // Only wrap in PARSE_JSON if not already a PARSE_JSON call or ParseJson expression
18628 let json_expr = match &f.this {
18629 Expression::Function(ref inner_f)
18630 if inner_f.name.eq_ignore_ascii_case("PARSE_JSON") =>
18631 {
18632 f.this
18633 }
18634 Expression::ParseJson(_) => {
18635 // Already a ParseJson expression, which generates as PARSE_JSON(...)
18636 f.this
18637 }
18638 _ => Expression::Function(Box::new(Function::new(
18639 "PARSE_JSON".to_string(),
18640 vec![f.this],
18641 ))),
18642 };
18643 let path_str = match &f.path {
18644 Expression::Literal(Literal::String(s)) => {
18645 let stripped = s.strip_prefix("$.").unwrap_or(s);
18646 Expression::Literal(Literal::String(stripped.to_string()))
18647 }
18648 other => other.clone(),
18649 };
18650 Ok(Expression::Function(Box::new(Function::new(
18651 "GET_PATH".to_string(),
18652 vec![json_expr, path_str],
18653 ))))
18654 }
18655 _ => {
18656 // Default: keep as JSON_QUERY/JSON_VALUE function
18657 let func_name = if is_query { "JSON_QUERY" } else { "JSON_VALUE" };
18658 Ok(Expression::Function(Box::new(Function::new(
18659 func_name.to_string(),
18660 vec![f.this, f.path],
18661 ))))
18662 }
18663 }
18664 }
18665
18666 Action::JsonLiteralToJsonParse => {
18667 // CAST('x' AS JSON) -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake
18668 if let Expression::Cast(c) = e {
18669 let func_name = if matches!(target, DialectType::Snowflake) {
18670 "PARSE_JSON"
18671 } else {
18672 "JSON_PARSE"
18673 };
18674 Ok(Expression::Function(Box::new(Function::new(
18675 func_name.to_string(),
18676 vec![c.this],
18677 ))))
18678 } else {
18679 Ok(e)
18680 }
18681 }
18682
18683 Action::AtTimeZoneConvert => {
18684 // AT TIME ZONE -> target-specific conversion
18685 if let Expression::AtTimeZone(atz) = e {
18686 match target {
18687 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
18688 Ok(Expression::Function(Box::new(Function::new(
18689 "AT_TIMEZONE".to_string(),
18690 vec![atz.this, atz.zone],
18691 ))))
18692 }
18693 DialectType::Spark | DialectType::Databricks => {
18694 Ok(Expression::Function(Box::new(Function::new(
18695 "FROM_UTC_TIMESTAMP".to_string(),
18696 vec![atz.this, atz.zone],
18697 ))))
18698 }
18699 DialectType::Snowflake => {
18700 // CONVERT_TIMEZONE('zone', expr)
18701 Ok(Expression::Function(Box::new(Function::new(
18702 "CONVERT_TIMEZONE".to_string(),
18703 vec![atz.zone, atz.this],
18704 ))))
18705 }
18706 DialectType::BigQuery => {
18707 // TIMESTAMP(DATETIME(expr, 'zone'))
18708 let datetime_call = Expression::Function(Box::new(Function::new(
18709 "DATETIME".to_string(),
18710 vec![atz.this, atz.zone],
18711 )));
18712 Ok(Expression::Function(Box::new(Function::new(
18713 "TIMESTAMP".to_string(),
18714 vec![datetime_call],
18715 ))))
18716 }
18717 _ => Ok(Expression::Function(Box::new(Function::new(
18718 "AT_TIMEZONE".to_string(),
18719 vec![atz.this, atz.zone],
18720 )))),
18721 }
18722 } else {
18723 Ok(e)
18724 }
18725 }
18726
18727 Action::DayOfWeekConvert => {
18728 // DAY_OF_WEEK -> ISODOW for DuckDB, ((DAYOFWEEK(x) % 7) + 1) for Spark
18729 if let Expression::DayOfWeek(f) = e {
18730 match target {
18731 DialectType::DuckDB => Ok(Expression::Function(Box::new(
18732 Function::new("ISODOW".to_string(), vec![f.this]),
18733 ))),
18734 DialectType::Spark | DialectType::Databricks => {
18735 // ((DAYOFWEEK(x) % 7) + 1)
18736 let dayofweek = Expression::Function(Box::new(Function::new(
18737 "DAYOFWEEK".to_string(),
18738 vec![f.this],
18739 )));
18740 let modulo = Expression::Mod(Box::new(BinaryOp {
18741 left: dayofweek,
18742 right: Expression::number(7),
18743 left_comments: Vec::new(),
18744 operator_comments: Vec::new(),
18745 trailing_comments: Vec::new(),
18746 }));
18747 let paren_mod = Expression::Paren(Box::new(Paren {
18748 this: modulo,
18749 trailing_comments: Vec::new(),
18750 }));
18751 let add_one = Expression::Add(Box::new(BinaryOp {
18752 left: paren_mod,
18753 right: Expression::number(1),
18754 left_comments: Vec::new(),
18755 operator_comments: Vec::new(),
18756 trailing_comments: Vec::new(),
18757 }));
18758 Ok(Expression::Paren(Box::new(Paren {
18759 this: add_one,
18760 trailing_comments: Vec::new(),
18761 })))
18762 }
18763 _ => Ok(Expression::DayOfWeek(f)),
18764 }
18765 } else {
18766 Ok(e)
18767 }
18768 }
18769
18770 Action::MaxByMinByConvert => {
18771 // MAX_BY -> argMax for ClickHouse, drop 3rd arg for Spark
18772 // MIN_BY -> argMin for ClickHouse, ARG_MIN for DuckDB, drop 3rd arg for Spark/ClickHouse
18773 // Handle both Expression::Function and Expression::AggregateFunction
18774 let (is_max, args) = match &e {
18775 Expression::Function(f) => {
18776 (f.name.eq_ignore_ascii_case("MAX_BY"), f.args.clone())
18777 }
18778 Expression::AggregateFunction(af) => {
18779 (af.name.eq_ignore_ascii_case("MAX_BY"), af.args.clone())
18780 }
18781 _ => return Ok(e),
18782 };
18783 match target {
18784 DialectType::ClickHouse => {
18785 let name = if is_max { "argMax" } else { "argMin" };
18786 let mut args = args;
18787 args.truncate(2);
18788 Ok(Expression::Function(Box::new(Function::new(
18789 name.to_string(),
18790 args,
18791 ))))
18792 }
18793 DialectType::DuckDB => {
18794 let name = if is_max { "ARG_MAX" } else { "ARG_MIN" };
18795 Ok(Expression::Function(Box::new(Function::new(
18796 name.to_string(),
18797 args,
18798 ))))
18799 }
18800 DialectType::Spark | DialectType::Databricks => {
18801 let mut args = args;
18802 args.truncate(2);
18803 let name = if is_max { "MAX_BY" } else { "MIN_BY" };
18804 Ok(Expression::Function(Box::new(Function::new(
18805 name.to_string(),
18806 args,
18807 ))))
18808 }
18809 _ => Ok(e),
18810 }
18811 }
18812
18813 Action::ElementAtConvert => {
18814 // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
18815 let (arr, idx) = if let Expression::ElementAt(bf) = e {
18816 (bf.this, bf.expression)
18817 } else if let Expression::Function(ref f) = e {
18818 if f.args.len() >= 2 {
18819 if let Expression::Function(f) = e {
18820 let mut args = f.args;
18821 let arr = args.remove(0);
18822 let idx = args.remove(0);
18823 (arr, idx)
18824 } else {
18825 unreachable!("outer condition already matched Expression::Function")
18826 }
18827 } else {
18828 return Ok(e);
18829 }
18830 } else {
18831 return Ok(e);
18832 };
18833 match target {
18834 DialectType::PostgreSQL => {
18835 // Wrap array in parens for PostgreSQL: (ARRAY[1,2,3])[4]
18836 let arr_expr = Expression::Paren(Box::new(Paren {
18837 this: arr,
18838 trailing_comments: vec![],
18839 }));
18840 Ok(Expression::Subscript(Box::new(
18841 crate::expressions::Subscript {
18842 this: arr_expr,
18843 index: idx,
18844 },
18845 )))
18846 }
18847 DialectType::BigQuery => {
18848 // BigQuery: convert ARRAY[...] to bare [...] for subscript
18849 let arr_expr = match arr {
18850 Expression::ArrayFunc(af) => Expression::ArrayFunc(Box::new(
18851 crate::expressions::ArrayConstructor {
18852 expressions: af.expressions,
18853 bracket_notation: true,
18854 use_list_keyword: false,
18855 },
18856 )),
18857 other => other,
18858 };
18859 let safe_ordinal = Expression::Function(Box::new(Function::new(
18860 "SAFE_ORDINAL".to_string(),
18861 vec![idx],
18862 )));
18863 Ok(Expression::Subscript(Box::new(
18864 crate::expressions::Subscript {
18865 this: arr_expr,
18866 index: safe_ordinal,
18867 },
18868 )))
18869 }
18870 _ => Ok(Expression::Function(Box::new(Function::new(
18871 "ELEMENT_AT".to_string(),
18872 vec![arr, idx],
18873 )))),
18874 }
18875 }
18876
18877 Action::CurrentUserParens => {
18878 // CURRENT_USER -> CURRENT_USER() for Snowflake
18879 Ok(Expression::Function(Box::new(Function::new(
18880 "CURRENT_USER".to_string(),
18881 vec![],
18882 ))))
18883 }
18884
18885 Action::ArrayAggToCollectList => {
18886 // ARRAY_AGG(x ORDER BY ...) -> COLLECT_LIST(x) for Hive/Spark
18887 // Python sqlglot Hive.arrayagg_sql strips ORDER BY for simple cases
18888 // but preserves it when DISTINCT/IGNORE NULLS/LIMIT are present
18889 match e {
18890 Expression::AggregateFunction(mut af) => {
18891 let is_simple =
18892 !af.distinct && af.ignore_nulls.is_none() && af.limit.is_none();
18893 let args = if af.args.is_empty() {
18894 vec![]
18895 } else {
18896 vec![af.args[0].clone()]
18897 };
18898 af.name = "COLLECT_LIST".to_string();
18899 af.args = args;
18900 if is_simple {
18901 af.order_by = Vec::new();
18902 }
18903 Ok(Expression::AggregateFunction(af))
18904 }
18905 Expression::ArrayAgg(agg) => {
18906 let is_simple =
18907 !agg.distinct && agg.ignore_nulls.is_none() && agg.limit.is_none();
18908 Ok(Expression::AggregateFunction(Box::new(
18909 crate::expressions::AggregateFunction {
18910 name: "COLLECT_LIST".to_string(),
18911 args: vec![agg.this.clone()],
18912 distinct: agg.distinct,
18913 filter: agg.filter.clone(),
18914 order_by: if is_simple {
18915 Vec::new()
18916 } else {
18917 agg.order_by.clone()
18918 },
18919 limit: agg.limit.clone(),
18920 ignore_nulls: agg.ignore_nulls,
18921 },
18922 )))
18923 }
18924 _ => Ok(e),
18925 }
18926 }
18927
18928 Action::ArraySyntaxConvert => {
18929 match e {
18930 // ARRAY[1, 2] (ArrayFunc bracket_notation=false) -> set bracket_notation=true
18931 // so the generator uses dialect-specific output (ARRAY() for Spark, [] for BigQuery)
18932 Expression::ArrayFunc(arr) if !arr.bracket_notation => Ok(
18933 Expression::ArrayFunc(Box::new(crate::expressions::ArrayConstructor {
18934 expressions: arr.expressions,
18935 bracket_notation: true,
18936 use_list_keyword: false,
18937 })),
18938 ),
18939 // ARRAY(y) function style -> ArrayFunc for target dialect
18940 // bracket_notation=true for BigQuery/DuckDB/ClickHouse/StarRocks (output []), false for Presto (output ARRAY[])
18941 Expression::Function(f) if f.name.eq_ignore_ascii_case("ARRAY") => {
18942 let bracket = matches!(
18943 target,
18944 DialectType::BigQuery
18945 | DialectType::DuckDB
18946 | DialectType::ClickHouse
18947 | DialectType::StarRocks
18948 );
18949 Ok(Expression::ArrayFunc(Box::new(
18950 crate::expressions::ArrayConstructor {
18951 expressions: f.args,
18952 bracket_notation: bracket,
18953 use_list_keyword: false,
18954 },
18955 )))
18956 }
18957 _ => Ok(e),
18958 }
18959 }
18960
18961 Action::CastToJsonForSpark => {
18962 // CAST(x AS JSON) -> TO_JSON(x) for Spark
18963 if let Expression::Cast(c) = e {
18964 Ok(Expression::Function(Box::new(Function::new(
18965 "TO_JSON".to_string(),
18966 vec![c.this],
18967 ))))
18968 } else {
18969 Ok(e)
18970 }
18971 }
18972
18973 Action::CastJsonToFromJson => {
18974 // CAST(ParseJson(literal) AS ARRAY/MAP/STRUCT) -> FROM_JSON(literal, type_string) for Spark
18975 if let Expression::Cast(c) = e {
18976 // Extract the string literal from ParseJson
18977 let literal_expr = if let Expression::ParseJson(pj) = c.this {
18978 pj.this
18979 } else {
18980 c.this
18981 };
18982 // Convert the target DataType to Spark's type string format
18983 let type_str = Self::data_type_to_spark_string(&c.to);
18984 Ok(Expression::Function(Box::new(Function::new(
18985 "FROM_JSON".to_string(),
18986 vec![literal_expr, Expression::Literal(Literal::String(type_str))],
18987 ))))
18988 } else {
18989 Ok(e)
18990 }
18991 }
18992
18993 Action::ToJsonConvert => {
18994 // TO_JSON(x) -> target-specific conversion
18995 if let Expression::ToJson(f) = e {
18996 let arg = f.this;
18997 match target {
18998 DialectType::Presto | DialectType::Trino => {
18999 // JSON_FORMAT(CAST(x AS JSON))
19000 let cast_json = Expression::Cast(Box::new(Cast {
19001 this: arg,
19002 to: DataType::Custom {
19003 name: "JSON".to_string(),
19004 },
19005 trailing_comments: vec![],
19006 double_colon_syntax: false,
19007 format: None,
19008 default: None,
19009 }));
19010 Ok(Expression::Function(Box::new(Function::new(
19011 "JSON_FORMAT".to_string(),
19012 vec![cast_json],
19013 ))))
19014 }
19015 DialectType::BigQuery => Ok(Expression::Function(Box::new(
19016 Function::new("TO_JSON_STRING".to_string(), vec![arg]),
19017 ))),
19018 DialectType::DuckDB => {
19019 // CAST(TO_JSON(x) AS TEXT)
19020 let to_json =
19021 Expression::ToJson(Box::new(crate::expressions::UnaryFunc {
19022 this: arg,
19023 original_name: None,
19024 }));
19025 Ok(Expression::Cast(Box::new(Cast {
19026 this: to_json,
19027 to: DataType::Text,
19028 trailing_comments: vec![],
19029 double_colon_syntax: false,
19030 format: None,
19031 default: None,
19032 })))
19033 }
19034 _ => Ok(Expression::ToJson(Box::new(
19035 crate::expressions::UnaryFunc {
19036 this: arg,
19037 original_name: None,
19038 },
19039 ))),
19040 }
19041 } else {
19042 Ok(e)
19043 }
19044 }
19045
19046 Action::VarianceToClickHouse => {
19047 if let Expression::Variance(f) = e {
19048 Ok(Expression::Function(Box::new(Function::new(
19049 "varSamp".to_string(),
19050 vec![f.this],
19051 ))))
19052 } else {
19053 Ok(e)
19054 }
19055 }
19056
19057 Action::StddevToClickHouse => {
19058 if let Expression::Stddev(f) = e {
19059 Ok(Expression::Function(Box::new(Function::new(
19060 "stddevSamp".to_string(),
19061 vec![f.this],
19062 ))))
19063 } else {
19064 Ok(e)
19065 }
19066 }
19067
19068 Action::ApproxQuantileConvert => {
19069 if let Expression::ApproxQuantile(aq) = e {
19070 let mut args = vec![*aq.this];
19071 if let Some(q) = aq.quantile {
19072 args.push(*q);
19073 }
19074 Ok(Expression::Function(Box::new(Function::new(
19075 "APPROX_PERCENTILE".to_string(),
19076 args,
19077 ))))
19078 } else {
19079 Ok(e)
19080 }
19081 }
19082
19083 Action::DollarParamConvert => {
19084 if let Expression::Parameter(p) = e {
19085 Ok(Expression::Parameter(Box::new(
19086 crate::expressions::Parameter {
19087 name: p.name,
19088 index: p.index,
19089 style: crate::expressions::ParameterStyle::At,
19090 quoted: p.quoted,
19091 string_quoted: p.string_quoted,
19092 expression: p.expression,
19093 },
19094 )))
19095 } else {
19096 Ok(e)
19097 }
19098 }
19099
19100 Action::EscapeStringNormalize => {
19101 if let Expression::Literal(Literal::EscapeString(s)) = e {
19102 // Strip prefix (e.g., "e:" or "E:") if present from tokenizer
19103 let stripped = if s.starts_with("e:") || s.starts_with("E:") {
19104 s[2..].to_string()
19105 } else {
19106 s
19107 };
19108 let normalized = stripped
19109 .replace('\n', "\\n")
19110 .replace('\r', "\\r")
19111 .replace('\t', "\\t");
19112 match target {
19113 DialectType::BigQuery => {
19114 // BigQuery: e'...' -> CAST(b'...' AS STRING)
19115 // Use Raw for the b'...' part to avoid double-escaping
19116 let raw_sql = format!("CAST(b'{}' AS STRING)", normalized);
19117 Ok(Expression::Raw(crate::expressions::Raw { sql: raw_sql }))
19118 }
19119 _ => Ok(Expression::Literal(Literal::EscapeString(normalized))),
19120 }
19121 } else {
19122 Ok(e)
19123 }
19124 }
19125
19126 Action::StraightJoinCase => {
19127 // straight_join: keep lowercase for DuckDB, quote for MySQL
19128 if let Expression::Column(col) = e {
19129 if col.name.name == "STRAIGHT_JOIN" {
19130 let mut new_col = col;
19131 new_col.name.name = "straight_join".to_string();
19132 if matches!(target, DialectType::MySQL) {
19133 // MySQL: needs quoting since it's a reserved keyword
19134 new_col.name.quoted = true;
19135 }
19136 Ok(Expression::Column(new_col))
19137 } else {
19138 Ok(Expression::Column(col))
19139 }
19140 } else {
19141 Ok(e)
19142 }
19143 }
19144
19145 Action::TablesampleReservoir => {
19146 // TABLESAMPLE -> TABLESAMPLE RESERVOIR for DuckDB
19147 if let Expression::TableSample(mut ts) = e {
19148 if let Some(ref mut sample) = ts.sample {
19149 sample.method = crate::expressions::SampleMethod::Reservoir;
19150 sample.explicit_method = true;
19151 }
19152 Ok(Expression::TableSample(ts))
19153 } else {
19154 Ok(e)
19155 }
19156 }
19157
19158 Action::TablesampleSnowflakeStrip => {
19159 // Strip method and PERCENT for Snowflake target from non-Snowflake source
19160 match e {
19161 Expression::TableSample(mut ts) => {
19162 if let Some(ref mut sample) = ts.sample {
19163 sample.suppress_method_output = true;
19164 sample.unit_after_size = false;
19165 sample.is_percent = false;
19166 }
19167 Ok(Expression::TableSample(ts))
19168 }
19169 Expression::Table(mut t) => {
19170 if let Some(ref mut sample) = t.table_sample {
19171 sample.suppress_method_output = true;
19172 sample.unit_after_size = false;
19173 sample.is_percent = false;
19174 }
19175 Ok(Expression::Table(t))
19176 }
19177 _ => Ok(e),
19178 }
19179 }
19180
19181 Action::FirstToAnyValue => {
19182 // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
19183 if let Expression::First(mut agg) = e {
19184 agg.ignore_nulls = None;
19185 agg.name = Some("ANY_VALUE".to_string());
19186 Ok(Expression::AnyValue(agg))
19187 } else {
19188 Ok(e)
19189 }
19190 }
19191
19192 Action::ArrayIndexConvert => {
19193 // Subscript index: 1-based to 0-based for BigQuery
19194 if let Expression::Subscript(mut sub) = e {
19195 if let Expression::Literal(Literal::Number(ref n)) = sub.index {
19196 if let Ok(val) = n.parse::<i64>() {
19197 sub.index =
19198 Expression::Literal(Literal::Number((val - 1).to_string()));
19199 }
19200 }
19201 Ok(Expression::Subscript(sub))
19202 } else {
19203 Ok(e)
19204 }
19205 }
19206
19207 Action::AnyValueIgnoreNulls => {
19208 // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
19209 if let Expression::AnyValue(mut av) = e {
19210 if av.ignore_nulls.is_none() {
19211 av.ignore_nulls = Some(true);
19212 }
19213 Ok(Expression::AnyValue(av))
19214 } else {
19215 Ok(e)
19216 }
19217 }
19218
19219 Action::BigQueryNullsOrdering => {
19220 // BigQuery doesn't support NULLS FIRST/LAST in window function ORDER BY
19221 if let Expression::WindowFunction(mut wf) = e {
19222 for o in &mut wf.over.order_by {
19223 o.nulls_first = None;
19224 }
19225 Ok(Expression::WindowFunction(wf))
19226 } else if let Expression::Ordered(mut o) = e {
19227 o.nulls_first = None;
19228 Ok(Expression::Ordered(o))
19229 } else {
19230 Ok(e)
19231 }
19232 }
19233
19234 Action::SnowflakeFloatProtect => {
19235 // Convert DataType::Float to DataType::Custom("FLOAT") to prevent
19236 // Snowflake's target transform from converting it to DOUBLE.
19237 // Non-Snowflake sources should keep their FLOAT spelling.
19238 if let Expression::DataType(DataType::Float { .. }) = e {
19239 Ok(Expression::DataType(DataType::Custom {
19240 name: "FLOAT".to_string(),
19241 }))
19242 } else {
19243 Ok(e)
19244 }
19245 }
19246
19247 Action::MysqlNullsOrdering => {
19248 // MySQL doesn't support NULLS FIRST/LAST - strip or rewrite
19249 if let Expression::Ordered(mut o) = e {
19250 let nulls_last = o.nulls_first == Some(false);
19251 let desc = o.desc;
19252 // MySQL default: ASC -> NULLS LAST, DESC -> NULLS FIRST
19253 // If requested ordering matches default, just strip NULLS clause
19254 let matches_default = if desc {
19255 // DESC default is NULLS FIRST, so nulls_first=true matches
19256 o.nulls_first == Some(true)
19257 } else {
19258 // ASC default is NULLS LAST, so nulls_first=false matches
19259 nulls_last
19260 };
19261 if matches_default {
19262 o.nulls_first = None;
19263 Ok(Expression::Ordered(o))
19264 } else {
19265 // Need CASE WHEN x IS NULL THEN 0/1 ELSE 0/1 END, x
19266 // For ASC NULLS FIRST: ORDER BY CASE WHEN x IS NULL THEN 0 ELSE 1 END, x ASC
19267 // For DESC NULLS LAST: ORDER BY CASE WHEN x IS NULL THEN 1 ELSE 0 END, x DESC
19268 let null_val = if desc { 1 } else { 0 };
19269 let non_null_val = if desc { 0 } else { 1 };
19270 let _case_expr = Expression::Case(Box::new(Case {
19271 operand: None,
19272 whens: vec![(
19273 Expression::IsNull(Box::new(crate::expressions::IsNull {
19274 this: o.this.clone(),
19275 not: false,
19276 postfix_form: false,
19277 })),
19278 Expression::number(null_val),
19279 )],
19280 else_: Some(Expression::number(non_null_val)),
19281 comments: Vec::new(),
19282 }));
19283 o.nulls_first = None;
19284 // Return a tuple of [case_expr, ordered_expr]
19285 // We need to return both as part of the ORDER BY
19286 // But since transform_recursive processes individual expressions,
19287 // we can't easily add extra ORDER BY items here.
19288 // Instead, strip the nulls_first
19289 o.nulls_first = None;
19290 Ok(Expression::Ordered(o))
19291 }
19292 } else {
19293 Ok(e)
19294 }
19295 }
19296
19297 Action::MysqlNullsLastRewrite => {
19298 // DuckDB -> MySQL: Add CASE WHEN IS NULL THEN 1 ELSE 0 END to ORDER BY
19299 // to simulate NULLS LAST for ASC ordering
19300 if let Expression::WindowFunction(mut wf) = e {
19301 let mut new_order_by = Vec::new();
19302 for o in wf.over.order_by {
19303 if !o.desc {
19304 // ASC: DuckDB has NULLS LAST, MySQL has NULLS FIRST
19305 // Add CASE WHEN expr IS NULL THEN 1 ELSE 0 END before expr
19306 let case_expr = Expression::Case(Box::new(Case {
19307 operand: None,
19308 whens: vec![(
19309 Expression::IsNull(Box::new(crate::expressions::IsNull {
19310 this: o.this.clone(),
19311 not: false,
19312 postfix_form: false,
19313 })),
19314 Expression::Literal(Literal::Number("1".to_string())),
19315 )],
19316 else_: Some(Expression::Literal(Literal::Number(
19317 "0".to_string(),
19318 ))),
19319 comments: Vec::new(),
19320 }));
19321 new_order_by.push(crate::expressions::Ordered {
19322 this: case_expr,
19323 desc: false,
19324 nulls_first: None,
19325 explicit_asc: false,
19326 with_fill: None,
19327 });
19328 let mut ordered = o;
19329 ordered.nulls_first = None;
19330 new_order_by.push(ordered);
19331 } else {
19332 // DESC: DuckDB has NULLS LAST, MySQL also has NULLS LAST (NULLs smallest in DESC)
19333 // No change needed
19334 let mut ordered = o;
19335 ordered.nulls_first = None;
19336 new_order_by.push(ordered);
19337 }
19338 }
19339 wf.over.order_by = new_order_by;
19340 Ok(Expression::WindowFunction(wf))
19341 } else {
19342 Ok(e)
19343 }
19344 }
19345
19346 Action::RespectNullsConvert => {
19347 // RESPECT NULLS -> strip for SQLite (FIRST_VALUE(c) OVER (...))
19348 if let Expression::WindowFunction(mut wf) = e {
19349 match &mut wf.this {
19350 Expression::FirstValue(ref mut vf) => {
19351 if vf.ignore_nulls == Some(false) {
19352 vf.ignore_nulls = None;
19353 // For SQLite, we'd need to add NULLS LAST to ORDER BY in the OVER clause
19354 // but that's handled by the generator's NULLS ordering
19355 }
19356 }
19357 Expression::LastValue(ref mut vf) => {
19358 if vf.ignore_nulls == Some(false) {
19359 vf.ignore_nulls = None;
19360 }
19361 }
19362 _ => {}
19363 }
19364 Ok(Expression::WindowFunction(wf))
19365 } else {
19366 Ok(e)
19367 }
19368 }
19369
19370 Action::CreateTableStripComment => {
19371 // Strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
19372 if let Expression::CreateTable(mut ct) = e {
19373 for col in &mut ct.columns {
19374 col.comment = None;
19375 col.constraints.retain(|c| {
19376 !matches!(c, crate::expressions::ColumnConstraint::Comment(_))
19377 });
19378 // Also remove Comment from constraint_order
19379 col.constraint_order.retain(|c| {
19380 !matches!(c, crate::expressions::ConstraintType::Comment)
19381 });
19382 }
19383 // Strip properties (USING, PARTITIONED BY, etc.)
19384 ct.properties.clear();
19385 Ok(Expression::CreateTable(ct))
19386 } else {
19387 Ok(e)
19388 }
19389 }
19390
19391 Action::AlterTableToSpRename => {
19392 // ALTER TABLE db.t1 RENAME TO db.t2 -> EXEC sp_rename 'db.t1', 't2'
19393 if let Expression::AlterTable(ref at) = e {
19394 if let Some(crate::expressions::AlterTableAction::RenameTable(
19395 ref new_tbl,
19396 )) = at.actions.first()
19397 {
19398 // Build the old table name using TSQL bracket quoting
19399 let old_name = if let Some(ref schema) = at.name.schema {
19400 if at.name.name.quoted || schema.quoted {
19401 format!("[{}].[{}]", schema.name, at.name.name.name)
19402 } else {
19403 format!("{}.{}", schema.name, at.name.name.name)
19404 }
19405 } else {
19406 if at.name.name.quoted {
19407 format!("[{}]", at.name.name.name)
19408 } else {
19409 at.name.name.name.clone()
19410 }
19411 };
19412 let new_name = new_tbl.name.name.clone();
19413 // EXEC sp_rename 'old_name', 'new_name'
19414 let sql = format!("EXEC sp_rename '{}', '{}'", old_name, new_name);
19415 Ok(Expression::Raw(crate::expressions::Raw { sql }))
19416 } else {
19417 Ok(e)
19418 }
19419 } else {
19420 Ok(e)
19421 }
19422 }
19423
19424 Action::SnowflakeIntervalFormat => {
19425 // INTERVAL '2' HOUR -> INTERVAL '2 HOUR' for Snowflake
19426 if let Expression::Interval(mut iv) = e {
19427 if let (
19428 Some(Expression::Literal(Literal::String(ref val))),
19429 Some(ref unit_spec),
19430 ) = (&iv.this, &iv.unit)
19431 {
19432 let unit_str = match unit_spec {
19433 crate::expressions::IntervalUnitSpec::Simple { unit, .. } => {
19434 match unit {
19435 crate::expressions::IntervalUnit::Year => "YEAR",
19436 crate::expressions::IntervalUnit::Quarter => "QUARTER",
19437 crate::expressions::IntervalUnit::Month => "MONTH",
19438 crate::expressions::IntervalUnit::Week => "WEEK",
19439 crate::expressions::IntervalUnit::Day => "DAY",
19440 crate::expressions::IntervalUnit::Hour => "HOUR",
19441 crate::expressions::IntervalUnit::Minute => "MINUTE",
19442 crate::expressions::IntervalUnit::Second => "SECOND",
19443 crate::expressions::IntervalUnit::Millisecond => {
19444 "MILLISECOND"
19445 }
19446 crate::expressions::IntervalUnit::Microsecond => {
19447 "MICROSECOND"
19448 }
19449 crate::expressions::IntervalUnit::Nanosecond => {
19450 "NANOSECOND"
19451 }
19452 }
19453 }
19454 _ => "",
19455 };
19456 if !unit_str.is_empty() {
19457 let combined = format!("{} {}", val, unit_str);
19458 iv.this = Some(Expression::Literal(Literal::String(combined)));
19459 iv.unit = None;
19460 }
19461 }
19462 Ok(Expression::Interval(iv))
19463 } else {
19464 Ok(e)
19465 }
19466 }
19467
19468 Action::ArrayConcatBracketConvert => {
19469 // Expression::Array/ArrayFunc -> target-specific
19470 // For PostgreSQL: Array -> ArrayFunc (bracket_notation: false)
19471 // For Redshift: Array/ArrayFunc -> Function("ARRAY", args) to produce ARRAY(1, 2) with parens
19472 match e {
19473 Expression::Array(arr) => {
19474 if matches!(target, DialectType::Redshift) {
19475 Ok(Expression::Function(Box::new(Function::new(
19476 "ARRAY".to_string(),
19477 arr.expressions,
19478 ))))
19479 } else {
19480 Ok(Expression::ArrayFunc(Box::new(
19481 crate::expressions::ArrayConstructor {
19482 expressions: arr.expressions,
19483 bracket_notation: false,
19484 use_list_keyword: false,
19485 },
19486 )))
19487 }
19488 }
19489 Expression::ArrayFunc(arr) => {
19490 // Only for Redshift: convert bracket-notation ArrayFunc to Function("ARRAY")
19491 if matches!(target, DialectType::Redshift) {
19492 Ok(Expression::Function(Box::new(Function::new(
19493 "ARRAY".to_string(),
19494 arr.expressions,
19495 ))))
19496 } else {
19497 Ok(Expression::ArrayFunc(arr))
19498 }
19499 }
19500 _ => Ok(e),
19501 }
19502 }
19503
19504 Action::BitAggFloatCast => {
19505 // BIT_OR/BIT_AND/BIT_XOR with float/decimal cast arg -> wrap with ROUND+INT cast for DuckDB
19506 // For FLOAT/DOUBLE/REAL: CAST(ROUND(CAST(val AS type)) AS INT)
19507 // For DECIMAL: CAST(CAST(val AS DECIMAL(p,s)) AS INT)
19508 let int_type = DataType::Int {
19509 length: None,
19510 integer_spelling: false,
19511 };
19512 let wrap_agg = |agg_this: Expression, int_dt: DataType| -> Expression {
19513 if let Expression::Cast(c) = agg_this {
19514 match &c.to {
19515 DataType::Float { .. }
19516 | DataType::Double { .. }
19517 | DataType::Custom { .. } => {
19518 // FLOAT/DOUBLE/REAL: CAST(ROUND(CAST(val AS type)) AS INT)
19519 // Change FLOAT to REAL (Float with real_spelling=true) for DuckDB generator
19520 let inner_type = match &c.to {
19521 DataType::Float {
19522 precision, scale, ..
19523 } => DataType::Float {
19524 precision: *precision,
19525 scale: *scale,
19526 real_spelling: true,
19527 },
19528 other => other.clone(),
19529 };
19530 let inner_cast =
19531 Expression::Cast(Box::new(crate::expressions::Cast {
19532 this: c.this.clone(),
19533 to: inner_type,
19534 trailing_comments: Vec::new(),
19535 double_colon_syntax: false,
19536 format: None,
19537 default: None,
19538 }));
19539 let rounded = Expression::Function(Box::new(Function::new(
19540 "ROUND".to_string(),
19541 vec![inner_cast],
19542 )));
19543 Expression::Cast(Box::new(crate::expressions::Cast {
19544 this: rounded,
19545 to: int_dt,
19546 trailing_comments: Vec::new(),
19547 double_colon_syntax: false,
19548 format: None,
19549 default: None,
19550 }))
19551 }
19552 DataType::Decimal { .. } => {
19553 // DECIMAL: CAST(CAST(val AS DECIMAL(p,s)) AS INT)
19554 Expression::Cast(Box::new(crate::expressions::Cast {
19555 this: Expression::Cast(c),
19556 to: int_dt,
19557 trailing_comments: Vec::new(),
19558 double_colon_syntax: false,
19559 format: None,
19560 default: None,
19561 }))
19562 }
19563 _ => Expression::Cast(c),
19564 }
19565 } else {
19566 agg_this
19567 }
19568 };
19569 match e {
19570 Expression::BitwiseOrAgg(mut f) => {
19571 f.this = wrap_agg(f.this, int_type);
19572 Ok(Expression::BitwiseOrAgg(f))
19573 }
19574 Expression::BitwiseAndAgg(mut f) => {
19575 let int_type = DataType::Int {
19576 length: None,
19577 integer_spelling: false,
19578 };
19579 f.this = wrap_agg(f.this, int_type);
19580 Ok(Expression::BitwiseAndAgg(f))
19581 }
19582 Expression::BitwiseXorAgg(mut f) => {
19583 let int_type = DataType::Int {
19584 length: None,
19585 integer_spelling: false,
19586 };
19587 f.this = wrap_agg(f.this, int_type);
19588 Ok(Expression::BitwiseXorAgg(f))
19589 }
19590 _ => Ok(e),
19591 }
19592 }
19593
19594 Action::BitAggSnowflakeRename => {
19595 // BIT_OR -> BITORAGG, BIT_AND -> BITANDAGG, BIT_XOR -> BITXORAGG for Snowflake
19596 match e {
19597 Expression::BitwiseOrAgg(f) => Ok(Expression::Function(Box::new(
19598 Function::new("BITORAGG".to_string(), vec![f.this]),
19599 ))),
19600 Expression::BitwiseAndAgg(f) => Ok(Expression::Function(Box::new(
19601 Function::new("BITANDAGG".to_string(), vec![f.this]),
19602 ))),
19603 Expression::BitwiseXorAgg(f) => Ok(Expression::Function(Box::new(
19604 Function::new("BITXORAGG".to_string(), vec![f.this]),
19605 ))),
19606 _ => Ok(e),
19607 }
19608 }
19609
19610 Action::StrftimeCastTimestamp => {
19611 // CAST(x AS TIMESTAMP) -> CAST(x AS TIMESTAMP_NTZ) for Spark
19612 if let Expression::Cast(mut c) = e {
19613 if matches!(
19614 c.to,
19615 DataType::Timestamp {
19616 timezone: false,
19617 ..
19618 }
19619 ) {
19620 c.to = DataType::Custom {
19621 name: "TIMESTAMP_NTZ".to_string(),
19622 };
19623 }
19624 Ok(Expression::Cast(c))
19625 } else {
19626 Ok(e)
19627 }
19628 }
19629
19630 Action::DecimalDefaultPrecision => {
19631 // DECIMAL without precision -> DECIMAL(18, 3) for Snowflake
19632 if let Expression::Cast(mut c) = e {
19633 if matches!(
19634 c.to,
19635 DataType::Decimal {
19636 precision: None,
19637 ..
19638 }
19639 ) {
19640 c.to = DataType::Decimal {
19641 precision: Some(18),
19642 scale: Some(3),
19643 };
19644 }
19645 Ok(Expression::Cast(c))
19646 } else {
19647 Ok(e)
19648 }
19649 }
19650
19651 Action::FilterToIff => {
19652 // FILTER(WHERE cond) -> rewrite aggregate: AGG(IFF(cond, val, NULL))
19653 if let Expression::Filter(f) = e {
19654 let condition = *f.expression;
19655 let agg = *f.this;
19656 // Strip WHERE from condition
19657 let cond = match condition {
19658 Expression::Where(w) => w.this,
19659 other => other,
19660 };
19661 // Extract the aggregate function and its argument
19662 // We want AVG(IFF(condition, x, NULL))
19663 match agg {
19664 Expression::Function(mut func) => {
19665 if !func.args.is_empty() {
19666 let orig_arg = func.args[0].clone();
19667 let iff_call = Expression::Function(Box::new(Function::new(
19668 "IFF".to_string(),
19669 vec![cond, orig_arg, Expression::Null(Null)],
19670 )));
19671 func.args[0] = iff_call;
19672 Ok(Expression::Function(func))
19673 } else {
19674 Ok(Expression::Filter(Box::new(crate::expressions::Filter {
19675 this: Box::new(Expression::Function(func)),
19676 expression: Box::new(cond),
19677 })))
19678 }
19679 }
19680 Expression::Avg(mut avg) => {
19681 let iff_call = Expression::Function(Box::new(Function::new(
19682 "IFF".to_string(),
19683 vec![cond, avg.this.clone(), Expression::Null(Null)],
19684 )));
19685 avg.this = iff_call;
19686 Ok(Expression::Avg(avg))
19687 }
19688 Expression::Sum(mut s) => {
19689 let iff_call = Expression::Function(Box::new(Function::new(
19690 "IFF".to_string(),
19691 vec![cond, s.this.clone(), Expression::Null(Null)],
19692 )));
19693 s.this = iff_call;
19694 Ok(Expression::Sum(s))
19695 }
19696 Expression::Count(mut c) => {
19697 if let Some(ref this_expr) = c.this {
19698 let iff_call = Expression::Function(Box::new(Function::new(
19699 "IFF".to_string(),
19700 vec![cond, this_expr.clone(), Expression::Null(Null)],
19701 )));
19702 c.this = Some(iff_call);
19703 }
19704 Ok(Expression::Count(c))
19705 }
19706 other => {
19707 // Fallback: keep as Filter
19708 Ok(Expression::Filter(Box::new(crate::expressions::Filter {
19709 this: Box::new(other),
19710 expression: Box::new(cond),
19711 })))
19712 }
19713 }
19714 } else {
19715 Ok(e)
19716 }
19717 }
19718
19719 Action::AggFilterToIff => {
19720 // AggFunc.filter -> IFF wrapping: AVG(x) FILTER(WHERE cond) -> AVG(IFF(cond, x, NULL))
19721 // Helper macro to handle the common AggFunc case
19722 macro_rules! handle_agg_filter_to_iff {
19723 ($variant:ident, $agg:expr) => {{
19724 let mut agg = $agg;
19725 if let Some(filter_cond) = agg.filter.take() {
19726 let iff_call = Expression::Function(Box::new(Function::new(
19727 "IFF".to_string(),
19728 vec![filter_cond, agg.this.clone(), Expression::Null(Null)],
19729 )));
19730 agg.this = iff_call;
19731 }
19732 Ok(Expression::$variant(agg))
19733 }};
19734 }
19735
19736 match e {
19737 Expression::Avg(agg) => handle_agg_filter_to_iff!(Avg, agg),
19738 Expression::Sum(agg) => handle_agg_filter_to_iff!(Sum, agg),
19739 Expression::Min(agg) => handle_agg_filter_to_iff!(Min, agg),
19740 Expression::Max(agg) => handle_agg_filter_to_iff!(Max, agg),
19741 Expression::ArrayAgg(agg) => handle_agg_filter_to_iff!(ArrayAgg, agg),
19742 Expression::CountIf(agg) => handle_agg_filter_to_iff!(CountIf, agg),
19743 Expression::Stddev(agg) => handle_agg_filter_to_iff!(Stddev, agg),
19744 Expression::StddevPop(agg) => handle_agg_filter_to_iff!(StddevPop, agg),
19745 Expression::StddevSamp(agg) => handle_agg_filter_to_iff!(StddevSamp, agg),
19746 Expression::Variance(agg) => handle_agg_filter_to_iff!(Variance, agg),
19747 Expression::VarPop(agg) => handle_agg_filter_to_iff!(VarPop, agg),
19748 Expression::VarSamp(agg) => handle_agg_filter_to_iff!(VarSamp, agg),
19749 Expression::Median(agg) => handle_agg_filter_to_iff!(Median, agg),
19750 Expression::Mode(agg) => handle_agg_filter_to_iff!(Mode, agg),
19751 Expression::First(agg) => handle_agg_filter_to_iff!(First, agg),
19752 Expression::Last(agg) => handle_agg_filter_to_iff!(Last, agg),
19753 Expression::AnyValue(agg) => handle_agg_filter_to_iff!(AnyValue, agg),
19754 Expression::ApproxDistinct(agg) => {
19755 handle_agg_filter_to_iff!(ApproxDistinct, agg)
19756 }
19757 Expression::Count(mut c) => {
19758 if let Some(filter_cond) = c.filter.take() {
19759 if let Some(ref this_expr) = c.this {
19760 let iff_call = Expression::Function(Box::new(Function::new(
19761 "IFF".to_string(),
19762 vec![
19763 filter_cond,
19764 this_expr.clone(),
19765 Expression::Null(Null),
19766 ],
19767 )));
19768 c.this = Some(iff_call);
19769 }
19770 }
19771 Ok(Expression::Count(c))
19772 }
19773 other => Ok(other),
19774 }
19775 }
19776
19777 Action::JsonToGetPath => {
19778 // JSON_EXTRACT(x, '$.key') -> GET_PATH(PARSE_JSON(x), 'key')
19779 if let Expression::JsonExtract(je) = e {
19780 // Convert to PARSE_JSON() wrapper:
19781 // - JSON(x) -> PARSE_JSON(x)
19782 // - PARSE_JSON(x) -> keep as-is
19783 // - anything else -> wrap in PARSE_JSON()
19784 let this = match &je.this {
19785 Expression::Function(f)
19786 if f.name.eq_ignore_ascii_case("JSON") && f.args.len() == 1 =>
19787 {
19788 Expression::Function(Box::new(Function::new(
19789 "PARSE_JSON".to_string(),
19790 f.args.clone(),
19791 )))
19792 }
19793 Expression::Function(f)
19794 if f.name.eq_ignore_ascii_case("PARSE_JSON") =>
19795 {
19796 je.this.clone()
19797 }
19798 // GET_PATH result is already JSON, don't wrap
19799 Expression::Function(f) if f.name.eq_ignore_ascii_case("GET_PATH") => {
19800 je.this.clone()
19801 }
19802 other => {
19803 // Wrap non-JSON expressions in PARSE_JSON()
19804 Expression::Function(Box::new(Function::new(
19805 "PARSE_JSON".to_string(),
19806 vec![other.clone()],
19807 )))
19808 }
19809 };
19810 // Convert path: extract key from JSONPath or strip $. prefix from string
19811 let path = match &je.path {
19812 Expression::JSONPath(jp) => {
19813 // Extract the key from JSONPath: $root.key -> 'key'
19814 let mut key_parts = Vec::new();
19815 for expr in &jp.expressions {
19816 match expr {
19817 Expression::JSONPathRoot(_) => {} // skip root
19818 Expression::JSONPathKey(k) => {
19819 if let Expression::Literal(Literal::String(s)) =
19820 &*k.this
19821 {
19822 key_parts.push(s.clone());
19823 }
19824 }
19825 _ => {}
19826 }
19827 }
19828 if !key_parts.is_empty() {
19829 Expression::Literal(Literal::String(key_parts.join(".")))
19830 } else {
19831 je.path.clone()
19832 }
19833 }
19834 Expression::Literal(Literal::String(s)) if s.starts_with("$.") => {
19835 let stripped = Self::strip_json_wildcards(&s[2..].to_string());
19836 Expression::Literal(Literal::String(stripped))
19837 }
19838 Expression::Literal(Literal::String(s)) if s.starts_with('$') => {
19839 let stripped = Self::strip_json_wildcards(&s[1..].to_string());
19840 Expression::Literal(Literal::String(stripped))
19841 }
19842 _ => je.path.clone(),
19843 };
19844 Ok(Expression::Function(Box::new(Function::new(
19845 "GET_PATH".to_string(),
19846 vec![this, path],
19847 ))))
19848 } else {
19849 Ok(e)
19850 }
19851 }
19852
19853 Action::StructToRow => {
19854 // DuckDB struct/dict -> BigQuery STRUCT(value AS key, ...) / Presto ROW
19855 // Handles both Expression::Struct and Expression::MapFunc(curly_brace_syntax=true)
19856
19857 // Extract key-value pairs from either Struct or MapFunc
19858 let kv_pairs: Option<Vec<(String, Expression)>> = match &e {
19859 Expression::Struct(s) => Some(
19860 s.fields
19861 .iter()
19862 .map(|(opt_name, field_expr)| {
19863 if let Some(name) = opt_name {
19864 (name.clone(), field_expr.clone())
19865 } else if let Expression::NamedArgument(na) = field_expr {
19866 (na.name.name.clone(), na.value.clone())
19867 } else {
19868 (String::new(), field_expr.clone())
19869 }
19870 })
19871 .collect(),
19872 ),
19873 Expression::MapFunc(m) if m.curly_brace_syntax => Some(
19874 m.keys
19875 .iter()
19876 .zip(m.values.iter())
19877 .map(|(key, value)| {
19878 let key_name = match key {
19879 Expression::Literal(Literal::String(s)) => s.clone(),
19880 Expression::Identifier(id) => id.name.clone(),
19881 _ => String::new(),
19882 };
19883 (key_name, value.clone())
19884 })
19885 .collect(),
19886 ),
19887 _ => None,
19888 };
19889
19890 if let Some(pairs) = kv_pairs {
19891 let mut named_args = Vec::new();
19892 for (key_name, value) in pairs {
19893 if matches!(target, DialectType::BigQuery) && !key_name.is_empty() {
19894 named_args.push(Expression::Alias(Box::new(
19895 crate::expressions::Alias::new(
19896 value,
19897 Identifier::new(key_name),
19898 ),
19899 )));
19900 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
19901 named_args.push(value);
19902 } else {
19903 named_args.push(value);
19904 }
19905 }
19906
19907 if matches!(target, DialectType::BigQuery) {
19908 Ok(Expression::Function(Box::new(Function::new(
19909 "STRUCT".to_string(),
19910 named_args,
19911 ))))
19912 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
19913 // For Presto/Trino, infer types and wrap in CAST(ROW(...) AS ROW(name TYPE, ...))
19914 let row_func = Expression::Function(Box::new(Function::new(
19915 "ROW".to_string(),
19916 named_args,
19917 )));
19918
19919 // Try to infer types for each pair
19920 let kv_pairs_again: Option<Vec<(String, Expression)>> = match &e {
19921 Expression::Struct(s) => Some(
19922 s.fields
19923 .iter()
19924 .map(|(opt_name, field_expr)| {
19925 if let Some(name) = opt_name {
19926 (name.clone(), field_expr.clone())
19927 } else if let Expression::NamedArgument(na) = field_expr
19928 {
19929 (na.name.name.clone(), na.value.clone())
19930 } else {
19931 (String::new(), field_expr.clone())
19932 }
19933 })
19934 .collect(),
19935 ),
19936 Expression::MapFunc(m) if m.curly_brace_syntax => Some(
19937 m.keys
19938 .iter()
19939 .zip(m.values.iter())
19940 .map(|(key, value)| {
19941 let key_name = match key {
19942 Expression::Literal(Literal::String(s)) => {
19943 s.clone()
19944 }
19945 Expression::Identifier(id) => id.name.clone(),
19946 _ => String::new(),
19947 };
19948 (key_name, value.clone())
19949 })
19950 .collect(),
19951 ),
19952 _ => None,
19953 };
19954
19955 if let Some(pairs) = kv_pairs_again {
19956 // Infer types for all values
19957 let mut all_inferred = true;
19958 let mut fields = Vec::new();
19959 for (name, value) in &pairs {
19960 let inferred_type = match value {
19961 Expression::Literal(Literal::Number(n)) => {
19962 if n.contains('.') {
19963 Some(DataType::Double {
19964 precision: None,
19965 scale: None,
19966 })
19967 } else {
19968 Some(DataType::Int {
19969 length: None,
19970 integer_spelling: true,
19971 })
19972 }
19973 }
19974 Expression::Literal(Literal::String(_)) => {
19975 Some(DataType::VarChar {
19976 length: None,
19977 parenthesized_length: false,
19978 })
19979 }
19980 Expression::Boolean(_) => Some(DataType::Boolean),
19981 _ => None,
19982 };
19983 if let Some(dt) = inferred_type {
19984 fields.push(crate::expressions::StructField::new(
19985 name.clone(),
19986 dt,
19987 ));
19988 } else {
19989 all_inferred = false;
19990 break;
19991 }
19992 }
19993
19994 if all_inferred && !fields.is_empty() {
19995 let row_type = DataType::Struct {
19996 fields,
19997 nested: true,
19998 };
19999 Ok(Expression::Cast(Box::new(Cast {
20000 this: row_func,
20001 to: row_type,
20002 trailing_comments: Vec::new(),
20003 double_colon_syntax: false,
20004 format: None,
20005 default: None,
20006 })))
20007 } else {
20008 Ok(row_func)
20009 }
20010 } else {
20011 Ok(row_func)
20012 }
20013 } else {
20014 Ok(Expression::Function(Box::new(Function::new(
20015 "ROW".to_string(),
20016 named_args,
20017 ))))
20018 }
20019 } else {
20020 Ok(e)
20021 }
20022 }
20023
20024 Action::SparkStructConvert => {
20025 // Spark STRUCT(val AS name, ...) -> Presto CAST(ROW(...) AS ROW(name TYPE, ...))
20026 // or DuckDB {'name': val, ...}
20027 if let Expression::Function(f) = e {
20028 // Extract name-value pairs from aliased args
20029 let mut pairs: Vec<(String, Expression)> = Vec::new();
20030 for arg in &f.args {
20031 match arg {
20032 Expression::Alias(a) => {
20033 pairs.push((a.alias.name.clone(), a.this.clone()));
20034 }
20035 _ => {
20036 pairs.push((String::new(), arg.clone()));
20037 }
20038 }
20039 }
20040
20041 match target {
20042 DialectType::DuckDB => {
20043 // Convert to DuckDB struct literal {'name': value, ...}
20044 let mut keys = Vec::new();
20045 let mut values = Vec::new();
20046 for (name, value) in &pairs {
20047 keys.push(Expression::Literal(Literal::String(name.clone())));
20048 values.push(value.clone());
20049 }
20050 Ok(Expression::MapFunc(Box::new(
20051 crate::expressions::MapConstructor {
20052 keys,
20053 values,
20054 curly_brace_syntax: true,
20055 with_map_keyword: false,
20056 },
20057 )))
20058 }
20059 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
20060 // Convert to CAST(ROW(val1, val2) AS ROW(name1 TYPE1, name2 TYPE2))
20061 let row_args: Vec<Expression> =
20062 pairs.iter().map(|(_, v)| v.clone()).collect();
20063 let row_func = Expression::Function(Box::new(Function::new(
20064 "ROW".to_string(),
20065 row_args,
20066 )));
20067
20068 // Infer types
20069 let mut all_inferred = true;
20070 let mut fields = Vec::new();
20071 for (name, value) in &pairs {
20072 let inferred_type = match value {
20073 Expression::Literal(Literal::Number(n)) => {
20074 if n.contains('.') {
20075 Some(DataType::Double {
20076 precision: None,
20077 scale: None,
20078 })
20079 } else {
20080 Some(DataType::Int {
20081 length: None,
20082 integer_spelling: true,
20083 })
20084 }
20085 }
20086 Expression::Literal(Literal::String(_)) => {
20087 Some(DataType::VarChar {
20088 length: None,
20089 parenthesized_length: false,
20090 })
20091 }
20092 Expression::Boolean(_) => Some(DataType::Boolean),
20093 _ => None,
20094 };
20095 if let Some(dt) = inferred_type {
20096 fields.push(crate::expressions::StructField::new(
20097 name.clone(),
20098 dt,
20099 ));
20100 } else {
20101 all_inferred = false;
20102 break;
20103 }
20104 }
20105
20106 if all_inferred && !fields.is_empty() {
20107 let row_type = DataType::Struct {
20108 fields,
20109 nested: true,
20110 };
20111 Ok(Expression::Cast(Box::new(Cast {
20112 this: row_func,
20113 to: row_type,
20114 trailing_comments: Vec::new(),
20115 double_colon_syntax: false,
20116 format: None,
20117 default: None,
20118 })))
20119 } else {
20120 Ok(row_func)
20121 }
20122 }
20123 _ => Ok(Expression::Function(f)),
20124 }
20125 } else {
20126 Ok(e)
20127 }
20128 }
20129
20130 Action::ApproxCountDistinctToApproxDistinct => {
20131 // APPROX_COUNT_DISTINCT(x) -> APPROX_DISTINCT(x)
20132 if let Expression::ApproxCountDistinct(f) = e {
20133 Ok(Expression::ApproxDistinct(f))
20134 } else {
20135 Ok(e)
20136 }
20137 }
20138
20139 Action::CollectListToArrayAgg => {
20140 // COLLECT_LIST(x) -> ARRAY_AGG(x) FILTER(WHERE x IS NOT NULL)
20141 if let Expression::AggregateFunction(f) = e {
20142 let filter_expr = if !f.args.is_empty() {
20143 let arg = f.args[0].clone();
20144 Some(Expression::IsNull(Box::new(crate::expressions::IsNull {
20145 this: arg,
20146 not: true,
20147 postfix_form: false,
20148 })))
20149 } else {
20150 None
20151 };
20152 let agg = crate::expressions::AggFunc {
20153 this: if f.args.is_empty() {
20154 Expression::Null(crate::expressions::Null)
20155 } else {
20156 f.args[0].clone()
20157 },
20158 distinct: f.distinct,
20159 order_by: f.order_by.clone(),
20160 filter: filter_expr,
20161 ignore_nulls: None,
20162 name: None,
20163 having_max: None,
20164 limit: None,
20165 };
20166 Ok(Expression::ArrayAgg(Box::new(agg)))
20167 } else {
20168 Ok(e)
20169 }
20170 }
20171
20172 Action::CollectSetConvert => {
20173 // COLLECT_SET(x) -> target-specific
20174 if let Expression::AggregateFunction(f) = e {
20175 match target {
20176 DialectType::Presto => Ok(Expression::AggregateFunction(Box::new(
20177 crate::expressions::AggregateFunction {
20178 name: "SET_AGG".to_string(),
20179 args: f.args,
20180 distinct: false,
20181 order_by: f.order_by,
20182 filter: f.filter,
20183 limit: f.limit,
20184 ignore_nulls: f.ignore_nulls,
20185 },
20186 ))),
20187 DialectType::Snowflake => Ok(Expression::AggregateFunction(Box::new(
20188 crate::expressions::AggregateFunction {
20189 name: "ARRAY_UNIQUE_AGG".to_string(),
20190 args: f.args,
20191 distinct: false,
20192 order_by: f.order_by,
20193 filter: f.filter,
20194 limit: f.limit,
20195 ignore_nulls: f.ignore_nulls,
20196 },
20197 ))),
20198 DialectType::Trino | DialectType::DuckDB => {
20199 let agg = crate::expressions::AggFunc {
20200 this: if f.args.is_empty() {
20201 Expression::Null(crate::expressions::Null)
20202 } else {
20203 f.args[0].clone()
20204 },
20205 distinct: true,
20206 order_by: Vec::new(),
20207 filter: None,
20208 ignore_nulls: None,
20209 name: None,
20210 having_max: None,
20211 limit: None,
20212 };
20213 Ok(Expression::ArrayAgg(Box::new(agg)))
20214 }
20215 _ => Ok(Expression::AggregateFunction(f)),
20216 }
20217 } else {
20218 Ok(e)
20219 }
20220 }
20221
20222 Action::PercentileConvert => {
20223 // PERCENTILE(x, 0.5) -> QUANTILE(x, 0.5) / APPROX_PERCENTILE(x, 0.5)
20224 if let Expression::AggregateFunction(f) = e {
20225 let name = match target {
20226 DialectType::DuckDB => "QUANTILE",
20227 DialectType::Presto | DialectType::Trino => "APPROX_PERCENTILE",
20228 _ => "PERCENTILE",
20229 };
20230 Ok(Expression::AggregateFunction(Box::new(
20231 crate::expressions::AggregateFunction {
20232 name: name.to_string(),
20233 args: f.args,
20234 distinct: f.distinct,
20235 order_by: f.order_by,
20236 filter: f.filter,
20237 limit: f.limit,
20238 ignore_nulls: f.ignore_nulls,
20239 },
20240 )))
20241 } else {
20242 Ok(e)
20243 }
20244 }
20245
20246 Action::CorrIsnanWrap => {
20247 // CORR(a, b) -> CASE WHEN ISNAN(CORR(a, b)) THEN NULL ELSE CORR(a, b) END
20248 // The CORR expression could be AggregateFunction, WindowFunction, or Filter-wrapped
20249 let corr_clone = e.clone();
20250 let isnan = Expression::Function(Box::new(Function::new(
20251 "ISNAN".to_string(),
20252 vec![corr_clone.clone()],
20253 )));
20254 let case_expr = Expression::Case(Box::new(Case {
20255 operand: None,
20256 whens: vec![(isnan, Expression::Null(crate::expressions::Null))],
20257 else_: Some(corr_clone),
20258 comments: Vec::new(),
20259 }));
20260 Ok(case_expr)
20261 }
20262
20263 Action::TruncToDateTrunc => {
20264 // TRUNC(timestamp, 'MONTH') -> DATE_TRUNC('MONTH', timestamp)
20265 if let Expression::Function(f) = e {
20266 if f.args.len() == 2 {
20267 let timestamp = f.args[0].clone();
20268 let unit_expr = f.args[1].clone();
20269
20270 if matches!(target, DialectType::ClickHouse) {
20271 // For ClickHouse, produce Expression::DateTrunc which the generator
20272 // outputs as DATE_TRUNC(...) without going through the ClickHouse
20273 // target transform that would convert it to dateTrunc
20274 let unit_str = Self::get_unit_str_static(&unit_expr);
20275 let dt_field = match unit_str.as_str() {
20276 "YEAR" => DateTimeField::Year,
20277 "MONTH" => DateTimeField::Month,
20278 "DAY" => DateTimeField::Day,
20279 "HOUR" => DateTimeField::Hour,
20280 "MINUTE" => DateTimeField::Minute,
20281 "SECOND" => DateTimeField::Second,
20282 "WEEK" => DateTimeField::Week,
20283 "QUARTER" => DateTimeField::Quarter,
20284 _ => DateTimeField::Custom(unit_str),
20285 };
20286 Ok(Expression::DateTrunc(Box::new(
20287 crate::expressions::DateTruncFunc {
20288 this: timestamp,
20289 unit: dt_field,
20290 },
20291 )))
20292 } else {
20293 let new_args = vec![unit_expr, timestamp];
20294 Ok(Expression::Function(Box::new(Function::new(
20295 "DATE_TRUNC".to_string(),
20296 new_args,
20297 ))))
20298 }
20299 } else {
20300 Ok(Expression::Function(f))
20301 }
20302 } else {
20303 Ok(e)
20304 }
20305 }
20306
20307 Action::ArrayContainsConvert => {
20308 if let Expression::ArrayContains(f) = e {
20309 match target {
20310 DialectType::Presto | DialectType::Trino => {
20311 // ARRAY_CONTAINS(arr, val) -> CONTAINS(arr, val)
20312 Ok(Expression::Function(Box::new(Function::new(
20313 "CONTAINS".to_string(),
20314 vec![f.this, f.expression],
20315 ))))
20316 }
20317 DialectType::Snowflake => {
20318 // ARRAY_CONTAINS(arr, val) -> ARRAY_CONTAINS(CAST(val AS VARIANT), arr)
20319 let cast_val =
20320 Expression::Cast(Box::new(crate::expressions::Cast {
20321 this: f.expression,
20322 to: crate::expressions::DataType::Custom {
20323 name: "VARIANT".to_string(),
20324 },
20325 trailing_comments: Vec::new(),
20326 double_colon_syntax: false,
20327 format: None,
20328 default: None,
20329 }));
20330 Ok(Expression::Function(Box::new(Function::new(
20331 "ARRAY_CONTAINS".to_string(),
20332 vec![cast_val, f.this],
20333 ))))
20334 }
20335 _ => Ok(Expression::ArrayContains(f)),
20336 }
20337 } else {
20338 Ok(e)
20339 }
20340 }
20341
20342 Action::StrPositionExpand => {
20343 // StrPosition with position arg -> complex STRPOS expansion for Presto/DuckDB
20344 // LOCATE(substr, str, pos) / STRPOS(str, substr, pos) ->
20345 // For Presto: IF(STRPOS(SUBSTRING(str, pos), substr) = 0, 0, STRPOS(SUBSTRING(str, pos), substr) + pos - 1)
20346 // For DuckDB: CASE WHEN STRPOS(SUBSTRING(str, pos), substr) = 0 THEN 0 ELSE STRPOS(SUBSTRING(str, pos), substr) + pos - 1 END
20347 if let Expression::StrPosition(sp) = e {
20348 let crate::expressions::StrPosition {
20349 this,
20350 substr,
20351 position,
20352 occurrence,
20353 } = *sp;
20354 let string = *this;
20355 let substr_expr = match substr {
20356 Some(s) => *s,
20357 None => Expression::Null(Null),
20358 };
20359 let pos = match position {
20360 Some(p) => *p,
20361 None => Expression::number(1),
20362 };
20363
20364 // SUBSTRING(string, pos)
20365 let substring_call = Expression::Function(Box::new(Function::new(
20366 "SUBSTRING".to_string(),
20367 vec![string.clone(), pos.clone()],
20368 )));
20369 // STRPOS(SUBSTRING(string, pos), substr)
20370 let strpos_call = Expression::Function(Box::new(Function::new(
20371 "STRPOS".to_string(),
20372 vec![substring_call, substr_expr.clone()],
20373 )));
20374 // STRPOS(...) + pos - 1
20375 let pos_adjusted =
20376 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
20377 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
20378 strpos_call.clone(),
20379 pos.clone(),
20380 ))),
20381 Expression::number(1),
20382 )));
20383 // STRPOS(...) = 0
20384 let is_zero = Expression::Eq(Box::new(crate::expressions::BinaryOp::new(
20385 strpos_call.clone(),
20386 Expression::number(0),
20387 )));
20388
20389 match target {
20390 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
20391 // IF(STRPOS(SUBSTRING(str, pos), substr) = 0, 0, STRPOS(SUBSTRING(str, pos), substr) + pos - 1)
20392 Ok(Expression::Function(Box::new(Function::new(
20393 "IF".to_string(),
20394 vec![is_zero, Expression::number(0), pos_adjusted],
20395 ))))
20396 }
20397 DialectType::DuckDB => {
20398 // CASE WHEN STRPOS(SUBSTRING(str, pos), substr) = 0 THEN 0 ELSE STRPOS(SUBSTRING(str, pos), substr) + pos - 1 END
20399 Ok(Expression::Case(Box::new(Case {
20400 operand: None,
20401 whens: vec![(is_zero, Expression::number(0))],
20402 else_: Some(pos_adjusted),
20403 comments: Vec::new(),
20404 })))
20405 }
20406 _ => {
20407 // Reconstruct StrPosition
20408 Ok(Expression::StrPosition(Box::new(
20409 crate::expressions::StrPosition {
20410 this: Box::new(string),
20411 substr: Some(Box::new(substr_expr)),
20412 position: Some(Box::new(pos)),
20413 occurrence,
20414 },
20415 )))
20416 }
20417 }
20418 } else {
20419 Ok(e)
20420 }
20421 }
20422
20423 Action::MonthsBetweenConvert => {
20424 if let Expression::MonthsBetween(mb) = e {
20425 let crate::expressions::BinaryFunc {
20426 this: end_date,
20427 expression: start_date,
20428 ..
20429 } = *mb;
20430 match target {
20431 DialectType::DuckDB => {
20432 let cast_end = Self::ensure_cast_date(end_date);
20433 let cast_start = Self::ensure_cast_date(start_date);
20434 let dd = Expression::Function(Box::new(Function::new(
20435 "DATE_DIFF".to_string(),
20436 vec![
20437 Expression::string("MONTH"),
20438 cast_start.clone(),
20439 cast_end.clone(),
20440 ],
20441 )));
20442 let day_end = Expression::Function(Box::new(Function::new(
20443 "DAY".to_string(),
20444 vec![cast_end.clone()],
20445 )));
20446 let day_start = Expression::Function(Box::new(Function::new(
20447 "DAY".to_string(),
20448 vec![cast_start.clone()],
20449 )));
20450 let last_day_end = Expression::Function(Box::new(Function::new(
20451 "LAST_DAY".to_string(),
20452 vec![cast_end.clone()],
20453 )));
20454 let last_day_start = Expression::Function(Box::new(Function::new(
20455 "LAST_DAY".to_string(),
20456 vec![cast_start.clone()],
20457 )));
20458 let day_last_end = Expression::Function(Box::new(Function::new(
20459 "DAY".to_string(),
20460 vec![last_day_end],
20461 )));
20462 let day_last_start = Expression::Function(Box::new(Function::new(
20463 "DAY".to_string(),
20464 vec![last_day_start],
20465 )));
20466 let cond1 = Expression::Eq(Box::new(BinaryOp::new(
20467 day_end.clone(),
20468 day_last_end,
20469 )));
20470 let cond2 = Expression::Eq(Box::new(BinaryOp::new(
20471 day_start.clone(),
20472 day_last_start,
20473 )));
20474 let both_cond =
20475 Expression::And(Box::new(BinaryOp::new(cond1, cond2)));
20476 let day_diff =
20477 Expression::Sub(Box::new(BinaryOp::new(day_end, day_start)));
20478 let day_diff_paren =
20479 Expression::Paren(Box::new(crate::expressions::Paren {
20480 this: day_diff,
20481 trailing_comments: Vec::new(),
20482 }));
20483 let frac = Expression::Div(Box::new(BinaryOp::new(
20484 day_diff_paren,
20485 Expression::Literal(Literal::Number("31.0".to_string())),
20486 )));
20487 let case_expr = Expression::Case(Box::new(Case {
20488 operand: None,
20489 whens: vec![(both_cond, Expression::number(0))],
20490 else_: Some(frac),
20491 comments: Vec::new(),
20492 }));
20493 Ok(Expression::Add(Box::new(BinaryOp::new(dd, case_expr))))
20494 }
20495 DialectType::Snowflake | DialectType::Redshift => {
20496 let unit = Expression::Identifier(Identifier::new("MONTH"));
20497 Ok(Expression::Function(Box::new(Function::new(
20498 "DATEDIFF".to_string(),
20499 vec![unit, start_date, end_date],
20500 ))))
20501 }
20502 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
20503 Ok(Expression::Function(Box::new(Function::new(
20504 "DATE_DIFF".to_string(),
20505 vec![Expression::string("MONTH"), start_date, end_date],
20506 ))))
20507 }
20508 _ => Ok(Expression::MonthsBetween(Box::new(
20509 crate::expressions::BinaryFunc {
20510 this: end_date,
20511 expression: start_date,
20512 original_name: None,
20513 },
20514 ))),
20515 }
20516 } else {
20517 Ok(e)
20518 }
20519 }
20520
20521 Action::AddMonthsConvert => {
20522 if let Expression::AddMonths(am) = e {
20523 let date = am.this;
20524 let val = am.expression;
20525 match target {
20526 DialectType::TSQL | DialectType::Fabric => {
20527 let cast_date = Self::ensure_cast_datetime2(date);
20528 Ok(Expression::Function(Box::new(Function::new(
20529 "DATEADD".to_string(),
20530 vec![
20531 Expression::Identifier(Identifier::new("MONTH")),
20532 val,
20533 cast_date,
20534 ],
20535 ))))
20536 }
20537 DialectType::DuckDB if matches!(source, DialectType::Snowflake) => {
20538 // DuckDB ADD_MONTHS from Snowflake: CASE WHEN LAST_DAY(date) = date THEN LAST_DAY(date + interval) ELSE date + interval END
20539 // Optionally wrapped in CAST(... AS type) if the input had a specific type
20540
20541 // Determine the cast type from the date expression
20542 let (cast_date, return_type) = match &date {
20543 Expression::Literal(Literal::String(_)) => {
20544 // String literal: CAST(str AS TIMESTAMP), no outer CAST
20545 (
20546 Expression::Cast(Box::new(Cast {
20547 this: date.clone(),
20548 to: DataType::Timestamp {
20549 precision: None,
20550 timezone: false,
20551 },
20552 trailing_comments: Vec::new(),
20553 double_colon_syntax: false,
20554 format: None,
20555 default: None,
20556 })),
20557 None,
20558 )
20559 }
20560 Expression::Cast(c) => {
20561 // Already cast (e.g., '2023-01-31'::DATE) - keep the cast, wrap result in CAST(... AS type)
20562 (date.clone(), Some(c.to.clone()))
20563 }
20564 _ => {
20565 // Expression or NULL::TYPE - keep as-is, check for cast type
20566 if let Expression::Cast(c) = &date {
20567 (date.clone(), Some(c.to.clone()))
20568 } else {
20569 (date.clone(), None)
20570 }
20571 }
20572 };
20573
20574 // Build the interval expression
20575 // For non-integer values (float, decimal, cast), use TO_MONTHS(CAST(ROUND(val) AS INT))
20576 // For integer values, use INTERVAL val MONTH
20577 let is_non_integer_val = match &val {
20578 Expression::Literal(Literal::Number(n)) => n.contains('.'),
20579 Expression::Cast(_) => true, // e.g., 3.2::DECIMAL(10,2)
20580 Expression::Neg(n) => {
20581 if let Expression::Literal(Literal::Number(s)) = &n.this {
20582 s.contains('.')
20583 } else {
20584 false
20585 }
20586 }
20587 _ => false,
20588 };
20589
20590 let add_interval = if is_non_integer_val {
20591 // TO_MONTHS(CAST(ROUND(val) AS INT))
20592 let round_val = Expression::Function(Box::new(Function::new(
20593 "ROUND".to_string(),
20594 vec![val.clone()],
20595 )));
20596 let cast_int = Expression::Cast(Box::new(Cast {
20597 this: round_val,
20598 to: DataType::Int {
20599 length: None,
20600 integer_spelling: false,
20601 },
20602 trailing_comments: Vec::new(),
20603 double_colon_syntax: false,
20604 format: None,
20605 default: None,
20606 }));
20607 Expression::Function(Box::new(Function::new(
20608 "TO_MONTHS".to_string(),
20609 vec![cast_int],
20610 )))
20611 } else {
20612 // INTERVAL val MONTH
20613 // For negative numbers, wrap in parens
20614 let interval_val = match &val {
20615 Expression::Literal(Literal::Number(n))
20616 if n.starts_with('-') =>
20617 {
20618 Expression::Paren(Box::new(Paren {
20619 this: val.clone(),
20620 trailing_comments: Vec::new(),
20621 }))
20622 }
20623 Expression::Neg(_) => Expression::Paren(Box::new(Paren {
20624 this: val.clone(),
20625 trailing_comments: Vec::new(),
20626 })),
20627 Expression::Null(_) => Expression::Paren(Box::new(Paren {
20628 this: val.clone(),
20629 trailing_comments: Vec::new(),
20630 })),
20631 _ => val.clone(),
20632 };
20633 Expression::Interval(Box::new(crate::expressions::Interval {
20634 this: Some(interval_val),
20635 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
20636 unit: crate::expressions::IntervalUnit::Month,
20637 use_plural: false,
20638 }),
20639 }))
20640 };
20641
20642 // Build: date + interval
20643 let date_plus_interval = Expression::Add(Box::new(BinaryOp::new(
20644 cast_date.clone(),
20645 add_interval.clone(),
20646 )));
20647
20648 // Build LAST_DAY(date)
20649 let last_day_date = Expression::Function(Box::new(Function::new(
20650 "LAST_DAY".to_string(),
20651 vec![cast_date.clone()],
20652 )));
20653
20654 // Build LAST_DAY(date + interval)
20655 let last_day_date_plus =
20656 Expression::Function(Box::new(Function::new(
20657 "LAST_DAY".to_string(),
20658 vec![date_plus_interval.clone()],
20659 )));
20660
20661 // Build: CASE WHEN LAST_DAY(date) = date THEN LAST_DAY(date + interval) ELSE date + interval END
20662 let case_expr = Expression::Case(Box::new(Case {
20663 operand: None,
20664 whens: vec![(
20665 Expression::Eq(Box::new(BinaryOp::new(
20666 last_day_date,
20667 cast_date.clone(),
20668 ))),
20669 last_day_date_plus,
20670 )],
20671 else_: Some(date_plus_interval),
20672 comments: Vec::new(),
20673 }));
20674
20675 // Wrap in CAST(... AS type) if needed
20676 if let Some(dt) = return_type {
20677 Ok(Expression::Cast(Box::new(Cast {
20678 this: case_expr,
20679 to: dt,
20680 trailing_comments: Vec::new(),
20681 double_colon_syntax: false,
20682 format: None,
20683 default: None,
20684 })))
20685 } else {
20686 Ok(case_expr)
20687 }
20688 }
20689 DialectType::DuckDB => {
20690 // Non-Snowflake source: simple date + INTERVAL
20691 let cast_date =
20692 if matches!(&date, Expression::Literal(Literal::String(_))) {
20693 Expression::Cast(Box::new(Cast {
20694 this: date,
20695 to: DataType::Timestamp {
20696 precision: None,
20697 timezone: false,
20698 },
20699 trailing_comments: Vec::new(),
20700 double_colon_syntax: false,
20701 format: None,
20702 default: None,
20703 }))
20704 } else {
20705 date
20706 };
20707 let interval =
20708 Expression::Interval(Box::new(crate::expressions::Interval {
20709 this: Some(val),
20710 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
20711 unit: crate::expressions::IntervalUnit::Month,
20712 use_plural: false,
20713 }),
20714 }));
20715 Ok(Expression::Add(Box::new(BinaryOp::new(
20716 cast_date, interval,
20717 ))))
20718 }
20719 DialectType::Snowflake => {
20720 // Keep ADD_MONTHS when source is also Snowflake
20721 if matches!(source, DialectType::Snowflake) {
20722 Ok(Expression::Function(Box::new(Function::new(
20723 "ADD_MONTHS".to_string(),
20724 vec![date, val],
20725 ))))
20726 } else {
20727 Ok(Expression::Function(Box::new(Function::new(
20728 "DATEADD".to_string(),
20729 vec![
20730 Expression::Identifier(Identifier::new("MONTH")),
20731 val,
20732 date,
20733 ],
20734 ))))
20735 }
20736 }
20737 DialectType::Redshift => {
20738 Ok(Expression::Function(Box::new(Function::new(
20739 "DATEADD".to_string(),
20740 vec![
20741 Expression::Identifier(Identifier::new("MONTH")),
20742 val,
20743 date,
20744 ],
20745 ))))
20746 }
20747 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
20748 let cast_date =
20749 if matches!(&date, Expression::Literal(Literal::String(_))) {
20750 Expression::Cast(Box::new(Cast {
20751 this: date,
20752 to: DataType::Timestamp {
20753 precision: None,
20754 timezone: false,
20755 },
20756 trailing_comments: Vec::new(),
20757 double_colon_syntax: false,
20758 format: None,
20759 default: None,
20760 }))
20761 } else {
20762 date
20763 };
20764 Ok(Expression::Function(Box::new(Function::new(
20765 "DATE_ADD".to_string(),
20766 vec![Expression::string("MONTH"), val, cast_date],
20767 ))))
20768 }
20769 DialectType::BigQuery => {
20770 let interval =
20771 Expression::Interval(Box::new(crate::expressions::Interval {
20772 this: Some(val),
20773 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
20774 unit: crate::expressions::IntervalUnit::Month,
20775 use_plural: false,
20776 }),
20777 }));
20778 let cast_date =
20779 if matches!(&date, Expression::Literal(Literal::String(_))) {
20780 Expression::Cast(Box::new(Cast {
20781 this: date,
20782 to: DataType::Custom {
20783 name: "DATETIME".to_string(),
20784 },
20785 trailing_comments: Vec::new(),
20786 double_colon_syntax: false,
20787 format: None,
20788 default: None,
20789 }))
20790 } else {
20791 date
20792 };
20793 Ok(Expression::Function(Box::new(Function::new(
20794 "DATE_ADD".to_string(),
20795 vec![cast_date, interval],
20796 ))))
20797 }
20798 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
20799 Ok(Expression::Function(Box::new(Function::new(
20800 "ADD_MONTHS".to_string(),
20801 vec![date, val],
20802 ))))
20803 }
20804 _ => {
20805 // Default: keep as AddMonths expression
20806 Ok(Expression::AddMonths(Box::new(
20807 crate::expressions::BinaryFunc {
20808 this: date,
20809 expression: val,
20810 original_name: None,
20811 },
20812 )))
20813 }
20814 }
20815 } else {
20816 Ok(e)
20817 }
20818 }
20819
20820 Action::PercentileContConvert => {
20821 // PERCENTILE_CONT(p) WITHIN GROUP (ORDER BY col) ->
20822 // Presto/Trino: APPROX_PERCENTILE(col, p)
20823 // Spark/Databricks: PERCENTILE_APPROX(col, p)
20824 if let Expression::WithinGroup(wg) = e {
20825 // Extract percentile value and order by column
20826 let (percentile, _is_disc) = match &wg.this {
20827 Expression::Function(f) => {
20828 let is_disc = f.name.eq_ignore_ascii_case("PERCENTILE_DISC");
20829 let pct = f.args.first().cloned().unwrap_or(Expression::Literal(
20830 Literal::Number("0.5".to_string()),
20831 ));
20832 (pct, is_disc)
20833 }
20834 Expression::AggregateFunction(af) => {
20835 let is_disc = af.name.eq_ignore_ascii_case("PERCENTILE_DISC");
20836 let pct = af.args.first().cloned().unwrap_or(Expression::Literal(
20837 Literal::Number("0.5".to_string()),
20838 ));
20839 (pct, is_disc)
20840 }
20841 Expression::PercentileCont(pc) => (pc.percentile.clone(), false),
20842 _ => return Ok(Expression::WithinGroup(wg)),
20843 };
20844 let col = wg
20845 .order_by
20846 .first()
20847 .map(|o| o.this.clone())
20848 .unwrap_or(Expression::Literal(Literal::Number("1".to_string())));
20849
20850 let func_name = match target {
20851 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
20852 "APPROX_PERCENTILE"
20853 }
20854 _ => "PERCENTILE_APPROX", // Spark, Databricks
20855 };
20856 Ok(Expression::Function(Box::new(Function::new(
20857 func_name.to_string(),
20858 vec![col, percentile],
20859 ))))
20860 } else {
20861 Ok(e)
20862 }
20863 }
20864
20865 Action::CurrentUserSparkParens => {
20866 // CURRENT_USER -> CURRENT_USER() for Spark
20867 if let Expression::CurrentUser(_) = e {
20868 Ok(Expression::Function(Box::new(Function::new(
20869 "CURRENT_USER".to_string(),
20870 vec![],
20871 ))))
20872 } else {
20873 Ok(e)
20874 }
20875 }
20876
20877 Action::SparkDateFuncCast => {
20878 // MONTH/YEAR/DAY('string') from Spark -> wrap arg in CAST to DATE
20879 let cast_arg = |arg: Expression| -> Expression {
20880 match target {
20881 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
20882 Self::double_cast_timestamp_date(arg)
20883 }
20884 _ => {
20885 // DuckDB, PostgreSQL, etc: CAST(arg AS DATE)
20886 Self::ensure_cast_date(arg)
20887 }
20888 }
20889 };
20890 match e {
20891 Expression::Month(f) => Ok(Expression::Month(Box::new(
20892 crate::expressions::UnaryFunc::new(cast_arg(f.this)),
20893 ))),
20894 Expression::Year(f) => Ok(Expression::Year(Box::new(
20895 crate::expressions::UnaryFunc::new(cast_arg(f.this)),
20896 ))),
20897 Expression::Day(f) => Ok(Expression::Day(Box::new(
20898 crate::expressions::UnaryFunc::new(cast_arg(f.this)),
20899 ))),
20900 other => Ok(other),
20901 }
20902 }
20903
20904 Action::MapFromArraysConvert => {
20905 // Expression::MapFromArrays -> target-specific
20906 if let Expression::MapFromArrays(mfa) = e {
20907 let keys = mfa.this;
20908 let values = mfa.expression;
20909 match target {
20910 DialectType::Snowflake => Ok(Expression::Function(Box::new(
20911 Function::new("OBJECT_CONSTRUCT".to_string(), vec![keys, values]),
20912 ))),
20913 _ => {
20914 // Hive, Presto, DuckDB, etc.: MAP(keys, values)
20915 Ok(Expression::Function(Box::new(Function::new(
20916 "MAP".to_string(),
20917 vec![keys, values],
20918 ))))
20919 }
20920 }
20921 } else {
20922 Ok(e)
20923 }
20924 }
20925
20926 Action::AnyToExists => {
20927 if let Expression::Any(q) = e {
20928 if let Some(op) = q.op.clone() {
20929 let lambda_param = crate::expressions::Identifier::new("x");
20930 let rhs = Expression::Identifier(lambda_param.clone());
20931 let body = match op {
20932 crate::expressions::QuantifiedOp::Eq => {
20933 Expression::Eq(Box::new(BinaryOp::new(q.this, rhs)))
20934 }
20935 crate::expressions::QuantifiedOp::Neq => {
20936 Expression::Neq(Box::new(BinaryOp::new(q.this, rhs)))
20937 }
20938 crate::expressions::QuantifiedOp::Lt => {
20939 Expression::Lt(Box::new(BinaryOp::new(q.this, rhs)))
20940 }
20941 crate::expressions::QuantifiedOp::Lte => {
20942 Expression::Lte(Box::new(BinaryOp::new(q.this, rhs)))
20943 }
20944 crate::expressions::QuantifiedOp::Gt => {
20945 Expression::Gt(Box::new(BinaryOp::new(q.this, rhs)))
20946 }
20947 crate::expressions::QuantifiedOp::Gte => {
20948 Expression::Gte(Box::new(BinaryOp::new(q.this, rhs)))
20949 }
20950 };
20951 let lambda =
20952 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
20953 parameters: vec![lambda_param],
20954 body,
20955 colon: false,
20956 parameter_types: Vec::new(),
20957 }));
20958 Ok(Expression::Function(Box::new(Function::new(
20959 "EXISTS".to_string(),
20960 vec![q.subquery, lambda],
20961 ))))
20962 } else {
20963 Ok(Expression::Any(q))
20964 }
20965 } else {
20966 Ok(e)
20967 }
20968 }
20969
20970 Action::GenerateSeriesConvert => {
20971 // GENERATE_SERIES(start, end[, step]) -> SEQUENCE for Spark/Databricks/Hive, wrapped in UNNEST/EXPLODE
20972 // For DuckDB target: wrap in UNNEST(GENERATE_SERIES(...))
20973 // For PG/Redshift target: keep as GENERATE_SERIES but normalize interval string step
20974 if let Expression::Function(f) = e {
20975 if f.name.eq_ignore_ascii_case("GENERATE_SERIES") && f.args.len() >= 2 {
20976 let start = f.args[0].clone();
20977 let end = f.args[1].clone();
20978 let step = f.args.get(2).cloned();
20979
20980 // Normalize step: convert string interval like '1day' or ' 2 days ' to INTERVAL expression
20981 let step = step.map(|s| Self::normalize_interval_string(s, target));
20982
20983 // Helper: wrap CURRENT_TIMESTAMP in CAST(... AS TIMESTAMP) for Presto/Trino/Spark
20984 let maybe_cast_timestamp = |arg: Expression| -> Expression {
20985 if matches!(
20986 target,
20987 DialectType::Presto
20988 | DialectType::Trino
20989 | DialectType::Athena
20990 | DialectType::Spark
20991 | DialectType::Databricks
20992 | DialectType::Hive
20993 ) {
20994 match &arg {
20995 Expression::CurrentTimestamp(_) => {
20996 Expression::Cast(Box::new(Cast {
20997 this: arg,
20998 to: DataType::Timestamp {
20999 precision: None,
21000 timezone: false,
21001 },
21002 trailing_comments: Vec::new(),
21003 double_colon_syntax: false,
21004 format: None,
21005 default: None,
21006 }))
21007 }
21008 _ => arg,
21009 }
21010 } else {
21011 arg
21012 }
21013 };
21014
21015 let start = maybe_cast_timestamp(start);
21016 let end = maybe_cast_timestamp(end);
21017
21018 // For PostgreSQL/Redshift target, keep as GENERATE_SERIES
21019 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
21020 let mut gs_args = vec![start, end];
21021 if let Some(step) = step {
21022 gs_args.push(step);
21023 }
21024 return Ok(Expression::Function(Box::new(Function::new(
21025 "GENERATE_SERIES".to_string(),
21026 gs_args,
21027 ))));
21028 }
21029
21030 // For DuckDB target: wrap in UNNEST(GENERATE_SERIES(...))
21031 if matches!(target, DialectType::DuckDB) {
21032 let mut gs_args = vec![start, end];
21033 if let Some(step) = step {
21034 gs_args.push(step);
21035 }
21036 let gs = Expression::Function(Box::new(Function::new(
21037 "GENERATE_SERIES".to_string(),
21038 gs_args,
21039 )));
21040 return Ok(Expression::Function(Box::new(Function::new(
21041 "UNNEST".to_string(),
21042 vec![gs],
21043 ))));
21044 }
21045
21046 let mut seq_args = vec![start, end];
21047 if let Some(step) = step {
21048 seq_args.push(step);
21049 }
21050
21051 let seq = Expression::Function(Box::new(Function::new(
21052 "SEQUENCE".to_string(),
21053 seq_args,
21054 )));
21055
21056 match target {
21057 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
21058 // Wrap in UNNEST
21059 Ok(Expression::Function(Box::new(Function::new(
21060 "UNNEST".to_string(),
21061 vec![seq],
21062 ))))
21063 }
21064 DialectType::Spark
21065 | DialectType::Databricks
21066 | DialectType::Hive => {
21067 // Wrap in EXPLODE
21068 Ok(Expression::Function(Box::new(Function::new(
21069 "EXPLODE".to_string(),
21070 vec![seq],
21071 ))))
21072 }
21073 _ => {
21074 // Just SEQUENCE for others
21075 Ok(seq)
21076 }
21077 }
21078 } else {
21079 Ok(Expression::Function(f))
21080 }
21081 } else {
21082 Ok(e)
21083 }
21084 }
21085
21086 Action::ConcatCoalesceWrap => {
21087 // CONCAT(a, b) function -> CONCAT(COALESCE(CAST(a AS VARCHAR), ''), ...) for Presto
21088 // CONCAT(a, b) function -> CONCAT(COALESCE(a, ''), ...) for ClickHouse
21089 if let Expression::Function(f) = e {
21090 if f.name.eq_ignore_ascii_case("CONCAT") {
21091 let new_args: Vec<Expression> = f
21092 .args
21093 .into_iter()
21094 .map(|arg| {
21095 let cast_arg = if matches!(
21096 target,
21097 DialectType::Presto
21098 | DialectType::Trino
21099 | DialectType::Athena
21100 ) {
21101 Expression::Cast(Box::new(Cast {
21102 this: arg,
21103 to: DataType::VarChar {
21104 length: None,
21105 parenthesized_length: false,
21106 },
21107 trailing_comments: Vec::new(),
21108 double_colon_syntax: false,
21109 format: None,
21110 default: None,
21111 }))
21112 } else {
21113 arg
21114 };
21115 Expression::Function(Box::new(Function::new(
21116 "COALESCE".to_string(),
21117 vec![cast_arg, Expression::string("")],
21118 )))
21119 })
21120 .collect();
21121 Ok(Expression::Function(Box::new(Function::new(
21122 "CONCAT".to_string(),
21123 new_args,
21124 ))))
21125 } else {
21126 Ok(Expression::Function(f))
21127 }
21128 } else {
21129 Ok(e)
21130 }
21131 }
21132
21133 Action::PipeConcatToConcat => {
21134 // a || b (Concat operator) -> CONCAT(CAST(a AS VARCHAR), CAST(b AS VARCHAR)) for Presto/Trino
21135 if let Expression::Concat(op) = e {
21136 let cast_left = Expression::Cast(Box::new(Cast {
21137 this: op.left,
21138 to: DataType::VarChar {
21139 length: None,
21140 parenthesized_length: false,
21141 },
21142 trailing_comments: Vec::new(),
21143 double_colon_syntax: false,
21144 format: None,
21145 default: None,
21146 }));
21147 let cast_right = Expression::Cast(Box::new(Cast {
21148 this: op.right,
21149 to: DataType::VarChar {
21150 length: None,
21151 parenthesized_length: false,
21152 },
21153 trailing_comments: Vec::new(),
21154 double_colon_syntax: false,
21155 format: None,
21156 default: None,
21157 }));
21158 Ok(Expression::Function(Box::new(Function::new(
21159 "CONCAT".to_string(),
21160 vec![cast_left, cast_right],
21161 ))))
21162 } else {
21163 Ok(e)
21164 }
21165 }
21166
21167 Action::DivFuncConvert => {
21168 // DIV(a, b) -> target-specific integer division
21169 if let Expression::Function(f) = e {
21170 if f.name.eq_ignore_ascii_case("DIV") && f.args.len() == 2 {
21171 let a = f.args[0].clone();
21172 let b = f.args[1].clone();
21173 match target {
21174 DialectType::DuckDB => {
21175 // DIV(a, b) -> CAST(a // b AS DECIMAL)
21176 let int_div = Expression::IntDiv(Box::new(
21177 crate::expressions::BinaryFunc {
21178 this: a,
21179 expression: b,
21180 original_name: None,
21181 },
21182 ));
21183 Ok(Expression::Cast(Box::new(Cast {
21184 this: int_div,
21185 to: DataType::Decimal {
21186 precision: None,
21187 scale: None,
21188 },
21189 trailing_comments: Vec::new(),
21190 double_colon_syntax: false,
21191 format: None,
21192 default: None,
21193 })))
21194 }
21195 DialectType::BigQuery => {
21196 // DIV(a, b) -> CAST(DIV(a, b) AS NUMERIC)
21197 let div_func = Expression::Function(Box::new(Function::new(
21198 "DIV".to_string(),
21199 vec![a, b],
21200 )));
21201 Ok(Expression::Cast(Box::new(Cast {
21202 this: div_func,
21203 to: DataType::Custom {
21204 name: "NUMERIC".to_string(),
21205 },
21206 trailing_comments: Vec::new(),
21207 double_colon_syntax: false,
21208 format: None,
21209 default: None,
21210 })))
21211 }
21212 DialectType::SQLite => {
21213 // DIV(a, b) -> CAST(CAST(CAST(a AS REAL) / b AS INTEGER) AS REAL)
21214 let cast_a = Expression::Cast(Box::new(Cast {
21215 this: a,
21216 to: DataType::Custom {
21217 name: "REAL".to_string(),
21218 },
21219 trailing_comments: Vec::new(),
21220 double_colon_syntax: false,
21221 format: None,
21222 default: None,
21223 }));
21224 let div = Expression::Div(Box::new(BinaryOp::new(cast_a, b)));
21225 let cast_int = Expression::Cast(Box::new(Cast {
21226 this: div,
21227 to: DataType::Int {
21228 length: None,
21229 integer_spelling: true,
21230 },
21231 trailing_comments: Vec::new(),
21232 double_colon_syntax: false,
21233 format: None,
21234 default: None,
21235 }));
21236 Ok(Expression::Cast(Box::new(Cast {
21237 this: cast_int,
21238 to: DataType::Custom {
21239 name: "REAL".to_string(),
21240 },
21241 trailing_comments: Vec::new(),
21242 double_colon_syntax: false,
21243 format: None,
21244 default: None,
21245 })))
21246 }
21247 _ => Ok(Expression::Function(f)),
21248 }
21249 } else {
21250 Ok(Expression::Function(f))
21251 }
21252 } else {
21253 Ok(e)
21254 }
21255 }
21256
21257 Action::JsonObjectAggConvert => {
21258 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
21259 match e {
21260 Expression::Function(f) => Ok(Expression::Function(Box::new(
21261 Function::new("JSON_GROUP_OBJECT".to_string(), f.args),
21262 ))),
21263 Expression::AggregateFunction(af) => {
21264 // AggregateFunction stores all args in the `args` vec
21265 Ok(Expression::Function(Box::new(Function::new(
21266 "JSON_GROUP_OBJECT".to_string(),
21267 af.args,
21268 ))))
21269 }
21270 other => Ok(other),
21271 }
21272 }
21273
21274 Action::JsonbExistsConvert => {
21275 // JSONB_EXISTS('json', 'key') -> JSON_EXISTS('json', '$.key') for DuckDB
21276 if let Expression::Function(f) = e {
21277 if f.args.len() == 2 {
21278 let json_expr = f.args[0].clone();
21279 let key = match &f.args[1] {
21280 Expression::Literal(crate::expressions::Literal::String(s)) => {
21281 format!("$.{}", s)
21282 }
21283 _ => return Ok(Expression::Function(f)),
21284 };
21285 Ok(Expression::Function(Box::new(Function::new(
21286 "JSON_EXISTS".to_string(),
21287 vec![json_expr, Expression::string(&key)],
21288 ))))
21289 } else {
21290 Ok(Expression::Function(f))
21291 }
21292 } else {
21293 Ok(e)
21294 }
21295 }
21296
21297 Action::DateBinConvert => {
21298 // DATE_BIN('interval', ts, origin) -> TIME_BUCKET('interval', ts, origin) for DuckDB
21299 if let Expression::Function(f) = e {
21300 Ok(Expression::Function(Box::new(Function::new(
21301 "TIME_BUCKET".to_string(),
21302 f.args,
21303 ))))
21304 } else {
21305 Ok(e)
21306 }
21307 }
21308
21309 Action::MysqlCastCharToText => {
21310 // MySQL CAST(x AS CHAR) was originally TEXT -> convert to target text type
21311 if let Expression::Cast(mut c) = e {
21312 c.to = DataType::Text;
21313 Ok(Expression::Cast(c))
21314 } else {
21315 Ok(e)
21316 }
21317 }
21318
21319 Action::SparkCastVarcharToString => {
21320 // Spark parses VARCHAR(n)/CHAR(n) as TEXT -> normalize to STRING
21321 match e {
21322 Expression::Cast(mut c) => {
21323 c.to = Self::normalize_varchar_to_string(c.to);
21324 Ok(Expression::Cast(c))
21325 }
21326 Expression::TryCast(mut c) => {
21327 c.to = Self::normalize_varchar_to_string(c.to);
21328 Ok(Expression::TryCast(c))
21329 }
21330 _ => Ok(e),
21331 }
21332 }
21333
21334 Action::MinMaxToLeastGreatest => {
21335 // Multi-arg MIN(a,b,c) -> LEAST(a,b,c), MAX(a,b,c) -> GREATEST(a,b,c)
21336 if let Expression::Function(f) = e {
21337 let name = f.name.to_uppercase();
21338 let new_name = match name.as_str() {
21339 "MIN" => "LEAST",
21340 "MAX" => "GREATEST",
21341 _ => return Ok(Expression::Function(f)),
21342 };
21343 Ok(Expression::Function(Box::new(Function::new(
21344 new_name.to_string(),
21345 f.args,
21346 ))))
21347 } else {
21348 Ok(e)
21349 }
21350 }
21351
21352 Action::ClickHouseUniqToApproxCountDistinct => {
21353 // ClickHouse uniq(x) -> APPROX_COUNT_DISTINCT(x) for non-ClickHouse targets
21354 if let Expression::Function(f) = e {
21355 Ok(Expression::Function(Box::new(Function::new(
21356 "APPROX_COUNT_DISTINCT".to_string(),
21357 f.args,
21358 ))))
21359 } else {
21360 Ok(e)
21361 }
21362 }
21363
21364 Action::ClickHouseAnyToAnyValue => {
21365 // ClickHouse any(x) -> ANY_VALUE(x) for non-ClickHouse targets
21366 if let Expression::Function(f) = e {
21367 Ok(Expression::Function(Box::new(Function::new(
21368 "ANY_VALUE".to_string(),
21369 f.args,
21370 ))))
21371 } else {
21372 Ok(e)
21373 }
21374 }
21375
21376 Action::OracleVarchar2ToVarchar => {
21377 // Oracle VARCHAR2(N CHAR/BYTE) / NVARCHAR2(N) -> VarChar(N) for non-Oracle targets
21378 if let Expression::DataType(DataType::Custom { ref name }) = e {
21379 let upper = name.to_uppercase();
21380 // Extract length from VARCHAR2(N ...) or NVARCHAR2(N ...)
21381 let inner =
21382 if upper.starts_with("VARCHAR2(") || upper.starts_with("NVARCHAR2(") {
21383 let start = if upper.starts_with("N") { 10 } else { 9 }; // skip "NVARCHAR2(" or "VARCHAR2("
21384 let end = name.len() - 1; // skip trailing ")"
21385 Some(&name[start..end])
21386 } else {
21387 Option::None
21388 };
21389 if let Some(inner_str) = inner {
21390 // Parse the number part, ignoring BYTE/CHAR qualifier
21391 let num_str = inner_str.split_whitespace().next().unwrap_or("");
21392 if let Ok(n) = num_str.parse::<u32>() {
21393 Ok(Expression::DataType(DataType::VarChar {
21394 length: Some(n),
21395 parenthesized_length: false,
21396 }))
21397 } else {
21398 Ok(e)
21399 }
21400 } else {
21401 // Plain VARCHAR2 / NVARCHAR2 without parens
21402 Ok(Expression::DataType(DataType::VarChar {
21403 length: Option::None,
21404 parenthesized_length: false,
21405 }))
21406 }
21407 } else {
21408 Ok(e)
21409 }
21410 }
21411
21412 Action::Nvl2Expand => {
21413 // NVL2(a, b[, c]) -> CASE WHEN NOT a IS NULL THEN b [ELSE c] END
21414 // But keep as NVL2 for dialects that support it natively
21415 let nvl2_native = matches!(
21416 target,
21417 DialectType::Oracle
21418 | DialectType::Snowflake
21419 | DialectType::Redshift
21420 | DialectType::Teradata
21421 | DialectType::Spark
21422 | DialectType::Databricks
21423 );
21424 let (a, b, c) = if let Expression::Nvl2(nvl2) = e {
21425 if nvl2_native {
21426 return Ok(Expression::Nvl2(nvl2));
21427 }
21428 (nvl2.this, nvl2.true_value, Some(nvl2.false_value))
21429 } else if let Expression::Function(f) = e {
21430 if nvl2_native {
21431 return Ok(Expression::Function(Box::new(Function::new(
21432 "NVL2".to_string(),
21433 f.args,
21434 ))));
21435 }
21436 if f.args.len() < 2 {
21437 return Ok(Expression::Function(f));
21438 }
21439 let mut args = f.args;
21440 let a = args.remove(0);
21441 let b = args.remove(0);
21442 let c = if !args.is_empty() {
21443 Some(args.remove(0))
21444 } else {
21445 Option::None
21446 };
21447 (a, b, c)
21448 } else {
21449 return Ok(e);
21450 };
21451 // Build: NOT (a IS NULL)
21452 let is_null = Expression::IsNull(Box::new(IsNull {
21453 this: a,
21454 not: false,
21455 postfix_form: false,
21456 }));
21457 let not_null =
21458 Expression::Not(Box::new(crate::expressions::UnaryOp { this: is_null }));
21459 Ok(Expression::Case(Box::new(Case {
21460 operand: Option::None,
21461 whens: vec![(not_null, b)],
21462 else_: c,
21463 comments: Vec::new(),
21464 })))
21465 }
21466
21467 Action::IfnullToCoalesce => {
21468 // IFNULL(a, b) -> COALESCE(a, b): clear original_name to output COALESCE
21469 if let Expression::Coalesce(mut cf) = e {
21470 cf.original_name = Option::None;
21471 Ok(Expression::Coalesce(cf))
21472 } else if let Expression::Function(f) = e {
21473 Ok(Expression::Function(Box::new(Function::new(
21474 "COALESCE".to_string(),
21475 f.args,
21476 ))))
21477 } else {
21478 Ok(e)
21479 }
21480 }
21481
21482 Action::IsAsciiConvert => {
21483 // IS_ASCII(x) -> dialect-specific ASCII check
21484 if let Expression::Function(f) = e {
21485 let arg = f.args.into_iter().next().unwrap();
21486 match target {
21487 DialectType::MySQL | DialectType::SingleStore | DialectType::TiDB => {
21488 // REGEXP_LIKE(x, '^[[:ascii:]]*$')
21489 Ok(Expression::Function(Box::new(Function::new(
21490 "REGEXP_LIKE".to_string(),
21491 vec![
21492 arg,
21493 Expression::Literal(Literal::String(
21494 "^[[:ascii:]]*$".to_string(),
21495 )),
21496 ],
21497 ))))
21498 }
21499 DialectType::PostgreSQL
21500 | DialectType::Redshift
21501 | DialectType::Materialize
21502 | DialectType::RisingWave => {
21503 // (x ~ '^[[:ascii:]]*$')
21504 Ok(Expression::Paren(Box::new(Paren {
21505 this: Expression::RegexpLike(Box::new(
21506 crate::expressions::RegexpFunc {
21507 this: arg,
21508 pattern: Expression::Literal(Literal::String(
21509 "^[[:ascii:]]*$".to_string(),
21510 )),
21511 flags: Option::None,
21512 },
21513 )),
21514 trailing_comments: Vec::new(),
21515 })))
21516 }
21517 DialectType::SQLite => {
21518 // (NOT x GLOB CAST(x'2a5b5e012d7f5d2a' AS TEXT))
21519 let hex_lit = Expression::Literal(Literal::HexString(
21520 "2a5b5e012d7f5d2a".to_string(),
21521 ));
21522 let cast_expr = Expression::Cast(Box::new(Cast {
21523 this: hex_lit,
21524 to: DataType::Text,
21525 trailing_comments: Vec::new(),
21526 double_colon_syntax: false,
21527 format: Option::None,
21528 default: Option::None,
21529 }));
21530 let glob = Expression::Glob(Box::new(BinaryOp {
21531 left: arg,
21532 right: cast_expr,
21533 left_comments: Vec::new(),
21534 operator_comments: Vec::new(),
21535 trailing_comments: Vec::new(),
21536 }));
21537 Ok(Expression::Paren(Box::new(Paren {
21538 this: Expression::Not(Box::new(crate::expressions::UnaryOp {
21539 this: glob,
21540 })),
21541 trailing_comments: Vec::new(),
21542 })))
21543 }
21544 DialectType::TSQL | DialectType::Fabric => {
21545 // (PATINDEX(CONVERT(VARCHAR(MAX), 0x255b5e002d7f5d25) COLLATE Latin1_General_BIN, x) = 0)
21546 let hex_lit = Expression::Literal(Literal::HexNumber(
21547 "255b5e002d7f5d25".to_string(),
21548 ));
21549 let convert_expr = Expression::Convert(Box::new(
21550 crate::expressions::ConvertFunc {
21551 this: hex_lit,
21552 to: DataType::Text, // Text generates as VARCHAR(MAX) for TSQL
21553 style: None,
21554 },
21555 ));
21556 let collated = Expression::Collation(Box::new(
21557 crate::expressions::CollationExpr {
21558 this: convert_expr,
21559 collation: "Latin1_General_BIN".to_string(),
21560 quoted: false,
21561 double_quoted: false,
21562 },
21563 ));
21564 let patindex = Expression::Function(Box::new(Function::new(
21565 "PATINDEX".to_string(),
21566 vec![collated, arg],
21567 )));
21568 let zero = Expression::Literal(Literal::Number("0".to_string()));
21569 let eq_zero = Expression::Eq(Box::new(BinaryOp {
21570 left: patindex,
21571 right: zero,
21572 left_comments: Vec::new(),
21573 operator_comments: Vec::new(),
21574 trailing_comments: Vec::new(),
21575 }));
21576 Ok(Expression::Paren(Box::new(Paren {
21577 this: eq_zero,
21578 trailing_comments: Vec::new(),
21579 })))
21580 }
21581 DialectType::Oracle => {
21582 // NVL(REGEXP_LIKE(x, '^[' || CHR(1) || '-' || CHR(127) || ']*$'), TRUE)
21583 // Build the pattern: '^[' || CHR(1) || '-' || CHR(127) || ']*$'
21584 let s1 = Expression::Literal(Literal::String("^[".to_string()));
21585 let chr1 = Expression::Function(Box::new(Function::new(
21586 "CHR".to_string(),
21587 vec![Expression::Literal(Literal::Number("1".to_string()))],
21588 )));
21589 let dash = Expression::Literal(Literal::String("-".to_string()));
21590 let chr127 = Expression::Function(Box::new(Function::new(
21591 "CHR".to_string(),
21592 vec![Expression::Literal(Literal::Number("127".to_string()))],
21593 )));
21594 let s2 = Expression::Literal(Literal::String("]*$".to_string()));
21595 // Build: '^[' || CHR(1) || '-' || CHR(127) || ']*$'
21596 let concat1 =
21597 Expression::DPipe(Box::new(crate::expressions::DPipe {
21598 this: Box::new(s1),
21599 expression: Box::new(chr1),
21600 safe: None,
21601 }));
21602 let concat2 =
21603 Expression::DPipe(Box::new(crate::expressions::DPipe {
21604 this: Box::new(concat1),
21605 expression: Box::new(dash),
21606 safe: None,
21607 }));
21608 let concat3 =
21609 Expression::DPipe(Box::new(crate::expressions::DPipe {
21610 this: Box::new(concat2),
21611 expression: Box::new(chr127),
21612 safe: None,
21613 }));
21614 let concat4 =
21615 Expression::DPipe(Box::new(crate::expressions::DPipe {
21616 this: Box::new(concat3),
21617 expression: Box::new(s2),
21618 safe: None,
21619 }));
21620 let regexp_like = Expression::Function(Box::new(Function::new(
21621 "REGEXP_LIKE".to_string(),
21622 vec![arg, concat4],
21623 )));
21624 // Use Column("TRUE") to output literal TRUE keyword (not boolean 1/0)
21625 let true_expr = Expression::Column(crate::expressions::Column {
21626 name: Identifier {
21627 name: "TRUE".to_string(),
21628 quoted: false,
21629 trailing_comments: Vec::new(),
21630 },
21631 table: None,
21632 join_mark: false,
21633 trailing_comments: Vec::new(),
21634 });
21635 let nvl = Expression::Function(Box::new(Function::new(
21636 "NVL".to_string(),
21637 vec![regexp_like, true_expr],
21638 )));
21639 Ok(nvl)
21640 }
21641 _ => Ok(Expression::Function(Box::new(Function::new(
21642 "IS_ASCII".to_string(),
21643 vec![arg],
21644 )))),
21645 }
21646 } else {
21647 Ok(e)
21648 }
21649 }
21650
21651 Action::StrPositionConvert => {
21652 // STR_POSITION(haystack, needle[, position[, occurrence]]) -> dialect-specific
21653 if let Expression::Function(f) = e {
21654 if f.args.len() < 2 {
21655 return Ok(Expression::Function(f));
21656 }
21657 let mut args = f.args;
21658
21659 let haystack = args.remove(0);
21660 let needle = args.remove(0);
21661 let position = if !args.is_empty() {
21662 Some(args.remove(0))
21663 } else {
21664 Option::None
21665 };
21666 let occurrence = if !args.is_empty() {
21667 Some(args.remove(0))
21668 } else {
21669 Option::None
21670 };
21671
21672 // Helper to build: STRPOS/INSTR(SUBSTRING(haystack, pos), needle) expansion
21673 // Returns: CASE/IF WHEN func(SUBSTRING(haystack, pos), needle[, occ]) = 0 THEN 0 ELSE ... + pos - 1 END
21674 fn build_position_expansion(
21675 haystack: Expression,
21676 needle: Expression,
21677 pos: Expression,
21678 occurrence: Option<Expression>,
21679 inner_func: &str,
21680 wrapper: &str, // "CASE", "IF", "IIF"
21681 ) -> Expression {
21682 let substr = Expression::Function(Box::new(Function::new(
21683 "SUBSTRING".to_string(),
21684 vec![haystack, pos.clone()],
21685 )));
21686 let mut inner_args = vec![substr, needle];
21687 if let Some(occ) = occurrence {
21688 inner_args.push(occ);
21689 }
21690 let inner_call = Expression::Function(Box::new(Function::new(
21691 inner_func.to_string(),
21692 inner_args,
21693 )));
21694 let zero = Expression::Literal(Literal::Number("0".to_string()));
21695 let one = Expression::Literal(Literal::Number("1".to_string()));
21696 let eq_zero = Expression::Eq(Box::new(BinaryOp {
21697 left: inner_call.clone(),
21698 right: zero.clone(),
21699 left_comments: Vec::new(),
21700 operator_comments: Vec::new(),
21701 trailing_comments: Vec::new(),
21702 }));
21703 let add_pos = Expression::Add(Box::new(BinaryOp {
21704 left: inner_call,
21705 right: pos,
21706 left_comments: Vec::new(),
21707 operator_comments: Vec::new(),
21708 trailing_comments: Vec::new(),
21709 }));
21710 let sub_one = Expression::Sub(Box::new(BinaryOp {
21711 left: add_pos,
21712 right: one,
21713 left_comments: Vec::new(),
21714 operator_comments: Vec::new(),
21715 trailing_comments: Vec::new(),
21716 }));
21717
21718 match wrapper {
21719 "CASE" => Expression::Case(Box::new(Case {
21720 operand: Option::None,
21721 whens: vec![(eq_zero, zero)],
21722 else_: Some(sub_one),
21723 comments: Vec::new(),
21724 })),
21725 "IIF" => Expression::Function(Box::new(Function::new(
21726 "IIF".to_string(),
21727 vec![eq_zero, zero, sub_one],
21728 ))),
21729 _ => Expression::Function(Box::new(Function::new(
21730 "IF".to_string(),
21731 vec![eq_zero, zero, sub_one],
21732 ))),
21733 }
21734 }
21735
21736 match target {
21737 // STRPOS group: Athena, DuckDB, Presto, Trino, Drill
21738 DialectType::Athena
21739 | DialectType::DuckDB
21740 | DialectType::Presto
21741 | DialectType::Trino
21742 | DialectType::Drill => {
21743 if let Some(pos) = position {
21744 let wrapper = if matches!(target, DialectType::DuckDB) {
21745 "CASE"
21746 } else {
21747 "IF"
21748 };
21749 let result = build_position_expansion(
21750 haystack, needle, pos, occurrence, "STRPOS", wrapper,
21751 );
21752 if matches!(target, DialectType::Drill) {
21753 // Drill uses backtick-quoted `IF`
21754 if let Expression::Function(mut f) = result {
21755 f.name = "`IF`".to_string();
21756 Ok(Expression::Function(f))
21757 } else {
21758 Ok(result)
21759 }
21760 } else {
21761 Ok(result)
21762 }
21763 } else {
21764 Ok(Expression::Function(Box::new(Function::new(
21765 "STRPOS".to_string(),
21766 vec![haystack, needle],
21767 ))))
21768 }
21769 }
21770 // SQLite: IIF wrapper
21771 DialectType::SQLite => {
21772 if let Some(pos) = position {
21773 Ok(build_position_expansion(
21774 haystack, needle, pos, occurrence, "INSTR", "IIF",
21775 ))
21776 } else {
21777 Ok(Expression::Function(Box::new(Function::new(
21778 "INSTR".to_string(),
21779 vec![haystack, needle],
21780 ))))
21781 }
21782 }
21783 // INSTR group: Teradata, BigQuery, Oracle
21784 DialectType::Teradata | DialectType::BigQuery | DialectType::Oracle => {
21785 let mut a = vec![haystack, needle];
21786 if let Some(pos) = position {
21787 a.push(pos);
21788 }
21789 if let Some(occ) = occurrence {
21790 a.push(occ);
21791 }
21792 Ok(Expression::Function(Box::new(Function::new(
21793 "INSTR".to_string(),
21794 a,
21795 ))))
21796 }
21797 // CHARINDEX group: Snowflake, TSQL
21798 DialectType::Snowflake | DialectType::TSQL | DialectType::Fabric => {
21799 let mut a = vec![needle, haystack];
21800 if let Some(pos) = position {
21801 a.push(pos);
21802 }
21803 Ok(Expression::Function(Box::new(Function::new(
21804 "CHARINDEX".to_string(),
21805 a,
21806 ))))
21807 }
21808 // POSITION(needle IN haystack): PostgreSQL, Materialize, RisingWave, Redshift
21809 DialectType::PostgreSQL
21810 | DialectType::Materialize
21811 | DialectType::RisingWave
21812 | DialectType::Redshift => {
21813 if let Some(pos) = position {
21814 // Build: CASE WHEN POSITION(needle IN SUBSTRING(haystack FROM pos)) = 0 THEN 0
21815 // ELSE POSITION(...) + pos - 1 END
21816 let substr = Expression::Substring(Box::new(
21817 crate::expressions::SubstringFunc {
21818 this: haystack,
21819 start: pos.clone(),
21820 length: Option::None,
21821 from_for_syntax: true,
21822 },
21823 ));
21824 let pos_in = Expression::StrPosition(Box::new(
21825 crate::expressions::StrPosition {
21826 this: Box::new(substr),
21827 substr: Some(Box::new(needle)),
21828 position: Option::None,
21829 occurrence: Option::None,
21830 },
21831 ));
21832 let zero =
21833 Expression::Literal(Literal::Number("0".to_string()));
21834 let one = Expression::Literal(Literal::Number("1".to_string()));
21835 let eq_zero = Expression::Eq(Box::new(BinaryOp {
21836 left: pos_in.clone(),
21837 right: zero.clone(),
21838 left_comments: Vec::new(),
21839 operator_comments: Vec::new(),
21840 trailing_comments: Vec::new(),
21841 }));
21842 let add_pos = Expression::Add(Box::new(BinaryOp {
21843 left: pos_in,
21844 right: pos,
21845 left_comments: Vec::new(),
21846 operator_comments: Vec::new(),
21847 trailing_comments: Vec::new(),
21848 }));
21849 let sub_one = Expression::Sub(Box::new(BinaryOp {
21850 left: add_pos,
21851 right: one,
21852 left_comments: Vec::new(),
21853 operator_comments: Vec::new(),
21854 trailing_comments: Vec::new(),
21855 }));
21856 Ok(Expression::Case(Box::new(Case {
21857 operand: Option::None,
21858 whens: vec![(eq_zero, zero)],
21859 else_: Some(sub_one),
21860 comments: Vec::new(),
21861 })))
21862 } else {
21863 Ok(Expression::StrPosition(Box::new(
21864 crate::expressions::StrPosition {
21865 this: Box::new(haystack),
21866 substr: Some(Box::new(needle)),
21867 position: Option::None,
21868 occurrence: Option::None,
21869 },
21870 )))
21871 }
21872 }
21873 // LOCATE group: MySQL, Hive, Spark, Databricks, Doris
21874 DialectType::MySQL
21875 | DialectType::SingleStore
21876 | DialectType::TiDB
21877 | DialectType::Hive
21878 | DialectType::Spark
21879 | DialectType::Databricks
21880 | DialectType::Doris
21881 | DialectType::StarRocks => {
21882 let mut a = vec![needle, haystack];
21883 if let Some(pos) = position {
21884 a.push(pos);
21885 }
21886 Ok(Expression::Function(Box::new(Function::new(
21887 "LOCATE".to_string(),
21888 a,
21889 ))))
21890 }
21891 // ClickHouse: POSITION(haystack, needle[, position])
21892 DialectType::ClickHouse => {
21893 let mut a = vec![haystack, needle];
21894 if let Some(pos) = position {
21895 a.push(pos);
21896 }
21897 Ok(Expression::Function(Box::new(Function::new(
21898 "POSITION".to_string(),
21899 a,
21900 ))))
21901 }
21902 _ => {
21903 let mut a = vec![haystack, needle];
21904 if let Some(pos) = position {
21905 a.push(pos);
21906 }
21907 if let Some(occ) = occurrence {
21908 a.push(occ);
21909 }
21910 Ok(Expression::Function(Box::new(Function::new(
21911 "STR_POSITION".to_string(),
21912 a,
21913 ))))
21914 }
21915 }
21916 } else {
21917 Ok(e)
21918 }
21919 }
21920
21921 Action::ArraySumConvert => {
21922 // ARRAY_SUM(arr) -> dialect-specific
21923 if let Expression::Function(f) = e {
21924 let args = f.args;
21925 match target {
21926 DialectType::DuckDB => Ok(Expression::Function(Box::new(
21927 Function::new("LIST_SUM".to_string(), args),
21928 ))),
21929 DialectType::Spark | DialectType::Databricks => {
21930 // AGGREGATE(arr, 0, (acc, x) -> acc + x, acc -> acc)
21931 let arr = args.into_iter().next().unwrap();
21932 let zero = Expression::Literal(Literal::Number("0".to_string()));
21933 let acc_id = Identifier::new("acc");
21934 let x_id = Identifier::new("x");
21935 let acc = Expression::Identifier(acc_id.clone());
21936 let x = Expression::Identifier(x_id.clone());
21937 let add = Expression::Add(Box::new(BinaryOp {
21938 left: acc.clone(),
21939 right: x,
21940 left_comments: Vec::new(),
21941 operator_comments: Vec::new(),
21942 trailing_comments: Vec::new(),
21943 }));
21944 let lambda1 =
21945 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
21946 parameters: vec![acc_id.clone(), x_id],
21947 body: add,
21948 colon: false,
21949 parameter_types: Vec::new(),
21950 }));
21951 let lambda2 =
21952 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
21953 parameters: vec![acc_id],
21954 body: acc,
21955 colon: false,
21956 parameter_types: Vec::new(),
21957 }));
21958 Ok(Expression::Function(Box::new(Function::new(
21959 "AGGREGATE".to_string(),
21960 vec![arr, zero, lambda1, lambda2],
21961 ))))
21962 }
21963 DialectType::Presto | DialectType::Athena => {
21964 // Presto/Athena keep ARRAY_SUM natively
21965 Ok(Expression::Function(Box::new(Function::new(
21966 "ARRAY_SUM".to_string(),
21967 args,
21968 ))))
21969 }
21970 DialectType::Trino => {
21971 // REDUCE(arr, 0, (acc, x) -> acc + x, acc -> acc)
21972 if args.len() == 1 {
21973 let arr = args.into_iter().next().unwrap();
21974 let zero =
21975 Expression::Literal(Literal::Number("0".to_string()));
21976 let acc_id = Identifier::new("acc");
21977 let x_id = Identifier::new("x");
21978 let acc = Expression::Identifier(acc_id.clone());
21979 let x = Expression::Identifier(x_id.clone());
21980 let add = Expression::Add(Box::new(BinaryOp {
21981 left: acc.clone(),
21982 right: x,
21983 left_comments: Vec::new(),
21984 operator_comments: Vec::new(),
21985 trailing_comments: Vec::new(),
21986 }));
21987 let lambda1 = Expression::Lambda(Box::new(
21988 crate::expressions::LambdaExpr {
21989 parameters: vec![acc_id.clone(), x_id],
21990 body: add,
21991 colon: false,
21992 parameter_types: Vec::new(),
21993 },
21994 ));
21995 let lambda2 = Expression::Lambda(Box::new(
21996 crate::expressions::LambdaExpr {
21997 parameters: vec![acc_id],
21998 body: acc,
21999 colon: false,
22000 parameter_types: Vec::new(),
22001 },
22002 ));
22003 Ok(Expression::Function(Box::new(Function::new(
22004 "REDUCE".to_string(),
22005 vec![arr, zero, lambda1, lambda2],
22006 ))))
22007 } else {
22008 Ok(Expression::Function(Box::new(Function::new(
22009 "ARRAY_SUM".to_string(),
22010 args,
22011 ))))
22012 }
22013 }
22014 DialectType::ClickHouse => {
22015 // arraySum(lambda, arr) or arraySum(arr)
22016 Ok(Expression::Function(Box::new(Function::new(
22017 "arraySum".to_string(),
22018 args,
22019 ))))
22020 }
22021 _ => Ok(Expression::Function(Box::new(Function::new(
22022 "ARRAY_SUM".to_string(),
22023 args,
22024 )))),
22025 }
22026 } else {
22027 Ok(e)
22028 }
22029 }
22030
22031 Action::ArraySizeConvert => {
22032 if let Expression::Function(f) = e {
22033 Ok(Expression::Function(Box::new(Function::new(
22034 "REPEATED_COUNT".to_string(),
22035 f.args,
22036 ))))
22037 } else {
22038 Ok(e)
22039 }
22040 }
22041
22042 Action::ArrayAnyConvert => {
22043 if let Expression::Function(f) = e {
22044 let mut args = f.args;
22045 if args.len() == 2 {
22046 let arr = args.remove(0);
22047 let lambda = args.remove(0);
22048
22049 // Extract lambda parameter name and body
22050 let (param_name, pred_body) =
22051 if let Expression::Lambda(ref lam) = lambda {
22052 let name = if let Some(p) = lam.parameters.first() {
22053 p.name.clone()
22054 } else {
22055 "x".to_string()
22056 };
22057 (name, lam.body.clone())
22058 } else {
22059 ("x".to_string(), lambda.clone())
22060 };
22061
22062 // Helper: build a function call Expression
22063 let make_func = |name: &str, args: Vec<Expression>| -> Expression {
22064 Expression::Function(Box::new(Function::new(
22065 name.to_string(),
22066 args,
22067 )))
22068 };
22069
22070 // Helper: build (len_func(arr) = 0 OR len_func(filter_expr) <> 0) wrapped in Paren
22071 let build_filter_pattern = |len_func: &str,
22072 len_args_extra: Vec<Expression>,
22073 filter_expr: Expression|
22074 -> Expression {
22075 // len_func(arr, ...extra) = 0
22076 let mut len_arr_args = vec![arr.clone()];
22077 len_arr_args.extend(len_args_extra.clone());
22078 let len_arr = make_func(len_func, len_arr_args);
22079 let eq_zero = Expression::Eq(Box::new(BinaryOp::new(
22080 len_arr,
22081 Expression::number(0),
22082 )));
22083
22084 // len_func(filter_expr, ...extra) <> 0
22085 let mut len_filter_args = vec![filter_expr];
22086 len_filter_args.extend(len_args_extra);
22087 let len_filter = make_func(len_func, len_filter_args);
22088 let neq_zero = Expression::Neq(Box::new(BinaryOp::new(
22089 len_filter,
22090 Expression::number(0),
22091 )));
22092
22093 // (eq_zero OR neq_zero)
22094 let or_expr =
22095 Expression::Or(Box::new(BinaryOp::new(eq_zero, neq_zero)));
22096 Expression::Paren(Box::new(Paren {
22097 this: or_expr,
22098 trailing_comments: Vec::new(),
22099 }))
22100 };
22101
22102 match target {
22103 DialectType::Trino | DialectType::Presto | DialectType::Athena => {
22104 Ok(make_func("ANY_MATCH", vec![arr, lambda]))
22105 }
22106 DialectType::ClickHouse => {
22107 // (LENGTH(arr) = 0 OR LENGTH(arrayFilter(x -> pred, arr)) <> 0)
22108 // ClickHouse arrayFilter takes lambda first, then array
22109 let filter_expr =
22110 make_func("arrayFilter", vec![lambda, arr.clone()]);
22111 Ok(build_filter_pattern("LENGTH", vec![], filter_expr))
22112 }
22113 DialectType::Databricks | DialectType::Spark => {
22114 // (SIZE(arr) = 0 OR SIZE(FILTER(arr, x -> pred)) <> 0)
22115 let filter_expr =
22116 make_func("FILTER", vec![arr.clone(), lambda]);
22117 Ok(build_filter_pattern("SIZE", vec![], filter_expr))
22118 }
22119 DialectType::DuckDB => {
22120 // (ARRAY_LENGTH(arr) = 0 OR ARRAY_LENGTH(LIST_FILTER(arr, x -> pred)) <> 0)
22121 let filter_expr =
22122 make_func("LIST_FILTER", vec![arr.clone(), lambda]);
22123 Ok(build_filter_pattern("ARRAY_LENGTH", vec![], filter_expr))
22124 }
22125 DialectType::Teradata => {
22126 // (CARDINALITY(arr) = 0 OR CARDINALITY(FILTER(arr, x -> pred)) <> 0)
22127 let filter_expr =
22128 make_func("FILTER", vec![arr.clone(), lambda]);
22129 Ok(build_filter_pattern("CARDINALITY", vec![], filter_expr))
22130 }
22131 DialectType::BigQuery => {
22132 // (ARRAY_LENGTH(arr) = 0 OR ARRAY_LENGTH(ARRAY(SELECT x FROM UNNEST(arr) AS x WHERE pred)) <> 0)
22133 // Build: SELECT x FROM UNNEST(arr) AS x WHERE pred
22134 let param_col = Expression::column(¶m_name);
22135 let unnest_expr = Expression::Unnest(Box::new(
22136 crate::expressions::UnnestFunc {
22137 this: arr.clone(),
22138 expressions: vec![],
22139 with_ordinality: false,
22140 alias: Some(Identifier::new(¶m_name)),
22141 offset_alias: None,
22142 },
22143 ));
22144 let mut sel = crate::expressions::Select::default();
22145 sel.expressions = vec![param_col];
22146 sel.from = Some(crate::expressions::From {
22147 expressions: vec![unnest_expr],
22148 });
22149 sel.where_clause =
22150 Some(crate::expressions::Where { this: pred_body });
22151 let array_subquery =
22152 make_func("ARRAY", vec![Expression::Select(Box::new(sel))]);
22153 Ok(build_filter_pattern("ARRAY_LENGTH", vec![], array_subquery))
22154 }
22155 DialectType::PostgreSQL => {
22156 // (ARRAY_LENGTH(arr, 1) = 0 OR ARRAY_LENGTH(ARRAY(SELECT x FROM UNNEST(arr) AS _t0(x) WHERE pred), 1) <> 0)
22157 // Build: SELECT x FROM UNNEST(arr) AS _t0(x) WHERE pred
22158 let param_col = Expression::column(¶m_name);
22159 // For PostgreSQL, UNNEST uses AS _t0(x) syntax - use TableAlias
22160 let unnest_with_alias =
22161 Expression::Alias(Box::new(crate::expressions::Alias {
22162 this: Expression::Unnest(Box::new(
22163 crate::expressions::UnnestFunc {
22164 this: arr.clone(),
22165 expressions: vec![],
22166 with_ordinality: false,
22167 alias: None,
22168 offset_alias: None,
22169 },
22170 )),
22171 alias: Identifier::new("_t0"),
22172 column_aliases: vec![Identifier::new(¶m_name)],
22173 pre_alias_comments: Vec::new(),
22174 trailing_comments: Vec::new(),
22175 }));
22176 let mut sel = crate::expressions::Select::default();
22177 sel.expressions = vec![param_col];
22178 sel.from = Some(crate::expressions::From {
22179 expressions: vec![unnest_with_alias],
22180 });
22181 sel.where_clause =
22182 Some(crate::expressions::Where { this: pred_body });
22183 let array_subquery =
22184 make_func("ARRAY", vec![Expression::Select(Box::new(sel))]);
22185 Ok(build_filter_pattern(
22186 "ARRAY_LENGTH",
22187 vec![Expression::number(1)],
22188 array_subquery,
22189 ))
22190 }
22191 _ => Ok(Expression::Function(Box::new(Function::new(
22192 "ARRAY_ANY".to_string(),
22193 vec![arr, lambda],
22194 )))),
22195 }
22196 } else {
22197 Ok(Expression::Function(Box::new(Function::new(
22198 "ARRAY_ANY".to_string(),
22199 args,
22200 ))))
22201 }
22202 } else {
22203 Ok(e)
22204 }
22205 }
22206
22207 Action::DecodeSimplify => {
22208 // DECODE(x, search1, result1, ..., default) -> CASE WHEN x = search1 OR (x IS NULL AND search1 IS NULL) THEN result1 ... [ELSE default] END
22209 // Helper to build null-safe CASE from (this_expr, search_result_pairs, default)
22210 let build_decode_case =
22211 |this_expr: Expression,
22212 pairs: Vec<(Expression, Expression)>,
22213 default: Option<Expression>| {
22214 let whens: Vec<(Expression, Expression)> = pairs
22215 .into_iter()
22216 .map(|(search, result)| {
22217 // Wrap search in parens if it's a comparison expression
22218 let needs_paren = matches!(
22219 &search,
22220 Expression::Eq(_)
22221 | Expression::Neq(_)
22222 | Expression::Gt(_)
22223 | Expression::Gte(_)
22224 | Expression::Lt(_)
22225 | Expression::Lte(_)
22226 );
22227 let search_ref = if needs_paren {
22228 Expression::Paren(Box::new(crate::expressions::Paren {
22229 this: search.clone(),
22230 trailing_comments: Vec::new(),
22231 }))
22232 } else {
22233 search.clone()
22234 };
22235 // Build: x = search OR (x IS NULL AND search IS NULL)
22236 let eq = Expression::Eq(Box::new(BinaryOp {
22237 left: this_expr.clone(),
22238 right: search_ref,
22239 left_comments: Vec::new(),
22240 operator_comments: Vec::new(),
22241 trailing_comments: Vec::new(),
22242 }));
22243 let search_in_null = if needs_paren {
22244 Expression::Paren(Box::new(crate::expressions::Paren {
22245 this: search.clone(),
22246 trailing_comments: Vec::new(),
22247 }))
22248 } else {
22249 search.clone()
22250 };
22251 let x_is_null = Expression::Is(Box::new(BinaryOp {
22252 left: this_expr.clone(),
22253 right: Expression::Null(crate::expressions::Null),
22254 left_comments: Vec::new(),
22255 operator_comments: Vec::new(),
22256 trailing_comments: Vec::new(),
22257 }));
22258 let search_is_null = Expression::Is(Box::new(BinaryOp {
22259 left: search_in_null,
22260 right: Expression::Null(crate::expressions::Null),
22261 left_comments: Vec::new(),
22262 operator_comments: Vec::new(),
22263 trailing_comments: Vec::new(),
22264 }));
22265 let both_null = Expression::And(Box::new(BinaryOp {
22266 left: x_is_null,
22267 right: search_is_null,
22268 left_comments: Vec::new(),
22269 operator_comments: Vec::new(),
22270 trailing_comments: Vec::new(),
22271 }));
22272 let condition = Expression::Or(Box::new(BinaryOp {
22273 left: eq,
22274 right: Expression::Paren(Box::new(
22275 crate::expressions::Paren {
22276 this: both_null,
22277 trailing_comments: Vec::new(),
22278 },
22279 )),
22280 left_comments: Vec::new(),
22281 operator_comments: Vec::new(),
22282 trailing_comments: Vec::new(),
22283 }));
22284 (condition, result)
22285 })
22286 .collect();
22287 Expression::Case(Box::new(Case {
22288 operand: None,
22289 whens,
22290 else_: default,
22291 comments: Vec::new(),
22292 }))
22293 };
22294
22295 if let Expression::Decode(decode) = e {
22296 Ok(build_decode_case(
22297 decode.this,
22298 decode.search_results,
22299 decode.default,
22300 ))
22301 } else if let Expression::DecodeCase(dc) = e {
22302 // DecodeCase has flat expressions: [x, s1, r1, s2, r2, ..., default?]
22303 let mut exprs = dc.expressions;
22304 if exprs.len() < 3 {
22305 return Ok(Expression::DecodeCase(Box::new(
22306 crate::expressions::DecodeCase { expressions: exprs },
22307 )));
22308 }
22309 let this_expr = exprs.remove(0);
22310 let mut pairs = Vec::new();
22311 let mut default = None;
22312 let mut i = 0;
22313 while i + 1 < exprs.len() {
22314 pairs.push((exprs[i].clone(), exprs[i + 1].clone()));
22315 i += 2;
22316 }
22317 if i < exprs.len() {
22318 // Odd remaining element is the default
22319 default = Some(exprs[i].clone());
22320 }
22321 Ok(build_decode_case(this_expr, pairs, default))
22322 } else {
22323 Ok(e)
22324 }
22325 }
22326
22327 Action::CreateTableLikeToCtas => {
22328 // CREATE TABLE a LIKE b -> CREATE TABLE a AS SELECT * FROM b LIMIT 0
22329 if let Expression::CreateTable(ct) = e {
22330 let like_source = ct.constraints.iter().find_map(|c| {
22331 if let crate::expressions::TableConstraint::Like { source, .. } = c {
22332 Some(source.clone())
22333 } else {
22334 None
22335 }
22336 });
22337 if let Some(source_table) = like_source {
22338 let mut new_ct = *ct;
22339 new_ct.constraints.clear();
22340 // Build: SELECT * FROM b LIMIT 0
22341 let select = Expression::Select(Box::new(crate::expressions::Select {
22342 expressions: vec![Expression::Star(crate::expressions::Star {
22343 table: None,
22344 except: None,
22345 replace: None,
22346 rename: None,
22347 trailing_comments: Vec::new(),
22348 })],
22349 from: Some(crate::expressions::From {
22350 expressions: vec![Expression::Table(source_table)],
22351 }),
22352 limit: Some(crate::expressions::Limit {
22353 this: Expression::Literal(Literal::Number("0".to_string())),
22354 percent: false,
22355 comments: Vec::new(),
22356 }),
22357 ..Default::default()
22358 }));
22359 new_ct.as_select = Some(select);
22360 Ok(Expression::CreateTable(Box::new(new_ct)))
22361 } else {
22362 Ok(Expression::CreateTable(ct))
22363 }
22364 } else {
22365 Ok(e)
22366 }
22367 }
22368
22369 Action::CreateTableLikeToSelectInto => {
22370 // CREATE TABLE a LIKE b -> SELECT TOP 0 * INTO a FROM b AS temp
22371 if let Expression::CreateTable(ct) = e {
22372 let like_source = ct.constraints.iter().find_map(|c| {
22373 if let crate::expressions::TableConstraint::Like { source, .. } = c {
22374 Some(source.clone())
22375 } else {
22376 None
22377 }
22378 });
22379 if let Some(source_table) = like_source {
22380 let mut aliased_source = source_table;
22381 aliased_source.alias = Some(Identifier::new("temp"));
22382 // Build: SELECT TOP 0 * INTO a FROM b AS temp
22383 let select = Expression::Select(Box::new(crate::expressions::Select {
22384 expressions: vec![Expression::Star(crate::expressions::Star {
22385 table: None,
22386 except: None,
22387 replace: None,
22388 rename: None,
22389 trailing_comments: Vec::new(),
22390 })],
22391 from: Some(crate::expressions::From {
22392 expressions: vec![Expression::Table(aliased_source)],
22393 }),
22394 into: Some(crate::expressions::SelectInto {
22395 this: Expression::Table(ct.name.clone()),
22396 temporary: false,
22397 unlogged: false,
22398 bulk_collect: false,
22399 expressions: Vec::new(),
22400 }),
22401 top: Some(crate::expressions::Top {
22402 this: Expression::Literal(Literal::Number("0".to_string())),
22403 percent: false,
22404 with_ties: false,
22405 parenthesized: false,
22406 }),
22407 ..Default::default()
22408 }));
22409 Ok(select)
22410 } else {
22411 Ok(Expression::CreateTable(ct))
22412 }
22413 } else {
22414 Ok(e)
22415 }
22416 }
22417
22418 Action::CreateTableLikeToAs => {
22419 // CREATE TABLE a LIKE b -> CREATE TABLE a AS b (ClickHouse)
22420 if let Expression::CreateTable(ct) = e {
22421 let like_source = ct.constraints.iter().find_map(|c| {
22422 if let crate::expressions::TableConstraint::Like { source, .. } = c {
22423 Some(source.clone())
22424 } else {
22425 None
22426 }
22427 });
22428 if let Some(source_table) = like_source {
22429 let mut new_ct = *ct;
22430 new_ct.constraints.clear();
22431 // AS b (just a table reference, not a SELECT)
22432 new_ct.as_select = Some(Expression::Table(source_table));
22433 Ok(Expression::CreateTable(Box::new(new_ct)))
22434 } else {
22435 Ok(Expression::CreateTable(ct))
22436 }
22437 } else {
22438 Ok(e)
22439 }
22440 }
22441
22442 Action::TsOrDsToDateConvert => {
22443 // TS_OR_DS_TO_DATE(x[, fmt]) -> dialect-specific date conversion
22444 if let Expression::Function(f) = e {
22445 let mut args = f.args;
22446 let this = args.remove(0);
22447 let fmt = if !args.is_empty() {
22448 match &args[0] {
22449 Expression::Literal(Literal::String(s)) => Some(s.clone()),
22450 _ => None,
22451 }
22452 } else {
22453 None
22454 };
22455 Ok(Expression::TsOrDsToDate(Box::new(
22456 crate::expressions::TsOrDsToDate {
22457 this: Box::new(this),
22458 format: fmt,
22459 safe: None,
22460 },
22461 )))
22462 } else {
22463 Ok(e)
22464 }
22465 }
22466
22467 Action::TsOrDsToDateStrConvert => {
22468 // TS_OR_DS_TO_DATE_STR(x) -> SUBSTRING(CAST(x AS type), 1, 10)
22469 if let Expression::Function(f) = e {
22470 let arg = f.args.into_iter().next().unwrap();
22471 let str_type = match target {
22472 DialectType::DuckDB
22473 | DialectType::PostgreSQL
22474 | DialectType::Materialize => DataType::Text,
22475 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
22476 DataType::Custom {
22477 name: "STRING".to_string(),
22478 }
22479 }
22480 DialectType::Presto
22481 | DialectType::Trino
22482 | DialectType::Athena
22483 | DialectType::Drill => DataType::VarChar {
22484 length: None,
22485 parenthesized_length: false,
22486 },
22487 DialectType::MySQL | DialectType::Doris | DialectType::StarRocks => {
22488 DataType::Custom {
22489 name: "STRING".to_string(),
22490 }
22491 }
22492 _ => DataType::VarChar {
22493 length: None,
22494 parenthesized_length: false,
22495 },
22496 };
22497 let cast_expr = Expression::Cast(Box::new(Cast {
22498 this: arg,
22499 to: str_type,
22500 double_colon_syntax: false,
22501 trailing_comments: Vec::new(),
22502 format: None,
22503 default: None,
22504 }));
22505 Ok(Expression::Substring(Box::new(
22506 crate::expressions::SubstringFunc {
22507 this: cast_expr,
22508 start: Expression::number(1),
22509 length: Some(Expression::number(10)),
22510 from_for_syntax: false,
22511 },
22512 )))
22513 } else {
22514 Ok(e)
22515 }
22516 }
22517
22518 Action::DateStrToDateConvert => {
22519 // DATE_STR_TO_DATE(x) -> dialect-specific
22520 if let Expression::Function(f) = e {
22521 let arg = f.args.into_iter().next().unwrap();
22522 match target {
22523 DialectType::SQLite => {
22524 // SQLite: just the bare expression (dates are strings)
22525 Ok(arg)
22526 }
22527 _ => Ok(Expression::Cast(Box::new(Cast {
22528 this: arg,
22529 to: DataType::Date,
22530 double_colon_syntax: false,
22531 trailing_comments: Vec::new(),
22532 format: None,
22533 default: None,
22534 }))),
22535 }
22536 } else {
22537 Ok(e)
22538 }
22539 }
22540
22541 Action::TimeStrToDateConvert => {
22542 // TIME_STR_TO_DATE(x) -> dialect-specific
22543 if let Expression::Function(f) = e {
22544 let arg = f.args.into_iter().next().unwrap();
22545 match target {
22546 DialectType::Hive
22547 | DialectType::Doris
22548 | DialectType::StarRocks
22549 | DialectType::Snowflake => Ok(Expression::Function(Box::new(
22550 Function::new("TO_DATE".to_string(), vec![arg]),
22551 ))),
22552 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
22553 // Presto: CAST(x AS TIMESTAMP)
22554 Ok(Expression::Cast(Box::new(Cast {
22555 this: arg,
22556 to: DataType::Timestamp {
22557 timezone: false,
22558 precision: None,
22559 },
22560 double_colon_syntax: false,
22561 trailing_comments: Vec::new(),
22562 format: None,
22563 default: None,
22564 })))
22565 }
22566 _ => {
22567 // Default: CAST(x AS DATE)
22568 Ok(Expression::Cast(Box::new(Cast {
22569 this: arg,
22570 to: DataType::Date,
22571 double_colon_syntax: false,
22572 trailing_comments: Vec::new(),
22573 format: None,
22574 default: None,
22575 })))
22576 }
22577 }
22578 } else {
22579 Ok(e)
22580 }
22581 }
22582
22583 Action::TimeStrToTimeConvert => {
22584 // TIME_STR_TO_TIME(x[, zone]) -> dialect-specific CAST to timestamp type
22585 if let Expression::Function(f) = e {
22586 let mut args = f.args;
22587 let this = args.remove(0);
22588 let zone = if !args.is_empty() {
22589 match &args[0] {
22590 Expression::Literal(Literal::String(s)) => Some(s.clone()),
22591 _ => None,
22592 }
22593 } else {
22594 None
22595 };
22596 let has_zone = zone.is_some();
22597
22598 match target {
22599 DialectType::SQLite => {
22600 // SQLite: just the bare expression
22601 Ok(this)
22602 }
22603 DialectType::MySQL => {
22604 if has_zone {
22605 // MySQL with zone: TIMESTAMP(x)
22606 Ok(Expression::Function(Box::new(Function::new(
22607 "TIMESTAMP".to_string(),
22608 vec![this],
22609 ))))
22610 } else {
22611 // MySQL: CAST(x AS DATETIME) or with precision
22612 // Use DataType::Custom to avoid MySQL's transform_cast converting
22613 // CAST(x AS TIMESTAMP) -> TIMESTAMP(x)
22614 let precision =
22615 if let Expression::Literal(Literal::String(ref s)) = this {
22616 if let Some(dot_pos) = s.rfind('.') {
22617 let frac = &s[dot_pos + 1..];
22618 let digit_count = frac
22619 .chars()
22620 .take_while(|c| c.is_ascii_digit())
22621 .count();
22622 if digit_count > 0 {
22623 Some(digit_count)
22624 } else {
22625 None
22626 }
22627 } else {
22628 None
22629 }
22630 } else {
22631 None
22632 };
22633 let type_name = match precision {
22634 Some(p) => format!("DATETIME({})", p),
22635 None => "DATETIME".to_string(),
22636 };
22637 Ok(Expression::Cast(Box::new(Cast {
22638 this,
22639 to: DataType::Custom { name: type_name },
22640 double_colon_syntax: false,
22641 trailing_comments: Vec::new(),
22642 format: None,
22643 default: None,
22644 })))
22645 }
22646 }
22647 DialectType::ClickHouse => {
22648 if has_zone {
22649 // ClickHouse with zone: CAST(x AS DateTime64(6, 'zone'))
22650 // We need to strip the timezone offset from the literal if present
22651 let clean_this =
22652 if let Expression::Literal(Literal::String(ref s)) = this {
22653 // Strip timezone offset like "-08:00" or "+00:00"
22654 let re_offset = s.rfind(|c: char| c == '+' || c == '-');
22655 if let Some(offset_pos) = re_offset {
22656 if offset_pos > 10 {
22657 // After the date part
22658 let trimmed = s[..offset_pos].to_string();
22659 Expression::Literal(Literal::String(trimmed))
22660 } else {
22661 this.clone()
22662 }
22663 } else {
22664 this.clone()
22665 }
22666 } else {
22667 this.clone()
22668 };
22669 let zone_str = zone.unwrap();
22670 // Build: CAST(x AS DateTime64(6, 'zone'))
22671 let type_name = format!("DateTime64(6, '{}')", zone_str);
22672 Ok(Expression::Cast(Box::new(Cast {
22673 this: clean_this,
22674 to: DataType::Custom { name: type_name },
22675 double_colon_syntax: false,
22676 trailing_comments: Vec::new(),
22677 format: None,
22678 default: None,
22679 })))
22680 } else {
22681 Ok(Expression::Cast(Box::new(Cast {
22682 this,
22683 to: DataType::Custom {
22684 name: "DateTime64(6)".to_string(),
22685 },
22686 double_colon_syntax: false,
22687 trailing_comments: Vec::new(),
22688 format: None,
22689 default: None,
22690 })))
22691 }
22692 }
22693 DialectType::BigQuery => {
22694 if has_zone {
22695 // BigQuery with zone: CAST(x AS TIMESTAMP)
22696 Ok(Expression::Cast(Box::new(Cast {
22697 this,
22698 to: DataType::Timestamp {
22699 timezone: false,
22700 precision: None,
22701 },
22702 double_colon_syntax: false,
22703 trailing_comments: Vec::new(),
22704 format: None,
22705 default: None,
22706 })))
22707 } else {
22708 // BigQuery: CAST(x AS DATETIME) - Timestamp{tz:false} renders as DATETIME for BigQuery
22709 Ok(Expression::Cast(Box::new(Cast {
22710 this,
22711 to: DataType::Custom {
22712 name: "DATETIME".to_string(),
22713 },
22714 double_colon_syntax: false,
22715 trailing_comments: Vec::new(),
22716 format: None,
22717 default: None,
22718 })))
22719 }
22720 }
22721 DialectType::Doris => {
22722 // Doris: CAST(x AS DATETIME)
22723 Ok(Expression::Cast(Box::new(Cast {
22724 this,
22725 to: DataType::Custom {
22726 name: "DATETIME".to_string(),
22727 },
22728 double_colon_syntax: false,
22729 trailing_comments: Vec::new(),
22730 format: None,
22731 default: None,
22732 })))
22733 }
22734 DialectType::TSQL | DialectType::Fabric => {
22735 if has_zone {
22736 // TSQL with zone: CAST(x AS DATETIMEOFFSET) AT TIME ZONE 'UTC'
22737 let cast_expr = Expression::Cast(Box::new(Cast {
22738 this,
22739 to: DataType::Custom {
22740 name: "DATETIMEOFFSET".to_string(),
22741 },
22742 double_colon_syntax: false,
22743 trailing_comments: Vec::new(),
22744 format: None,
22745 default: None,
22746 }));
22747 Ok(Expression::AtTimeZone(Box::new(
22748 crate::expressions::AtTimeZone {
22749 this: cast_expr,
22750 zone: Expression::Literal(Literal::String(
22751 "UTC".to_string(),
22752 )),
22753 },
22754 )))
22755 } else {
22756 // TSQL: CAST(x AS DATETIME2)
22757 Ok(Expression::Cast(Box::new(Cast {
22758 this,
22759 to: DataType::Custom {
22760 name: "DATETIME2".to_string(),
22761 },
22762 double_colon_syntax: false,
22763 trailing_comments: Vec::new(),
22764 format: None,
22765 default: None,
22766 })))
22767 }
22768 }
22769 DialectType::DuckDB => {
22770 if has_zone {
22771 // DuckDB with zone: CAST(x AS TIMESTAMPTZ)
22772 Ok(Expression::Cast(Box::new(Cast {
22773 this,
22774 to: DataType::Timestamp {
22775 timezone: true,
22776 precision: None,
22777 },
22778 double_colon_syntax: false,
22779 trailing_comments: Vec::new(),
22780 format: None,
22781 default: None,
22782 })))
22783 } else {
22784 // DuckDB: CAST(x AS TIMESTAMP)
22785 Ok(Expression::Cast(Box::new(Cast {
22786 this,
22787 to: DataType::Timestamp {
22788 timezone: false,
22789 precision: None,
22790 },
22791 double_colon_syntax: false,
22792 trailing_comments: Vec::new(),
22793 format: None,
22794 default: None,
22795 })))
22796 }
22797 }
22798 DialectType::PostgreSQL
22799 | DialectType::Materialize
22800 | DialectType::RisingWave => {
22801 if has_zone {
22802 // PostgreSQL with zone: CAST(x AS TIMESTAMPTZ)
22803 Ok(Expression::Cast(Box::new(Cast {
22804 this,
22805 to: DataType::Timestamp {
22806 timezone: true,
22807 precision: None,
22808 },
22809 double_colon_syntax: false,
22810 trailing_comments: Vec::new(),
22811 format: None,
22812 default: None,
22813 })))
22814 } else {
22815 // PostgreSQL: CAST(x AS TIMESTAMP)
22816 Ok(Expression::Cast(Box::new(Cast {
22817 this,
22818 to: DataType::Timestamp {
22819 timezone: false,
22820 precision: None,
22821 },
22822 double_colon_syntax: false,
22823 trailing_comments: Vec::new(),
22824 format: None,
22825 default: None,
22826 })))
22827 }
22828 }
22829 DialectType::Snowflake => {
22830 if has_zone {
22831 // Snowflake with zone: CAST(x AS TIMESTAMPTZ)
22832 Ok(Expression::Cast(Box::new(Cast {
22833 this,
22834 to: DataType::Timestamp {
22835 timezone: true,
22836 precision: None,
22837 },
22838 double_colon_syntax: false,
22839 trailing_comments: Vec::new(),
22840 format: None,
22841 default: None,
22842 })))
22843 } else {
22844 // Snowflake: CAST(x AS TIMESTAMP)
22845 Ok(Expression::Cast(Box::new(Cast {
22846 this,
22847 to: DataType::Timestamp {
22848 timezone: false,
22849 precision: None,
22850 },
22851 double_colon_syntax: false,
22852 trailing_comments: Vec::new(),
22853 format: None,
22854 default: None,
22855 })))
22856 }
22857 }
22858 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
22859 if has_zone {
22860 // Presto/Trino with zone: CAST(x AS TIMESTAMP WITH TIME ZONE)
22861 // Check for precision from sub-second digits
22862 let precision =
22863 if let Expression::Literal(Literal::String(ref s)) = this {
22864 if let Some(dot_pos) = s.rfind('.') {
22865 let frac = &s[dot_pos + 1..];
22866 let digit_count = frac
22867 .chars()
22868 .take_while(|c| c.is_ascii_digit())
22869 .count();
22870 if digit_count > 0
22871 && matches!(target, DialectType::Trino)
22872 {
22873 Some(digit_count as u32)
22874 } else {
22875 None
22876 }
22877 } else {
22878 None
22879 }
22880 } else {
22881 None
22882 };
22883 let dt = if let Some(prec) = precision {
22884 DataType::Timestamp {
22885 timezone: true,
22886 precision: Some(prec),
22887 }
22888 } else {
22889 DataType::Timestamp {
22890 timezone: true,
22891 precision: None,
22892 }
22893 };
22894 Ok(Expression::Cast(Box::new(Cast {
22895 this,
22896 to: dt,
22897 double_colon_syntax: false,
22898 trailing_comments: Vec::new(),
22899 format: None,
22900 default: None,
22901 })))
22902 } else {
22903 // Check for sub-second precision for Trino
22904 let precision =
22905 if let Expression::Literal(Literal::String(ref s)) = this {
22906 if let Some(dot_pos) = s.rfind('.') {
22907 let frac = &s[dot_pos + 1..];
22908 let digit_count = frac
22909 .chars()
22910 .take_while(|c| c.is_ascii_digit())
22911 .count();
22912 if digit_count > 0
22913 && matches!(target, DialectType::Trino)
22914 {
22915 Some(digit_count as u32)
22916 } else {
22917 None
22918 }
22919 } else {
22920 None
22921 }
22922 } else {
22923 None
22924 };
22925 let dt = DataType::Timestamp {
22926 timezone: false,
22927 precision,
22928 };
22929 Ok(Expression::Cast(Box::new(Cast {
22930 this,
22931 to: dt,
22932 double_colon_syntax: false,
22933 trailing_comments: Vec::new(),
22934 format: None,
22935 default: None,
22936 })))
22937 }
22938 }
22939 DialectType::Redshift => {
22940 if has_zone {
22941 // Redshift with zone: CAST(x AS TIMESTAMP WITH TIME ZONE)
22942 Ok(Expression::Cast(Box::new(Cast {
22943 this,
22944 to: DataType::Timestamp {
22945 timezone: true,
22946 precision: None,
22947 },
22948 double_colon_syntax: false,
22949 trailing_comments: Vec::new(),
22950 format: None,
22951 default: None,
22952 })))
22953 } else {
22954 // Redshift: CAST(x AS TIMESTAMP)
22955 Ok(Expression::Cast(Box::new(Cast {
22956 this,
22957 to: DataType::Timestamp {
22958 timezone: false,
22959 precision: None,
22960 },
22961 double_colon_syntax: false,
22962 trailing_comments: Vec::new(),
22963 format: None,
22964 default: None,
22965 })))
22966 }
22967 }
22968 _ => {
22969 // Default: CAST(x AS TIMESTAMP)
22970 Ok(Expression::Cast(Box::new(Cast {
22971 this,
22972 to: DataType::Timestamp {
22973 timezone: false,
22974 precision: None,
22975 },
22976 double_colon_syntax: false,
22977 trailing_comments: Vec::new(),
22978 format: None,
22979 default: None,
22980 })))
22981 }
22982 }
22983 } else {
22984 Ok(e)
22985 }
22986 }
22987
22988 Action::DateToDateStrConvert => {
22989 // DATE_TO_DATE_STR(x) -> CAST(x AS text_type) per dialect
22990 if let Expression::Function(f) = e {
22991 let arg = f.args.into_iter().next().unwrap();
22992 let str_type = match target {
22993 DialectType::DuckDB => DataType::Text,
22994 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
22995 DataType::Custom {
22996 name: "STRING".to_string(),
22997 }
22998 }
22999 DialectType::Presto
23000 | DialectType::Trino
23001 | DialectType::Athena
23002 | DialectType::Drill => DataType::VarChar {
23003 length: None,
23004 parenthesized_length: false,
23005 },
23006 _ => DataType::VarChar {
23007 length: None,
23008 parenthesized_length: false,
23009 },
23010 };
23011 Ok(Expression::Cast(Box::new(Cast {
23012 this: arg,
23013 to: str_type,
23014 double_colon_syntax: false,
23015 trailing_comments: Vec::new(),
23016 format: None,
23017 default: None,
23018 })))
23019 } else {
23020 Ok(e)
23021 }
23022 }
23023
23024 Action::DateToDiConvert => {
23025 // DATE_TO_DI(x) -> CAST(format_func(x, fmt) AS INT)
23026 if let Expression::Function(f) = e {
23027 let arg = f.args.into_iter().next().unwrap();
23028 let inner = match target {
23029 DialectType::DuckDB => {
23030 // STRFTIME(x, '%Y%m%d')
23031 Expression::Function(Box::new(Function::new(
23032 "STRFTIME".to_string(),
23033 vec![arg, Expression::string("%Y%m%d")],
23034 )))
23035 }
23036 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
23037 // DATE_FORMAT(x, 'yyyyMMdd')
23038 Expression::Function(Box::new(Function::new(
23039 "DATE_FORMAT".to_string(),
23040 vec![arg, Expression::string("yyyyMMdd")],
23041 )))
23042 }
23043 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23044 // DATE_FORMAT(x, '%Y%m%d')
23045 Expression::Function(Box::new(Function::new(
23046 "DATE_FORMAT".to_string(),
23047 vec![arg, Expression::string("%Y%m%d")],
23048 )))
23049 }
23050 DialectType::Drill => {
23051 // TO_DATE(x, 'yyyyMMdd')
23052 Expression::Function(Box::new(Function::new(
23053 "TO_DATE".to_string(),
23054 vec![arg, Expression::string("yyyyMMdd")],
23055 )))
23056 }
23057 _ => {
23058 // Default: STRFTIME(x, '%Y%m%d')
23059 Expression::Function(Box::new(Function::new(
23060 "STRFTIME".to_string(),
23061 vec![arg, Expression::string("%Y%m%d")],
23062 )))
23063 }
23064 };
23065 // Use INT (not INTEGER) for Presto/Trino
23066 let int_type = match target {
23067 DialectType::Presto
23068 | DialectType::Trino
23069 | DialectType::Athena
23070 | DialectType::TSQL
23071 | DialectType::Fabric
23072 | DialectType::SQLite
23073 | DialectType::Redshift => DataType::Custom {
23074 name: "INT".to_string(),
23075 },
23076 _ => DataType::Int {
23077 length: None,
23078 integer_spelling: false,
23079 },
23080 };
23081 Ok(Expression::Cast(Box::new(Cast {
23082 this: inner,
23083 to: int_type,
23084 double_colon_syntax: false,
23085 trailing_comments: Vec::new(),
23086 format: None,
23087 default: None,
23088 })))
23089 } else {
23090 Ok(e)
23091 }
23092 }
23093
23094 Action::DiToDateConvert => {
23095 // DI_TO_DATE(x) -> dialect-specific integer-to-date conversion
23096 if let Expression::Function(f) = e {
23097 let arg = f.args.into_iter().next().unwrap();
23098 match target {
23099 DialectType::DuckDB => {
23100 // CAST(STRPTIME(CAST(x AS TEXT), '%Y%m%d') AS DATE)
23101 let cast_text = Expression::Cast(Box::new(Cast {
23102 this: arg,
23103 to: DataType::Text,
23104 double_colon_syntax: false,
23105 trailing_comments: Vec::new(),
23106 format: None,
23107 default: None,
23108 }));
23109 let strptime = Expression::Function(Box::new(Function::new(
23110 "STRPTIME".to_string(),
23111 vec![cast_text, Expression::string("%Y%m%d")],
23112 )));
23113 Ok(Expression::Cast(Box::new(Cast {
23114 this: strptime,
23115 to: DataType::Date,
23116 double_colon_syntax: false,
23117 trailing_comments: Vec::new(),
23118 format: None,
23119 default: None,
23120 })))
23121 }
23122 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
23123 // TO_DATE(CAST(x AS STRING), 'yyyyMMdd')
23124 let cast_str = Expression::Cast(Box::new(Cast {
23125 this: arg,
23126 to: DataType::Custom {
23127 name: "STRING".to_string(),
23128 },
23129 double_colon_syntax: false,
23130 trailing_comments: Vec::new(),
23131 format: None,
23132 default: None,
23133 }));
23134 Ok(Expression::Function(Box::new(Function::new(
23135 "TO_DATE".to_string(),
23136 vec![cast_str, Expression::string("yyyyMMdd")],
23137 ))))
23138 }
23139 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23140 // CAST(DATE_PARSE(CAST(x AS VARCHAR), '%Y%m%d') AS DATE)
23141 let cast_varchar = Expression::Cast(Box::new(Cast {
23142 this: arg,
23143 to: DataType::VarChar {
23144 length: None,
23145 parenthesized_length: false,
23146 },
23147 double_colon_syntax: false,
23148 trailing_comments: Vec::new(),
23149 format: None,
23150 default: None,
23151 }));
23152 let date_parse = Expression::Function(Box::new(Function::new(
23153 "DATE_PARSE".to_string(),
23154 vec![cast_varchar, Expression::string("%Y%m%d")],
23155 )));
23156 Ok(Expression::Cast(Box::new(Cast {
23157 this: date_parse,
23158 to: DataType::Date,
23159 double_colon_syntax: false,
23160 trailing_comments: Vec::new(),
23161 format: None,
23162 default: None,
23163 })))
23164 }
23165 DialectType::Drill => {
23166 // TO_DATE(CAST(x AS VARCHAR), 'yyyyMMdd')
23167 let cast_varchar = Expression::Cast(Box::new(Cast {
23168 this: arg,
23169 to: DataType::VarChar {
23170 length: None,
23171 parenthesized_length: false,
23172 },
23173 double_colon_syntax: false,
23174 trailing_comments: Vec::new(),
23175 format: None,
23176 default: None,
23177 }));
23178 Ok(Expression::Function(Box::new(Function::new(
23179 "TO_DATE".to_string(),
23180 vec![cast_varchar, Expression::string("yyyyMMdd")],
23181 ))))
23182 }
23183 _ => Ok(Expression::Function(Box::new(Function::new(
23184 "DI_TO_DATE".to_string(),
23185 vec![arg],
23186 )))),
23187 }
23188 } else {
23189 Ok(e)
23190 }
23191 }
23192
23193 Action::TsOrDiToDiConvert => {
23194 // TS_OR_DI_TO_DI(x) -> CAST(SUBSTR(REPLACE(CAST(x AS type), '-', ''), 1, 8) AS INT)
23195 if let Expression::Function(f) = e {
23196 let arg = f.args.into_iter().next().unwrap();
23197 let str_type = match target {
23198 DialectType::DuckDB => DataType::Text,
23199 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
23200 DataType::Custom {
23201 name: "STRING".to_string(),
23202 }
23203 }
23204 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23205 DataType::VarChar {
23206 length: None,
23207 parenthesized_length: false,
23208 }
23209 }
23210 _ => DataType::VarChar {
23211 length: None,
23212 parenthesized_length: false,
23213 },
23214 };
23215 let cast_str = Expression::Cast(Box::new(Cast {
23216 this: arg,
23217 to: str_type,
23218 double_colon_syntax: false,
23219 trailing_comments: Vec::new(),
23220 format: None,
23221 default: None,
23222 }));
23223 let replace_expr = Expression::Function(Box::new(Function::new(
23224 "REPLACE".to_string(),
23225 vec![cast_str, Expression::string("-"), Expression::string("")],
23226 )));
23227 let substr_name = match target {
23228 DialectType::DuckDB
23229 | DialectType::Hive
23230 | DialectType::Spark
23231 | DialectType::Databricks => "SUBSTR",
23232 _ => "SUBSTR",
23233 };
23234 let substr = Expression::Function(Box::new(Function::new(
23235 substr_name.to_string(),
23236 vec![replace_expr, Expression::number(1), Expression::number(8)],
23237 )));
23238 // Use INT (not INTEGER) for Presto/Trino etc.
23239 let int_type = match target {
23240 DialectType::Presto
23241 | DialectType::Trino
23242 | DialectType::Athena
23243 | DialectType::TSQL
23244 | DialectType::Fabric
23245 | DialectType::SQLite
23246 | DialectType::Redshift => DataType::Custom {
23247 name: "INT".to_string(),
23248 },
23249 _ => DataType::Int {
23250 length: None,
23251 integer_spelling: false,
23252 },
23253 };
23254 Ok(Expression::Cast(Box::new(Cast {
23255 this: substr,
23256 to: int_type,
23257 double_colon_syntax: false,
23258 trailing_comments: Vec::new(),
23259 format: None,
23260 default: None,
23261 })))
23262 } else {
23263 Ok(e)
23264 }
23265 }
23266
23267 Action::UnixToStrConvert => {
23268 // UNIX_TO_STR(x, fmt) -> convert to Expression::UnixToStr for generator
23269 if let Expression::Function(f) = e {
23270 let mut args = f.args;
23271 let this = args.remove(0);
23272 let fmt_expr = if !args.is_empty() {
23273 Some(args.remove(0))
23274 } else {
23275 None
23276 };
23277
23278 // Check if format is a string literal
23279 let fmt_str = fmt_expr.as_ref().and_then(|f| {
23280 if let Expression::Literal(Literal::String(s)) = f {
23281 Some(s.clone())
23282 } else {
23283 None
23284 }
23285 });
23286
23287 if let Some(fmt_string) = fmt_str {
23288 // String literal format -> use UnixToStr expression (generator handles it)
23289 Ok(Expression::UnixToStr(Box::new(
23290 crate::expressions::UnixToStr {
23291 this: Box::new(this),
23292 format: Some(fmt_string),
23293 },
23294 )))
23295 } else if let Some(fmt_e) = fmt_expr {
23296 // Non-literal format (e.g., identifier `y`) -> build target expression directly
23297 match target {
23298 DialectType::DuckDB => {
23299 // STRFTIME(TO_TIMESTAMP(x), y)
23300 let to_ts = Expression::Function(Box::new(Function::new(
23301 "TO_TIMESTAMP".to_string(),
23302 vec![this],
23303 )));
23304 Ok(Expression::Function(Box::new(Function::new(
23305 "STRFTIME".to_string(),
23306 vec![to_ts, fmt_e],
23307 ))))
23308 }
23309 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23310 // DATE_FORMAT(FROM_UNIXTIME(x), y)
23311 let from_unix = Expression::Function(Box::new(Function::new(
23312 "FROM_UNIXTIME".to_string(),
23313 vec![this],
23314 )));
23315 Ok(Expression::Function(Box::new(Function::new(
23316 "DATE_FORMAT".to_string(),
23317 vec![from_unix, fmt_e],
23318 ))))
23319 }
23320 DialectType::Hive
23321 | DialectType::Spark
23322 | DialectType::Databricks
23323 | DialectType::Doris
23324 | DialectType::StarRocks => {
23325 // FROM_UNIXTIME(x, y)
23326 Ok(Expression::Function(Box::new(Function::new(
23327 "FROM_UNIXTIME".to_string(),
23328 vec![this, fmt_e],
23329 ))))
23330 }
23331 _ => {
23332 // Default: keep as UNIX_TO_STR(x, y)
23333 Ok(Expression::Function(Box::new(Function::new(
23334 "UNIX_TO_STR".to_string(),
23335 vec![this, fmt_e],
23336 ))))
23337 }
23338 }
23339 } else {
23340 Ok(Expression::UnixToStr(Box::new(
23341 crate::expressions::UnixToStr {
23342 this: Box::new(this),
23343 format: None,
23344 },
23345 )))
23346 }
23347 } else {
23348 Ok(e)
23349 }
23350 }
23351
23352 Action::UnixToTimeConvert => {
23353 // UNIX_TO_TIME(x) -> convert to Expression::UnixToTime for generator
23354 if let Expression::Function(f) = e {
23355 let arg = f.args.into_iter().next().unwrap();
23356 Ok(Expression::UnixToTime(Box::new(
23357 crate::expressions::UnixToTime {
23358 this: Box::new(arg),
23359 scale: None,
23360 zone: None,
23361 hours: None,
23362 minutes: None,
23363 format: None,
23364 target_type: None,
23365 },
23366 )))
23367 } else {
23368 Ok(e)
23369 }
23370 }
23371
23372 Action::UnixToTimeStrConvert => {
23373 // UNIX_TO_TIME_STR(x) -> dialect-specific
23374 if let Expression::Function(f) = e {
23375 let arg = f.args.into_iter().next().unwrap();
23376 match target {
23377 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
23378 // FROM_UNIXTIME(x)
23379 Ok(Expression::Function(Box::new(Function::new(
23380 "FROM_UNIXTIME".to_string(),
23381 vec![arg],
23382 ))))
23383 }
23384 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23385 // CAST(FROM_UNIXTIME(x) AS VARCHAR)
23386 let from_unix = Expression::Function(Box::new(Function::new(
23387 "FROM_UNIXTIME".to_string(),
23388 vec![arg],
23389 )));
23390 Ok(Expression::Cast(Box::new(Cast {
23391 this: from_unix,
23392 to: DataType::VarChar {
23393 length: None,
23394 parenthesized_length: false,
23395 },
23396 double_colon_syntax: false,
23397 trailing_comments: Vec::new(),
23398 format: None,
23399 default: None,
23400 })))
23401 }
23402 DialectType::DuckDB => {
23403 // CAST(TO_TIMESTAMP(x) AS TEXT)
23404 let to_ts = Expression::Function(Box::new(Function::new(
23405 "TO_TIMESTAMP".to_string(),
23406 vec![arg],
23407 )));
23408 Ok(Expression::Cast(Box::new(Cast {
23409 this: to_ts,
23410 to: DataType::Text,
23411 double_colon_syntax: false,
23412 trailing_comments: Vec::new(),
23413 format: None,
23414 default: None,
23415 })))
23416 }
23417 _ => Ok(Expression::Function(Box::new(Function::new(
23418 "UNIX_TO_TIME_STR".to_string(),
23419 vec![arg],
23420 )))),
23421 }
23422 } else {
23423 Ok(e)
23424 }
23425 }
23426
23427 Action::TimeToUnixConvert => {
23428 // TIME_TO_UNIX(x) -> convert to Expression::TimeToUnix for generator
23429 if let Expression::Function(f) = e {
23430 let arg = f.args.into_iter().next().unwrap();
23431 Ok(Expression::TimeToUnix(Box::new(
23432 crate::expressions::UnaryFunc {
23433 this: arg,
23434 original_name: None,
23435 },
23436 )))
23437 } else {
23438 Ok(e)
23439 }
23440 }
23441
23442 Action::TimeToStrConvert => {
23443 // TIME_TO_STR(x, fmt) -> convert to Expression::TimeToStr for generator
23444 if let Expression::Function(f) = e {
23445 let mut args = f.args;
23446 let this = args.remove(0);
23447 let fmt = match args.remove(0) {
23448 Expression::Literal(Literal::String(s)) => s,
23449 other => {
23450 return Ok(Expression::Function(Box::new(Function::new(
23451 "TIME_TO_STR".to_string(),
23452 vec![this, other],
23453 ))));
23454 }
23455 };
23456 Ok(Expression::TimeToStr(Box::new(
23457 crate::expressions::TimeToStr {
23458 this: Box::new(this),
23459 format: fmt,
23460 culture: None,
23461 zone: None,
23462 },
23463 )))
23464 } else {
23465 Ok(e)
23466 }
23467 }
23468
23469 Action::StrToUnixConvert => {
23470 // STR_TO_UNIX(x, fmt) -> convert to Expression::StrToUnix for generator
23471 if let Expression::Function(f) = e {
23472 let mut args = f.args;
23473 let this = args.remove(0);
23474 let fmt = match args.remove(0) {
23475 Expression::Literal(Literal::String(s)) => s,
23476 other => {
23477 return Ok(Expression::Function(Box::new(Function::new(
23478 "STR_TO_UNIX".to_string(),
23479 vec![this, other],
23480 ))));
23481 }
23482 };
23483 Ok(Expression::StrToUnix(Box::new(
23484 crate::expressions::StrToUnix {
23485 this: Some(Box::new(this)),
23486 format: Some(fmt),
23487 },
23488 )))
23489 } else {
23490 Ok(e)
23491 }
23492 }
23493
23494 Action::TimeStrToUnixConvert => {
23495 // TIME_STR_TO_UNIX(x) -> dialect-specific
23496 if let Expression::Function(f) = e {
23497 let arg = f.args.into_iter().next().unwrap();
23498 match target {
23499 DialectType::DuckDB => {
23500 // EPOCH(CAST(x AS TIMESTAMP))
23501 let cast_ts = Expression::Cast(Box::new(Cast {
23502 this: arg,
23503 to: DataType::Timestamp {
23504 timezone: false,
23505 precision: None,
23506 },
23507 double_colon_syntax: false,
23508 trailing_comments: Vec::new(),
23509 format: None,
23510 default: None,
23511 }));
23512 Ok(Expression::Function(Box::new(Function::new(
23513 "EPOCH".to_string(),
23514 vec![cast_ts],
23515 ))))
23516 }
23517 DialectType::Hive
23518 | DialectType::Doris
23519 | DialectType::StarRocks
23520 | DialectType::MySQL => {
23521 // UNIX_TIMESTAMP(x)
23522 Ok(Expression::Function(Box::new(Function::new(
23523 "UNIX_TIMESTAMP".to_string(),
23524 vec![arg],
23525 ))))
23526 }
23527 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23528 // TO_UNIXTIME(DATE_PARSE(x, '%Y-%m-%d %T'))
23529 let date_parse = Expression::Function(Box::new(Function::new(
23530 "DATE_PARSE".to_string(),
23531 vec![arg, Expression::string("%Y-%m-%d %T")],
23532 )));
23533 Ok(Expression::Function(Box::new(Function::new(
23534 "TO_UNIXTIME".to_string(),
23535 vec![date_parse],
23536 ))))
23537 }
23538 _ => Ok(Expression::Function(Box::new(Function::new(
23539 "TIME_STR_TO_UNIX".to_string(),
23540 vec![arg],
23541 )))),
23542 }
23543 } else {
23544 Ok(e)
23545 }
23546 }
23547
23548 Action::TimeToTimeStrConvert => {
23549 // TIME_TO_TIME_STR(x) -> CAST(x AS str_type) per dialect
23550 if let Expression::Function(f) = e {
23551 let arg = f.args.into_iter().next().unwrap();
23552 let str_type = match target {
23553 DialectType::DuckDB => DataType::Text,
23554 DialectType::Hive
23555 | DialectType::Spark
23556 | DialectType::Databricks
23557 | DialectType::Doris
23558 | DialectType::StarRocks => DataType::Custom {
23559 name: "STRING".to_string(),
23560 },
23561 DialectType::Redshift => DataType::Custom {
23562 name: "VARCHAR(MAX)".to_string(),
23563 },
23564 _ => DataType::VarChar {
23565 length: None,
23566 parenthesized_length: false,
23567 },
23568 };
23569 Ok(Expression::Cast(Box::new(Cast {
23570 this: arg,
23571 to: str_type,
23572 double_colon_syntax: false,
23573 trailing_comments: Vec::new(),
23574 format: None,
23575 default: None,
23576 })))
23577 } else {
23578 Ok(e)
23579 }
23580 }
23581
23582 Action::DateTruncSwapArgs => {
23583 // DATE_TRUNC('unit', x) from Generic -> target-specific
23584 if let Expression::Function(f) = e {
23585 if f.args.len() == 2 {
23586 let unit_arg = f.args[0].clone();
23587 let expr_arg = f.args[1].clone();
23588 // Extract unit string from the first arg
23589 let unit_str = match &unit_arg {
23590 Expression::Literal(Literal::String(s)) => s.to_uppercase(),
23591 _ => return Ok(Expression::Function(f)),
23592 };
23593 match target {
23594 DialectType::BigQuery => {
23595 // BigQuery: DATE_TRUNC(x, UNIT) - unquoted unit
23596 let unit_ident =
23597 Expression::Column(crate::expressions::Column {
23598 name: crate::expressions::Identifier::new(unit_str),
23599 table: None,
23600 join_mark: false,
23601 trailing_comments: Vec::new(),
23602 });
23603 Ok(Expression::Function(Box::new(Function::new(
23604 "DATE_TRUNC".to_string(),
23605 vec![expr_arg, unit_ident],
23606 ))))
23607 }
23608 DialectType::Doris => {
23609 // Doris: DATE_TRUNC(x, 'UNIT')
23610 Ok(Expression::Function(Box::new(Function::new(
23611 "DATE_TRUNC".to_string(),
23612 vec![expr_arg, Expression::string(&unit_str)],
23613 ))))
23614 }
23615 DialectType::StarRocks => {
23616 // StarRocks: DATE_TRUNC('UNIT', x) - keep standard order
23617 Ok(Expression::Function(Box::new(Function::new(
23618 "DATE_TRUNC".to_string(),
23619 vec![Expression::string(&unit_str), expr_arg],
23620 ))))
23621 }
23622 DialectType::Spark | DialectType::Databricks => {
23623 // Spark: TRUNC(x, 'UNIT')
23624 Ok(Expression::Function(Box::new(Function::new(
23625 "TRUNC".to_string(),
23626 vec![expr_arg, Expression::string(&unit_str)],
23627 ))))
23628 }
23629 DialectType::MySQL => {
23630 // MySQL: complex expansion based on unit
23631 Self::date_trunc_to_mysql(&unit_str, &expr_arg)
23632 }
23633 _ => Ok(Expression::Function(f)),
23634 }
23635 } else {
23636 Ok(Expression::Function(f))
23637 }
23638 } else {
23639 Ok(e)
23640 }
23641 }
23642
23643 Action::TimestampTruncConvert => {
23644 // TIMESTAMP_TRUNC(x, UNIT[, tz]) from Generic -> target-specific
23645 if let Expression::Function(f) = e {
23646 if f.args.len() >= 2 {
23647 let expr_arg = f.args[0].clone();
23648 let unit_arg = f.args[1].clone();
23649 let tz_arg = if f.args.len() >= 3 {
23650 Some(f.args[2].clone())
23651 } else {
23652 None
23653 };
23654 // Extract unit string
23655 let unit_str = match &unit_arg {
23656 Expression::Literal(Literal::String(s)) => s.to_uppercase(),
23657 Expression::Column(c) => c.name.name.to_uppercase(),
23658 _ => {
23659 return Ok(Expression::Function(f));
23660 }
23661 };
23662 match target {
23663 DialectType::Spark | DialectType::Databricks => {
23664 // Spark: DATE_TRUNC('UNIT', x)
23665 Ok(Expression::Function(Box::new(Function::new(
23666 "DATE_TRUNC".to_string(),
23667 vec![Expression::string(&unit_str), expr_arg],
23668 ))))
23669 }
23670 DialectType::Doris | DialectType::StarRocks => {
23671 // Doris: DATE_TRUNC(x, 'UNIT')
23672 Ok(Expression::Function(Box::new(Function::new(
23673 "DATE_TRUNC".to_string(),
23674 vec![expr_arg, Expression::string(&unit_str)],
23675 ))))
23676 }
23677 DialectType::BigQuery => {
23678 // BigQuery: TIMESTAMP_TRUNC(x, UNIT) - keep but with unquoted unit
23679 let unit_ident =
23680 Expression::Column(crate::expressions::Column {
23681 name: crate::expressions::Identifier::new(unit_str),
23682 table: None,
23683 join_mark: false,
23684 trailing_comments: Vec::new(),
23685 });
23686 let mut args = vec![expr_arg, unit_ident];
23687 if let Some(tz) = tz_arg {
23688 args.push(tz);
23689 }
23690 Ok(Expression::Function(Box::new(Function::new(
23691 "TIMESTAMP_TRUNC".to_string(),
23692 args,
23693 ))))
23694 }
23695 DialectType::DuckDB => {
23696 // DuckDB with timezone: DATE_TRUNC('UNIT', x AT TIME ZONE 'tz') AT TIME ZONE 'tz'
23697 if let Some(tz) = tz_arg {
23698 let tz_str = match &tz {
23699 Expression::Literal(Literal::String(s)) => s.clone(),
23700 _ => "UTC".to_string(),
23701 };
23702 // x AT TIME ZONE 'tz'
23703 let at_tz = Expression::AtTimeZone(Box::new(
23704 crate::expressions::AtTimeZone {
23705 this: expr_arg,
23706 zone: Expression::string(&tz_str),
23707 },
23708 ));
23709 // DATE_TRUNC('UNIT', x AT TIME ZONE 'tz')
23710 let trunc = Expression::Function(Box::new(Function::new(
23711 "DATE_TRUNC".to_string(),
23712 vec![Expression::string(&unit_str), at_tz],
23713 )));
23714 // DATE_TRUNC(...) AT TIME ZONE 'tz'
23715 Ok(Expression::AtTimeZone(Box::new(
23716 crate::expressions::AtTimeZone {
23717 this: trunc,
23718 zone: Expression::string(&tz_str),
23719 },
23720 )))
23721 } else {
23722 Ok(Expression::Function(Box::new(Function::new(
23723 "DATE_TRUNC".to_string(),
23724 vec![Expression::string(&unit_str), expr_arg],
23725 ))))
23726 }
23727 }
23728 DialectType::Presto
23729 | DialectType::Trino
23730 | DialectType::Athena
23731 | DialectType::Snowflake => {
23732 // Presto/Snowflake: DATE_TRUNC('UNIT', x) - drop timezone
23733 Ok(Expression::Function(Box::new(Function::new(
23734 "DATE_TRUNC".to_string(),
23735 vec![Expression::string(&unit_str), expr_arg],
23736 ))))
23737 }
23738 _ => {
23739 // For most dialects: DATE_TRUNC('UNIT', x) + tz handling
23740 let mut args = vec![Expression::string(&unit_str), expr_arg];
23741 if let Some(tz) = tz_arg {
23742 args.push(tz);
23743 }
23744 Ok(Expression::Function(Box::new(Function::new(
23745 "DATE_TRUNC".to_string(),
23746 args,
23747 ))))
23748 }
23749 }
23750 } else {
23751 Ok(Expression::Function(f))
23752 }
23753 } else {
23754 Ok(e)
23755 }
23756 }
23757
23758 Action::StrToDateConvert => {
23759 // STR_TO_DATE(x, fmt) from Generic -> dialect-specific date parsing
23760 if let Expression::Function(f) = e {
23761 if f.args.len() == 2 {
23762 let mut args = f.args;
23763 let this = args.remove(0);
23764 let fmt_expr = args.remove(0);
23765 let fmt_str = match &fmt_expr {
23766 Expression::Literal(Literal::String(s)) => Some(s.clone()),
23767 _ => None,
23768 };
23769 let default_date = "%Y-%m-%d";
23770 let default_time = "%Y-%m-%d %H:%M:%S";
23771 let is_default = fmt_str
23772 .as_ref()
23773 .map_or(false, |f| f == default_date || f == default_time);
23774
23775 if is_default {
23776 // Default format: handle per-dialect
23777 match target {
23778 DialectType::MySQL
23779 | DialectType::Doris
23780 | DialectType::StarRocks => {
23781 // Keep STR_TO_DATE(x, fmt) as-is
23782 Ok(Expression::Function(Box::new(Function::new(
23783 "STR_TO_DATE".to_string(),
23784 vec![this, fmt_expr],
23785 ))))
23786 }
23787 DialectType::Hive => {
23788 // Hive: CAST(x AS DATE)
23789 Ok(Expression::Cast(Box::new(Cast {
23790 this,
23791 to: DataType::Date,
23792 double_colon_syntax: false,
23793 trailing_comments: Vec::new(),
23794 format: None,
23795 default: None,
23796 })))
23797 }
23798 DialectType::Presto
23799 | DialectType::Trino
23800 | DialectType::Athena => {
23801 // Presto: CAST(DATE_PARSE(x, '%Y-%m-%d') AS DATE)
23802 let date_parse =
23803 Expression::Function(Box::new(Function::new(
23804 "DATE_PARSE".to_string(),
23805 vec![this, fmt_expr],
23806 )));
23807 Ok(Expression::Cast(Box::new(Cast {
23808 this: date_parse,
23809 to: DataType::Date,
23810 double_colon_syntax: false,
23811 trailing_comments: Vec::new(),
23812 format: None,
23813 default: None,
23814 })))
23815 }
23816 _ => {
23817 // Others: TsOrDsToDate (delegates to generator)
23818 Ok(Expression::TsOrDsToDate(Box::new(
23819 crate::expressions::TsOrDsToDate {
23820 this: Box::new(this),
23821 format: None,
23822 safe: None,
23823 },
23824 )))
23825 }
23826 }
23827 } else if let Some(fmt) = fmt_str {
23828 match target {
23829 DialectType::Doris
23830 | DialectType::StarRocks
23831 | DialectType::MySQL => {
23832 // Keep STR_TO_DATE but with normalized format (%H:%M:%S -> %T, %-d -> %e)
23833 let mut normalized = fmt.clone();
23834 normalized = normalized.replace("%-d", "%e");
23835 normalized = normalized.replace("%-m", "%c");
23836 normalized = normalized.replace("%H:%M:%S", "%T");
23837 Ok(Expression::Function(Box::new(Function::new(
23838 "STR_TO_DATE".to_string(),
23839 vec![this, Expression::string(&normalized)],
23840 ))))
23841 }
23842 DialectType::Hive => {
23843 // Hive: CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(x, java_fmt)) AS DATE)
23844 let java_fmt = crate::generator::Generator::strftime_to_java_format_static(&fmt);
23845 let unix_ts =
23846 Expression::Function(Box::new(Function::new(
23847 "UNIX_TIMESTAMP".to_string(),
23848 vec![this, Expression::string(&java_fmt)],
23849 )));
23850 let from_unix =
23851 Expression::Function(Box::new(Function::new(
23852 "FROM_UNIXTIME".to_string(),
23853 vec![unix_ts],
23854 )));
23855 Ok(Expression::Cast(Box::new(Cast {
23856 this: from_unix,
23857 to: DataType::Date,
23858 double_colon_syntax: false,
23859 trailing_comments: Vec::new(),
23860 format: None,
23861 default: None,
23862 })))
23863 }
23864 DialectType::Spark | DialectType::Databricks => {
23865 // Spark: TO_DATE(x, java_fmt)
23866 let java_fmt = crate::generator::Generator::strftime_to_java_format_static(&fmt);
23867 Ok(Expression::Function(Box::new(Function::new(
23868 "TO_DATE".to_string(),
23869 vec![this, Expression::string(&java_fmt)],
23870 ))))
23871 }
23872 DialectType::Drill => {
23873 // Drill: TO_DATE(x, java_fmt) with T quoted as 'T' in Java format
23874 // The generator's string literal escaping will double the quotes: 'T' -> ''T''
23875 let java_fmt = crate::generator::Generator::strftime_to_java_format_static(&fmt);
23876 let java_fmt = java_fmt.replace('T', "'T'");
23877 Ok(Expression::Function(Box::new(Function::new(
23878 "TO_DATE".to_string(),
23879 vec![this, Expression::string(&java_fmt)],
23880 ))))
23881 }
23882 _ => {
23883 // For other dialects: use TsOrDsToDate which delegates to generator
23884 Ok(Expression::TsOrDsToDate(Box::new(
23885 crate::expressions::TsOrDsToDate {
23886 this: Box::new(this),
23887 format: Some(fmt),
23888 safe: None,
23889 },
23890 )))
23891 }
23892 }
23893 } else {
23894 // Non-string format - keep as-is
23895 let mut new_args = Vec::new();
23896 new_args.push(this);
23897 new_args.push(fmt_expr);
23898 Ok(Expression::Function(Box::new(Function::new(
23899 "STR_TO_DATE".to_string(),
23900 new_args,
23901 ))))
23902 }
23903 } else {
23904 Ok(Expression::Function(f))
23905 }
23906 } else {
23907 Ok(e)
23908 }
23909 }
23910
23911 Action::TsOrDsAddConvert => {
23912 // TS_OR_DS_ADD(x, n, 'UNIT') from Generic -> dialect-specific DATE_ADD
23913 if let Expression::Function(f) = e {
23914 if f.args.len() == 3 {
23915 let mut args = f.args;
23916 let x = args.remove(0);
23917 let n = args.remove(0);
23918 let unit_expr = args.remove(0);
23919 let unit_str = match &unit_expr {
23920 Expression::Literal(Literal::String(s)) => s.to_uppercase(),
23921 _ => "DAY".to_string(),
23922 };
23923
23924 match target {
23925 DialectType::Hive
23926 | DialectType::Spark
23927 | DialectType::Databricks => {
23928 // DATE_ADD(x, n) - only supports DAY unit
23929 Ok(Expression::Function(Box::new(Function::new(
23930 "DATE_ADD".to_string(),
23931 vec![x, n],
23932 ))))
23933 }
23934 DialectType::MySQL => {
23935 // DATE_ADD(x, INTERVAL n UNIT)
23936 let iu = match unit_str.to_uppercase().as_str() {
23937 "YEAR" => crate::expressions::IntervalUnit::Year,
23938 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
23939 "MONTH" => crate::expressions::IntervalUnit::Month,
23940 "WEEK" => crate::expressions::IntervalUnit::Week,
23941 "HOUR" => crate::expressions::IntervalUnit::Hour,
23942 "MINUTE" => crate::expressions::IntervalUnit::Minute,
23943 "SECOND" => crate::expressions::IntervalUnit::Second,
23944 _ => crate::expressions::IntervalUnit::Day,
23945 };
23946 let interval = Expression::Interval(Box::new(
23947 crate::expressions::Interval {
23948 this: Some(n),
23949 unit: Some(
23950 crate::expressions::IntervalUnitSpec::Simple {
23951 unit: iu,
23952 use_plural: false,
23953 },
23954 ),
23955 },
23956 ));
23957 Ok(Expression::Function(Box::new(Function::new(
23958 "DATE_ADD".to_string(),
23959 vec![x, interval],
23960 ))))
23961 }
23962 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23963 // DATE_ADD('UNIT', n, CAST(CAST(x AS TIMESTAMP) AS DATE))
23964 let cast_ts = Expression::Cast(Box::new(Cast {
23965 this: x,
23966 to: DataType::Timestamp {
23967 precision: None,
23968 timezone: false,
23969 },
23970 double_colon_syntax: false,
23971 trailing_comments: Vec::new(),
23972 format: None,
23973 default: None,
23974 }));
23975 let cast_date = Expression::Cast(Box::new(Cast {
23976 this: cast_ts,
23977 to: DataType::Date,
23978 double_colon_syntax: false,
23979 trailing_comments: Vec::new(),
23980 format: None,
23981 default: None,
23982 }));
23983 Ok(Expression::Function(Box::new(Function::new(
23984 "DATE_ADD".to_string(),
23985 vec![Expression::string(&unit_str), n, cast_date],
23986 ))))
23987 }
23988 DialectType::DuckDB => {
23989 // CAST(x AS DATE) + INTERVAL n UNIT
23990 let cast_date = Expression::Cast(Box::new(Cast {
23991 this: x,
23992 to: DataType::Date,
23993 double_colon_syntax: false,
23994 trailing_comments: Vec::new(),
23995 format: None,
23996 default: None,
23997 }));
23998 let iu = match unit_str.to_uppercase().as_str() {
23999 "YEAR" => crate::expressions::IntervalUnit::Year,
24000 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
24001 "MONTH" => crate::expressions::IntervalUnit::Month,
24002 "WEEK" => crate::expressions::IntervalUnit::Week,
24003 "HOUR" => crate::expressions::IntervalUnit::Hour,
24004 "MINUTE" => crate::expressions::IntervalUnit::Minute,
24005 "SECOND" => crate::expressions::IntervalUnit::Second,
24006 _ => crate::expressions::IntervalUnit::Day,
24007 };
24008 let interval = Expression::Interval(Box::new(
24009 crate::expressions::Interval {
24010 this: Some(n),
24011 unit: Some(
24012 crate::expressions::IntervalUnitSpec::Simple {
24013 unit: iu,
24014 use_plural: false,
24015 },
24016 ),
24017 },
24018 ));
24019 Ok(Expression::Add(Box::new(crate::expressions::BinaryOp {
24020 left: cast_date,
24021 right: interval,
24022 left_comments: Vec::new(),
24023 operator_comments: Vec::new(),
24024 trailing_comments: Vec::new(),
24025 })))
24026 }
24027 DialectType::Drill => {
24028 // DATE_ADD(CAST(x AS DATE), INTERVAL n UNIT)
24029 let cast_date = Expression::Cast(Box::new(Cast {
24030 this: x,
24031 to: DataType::Date,
24032 double_colon_syntax: false,
24033 trailing_comments: Vec::new(),
24034 format: None,
24035 default: None,
24036 }));
24037 let iu = match unit_str.to_uppercase().as_str() {
24038 "YEAR" => crate::expressions::IntervalUnit::Year,
24039 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
24040 "MONTH" => crate::expressions::IntervalUnit::Month,
24041 "WEEK" => crate::expressions::IntervalUnit::Week,
24042 "HOUR" => crate::expressions::IntervalUnit::Hour,
24043 "MINUTE" => crate::expressions::IntervalUnit::Minute,
24044 "SECOND" => crate::expressions::IntervalUnit::Second,
24045 _ => crate::expressions::IntervalUnit::Day,
24046 };
24047 let interval = Expression::Interval(Box::new(
24048 crate::expressions::Interval {
24049 this: Some(n),
24050 unit: Some(
24051 crate::expressions::IntervalUnitSpec::Simple {
24052 unit: iu,
24053 use_plural: false,
24054 },
24055 ),
24056 },
24057 ));
24058 Ok(Expression::Function(Box::new(Function::new(
24059 "DATE_ADD".to_string(),
24060 vec![cast_date, interval],
24061 ))))
24062 }
24063 _ => {
24064 // Default: keep as TS_OR_DS_ADD
24065 Ok(Expression::Function(Box::new(Function::new(
24066 "TS_OR_DS_ADD".to_string(),
24067 vec![x, n, unit_expr],
24068 ))))
24069 }
24070 }
24071 } else {
24072 Ok(Expression::Function(f))
24073 }
24074 } else {
24075 Ok(e)
24076 }
24077 }
24078
24079 Action::DateFromUnixDateConvert => {
24080 // DATE_FROM_UNIX_DATE(n) -> DATEADD(DAY, n, CAST('1970-01-01' AS DATE))
24081 if let Expression::Function(f) = e {
24082 // Keep as-is for dialects that support DATE_FROM_UNIX_DATE natively
24083 if matches!(
24084 target,
24085 DialectType::Spark | DialectType::Databricks | DialectType::BigQuery
24086 ) {
24087 return Ok(Expression::Function(Box::new(Function::new(
24088 "DATE_FROM_UNIX_DATE".to_string(),
24089 f.args,
24090 ))));
24091 }
24092 let n = f.args.into_iter().next().unwrap();
24093 let epoch_date = Expression::Cast(Box::new(Cast {
24094 this: Expression::string("1970-01-01"),
24095 to: DataType::Date,
24096 double_colon_syntax: false,
24097 trailing_comments: Vec::new(),
24098 format: None,
24099 default: None,
24100 }));
24101 match target {
24102 DialectType::DuckDB => {
24103 // CAST('1970-01-01' AS DATE) + INTERVAL n DAY
24104 let interval =
24105 Expression::Interval(Box::new(crate::expressions::Interval {
24106 this: Some(n),
24107 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
24108 unit: crate::expressions::IntervalUnit::Day,
24109 use_plural: false,
24110 }),
24111 }));
24112 Ok(Expression::Add(Box::new(
24113 crate::expressions::BinaryOp::new(epoch_date, interval),
24114 )))
24115 }
24116 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
24117 // DATE_ADD('DAY', n, CAST('1970-01-01' AS DATE))
24118 Ok(Expression::Function(Box::new(Function::new(
24119 "DATE_ADD".to_string(),
24120 vec![Expression::string("DAY"), n, epoch_date],
24121 ))))
24122 }
24123 DialectType::Snowflake | DialectType::Redshift | DialectType::TSQL => {
24124 // DATEADD(DAY, n, CAST('1970-01-01' AS DATE))
24125 Ok(Expression::Function(Box::new(Function::new(
24126 "DATEADD".to_string(),
24127 vec![
24128 Expression::Identifier(Identifier::new("DAY")),
24129 n,
24130 epoch_date,
24131 ],
24132 ))))
24133 }
24134 DialectType::BigQuery => {
24135 // DATE_ADD(CAST('1970-01-01' AS DATE), INTERVAL n DAY)
24136 let interval =
24137 Expression::Interval(Box::new(crate::expressions::Interval {
24138 this: Some(n),
24139 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
24140 unit: crate::expressions::IntervalUnit::Day,
24141 use_plural: false,
24142 }),
24143 }));
24144 Ok(Expression::Function(Box::new(Function::new(
24145 "DATE_ADD".to_string(),
24146 vec![epoch_date, interval],
24147 ))))
24148 }
24149 DialectType::MySQL
24150 | DialectType::Doris
24151 | DialectType::StarRocks
24152 | DialectType::Drill => {
24153 // DATE_ADD(CAST('1970-01-01' AS DATE), INTERVAL n DAY)
24154 let interval =
24155 Expression::Interval(Box::new(crate::expressions::Interval {
24156 this: Some(n),
24157 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
24158 unit: crate::expressions::IntervalUnit::Day,
24159 use_plural: false,
24160 }),
24161 }));
24162 Ok(Expression::Function(Box::new(Function::new(
24163 "DATE_ADD".to_string(),
24164 vec![epoch_date, interval],
24165 ))))
24166 }
24167 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
24168 // DATE_ADD(CAST('1970-01-01' AS DATE), n)
24169 Ok(Expression::Function(Box::new(Function::new(
24170 "DATE_ADD".to_string(),
24171 vec![epoch_date, n],
24172 ))))
24173 }
24174 DialectType::PostgreSQL
24175 | DialectType::Materialize
24176 | DialectType::RisingWave => {
24177 // CAST('1970-01-01' AS DATE) + INTERVAL 'n DAY'
24178 let n_str = match &n {
24179 Expression::Literal(Literal::Number(s)) => s.clone(),
24180 _ => Self::expr_to_string_static(&n),
24181 };
24182 let interval =
24183 Expression::Interval(Box::new(crate::expressions::Interval {
24184 this: Some(Expression::string(&format!("{} DAY", n_str))),
24185 unit: None,
24186 }));
24187 Ok(Expression::Add(Box::new(
24188 crate::expressions::BinaryOp::new(epoch_date, interval),
24189 )))
24190 }
24191 _ => {
24192 // Default: keep as-is
24193 Ok(Expression::Function(Box::new(Function::new(
24194 "DATE_FROM_UNIX_DATE".to_string(),
24195 vec![n],
24196 ))))
24197 }
24198 }
24199 } else {
24200 Ok(e)
24201 }
24202 }
24203
24204 Action::ArrayRemoveConvert => {
24205 // ARRAY_REMOVE(arr, target) -> LIST_FILTER/arrayFilter
24206 if let Expression::ArrayRemove(bf) = e {
24207 let arr = bf.this;
24208 let target_val = bf.expression;
24209 match target {
24210 DialectType::DuckDB => {
24211 let u_id = crate::expressions::Identifier::new("_u");
24212 let lambda =
24213 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
24214 parameters: vec![u_id.clone()],
24215 body: Expression::Neq(Box::new(BinaryOp {
24216 left: Expression::Identifier(u_id),
24217 right: target_val,
24218 left_comments: Vec::new(),
24219 operator_comments: Vec::new(),
24220 trailing_comments: Vec::new(),
24221 })),
24222 colon: false,
24223 parameter_types: Vec::new(),
24224 }));
24225 Ok(Expression::Function(Box::new(Function::new(
24226 "LIST_FILTER".to_string(),
24227 vec![arr, lambda],
24228 ))))
24229 }
24230 DialectType::ClickHouse => {
24231 let u_id = crate::expressions::Identifier::new("_u");
24232 let lambda =
24233 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
24234 parameters: vec![u_id.clone()],
24235 body: Expression::Neq(Box::new(BinaryOp {
24236 left: Expression::Identifier(u_id),
24237 right: target_val,
24238 left_comments: Vec::new(),
24239 operator_comments: Vec::new(),
24240 trailing_comments: Vec::new(),
24241 })),
24242 colon: false,
24243 parameter_types: Vec::new(),
24244 }));
24245 Ok(Expression::Function(Box::new(Function::new(
24246 "arrayFilter".to_string(),
24247 vec![lambda, arr],
24248 ))))
24249 }
24250 DialectType::BigQuery => {
24251 // ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)
24252 let u_id = crate::expressions::Identifier::new("_u");
24253 let u_col = Expression::Column(crate::expressions::Column {
24254 name: u_id.clone(),
24255 table: None,
24256 join_mark: false,
24257 trailing_comments: Vec::new(),
24258 });
24259 let unnest_expr =
24260 Expression::Unnest(Box::new(crate::expressions::UnnestFunc {
24261 this: arr,
24262 expressions: Vec::new(),
24263 with_ordinality: false,
24264 alias: None,
24265 offset_alias: None,
24266 }));
24267 let aliased_unnest =
24268 Expression::Alias(Box::new(crate::expressions::Alias {
24269 this: unnest_expr,
24270 alias: u_id.clone(),
24271 column_aliases: Vec::new(),
24272 pre_alias_comments: Vec::new(),
24273 trailing_comments: Vec::new(),
24274 }));
24275 let where_cond = Expression::Neq(Box::new(BinaryOp {
24276 left: u_col.clone(),
24277 right: target_val,
24278 left_comments: Vec::new(),
24279 operator_comments: Vec::new(),
24280 trailing_comments: Vec::new(),
24281 }));
24282 let subquery = Expression::Select(Box::new(
24283 crate::expressions::Select::new()
24284 .column(u_col)
24285 .from(aliased_unnest)
24286 .where_(where_cond),
24287 ));
24288 Ok(Expression::ArrayFunc(Box::new(
24289 crate::expressions::ArrayConstructor {
24290 expressions: vec![subquery],
24291 bracket_notation: false,
24292 use_list_keyword: false,
24293 },
24294 )))
24295 }
24296 _ => Ok(Expression::ArrayRemove(Box::new(
24297 crate::expressions::BinaryFunc {
24298 original_name: None,
24299 this: arr,
24300 expression: target_val,
24301 },
24302 ))),
24303 }
24304 } else {
24305 Ok(e)
24306 }
24307 }
24308
24309 Action::ArrayReverseConvert => {
24310 // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
24311 if let Expression::ArrayReverse(af) = e {
24312 Ok(Expression::Function(Box::new(Function::new(
24313 "arrayReverse".to_string(),
24314 vec![af.this],
24315 ))))
24316 } else {
24317 Ok(e)
24318 }
24319 }
24320
24321 Action::JsonKeysConvert => {
24322 // JSON_KEYS(x) -> JSON_OBJECT_KEYS/OBJECT_KEYS
24323 if let Expression::JsonKeys(uf) = e {
24324 match target {
24325 DialectType::Spark | DialectType::Databricks => {
24326 Ok(Expression::Function(Box::new(Function::new(
24327 "JSON_OBJECT_KEYS".to_string(),
24328 vec![uf.this],
24329 ))))
24330 }
24331 DialectType::Snowflake => Ok(Expression::Function(Box::new(
24332 Function::new("OBJECT_KEYS".to_string(), vec![uf.this]),
24333 ))),
24334 _ => Ok(Expression::JsonKeys(uf)),
24335 }
24336 } else {
24337 Ok(e)
24338 }
24339 }
24340
24341 Action::ParseJsonStrip => {
24342 // PARSE_JSON(x) -> x (strip wrapper for SQLite/Doris)
24343 if let Expression::ParseJson(uf) = e {
24344 Ok(uf.this)
24345 } else {
24346 Ok(e)
24347 }
24348 }
24349
24350 Action::ArraySizeDrill => {
24351 // ARRAY_SIZE(x) -> REPEATED_COUNT(x) for Drill
24352 if let Expression::ArraySize(uf) = e {
24353 Ok(Expression::Function(Box::new(Function::new(
24354 "REPEATED_COUNT".to_string(),
24355 vec![uf.this],
24356 ))))
24357 } else {
24358 Ok(e)
24359 }
24360 }
24361
24362 Action::WeekOfYearToWeekIso => {
24363 // WEEKOFYEAR(x) -> WEEKISO(x) for Snowflake (cross-dialect normalization)
24364 if let Expression::WeekOfYear(uf) = e {
24365 Ok(Expression::Function(Box::new(Function::new(
24366 "WEEKISO".to_string(),
24367 vec![uf.this],
24368 ))))
24369 } else {
24370 Ok(e)
24371 }
24372 }
24373 }
24374 })
24375 }
24376
24377 /// Convert DATE_TRUNC('unit', x) to MySQL-specific expansion
24378 fn date_trunc_to_mysql(unit: &str, expr: &Expression) -> Result<Expression> {
24379 use crate::expressions::Function;
24380 match unit {
24381 "DAY" => {
24382 // DATE(x)
24383 Ok(Expression::Function(Box::new(Function::new(
24384 "DATE".to_string(),
24385 vec![expr.clone()],
24386 ))))
24387 }
24388 "WEEK" => {
24389 // STR_TO_DATE(CONCAT(YEAR(x), ' ', WEEK(x, 1), ' 1'), '%Y %u %w')
24390 let year_x = Expression::Function(Box::new(Function::new(
24391 "YEAR".to_string(),
24392 vec![expr.clone()],
24393 )));
24394 let week_x = Expression::Function(Box::new(Function::new(
24395 "WEEK".to_string(),
24396 vec![expr.clone(), Expression::number(1)],
24397 )));
24398 let concat_args = vec![
24399 year_x,
24400 Expression::string(" "),
24401 week_x,
24402 Expression::string(" 1"),
24403 ];
24404 let concat = Expression::Function(Box::new(Function::new(
24405 "CONCAT".to_string(),
24406 concat_args,
24407 )));
24408 Ok(Expression::Function(Box::new(Function::new(
24409 "STR_TO_DATE".to_string(),
24410 vec![concat, Expression::string("%Y %u %w")],
24411 ))))
24412 }
24413 "MONTH" => {
24414 // STR_TO_DATE(CONCAT(YEAR(x), ' ', MONTH(x), ' 1'), '%Y %c %e')
24415 let year_x = Expression::Function(Box::new(Function::new(
24416 "YEAR".to_string(),
24417 vec![expr.clone()],
24418 )));
24419 let month_x = Expression::Function(Box::new(Function::new(
24420 "MONTH".to_string(),
24421 vec![expr.clone()],
24422 )));
24423 let concat_args = vec![
24424 year_x,
24425 Expression::string(" "),
24426 month_x,
24427 Expression::string(" 1"),
24428 ];
24429 let concat = Expression::Function(Box::new(Function::new(
24430 "CONCAT".to_string(),
24431 concat_args,
24432 )));
24433 Ok(Expression::Function(Box::new(Function::new(
24434 "STR_TO_DATE".to_string(),
24435 vec![concat, Expression::string("%Y %c %e")],
24436 ))))
24437 }
24438 "QUARTER" => {
24439 // STR_TO_DATE(CONCAT(YEAR(x), ' ', QUARTER(x) * 3 - 2, ' 1'), '%Y %c %e')
24440 let year_x = Expression::Function(Box::new(Function::new(
24441 "YEAR".to_string(),
24442 vec![expr.clone()],
24443 )));
24444 let quarter_x = Expression::Function(Box::new(Function::new(
24445 "QUARTER".to_string(),
24446 vec![expr.clone()],
24447 )));
24448 // QUARTER(x) * 3 - 2
24449 let mul = Expression::Mul(Box::new(crate::expressions::BinaryOp {
24450 left: quarter_x,
24451 right: Expression::number(3),
24452 left_comments: Vec::new(),
24453 operator_comments: Vec::new(),
24454 trailing_comments: Vec::new(),
24455 }));
24456 let sub = Expression::Sub(Box::new(crate::expressions::BinaryOp {
24457 left: mul,
24458 right: Expression::number(2),
24459 left_comments: Vec::new(),
24460 operator_comments: Vec::new(),
24461 trailing_comments: Vec::new(),
24462 }));
24463 let concat_args = vec![
24464 year_x,
24465 Expression::string(" "),
24466 sub,
24467 Expression::string(" 1"),
24468 ];
24469 let concat = Expression::Function(Box::new(Function::new(
24470 "CONCAT".to_string(),
24471 concat_args,
24472 )));
24473 Ok(Expression::Function(Box::new(Function::new(
24474 "STR_TO_DATE".to_string(),
24475 vec![concat, Expression::string("%Y %c %e")],
24476 ))))
24477 }
24478 "YEAR" => {
24479 // STR_TO_DATE(CONCAT(YEAR(x), ' 1 1'), '%Y %c %e')
24480 let year_x = Expression::Function(Box::new(Function::new(
24481 "YEAR".to_string(),
24482 vec![expr.clone()],
24483 )));
24484 let concat_args = vec![year_x, Expression::string(" 1 1")];
24485 let concat = Expression::Function(Box::new(Function::new(
24486 "CONCAT".to_string(),
24487 concat_args,
24488 )));
24489 Ok(Expression::Function(Box::new(Function::new(
24490 "STR_TO_DATE".to_string(),
24491 vec![concat, Expression::string("%Y %c %e")],
24492 ))))
24493 }
24494 _ => {
24495 // Unsupported unit -> keep as DATE_TRUNC
24496 Ok(Expression::Function(Box::new(Function::new(
24497 "DATE_TRUNC".to_string(),
24498 vec![Expression::string(unit), expr.clone()],
24499 ))))
24500 }
24501 }
24502 }
24503
24504 /// Check if a DataType is or contains VARCHAR/CHAR (for Spark VARCHAR->STRING normalization)
24505 fn has_varchar_char_type(dt: &crate::expressions::DataType) -> bool {
24506 use crate::expressions::DataType;
24507 match dt {
24508 DataType::VarChar { .. } | DataType::Char { .. } => true,
24509 DataType::Struct { fields, .. } => fields
24510 .iter()
24511 .any(|f| Self::has_varchar_char_type(&f.data_type)),
24512 _ => false,
24513 }
24514 }
24515
24516 /// Recursively normalize VARCHAR/CHAR to STRING in a DataType (for Spark)
24517 fn normalize_varchar_to_string(
24518 dt: crate::expressions::DataType,
24519 ) -> crate::expressions::DataType {
24520 use crate::expressions::DataType;
24521 match dt {
24522 DataType::VarChar { .. } | DataType::Char { .. } => DataType::Custom {
24523 name: "STRING".to_string(),
24524 },
24525 DataType::Struct { fields, nested } => {
24526 let fields = fields
24527 .into_iter()
24528 .map(|mut f| {
24529 f.data_type = Self::normalize_varchar_to_string(f.data_type);
24530 f
24531 })
24532 .collect();
24533 DataType::Struct { fields, nested }
24534 }
24535 other => other,
24536 }
24537 }
24538
24539 /// Normalize an interval string like '1day' or ' 2 days ' to proper INTERVAL expression
24540 fn normalize_interval_string(expr: Expression, target: DialectType) -> Expression {
24541 if let Expression::Literal(crate::expressions::Literal::String(ref s)) = expr {
24542 // Try to parse patterns like '1day', '1 day', '2 days', ' 2 days '
24543 let trimmed = s.trim();
24544
24545 // Find where digits end and unit text begins
24546 let digit_end = trimmed
24547 .find(|c: char| !c.is_ascii_digit())
24548 .unwrap_or(trimmed.len());
24549 if digit_end == 0 || digit_end == trimmed.len() {
24550 return expr;
24551 }
24552 let num = &trimmed[..digit_end];
24553 let unit_text = trimmed[digit_end..].trim().to_uppercase();
24554 if unit_text.is_empty() {
24555 return expr;
24556 }
24557
24558 let known_units = [
24559 "DAY", "DAYS", "HOUR", "HOURS", "MINUTE", "MINUTES", "SECOND", "SECONDS", "WEEK",
24560 "WEEKS", "MONTH", "MONTHS", "YEAR", "YEARS",
24561 ];
24562 if !known_units.contains(&unit_text.as_str()) {
24563 return expr;
24564 }
24565
24566 let unit_str = unit_text.clone();
24567 // Singularize
24568 let unit_singular = if unit_str.ends_with('S') && unit_str.len() > 3 {
24569 &unit_str[..unit_str.len() - 1]
24570 } else {
24571 &unit_str
24572 };
24573 let unit = unit_singular;
24574
24575 match target {
24576 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
24577 // INTERVAL '2' DAY
24578 let iu = match unit {
24579 "DAY" => crate::expressions::IntervalUnit::Day,
24580 "HOUR" => crate::expressions::IntervalUnit::Hour,
24581 "MINUTE" => crate::expressions::IntervalUnit::Minute,
24582 "SECOND" => crate::expressions::IntervalUnit::Second,
24583 "WEEK" => crate::expressions::IntervalUnit::Week,
24584 "MONTH" => crate::expressions::IntervalUnit::Month,
24585 "YEAR" => crate::expressions::IntervalUnit::Year,
24586 _ => return expr,
24587 };
24588 return Expression::Interval(Box::new(crate::expressions::Interval {
24589 this: Some(Expression::string(num)),
24590 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
24591 unit: iu,
24592 use_plural: false,
24593 }),
24594 }));
24595 }
24596 DialectType::PostgreSQL | DialectType::Redshift | DialectType::DuckDB => {
24597 // INTERVAL '2 DAYS'
24598 let plural = if num != "1" && !unit_str.ends_with('S') {
24599 format!("{} {}S", num, unit)
24600 } else if unit_str.ends_with('S') {
24601 format!("{} {}", num, unit_str)
24602 } else {
24603 format!("{} {}", num, unit)
24604 };
24605 return Expression::Interval(Box::new(crate::expressions::Interval {
24606 this: Some(Expression::string(&plural)),
24607 unit: None,
24608 }));
24609 }
24610 _ => {
24611 // Spark/Databricks/Hive: INTERVAL '1' DAY
24612 let iu = match unit {
24613 "DAY" => crate::expressions::IntervalUnit::Day,
24614 "HOUR" => crate::expressions::IntervalUnit::Hour,
24615 "MINUTE" => crate::expressions::IntervalUnit::Minute,
24616 "SECOND" => crate::expressions::IntervalUnit::Second,
24617 "WEEK" => crate::expressions::IntervalUnit::Week,
24618 "MONTH" => crate::expressions::IntervalUnit::Month,
24619 "YEAR" => crate::expressions::IntervalUnit::Year,
24620 _ => return expr,
24621 };
24622 return Expression::Interval(Box::new(crate::expressions::Interval {
24623 this: Some(Expression::string(num)),
24624 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
24625 unit: iu,
24626 use_plural: false,
24627 }),
24628 }));
24629 }
24630 }
24631 }
24632 // If it's already an INTERVAL expression, pass through
24633 expr
24634 }
24635
24636 /// Rewrite SELECT expressions containing UNNEST into expanded form with CROSS JOINs.
24637 /// DuckDB: SELECT UNNEST(arr1), UNNEST(arr2) ->
24638 /// BigQuery: SELECT IF(pos = pos_2, col, NULL) AS col, ... FROM UNNEST(GENERATE_ARRAY(0, ...)) AS pos CROSS JOIN ...
24639 /// Presto: SELECT IF(_u.pos = _u_2.pos_2, _u_2.col) AS col, ... FROM UNNEST(SEQUENCE(1, ...)) AS _u(pos) CROSS JOIN ...
24640 fn rewrite_unnest_expansion(
24641 select: &crate::expressions::Select,
24642 target: DialectType,
24643 ) -> Option<crate::expressions::Select> {
24644 use crate::expressions::{
24645 Alias, BinaryOp, Column, From, Function, Identifier, Join, JoinKind, Literal,
24646 UnnestFunc,
24647 };
24648
24649 let index_offset: i64 = match target {
24650 DialectType::Presto | DialectType::Trino => 1,
24651 _ => 0, // BigQuery, Snowflake
24652 };
24653
24654 let if_func_name = match target {
24655 DialectType::Snowflake => "IFF",
24656 _ => "IF",
24657 };
24658
24659 let array_length_func = match target {
24660 DialectType::BigQuery => "ARRAY_LENGTH",
24661 DialectType::Presto | DialectType::Trino => "CARDINALITY",
24662 DialectType::Snowflake => "ARRAY_SIZE",
24663 _ => "ARRAY_LENGTH",
24664 };
24665
24666 let use_table_aliases = matches!(
24667 target,
24668 DialectType::Presto | DialectType::Trino | DialectType::Snowflake
24669 );
24670 let null_third_arg = matches!(target, DialectType::BigQuery | DialectType::Snowflake);
24671
24672 fn make_col(name: &str, table: Option<&str>) -> Expression {
24673 if let Some(tbl) = table {
24674 Expression::Column(Column {
24675 name: Identifier::new(name.to_string()),
24676 table: Some(Identifier::new(tbl.to_string())),
24677 join_mark: false,
24678 trailing_comments: Vec::new(),
24679 })
24680 } else {
24681 Expression::Identifier(Identifier::new(name.to_string()))
24682 }
24683 }
24684
24685 fn make_join(this: Expression) -> Join {
24686 Join {
24687 this,
24688 on: None,
24689 using: Vec::new(),
24690 kind: JoinKind::Cross,
24691 use_inner_keyword: false,
24692 use_outer_keyword: false,
24693 deferred_condition: false,
24694 join_hint: None,
24695 match_condition: None,
24696 pivots: Vec::new(),
24697 comments: Vec::new(),
24698 nesting_group: 0,
24699 directed: false,
24700 }
24701 }
24702
24703 // Collect UNNEST info from SELECT expressions
24704 struct UnnestInfo {
24705 arr_expr: Expression,
24706 col_alias: String,
24707 pos_alias: String,
24708 source_alias: String,
24709 original_expr: Expression,
24710 has_outer_alias: Option<String>,
24711 }
24712
24713 let mut unnest_infos: Vec<UnnestInfo> = Vec::new();
24714 let mut col_counter = 0usize;
24715 let mut pos_counter = 1usize;
24716 let mut source_counter = 1usize;
24717
24718 fn extract_unnest_arg(expr: &Expression) -> Option<Expression> {
24719 match expr {
24720 Expression::Unnest(u) => Some(u.this.clone()),
24721 Expression::Function(f)
24722 if f.name.eq_ignore_ascii_case("UNNEST") && !f.args.is_empty() =>
24723 {
24724 Some(f.args[0].clone())
24725 }
24726 Expression::Alias(a) => extract_unnest_arg(&a.this),
24727 Expression::Add(op)
24728 | Expression::Sub(op)
24729 | Expression::Mul(op)
24730 | Expression::Div(op) => {
24731 extract_unnest_arg(&op.left).or_else(|| extract_unnest_arg(&op.right))
24732 }
24733 _ => None,
24734 }
24735 }
24736
24737 fn get_alias_name(expr: &Expression) -> Option<String> {
24738 if let Expression::Alias(a) = expr {
24739 Some(a.alias.name.clone())
24740 } else {
24741 None
24742 }
24743 }
24744
24745 for sel_expr in &select.expressions {
24746 if let Some(arr) = extract_unnest_arg(sel_expr) {
24747 col_counter += 1;
24748 pos_counter += 1;
24749 source_counter += 1;
24750
24751 let col_alias = if col_counter == 1 {
24752 "col".to_string()
24753 } else {
24754 format!("col_{}", col_counter)
24755 };
24756 let pos_alias = format!("pos_{}", pos_counter);
24757 let source_alias = format!("_u_{}", source_counter);
24758 let has_outer_alias = get_alias_name(sel_expr);
24759
24760 unnest_infos.push(UnnestInfo {
24761 arr_expr: arr,
24762 col_alias,
24763 pos_alias,
24764 source_alias,
24765 original_expr: sel_expr.clone(),
24766 has_outer_alias,
24767 });
24768 }
24769 }
24770
24771 if unnest_infos.is_empty() {
24772 return None;
24773 }
24774
24775 let series_alias = "pos".to_string();
24776 let series_source_alias = "_u".to_string();
24777 let tbl_ref = if use_table_aliases {
24778 Some(series_source_alias.as_str())
24779 } else {
24780 None
24781 };
24782
24783 // Build new SELECT expressions
24784 let mut new_select_exprs = Vec::new();
24785 for info in &unnest_infos {
24786 let actual_col_name = info.has_outer_alias.as_ref().unwrap_or(&info.col_alias);
24787 let src_ref = if use_table_aliases {
24788 Some(info.source_alias.as_str())
24789 } else {
24790 None
24791 };
24792
24793 let pos_col = make_col(&series_alias, tbl_ref);
24794 let unnest_pos_col = make_col(&info.pos_alias, src_ref);
24795 let col_ref = make_col(actual_col_name, src_ref);
24796
24797 let eq_cond = Expression::Eq(Box::new(BinaryOp::new(
24798 pos_col.clone(),
24799 unnest_pos_col.clone(),
24800 )));
24801 let mut if_args = vec![eq_cond, col_ref];
24802 if null_third_arg {
24803 if_args.push(Expression::Null(crate::expressions::Null));
24804 }
24805
24806 let if_expr =
24807 Expression::Function(Box::new(Function::new(if_func_name.to_string(), if_args)));
24808 let final_expr = Self::replace_unnest_with_if(&info.original_expr, &if_expr);
24809
24810 new_select_exprs.push(Expression::Alias(Box::new(Alias::new(
24811 final_expr,
24812 Identifier::new(actual_col_name.clone()),
24813 ))));
24814 }
24815
24816 // Build array size expressions for GREATEST
24817 let size_exprs: Vec<Expression> = unnest_infos
24818 .iter()
24819 .map(|info| {
24820 Expression::Function(Box::new(Function::new(
24821 array_length_func.to_string(),
24822 vec![info.arr_expr.clone()],
24823 )))
24824 })
24825 .collect();
24826
24827 let greatest =
24828 Expression::Function(Box::new(Function::new("GREATEST".to_string(), size_exprs)));
24829
24830 let series_end = if index_offset == 0 {
24831 Expression::Sub(Box::new(BinaryOp::new(
24832 greatest,
24833 Expression::Literal(Literal::Number("1".to_string())),
24834 )))
24835 } else {
24836 greatest
24837 };
24838
24839 // Build the position array source
24840 let series_unnest_expr = match target {
24841 DialectType::BigQuery => {
24842 let gen_array = Expression::Function(Box::new(Function::new(
24843 "GENERATE_ARRAY".to_string(),
24844 vec![
24845 Expression::Literal(Literal::Number("0".to_string())),
24846 series_end,
24847 ],
24848 )));
24849 Expression::Unnest(Box::new(UnnestFunc {
24850 this: gen_array,
24851 expressions: Vec::new(),
24852 with_ordinality: false,
24853 alias: None,
24854 offset_alias: None,
24855 }))
24856 }
24857 DialectType::Presto | DialectType::Trino => {
24858 let sequence = Expression::Function(Box::new(Function::new(
24859 "SEQUENCE".to_string(),
24860 vec![
24861 Expression::Literal(Literal::Number("1".to_string())),
24862 series_end,
24863 ],
24864 )));
24865 Expression::Unnest(Box::new(UnnestFunc {
24866 this: sequence,
24867 expressions: Vec::new(),
24868 with_ordinality: false,
24869 alias: None,
24870 offset_alias: None,
24871 }))
24872 }
24873 DialectType::Snowflake => {
24874 let range_end = Expression::Add(Box::new(BinaryOp::new(
24875 Expression::Paren(Box::new(crate::expressions::Paren {
24876 this: series_end,
24877 trailing_comments: Vec::new(),
24878 })),
24879 Expression::Literal(Literal::Number("1".to_string())),
24880 )));
24881 let gen_range = Expression::Function(Box::new(Function::new(
24882 "ARRAY_GENERATE_RANGE".to_string(),
24883 vec![
24884 Expression::Literal(Literal::Number("0".to_string())),
24885 range_end,
24886 ],
24887 )));
24888 let flatten_arg =
24889 Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
24890 name: Identifier::new("INPUT".to_string()),
24891 value: gen_range,
24892 separator: crate::expressions::NamedArgSeparator::DArrow,
24893 }));
24894 let flatten = Expression::Function(Box::new(Function::new(
24895 "FLATTEN".to_string(),
24896 vec![flatten_arg],
24897 )));
24898 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])))
24899 }
24900 _ => return None,
24901 };
24902
24903 // Build series alias expression
24904 let series_alias_expr = if use_table_aliases {
24905 let col_aliases = if matches!(target, DialectType::Snowflake) {
24906 vec![
24907 Identifier::new("seq".to_string()),
24908 Identifier::new("key".to_string()),
24909 Identifier::new("path".to_string()),
24910 Identifier::new("index".to_string()),
24911 Identifier::new(series_alias.clone()),
24912 Identifier::new("this".to_string()),
24913 ]
24914 } else {
24915 vec![Identifier::new(series_alias.clone())]
24916 };
24917 Expression::Alias(Box::new(Alias {
24918 this: series_unnest_expr,
24919 alias: Identifier::new(series_source_alias.clone()),
24920 column_aliases: col_aliases,
24921 pre_alias_comments: Vec::new(),
24922 trailing_comments: Vec::new(),
24923 }))
24924 } else {
24925 Expression::Alias(Box::new(Alias::new(
24926 series_unnest_expr,
24927 Identifier::new(series_alias.clone()),
24928 )))
24929 };
24930
24931 // Build CROSS JOINs for each UNNEST
24932 let mut joins = Vec::new();
24933 for info in &unnest_infos {
24934 let actual_col_name = info.has_outer_alias.as_ref().unwrap_or(&info.col_alias);
24935
24936 let unnest_join_expr = match target {
24937 DialectType::BigQuery => {
24938 // UNNEST([1,2,3]) AS col WITH OFFSET AS pos_2
24939 let unnest = UnnestFunc {
24940 this: info.arr_expr.clone(),
24941 expressions: Vec::new(),
24942 with_ordinality: true,
24943 alias: Some(Identifier::new(actual_col_name.clone())),
24944 offset_alias: Some(Identifier::new(info.pos_alias.clone())),
24945 };
24946 Expression::Unnest(Box::new(unnest))
24947 }
24948 DialectType::Presto | DialectType::Trino => {
24949 let unnest = UnnestFunc {
24950 this: info.arr_expr.clone(),
24951 expressions: Vec::new(),
24952 with_ordinality: true,
24953 alias: None,
24954 offset_alias: None,
24955 };
24956 Expression::Alias(Box::new(Alias {
24957 this: Expression::Unnest(Box::new(unnest)),
24958 alias: Identifier::new(info.source_alias.clone()),
24959 column_aliases: vec![
24960 Identifier::new(actual_col_name.clone()),
24961 Identifier::new(info.pos_alias.clone()),
24962 ],
24963 pre_alias_comments: Vec::new(),
24964 trailing_comments: Vec::new(),
24965 }))
24966 }
24967 DialectType::Snowflake => {
24968 let flatten_arg =
24969 Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
24970 name: Identifier::new("INPUT".to_string()),
24971 value: info.arr_expr.clone(),
24972 separator: crate::expressions::NamedArgSeparator::DArrow,
24973 }));
24974 let flatten = Expression::Function(Box::new(Function::new(
24975 "FLATTEN".to_string(),
24976 vec![flatten_arg],
24977 )));
24978 let table_fn = Expression::Function(Box::new(Function::new(
24979 "TABLE".to_string(),
24980 vec![flatten],
24981 )));
24982 Expression::Alias(Box::new(Alias {
24983 this: table_fn,
24984 alias: Identifier::new(info.source_alias.clone()),
24985 column_aliases: vec![
24986 Identifier::new("seq".to_string()),
24987 Identifier::new("key".to_string()),
24988 Identifier::new("path".to_string()),
24989 Identifier::new(info.pos_alias.clone()),
24990 Identifier::new(actual_col_name.clone()),
24991 Identifier::new("this".to_string()),
24992 ],
24993 pre_alias_comments: Vec::new(),
24994 trailing_comments: Vec::new(),
24995 }))
24996 }
24997 _ => return None,
24998 };
24999
25000 joins.push(make_join(unnest_join_expr));
25001 }
25002
25003 // Build WHERE clause
25004 let mut where_conditions: Vec<Expression> = Vec::new();
25005 for info in &unnest_infos {
25006 let src_ref = if use_table_aliases {
25007 Some(info.source_alias.as_str())
25008 } else {
25009 None
25010 };
25011 let pos_col = make_col(&series_alias, tbl_ref);
25012 let unnest_pos_col = make_col(&info.pos_alias, src_ref);
25013
25014 let arr_size = Expression::Function(Box::new(Function::new(
25015 array_length_func.to_string(),
25016 vec![info.arr_expr.clone()],
25017 )));
25018
25019 let size_ref = if index_offset == 0 {
25020 Expression::Paren(Box::new(crate::expressions::Paren {
25021 this: Expression::Sub(Box::new(BinaryOp::new(
25022 arr_size,
25023 Expression::Literal(Literal::Number("1".to_string())),
25024 ))),
25025 trailing_comments: Vec::new(),
25026 }))
25027 } else {
25028 arr_size
25029 };
25030
25031 let eq = Expression::Eq(Box::new(BinaryOp::new(
25032 pos_col.clone(),
25033 unnest_pos_col.clone(),
25034 )));
25035 let gt = Expression::Gt(Box::new(BinaryOp::new(pos_col, size_ref.clone())));
25036 let pos_eq_size = Expression::Eq(Box::new(BinaryOp::new(unnest_pos_col, size_ref)));
25037 let and_cond = Expression::And(Box::new(BinaryOp::new(gt, pos_eq_size)));
25038 let paren_and = Expression::Paren(Box::new(crate::expressions::Paren {
25039 this: and_cond,
25040 trailing_comments: Vec::new(),
25041 }));
25042 let or_cond = Expression::Or(Box::new(BinaryOp::new(eq, paren_and)));
25043
25044 where_conditions.push(or_cond);
25045 }
25046
25047 let where_expr = if where_conditions.len() == 1 {
25048 // Single condition: no parens needed
25049 where_conditions.into_iter().next().unwrap()
25050 } else {
25051 // Multiple conditions: wrap each OR in parens, then combine with AND
25052 let wrap = |e: Expression| {
25053 Expression::Paren(Box::new(crate::expressions::Paren {
25054 this: e,
25055 trailing_comments: Vec::new(),
25056 }))
25057 };
25058 let mut iter = where_conditions.into_iter();
25059 let first = wrap(iter.next().unwrap());
25060 let second = wrap(iter.next().unwrap());
25061 let mut combined = Expression::Paren(Box::new(crate::expressions::Paren {
25062 this: Expression::And(Box::new(BinaryOp::new(first, second))),
25063 trailing_comments: Vec::new(),
25064 }));
25065 for cond in iter {
25066 combined = Expression::And(Box::new(BinaryOp::new(combined, wrap(cond))));
25067 }
25068 combined
25069 };
25070
25071 // Build the new SELECT
25072 let mut new_select = select.clone();
25073 new_select.expressions = new_select_exprs;
25074
25075 if new_select.from.is_some() {
25076 let mut all_joins = vec![make_join(series_alias_expr)];
25077 all_joins.extend(joins);
25078 new_select.joins.extend(all_joins);
25079 } else {
25080 new_select.from = Some(From {
25081 expressions: vec![series_alias_expr],
25082 });
25083 new_select.joins.extend(joins);
25084 }
25085
25086 if let Some(ref existing_where) = new_select.where_clause {
25087 let combined = Expression::And(Box::new(BinaryOp::new(
25088 existing_where.this.clone(),
25089 where_expr,
25090 )));
25091 new_select.where_clause = Some(crate::expressions::Where { this: combined });
25092 } else {
25093 new_select.where_clause = Some(crate::expressions::Where { this: where_expr });
25094 }
25095
25096 Some(new_select)
25097 }
25098
25099 /// Helper to replace UNNEST(...) inside an expression with a replacement expression.
25100 fn replace_unnest_with_if(original: &Expression, replacement: &Expression) -> Expression {
25101 match original {
25102 Expression::Unnest(_) => replacement.clone(),
25103 Expression::Function(f) if f.name.eq_ignore_ascii_case("UNNEST") => replacement.clone(),
25104 Expression::Alias(a) => Self::replace_unnest_with_if(&a.this, replacement),
25105 Expression::Add(op) => {
25106 let left = Self::replace_unnest_with_if(&op.left, replacement);
25107 let right = Self::replace_unnest_with_if(&op.right, replacement);
25108 Expression::Add(Box::new(crate::expressions::BinaryOp::new(left, right)))
25109 }
25110 Expression::Sub(op) => {
25111 let left = Self::replace_unnest_with_if(&op.left, replacement);
25112 let right = Self::replace_unnest_with_if(&op.right, replacement);
25113 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(left, right)))
25114 }
25115 Expression::Mul(op) => {
25116 let left = Self::replace_unnest_with_if(&op.left, replacement);
25117 let right = Self::replace_unnest_with_if(&op.right, replacement);
25118 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(left, right)))
25119 }
25120 Expression::Div(op) => {
25121 let left = Self::replace_unnest_with_if(&op.left, replacement);
25122 let right = Self::replace_unnest_with_if(&op.right, replacement);
25123 Expression::Div(Box::new(crate::expressions::BinaryOp::new(left, right)))
25124 }
25125 _ => original.clone(),
25126 }
25127 }
25128
25129 /// Decompose a JSON path like `$.y[0].z` into individual parts: `["y", "0", "z"]`.
25130 /// Strips `$` prefix, handles bracket notation, quoted strings, and removes `[*]` wildcards.
25131 fn decompose_json_path(path: &str) -> Vec<String> {
25132 let mut parts = Vec::new();
25133 let path = if path.starts_with("$.") {
25134 &path[2..]
25135 } else if path.starts_with('$') {
25136 &path[1..]
25137 } else {
25138 path
25139 };
25140 if path.is_empty() {
25141 return parts;
25142 }
25143 let mut current = String::new();
25144 let chars: Vec<char> = path.chars().collect();
25145 let mut i = 0;
25146 while i < chars.len() {
25147 match chars[i] {
25148 '.' => {
25149 if !current.is_empty() {
25150 parts.push(current.clone());
25151 current.clear();
25152 }
25153 i += 1;
25154 }
25155 '[' => {
25156 if !current.is_empty() {
25157 parts.push(current.clone());
25158 current.clear();
25159 }
25160 i += 1;
25161 let mut bracket_content = String::new();
25162 while i < chars.len() && chars[i] != ']' {
25163 if chars[i] == '"' || chars[i] == '\'' {
25164 let quote = chars[i];
25165 i += 1;
25166 while i < chars.len() && chars[i] != quote {
25167 bracket_content.push(chars[i]);
25168 i += 1;
25169 }
25170 if i < chars.len() {
25171 i += 1;
25172 }
25173 } else {
25174 bracket_content.push(chars[i]);
25175 i += 1;
25176 }
25177 }
25178 if i < chars.len() {
25179 i += 1;
25180 }
25181 if bracket_content != "*" {
25182 parts.push(bracket_content);
25183 }
25184 }
25185 _ => {
25186 current.push(chars[i]);
25187 i += 1;
25188 }
25189 }
25190 }
25191 if !current.is_empty() {
25192 parts.push(current);
25193 }
25194 parts
25195 }
25196
25197 /// Strip `$` prefix from a JSON path, keeping the rest.
25198 /// `$.y[0].z` -> `y[0].z`, `$["a b"]` -> `["a b"]`
25199 fn strip_json_dollar_prefix(path: &str) -> String {
25200 if path.starts_with("$.") {
25201 path[2..].to_string()
25202 } else if path.starts_with('$') {
25203 path[1..].to_string()
25204 } else {
25205 path.to_string()
25206 }
25207 }
25208
25209 /// Strip `[*]` wildcards from a JSON path.
25210 /// `$.y[*]` -> `$.y`, `$.y[*].z` -> `$.y.z`
25211 fn strip_json_wildcards(path: &str) -> String {
25212 path.replace("[*]", "")
25213 .replace("..", ".") // Clean double dots from `$.y[*].z` -> `$.y..z`
25214 .trim_end_matches('.')
25215 .to_string()
25216 }
25217
25218 /// Convert bracket notation to dot notation for JSON paths.
25219 /// `$["a b"]` -> `$."a b"`, `$["key"]` -> `$.key`
25220 fn bracket_to_dot_notation(path: &str) -> String {
25221 let mut result = String::new();
25222 let chars: Vec<char> = path.chars().collect();
25223 let mut i = 0;
25224 while i < chars.len() {
25225 if chars[i] == '[' {
25226 // Read bracket content
25227 i += 1;
25228 let mut bracket_content = String::new();
25229 let mut is_quoted = false;
25230 let mut _quote_char = '"';
25231 while i < chars.len() && chars[i] != ']' {
25232 if chars[i] == '"' || chars[i] == '\'' {
25233 is_quoted = true;
25234 _quote_char = chars[i];
25235 i += 1;
25236 while i < chars.len() && chars[i] != _quote_char {
25237 bracket_content.push(chars[i]);
25238 i += 1;
25239 }
25240 if i < chars.len() {
25241 i += 1;
25242 }
25243 } else {
25244 bracket_content.push(chars[i]);
25245 i += 1;
25246 }
25247 }
25248 if i < chars.len() {
25249 i += 1;
25250 } // skip ]
25251 if bracket_content == "*" {
25252 // Keep wildcard as-is
25253 result.push_str("[*]");
25254 } else if is_quoted {
25255 // Quoted bracket -> dot notation with quotes
25256 result.push('.');
25257 result.push('"');
25258 result.push_str(&bracket_content);
25259 result.push('"');
25260 } else {
25261 // Numeric index -> keep as bracket
25262 result.push('[');
25263 result.push_str(&bracket_content);
25264 result.push(']');
25265 }
25266 } else {
25267 result.push(chars[i]);
25268 i += 1;
25269 }
25270 }
25271 result
25272 }
25273
25274 /// Convert JSON path bracket quoted strings to use single quotes instead of double quotes.
25275 /// `$["a b"]` -> `$['a b']`
25276 fn bracket_to_single_quotes(path: &str) -> String {
25277 let mut result = String::new();
25278 let chars: Vec<char> = path.chars().collect();
25279 let mut i = 0;
25280 while i < chars.len() {
25281 if chars[i] == '[' && i + 1 < chars.len() && chars[i + 1] == '"' {
25282 result.push('[');
25283 result.push('\'');
25284 i += 2; // skip [ and "
25285 while i < chars.len() && chars[i] != '"' {
25286 result.push(chars[i]);
25287 i += 1;
25288 }
25289 if i < chars.len() {
25290 i += 1;
25291 } // skip closing "
25292 result.push('\'');
25293 } else {
25294 result.push(chars[i]);
25295 i += 1;
25296 }
25297 }
25298 result
25299 }
25300
25301 /// Transform TSQL SELECT INTO -> CREATE TABLE AS for DuckDB/Snowflake
25302 /// or PostgreSQL #temp -> TEMPORARY.
25303 /// Also strips # from INSERT INTO #table for non-TSQL targets.
25304 fn transform_select_into(
25305 expr: Expression,
25306 _source: DialectType,
25307 target: DialectType,
25308 ) -> Expression {
25309 use crate::expressions::{CreateTable, Expression, TableRef};
25310
25311 // Handle INSERT INTO #temp -> INSERT INTO temp for non-TSQL targets
25312 if let Expression::Insert(ref insert) = expr {
25313 if insert.table.name.name.starts_with('#')
25314 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
25315 {
25316 let mut new_insert = insert.clone();
25317 new_insert.table.name.name =
25318 insert.table.name.name.trim_start_matches('#').to_string();
25319 return Expression::Insert(new_insert);
25320 }
25321 return expr;
25322 }
25323
25324 if let Expression::Select(ref select) = expr {
25325 if let Some(ref into) = select.into {
25326 let table_name_raw = match &into.this {
25327 Expression::Table(tr) => tr.name.name.clone(),
25328 Expression::Identifier(id) => id.name.clone(),
25329 _ => String::new(),
25330 };
25331 let is_temp = table_name_raw.starts_with('#') || into.temporary;
25332 let clean_name = table_name_raw.trim_start_matches('#').to_string();
25333
25334 match target {
25335 DialectType::DuckDB | DialectType::Snowflake => {
25336 // SELECT INTO -> CREATE TABLE AS SELECT
25337 let mut new_select = select.clone();
25338 new_select.into = None;
25339 let ct = CreateTable {
25340 name: TableRef::new(clean_name),
25341 on_cluster: None,
25342 columns: Vec::new(),
25343 constraints: Vec::new(),
25344 if_not_exists: false,
25345 temporary: is_temp,
25346 or_replace: false,
25347 table_modifier: None,
25348 as_select: Some(Expression::Select(new_select)),
25349 as_select_parenthesized: false,
25350 on_commit: None,
25351 clone_source: None,
25352 clone_at_clause: None,
25353 shallow_clone: false,
25354 is_copy: false,
25355 leading_comments: Vec::new(),
25356 with_properties: Vec::new(),
25357 teradata_post_name_options: Vec::new(),
25358 with_data: None,
25359 with_statistics: None,
25360 teradata_indexes: Vec::new(),
25361 with_cte: None,
25362 properties: Vec::new(),
25363 partition_of: None,
25364 post_table_properties: Vec::new(),
25365 mysql_table_options: Vec::new(),
25366 inherits: Vec::new(),
25367 on_property: None,
25368 copy_grants: false,
25369 using_template: None,
25370 rollup: None,
25371 };
25372 return Expression::CreateTable(Box::new(ct));
25373 }
25374 DialectType::PostgreSQL | DialectType::Redshift => {
25375 // PostgreSQL: #foo -> INTO TEMPORARY foo
25376 if is_temp && !into.temporary {
25377 let mut new_select = select.clone();
25378 let mut new_into = into.clone();
25379 new_into.temporary = true;
25380 new_into.unlogged = false;
25381 new_into.this = Expression::Table(TableRef::new(clean_name));
25382 new_select.into = Some(new_into);
25383 Expression::Select(new_select)
25384 } else {
25385 expr
25386 }
25387 }
25388 _ => expr,
25389 }
25390 } else {
25391 expr
25392 }
25393 } else {
25394 expr
25395 }
25396 }
25397
25398 /// Transform CREATE TABLE WITH properties for cross-dialect transpilation.
25399 /// Handles FORMAT, PARTITIONED_BY, and other Presto WITH properties.
25400 fn transform_create_table_properties(
25401 ct: &mut crate::expressions::CreateTable,
25402 _source: DialectType,
25403 target: DialectType,
25404 ) {
25405 use crate::expressions::{
25406 BinaryOp, BooleanLiteral, Expression, FileFormatProperty, Identifier, Literal,
25407 Properties,
25408 };
25409
25410 // Helper to convert a raw property value string to the correct Expression
25411 let value_to_expr = |v: &str| -> Expression {
25412 let trimmed = v.trim();
25413 // Check if it's a quoted string (starts and ends with ')
25414 if trimmed.starts_with('\'') && trimmed.ends_with('\'') {
25415 Expression::Literal(Literal::String(trimmed[1..trimmed.len() - 1].to_string()))
25416 }
25417 // Check if it's a number
25418 else if trimmed.parse::<i64>().is_ok() || trimmed.parse::<f64>().is_ok() {
25419 Expression::Literal(Literal::Number(trimmed.to_string()))
25420 }
25421 // Check if it's ARRAY[...] or ARRAY(...)
25422 else if trimmed.to_uppercase().starts_with("ARRAY") {
25423 // Convert ARRAY['y'] to ARRAY('y') for Hive/Spark
25424 let inner = trimmed
25425 .trim_start_matches(|c: char| c.is_alphabetic()) // Remove ARRAY
25426 .trim_start_matches('[')
25427 .trim_start_matches('(')
25428 .trim_end_matches(']')
25429 .trim_end_matches(')');
25430 let elements: Vec<Expression> = inner
25431 .split(',')
25432 .map(|e| {
25433 let elem = e.trim().trim_matches('\'');
25434 Expression::Literal(Literal::String(elem.to_string()))
25435 })
25436 .collect();
25437 Expression::Function(Box::new(crate::expressions::Function::new(
25438 "ARRAY".to_string(),
25439 elements,
25440 )))
25441 }
25442 // Otherwise, just output as identifier (unquoted)
25443 else {
25444 Expression::Identifier(Identifier::new(trimmed.to_string()))
25445 }
25446 };
25447
25448 if ct.with_properties.is_empty() && ct.properties.is_empty() {
25449 return;
25450 }
25451
25452 // Handle Presto-style WITH properties
25453 if !ct.with_properties.is_empty() {
25454 // Extract FORMAT property and remaining properties
25455 let mut format_value: Option<String> = None;
25456 let mut partitioned_by: Option<String> = None;
25457 let mut other_props: Vec<(String, String)> = Vec::new();
25458
25459 for (key, value) in ct.with_properties.drain(..) {
25460 let key_upper = key.to_uppercase();
25461 if key_upper == "FORMAT" {
25462 // Strip surrounding quotes from value if present
25463 format_value = Some(value.trim_matches('\'').to_string());
25464 } else if key_upper == "PARTITIONED_BY" {
25465 partitioned_by = Some(value);
25466 } else {
25467 other_props.push((key, value));
25468 }
25469 }
25470
25471 match target {
25472 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
25473 // Presto: keep WITH properties but lowercase 'format' key
25474 if let Some(fmt) = format_value {
25475 ct.with_properties
25476 .push(("format".to_string(), format!("'{}'", fmt)));
25477 }
25478 if let Some(part) = partitioned_by {
25479 // Convert (col1, col2) to ARRAY['col1', 'col2'] format
25480 let trimmed = part.trim();
25481 let inner = trimmed.trim_start_matches('(').trim_end_matches(')');
25482 // Also handle ARRAY['...'] format - keep as-is
25483 if trimmed.to_uppercase().starts_with("ARRAY") {
25484 ct.with_properties
25485 .push(("PARTITIONED_BY".to_string(), part));
25486 } else {
25487 // Parse column names from the parenthesized list
25488 let cols: Vec<&str> = inner
25489 .split(',')
25490 .map(|c| c.trim().trim_matches('"').trim_matches('\''))
25491 .collect();
25492 let array_val = format!(
25493 "ARRAY[{}]",
25494 cols.iter()
25495 .map(|c| format!("'{}'", c))
25496 .collect::<Vec<_>>()
25497 .join(", ")
25498 );
25499 ct.with_properties
25500 .push(("PARTITIONED_BY".to_string(), array_val));
25501 }
25502 }
25503 ct.with_properties.extend(other_props);
25504 }
25505 DialectType::Hive => {
25506 // Hive: FORMAT -> STORED AS, other props -> TBLPROPERTIES
25507 if let Some(fmt) = format_value {
25508 ct.properties.push(Expression::FileFormatProperty(Box::new(
25509 FileFormatProperty {
25510 this: Some(Box::new(Expression::Identifier(Identifier::new(fmt)))),
25511 expressions: vec![],
25512 hive_format: Some(Box::new(Expression::Boolean(BooleanLiteral {
25513 value: true,
25514 }))),
25515 },
25516 )));
25517 }
25518 if let Some(_part) = partitioned_by {
25519 // PARTITIONED_BY handling is complex - move columns to partitioned by
25520 // For now, the partition columns are extracted from the column list
25521 Self::apply_partitioned_by(ct, &_part, target);
25522 }
25523 if !other_props.is_empty() {
25524 let eq_exprs: Vec<Expression> = other_props
25525 .into_iter()
25526 .map(|(k, v)| {
25527 Expression::Eq(Box::new(BinaryOp::new(
25528 Expression::Literal(Literal::String(k)),
25529 value_to_expr(&v),
25530 )))
25531 })
25532 .collect();
25533 ct.properties
25534 .push(Expression::Properties(Box::new(Properties {
25535 expressions: eq_exprs,
25536 })));
25537 }
25538 }
25539 DialectType::Spark | DialectType::Databricks => {
25540 // Spark: FORMAT -> USING, other props -> TBLPROPERTIES
25541 if let Some(fmt) = format_value {
25542 ct.properties.push(Expression::FileFormatProperty(Box::new(
25543 FileFormatProperty {
25544 this: Some(Box::new(Expression::Identifier(Identifier::new(fmt)))),
25545 expressions: vec![],
25546 hive_format: None, // None means USING syntax
25547 },
25548 )));
25549 }
25550 if let Some(_part) = partitioned_by {
25551 Self::apply_partitioned_by(ct, &_part, target);
25552 }
25553 if !other_props.is_empty() {
25554 let eq_exprs: Vec<Expression> = other_props
25555 .into_iter()
25556 .map(|(k, v)| {
25557 Expression::Eq(Box::new(BinaryOp::new(
25558 Expression::Literal(Literal::String(k)),
25559 value_to_expr(&v),
25560 )))
25561 })
25562 .collect();
25563 ct.properties
25564 .push(Expression::Properties(Box::new(Properties {
25565 expressions: eq_exprs,
25566 })));
25567 }
25568 }
25569 DialectType::DuckDB => {
25570 // DuckDB: strip all WITH properties (FORMAT, PARTITIONED_BY, etc.)
25571 // Keep nothing
25572 }
25573 _ => {
25574 // For other dialects, keep WITH properties as-is
25575 if let Some(fmt) = format_value {
25576 ct.with_properties
25577 .push(("FORMAT".to_string(), format!("'{}'", fmt)));
25578 }
25579 if let Some(part) = partitioned_by {
25580 ct.with_properties
25581 .push(("PARTITIONED_BY".to_string(), part));
25582 }
25583 ct.with_properties.extend(other_props);
25584 }
25585 }
25586 }
25587
25588 // Handle STORED AS 'PARQUET' (quoted format name) -> STORED AS PARQUET (unquoted)
25589 // and Hive STORED AS -> Presto WITH (format=...) conversion
25590 if !ct.properties.is_empty() {
25591 let is_presto_target = matches!(
25592 target,
25593 DialectType::Presto | DialectType::Trino | DialectType::Athena
25594 );
25595 let is_duckdb_target = matches!(target, DialectType::DuckDB);
25596
25597 if is_presto_target || is_duckdb_target {
25598 let mut new_properties = Vec::new();
25599 for prop in ct.properties.drain(..) {
25600 match &prop {
25601 Expression::FileFormatProperty(ffp) => {
25602 if is_presto_target {
25603 // Convert STORED AS/USING to WITH (format=...)
25604 if let Some(ref fmt_expr) = ffp.this {
25605 let fmt_str = match fmt_expr.as_ref() {
25606 Expression::Identifier(id) => id.name.clone(),
25607 Expression::Literal(Literal::String(s)) => s.clone(),
25608 _ => {
25609 new_properties.push(prop);
25610 continue;
25611 }
25612 };
25613 ct.with_properties
25614 .push(("format".to_string(), format!("'{}'", fmt_str)));
25615 }
25616 }
25617 // DuckDB: just strip file format properties
25618 }
25619 // Convert TBLPROPERTIES to WITH properties for Presto target
25620 Expression::Properties(props) if is_presto_target => {
25621 for expr in &props.expressions {
25622 if let Expression::Eq(eq) = expr {
25623 // Extract key and value from the Eq expression
25624 let key = match &eq.left {
25625 Expression::Literal(Literal::String(s)) => s.clone(),
25626 Expression::Identifier(id) => id.name.clone(),
25627 _ => continue,
25628 };
25629 let value = match &eq.right {
25630 Expression::Literal(Literal::String(s)) => {
25631 format!("'{}'", s)
25632 }
25633 Expression::Literal(Literal::Number(n)) => n.clone(),
25634 Expression::Identifier(id) => id.name.clone(),
25635 _ => continue,
25636 };
25637 ct.with_properties.push((key, value));
25638 }
25639 }
25640 }
25641 // Convert PartitionedByProperty for Presto target
25642 Expression::PartitionedByProperty(ref pbp) if is_presto_target => {
25643 // Check if it contains ColumnDef expressions (Hive-style with types)
25644 if let Expression::Tuple(ref tuple) = *pbp.this {
25645 let mut col_names: Vec<String> = Vec::new();
25646 let mut col_defs: Vec<crate::expressions::ColumnDef> = Vec::new();
25647 let mut has_col_defs = false;
25648 for expr in &tuple.expressions {
25649 if let Expression::ColumnDef(ref cd) = expr {
25650 has_col_defs = true;
25651 col_names.push(cd.name.name.clone());
25652 col_defs.push(*cd.clone());
25653 } else if let Expression::Column(ref col) = expr {
25654 col_names.push(col.name.name.clone());
25655 } else if let Expression::Identifier(ref id) = expr {
25656 col_names.push(id.name.clone());
25657 } else {
25658 // For function expressions like MONTHS(y), serialize to SQL
25659 let generic = Dialect::get(DialectType::Generic);
25660 if let Ok(sql) = generic.generate(expr) {
25661 col_names.push(sql);
25662 }
25663 }
25664 }
25665 if has_col_defs {
25666 // Merge partition column defs into the main column list
25667 for cd in col_defs {
25668 ct.columns.push(cd);
25669 }
25670 }
25671 if !col_names.is_empty() {
25672 // Add PARTITIONED_BY property
25673 let array_val = format!(
25674 "ARRAY[{}]",
25675 col_names
25676 .iter()
25677 .map(|n| format!("'{}'", n))
25678 .collect::<Vec<_>>()
25679 .join(", ")
25680 );
25681 ct.with_properties
25682 .push(("PARTITIONED_BY".to_string(), array_val));
25683 }
25684 }
25685 // Skip - don't keep in properties
25686 }
25687 _ => {
25688 if !is_duckdb_target {
25689 new_properties.push(prop);
25690 }
25691 }
25692 }
25693 }
25694 ct.properties = new_properties;
25695 } else {
25696 // For Hive/Spark targets, unquote format names in STORED AS
25697 for prop in &mut ct.properties {
25698 if let Expression::FileFormatProperty(ref mut ffp) = prop {
25699 if let Some(ref mut fmt_expr) = ffp.this {
25700 if let Expression::Literal(Literal::String(s)) = fmt_expr.as_ref() {
25701 // Convert STORED AS 'PARQUET' to STORED AS PARQUET (unquote)
25702 let unquoted = s.clone();
25703 *fmt_expr =
25704 Box::new(Expression::Identifier(Identifier::new(unquoted)));
25705 }
25706 }
25707 }
25708 }
25709 }
25710 }
25711 }
25712
25713 /// Apply PARTITIONED_BY conversion: move partition columns from column list to PARTITIONED BY
25714 fn apply_partitioned_by(
25715 ct: &mut crate::expressions::CreateTable,
25716 partitioned_by_value: &str,
25717 target: DialectType,
25718 ) {
25719 use crate::expressions::{Column, Expression, Identifier, PartitionedByProperty, Tuple};
25720
25721 // Parse the ARRAY['col1', 'col2'] value to extract column names
25722 let mut col_names: Vec<String> = Vec::new();
25723 // The value looks like ARRAY['y', 'z'] or ARRAY('y', 'z')
25724 let inner = partitioned_by_value
25725 .trim()
25726 .trim_start_matches("ARRAY")
25727 .trim_start_matches('[')
25728 .trim_start_matches('(')
25729 .trim_end_matches(']')
25730 .trim_end_matches(')');
25731 for part in inner.split(',') {
25732 let col = part.trim().trim_matches('\'').trim_matches('"');
25733 if !col.is_empty() {
25734 col_names.push(col.to_string());
25735 }
25736 }
25737
25738 if col_names.is_empty() {
25739 return;
25740 }
25741
25742 if matches!(target, DialectType::Hive) {
25743 // Hive: PARTITIONED BY (col_name type, ...) - move columns out of column list
25744 let mut partition_col_defs = Vec::new();
25745 for col_name in &col_names {
25746 // Find and remove from columns
25747 if let Some(pos) = ct
25748 .columns
25749 .iter()
25750 .position(|c| c.name.name.eq_ignore_ascii_case(col_name))
25751 {
25752 let col_def = ct.columns.remove(pos);
25753 partition_col_defs.push(Expression::ColumnDef(Box::new(col_def)));
25754 }
25755 }
25756 if !partition_col_defs.is_empty() {
25757 ct.properties
25758 .push(Expression::PartitionedByProperty(Box::new(
25759 PartitionedByProperty {
25760 this: Box::new(Expression::Tuple(Box::new(Tuple {
25761 expressions: partition_col_defs,
25762 }))),
25763 },
25764 )));
25765 }
25766 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
25767 // Spark: PARTITIONED BY (col1, col2) - just column names, keep in column list
25768 // Use quoted identifiers to match the quoting style of the original column definitions
25769 let partition_exprs: Vec<Expression> = col_names
25770 .iter()
25771 .map(|name| {
25772 // Check if the column exists in the column list and use its quoting
25773 let is_quoted = ct
25774 .columns
25775 .iter()
25776 .any(|c| c.name.name.eq_ignore_ascii_case(name) && c.name.quoted);
25777 let ident = if is_quoted {
25778 Identifier::quoted(name.clone())
25779 } else {
25780 Identifier::new(name.clone())
25781 };
25782 Expression::Column(Column {
25783 name: ident,
25784 table: None,
25785 join_mark: false,
25786 trailing_comments: Vec::new(),
25787 })
25788 })
25789 .collect();
25790 ct.properties
25791 .push(Expression::PartitionedByProperty(Box::new(
25792 PartitionedByProperty {
25793 this: Box::new(Expression::Tuple(Box::new(Tuple {
25794 expressions: partition_exprs,
25795 }))),
25796 },
25797 )));
25798 }
25799 // DuckDB: strip partitioned_by entirely (already handled)
25800 }
25801
25802 /// Convert a DataType to Spark's type string format (using angle brackets)
25803 fn data_type_to_spark_string(dt: &crate::expressions::DataType) -> String {
25804 use crate::expressions::DataType;
25805 match dt {
25806 DataType::Int { .. } => "INT".to_string(),
25807 DataType::BigInt { .. } => "BIGINT".to_string(),
25808 DataType::SmallInt { .. } => "SMALLINT".to_string(),
25809 DataType::TinyInt { .. } => "TINYINT".to_string(),
25810 DataType::Float { .. } => "FLOAT".to_string(),
25811 DataType::Double { .. } => "DOUBLE".to_string(),
25812 DataType::Decimal {
25813 precision: Some(p),
25814 scale: Some(s),
25815 } => format!("DECIMAL({}, {})", p, s),
25816 DataType::Decimal {
25817 precision: Some(p), ..
25818 } => format!("DECIMAL({})", p),
25819 DataType::Decimal { .. } => "DECIMAL".to_string(),
25820 DataType::VarChar { .. } | DataType::Text | DataType::String { .. } => {
25821 "STRING".to_string()
25822 }
25823 DataType::Char { .. } => "STRING".to_string(),
25824 DataType::Boolean => "BOOLEAN".to_string(),
25825 DataType::Date => "DATE".to_string(),
25826 DataType::Timestamp { .. } => "TIMESTAMP".to_string(),
25827 DataType::Json | DataType::JsonB => "STRING".to_string(),
25828 DataType::Binary { .. } => "BINARY".to_string(),
25829 DataType::Array { element_type, .. } => {
25830 format!("ARRAY<{}>", Self::data_type_to_spark_string(element_type))
25831 }
25832 DataType::Map {
25833 key_type,
25834 value_type,
25835 } => format!(
25836 "MAP<{}, {}>",
25837 Self::data_type_to_spark_string(key_type),
25838 Self::data_type_to_spark_string(value_type)
25839 ),
25840 DataType::Struct { fields, .. } => {
25841 let field_strs: Vec<String> = fields
25842 .iter()
25843 .map(|f| {
25844 if f.name.is_empty() {
25845 Self::data_type_to_spark_string(&f.data_type)
25846 } else {
25847 format!(
25848 "{}: {}",
25849 f.name,
25850 Self::data_type_to_spark_string(&f.data_type)
25851 )
25852 }
25853 })
25854 .collect();
25855 format!("STRUCT<{}>", field_strs.join(", "))
25856 }
25857 DataType::Custom { name } => name.clone(),
25858 _ => format!("{:?}", dt),
25859 }
25860 }
25861
25862 /// Extract value and unit from an Interval expression
25863 /// Returns (value_expression, IntervalUnit)
25864 fn extract_interval_parts(
25865 interval_expr: &Expression,
25866 ) -> (Expression, crate::expressions::IntervalUnit) {
25867 use crate::expressions::{IntervalUnit, IntervalUnitSpec};
25868
25869 if let Expression::Interval(iv) = interval_expr {
25870 let val = iv.this.clone().unwrap_or(Expression::number(0));
25871 let unit = match &iv.unit {
25872 Some(IntervalUnitSpec::Simple { unit, .. }) => *unit,
25873 None => {
25874 // Unit might be embedded in the string value (Snowflake format: '5 DAY')
25875 if let Expression::Literal(crate::expressions::Literal::String(s)) = &val {
25876 let parts: Vec<&str> = s.trim().splitn(2, ' ').collect();
25877 if parts.len() == 2 {
25878 let unit_str = parts[1].trim().to_uppercase();
25879 let parsed_unit = match unit_str.as_str() {
25880 "YEAR" | "YEARS" => IntervalUnit::Year,
25881 "QUARTER" | "QUARTERS" => IntervalUnit::Quarter,
25882 "MONTH" | "MONTHS" => IntervalUnit::Month,
25883 "WEEK" | "WEEKS" | "ISOWEEK" => IntervalUnit::Week,
25884 "DAY" | "DAYS" => IntervalUnit::Day,
25885 "HOUR" | "HOURS" => IntervalUnit::Hour,
25886 "MINUTE" | "MINUTES" => IntervalUnit::Minute,
25887 "SECOND" | "SECONDS" => IntervalUnit::Second,
25888 "MILLISECOND" | "MILLISECONDS" => IntervalUnit::Millisecond,
25889 "MICROSECOND" | "MICROSECONDS" => IntervalUnit::Microsecond,
25890 _ => IntervalUnit::Day,
25891 };
25892 // Return just the numeric part as value and parsed unit
25893 return (
25894 Expression::Literal(crate::expressions::Literal::String(
25895 parts[0].to_string(),
25896 )),
25897 parsed_unit,
25898 );
25899 }
25900 IntervalUnit::Day
25901 } else {
25902 IntervalUnit::Day
25903 }
25904 }
25905 _ => IntervalUnit::Day,
25906 };
25907 (val, unit)
25908 } else {
25909 // Not an interval - pass through
25910 (interval_expr.clone(), crate::expressions::IntervalUnit::Day)
25911 }
25912 }
25913
25914 /// Normalize BigQuery-specific functions to standard forms that target dialects can handle
25915 fn normalize_bigquery_function(
25916 e: Expression,
25917 source: DialectType,
25918 target: DialectType,
25919 ) -> Result<Expression> {
25920 use crate::expressions::{BinaryOp, Cast, DataType, Function, Identifier, Literal, Paren};
25921
25922 let f = if let Expression::Function(f) = e {
25923 *f
25924 } else {
25925 return Ok(e);
25926 };
25927 let name = f.name.to_uppercase();
25928 let mut args = f.args;
25929
25930 /// Helper to extract unit string from an identifier, column, or literal expression
25931 fn get_unit_str(expr: &Expression) -> String {
25932 match expr {
25933 Expression::Identifier(id) => id.name.to_uppercase(),
25934 Expression::Literal(Literal::String(s)) => s.to_uppercase(),
25935 Expression::Column(col) => col.name.name.to_uppercase(),
25936 // Handle WEEK(MONDAY), WEEK(SUNDAY) etc. which are parsed as Function("WEEK", [Column("MONDAY")])
25937 Expression::Function(f) => {
25938 let base = f.name.to_uppercase();
25939 if !f.args.is_empty() {
25940 // e.g., WEEK(MONDAY) -> "WEEK(MONDAY)"
25941 let inner = get_unit_str(&f.args[0]);
25942 format!("{}({})", base, inner)
25943 } else {
25944 base
25945 }
25946 }
25947 _ => "DAY".to_string(),
25948 }
25949 }
25950
25951 /// Parse unit string to IntervalUnit
25952 fn parse_interval_unit(s: &str) -> crate::expressions::IntervalUnit {
25953 match s {
25954 "YEAR" => crate::expressions::IntervalUnit::Year,
25955 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
25956 "MONTH" => crate::expressions::IntervalUnit::Month,
25957 "WEEK" | "ISOWEEK" => crate::expressions::IntervalUnit::Week,
25958 "DAY" => crate::expressions::IntervalUnit::Day,
25959 "HOUR" => crate::expressions::IntervalUnit::Hour,
25960 "MINUTE" => crate::expressions::IntervalUnit::Minute,
25961 "SECOND" => crate::expressions::IntervalUnit::Second,
25962 "MILLISECOND" => crate::expressions::IntervalUnit::Millisecond,
25963 "MICROSECOND" => crate::expressions::IntervalUnit::Microsecond,
25964 _ if s.starts_with("WEEK(") => crate::expressions::IntervalUnit::Week,
25965 _ => crate::expressions::IntervalUnit::Day,
25966 }
25967 }
25968
25969 match name.as_str() {
25970 // TIMESTAMP_DIFF(date1, date2, unit) -> TIMESTAMPDIFF(unit, date2, date1)
25971 // (BigQuery: result = date1 - date2, Standard: result = end - start)
25972 "TIMESTAMP_DIFF" | "DATETIME_DIFF" | "TIME_DIFF" if args.len() == 3 => {
25973 let date1 = args.remove(0);
25974 let date2 = args.remove(0);
25975 let unit_expr = args.remove(0);
25976 let unit_str = get_unit_str(&unit_expr);
25977
25978 if matches!(target, DialectType::BigQuery) {
25979 // BigQuery -> BigQuery: just uppercase the unit
25980 let unit = Expression::Identifier(Identifier::new(unit_str.clone()));
25981 return Ok(Expression::Function(Box::new(Function::new(
25982 f.name,
25983 vec![date1, date2, unit],
25984 ))));
25985 }
25986
25987 // For Snowflake: use TimestampDiff expression so it generates TIMESTAMPDIFF
25988 // (Function("TIMESTAMPDIFF") would be converted to DATEDIFF by Snowflake's function normalization)
25989 if matches!(target, DialectType::Snowflake) {
25990 return Ok(Expression::TimestampDiff(Box::new(
25991 crate::expressions::TimestampDiff {
25992 this: Box::new(date2),
25993 expression: Box::new(date1),
25994 unit: Some(unit_str),
25995 },
25996 )));
25997 }
25998
25999 // For DuckDB: DATE_DIFF('UNIT', start, end) with proper CAST
26000 if matches!(target, DialectType::DuckDB) {
26001 let (cast_d1, cast_d2) = if name == "TIME_DIFF" {
26002 // CAST to TIME
26003 let cast_fn = |e: Expression| -> Expression {
26004 match e {
26005 Expression::Literal(Literal::String(s)) => {
26006 Expression::Cast(Box::new(Cast {
26007 this: Expression::Literal(Literal::String(s)),
26008 to: DataType::Custom {
26009 name: "TIME".to_string(),
26010 },
26011 trailing_comments: vec![],
26012 double_colon_syntax: false,
26013 format: None,
26014 default: None,
26015 }))
26016 }
26017 other => other,
26018 }
26019 };
26020 (cast_fn(date1), cast_fn(date2))
26021 } else if name == "DATETIME_DIFF" {
26022 // CAST to TIMESTAMP
26023 (
26024 Self::ensure_cast_timestamp(date1),
26025 Self::ensure_cast_timestamp(date2),
26026 )
26027 } else {
26028 // TIMESTAMP_DIFF: CAST to TIMESTAMPTZ
26029 (
26030 Self::ensure_cast_timestamptz(date1),
26031 Self::ensure_cast_timestamptz(date2),
26032 )
26033 };
26034 return Ok(Expression::Function(Box::new(Function::new(
26035 "DATE_DIFF".to_string(),
26036 vec![
26037 Expression::Literal(Literal::String(unit_str)),
26038 cast_d2,
26039 cast_d1,
26040 ],
26041 ))));
26042 }
26043
26044 // Convert to standard TIMESTAMPDIFF(unit, start, end)
26045 let unit = Expression::Identifier(Identifier::new(unit_str));
26046 Ok(Expression::Function(Box::new(Function::new(
26047 "TIMESTAMPDIFF".to_string(),
26048 vec![unit, date2, date1],
26049 ))))
26050 }
26051
26052 // DATEDIFF(unit, start, end) -> target-specific form
26053 // Used by: Redshift, Snowflake, TSQL, Databricks, Spark
26054 "DATEDIFF" if args.len() == 3 => {
26055 let arg0 = args.remove(0);
26056 let arg1 = args.remove(0);
26057 let arg2 = args.remove(0);
26058 let unit_str = get_unit_str(&arg0);
26059
26060 // Redshift DATEDIFF(unit, start, end) order: result = end - start
26061 // Snowflake DATEDIFF(unit, start, end) order: result = end - start
26062 // TSQL DATEDIFF(unit, start, end) order: result = end - start
26063
26064 if matches!(target, DialectType::Snowflake) {
26065 // Snowflake: DATEDIFF(UNIT, start, end) - uppercase unit
26066 let unit = Expression::Identifier(Identifier::new(unit_str));
26067 return Ok(Expression::Function(Box::new(Function::new(
26068 "DATEDIFF".to_string(),
26069 vec![unit, arg1, arg2],
26070 ))));
26071 }
26072
26073 if matches!(target, DialectType::DuckDB) {
26074 // DuckDB: DATE_DIFF('UNIT', start, end) with CAST
26075 let cast_d1 = Self::ensure_cast_timestamp(arg1);
26076 let cast_d2 = Self::ensure_cast_timestamp(arg2);
26077 return Ok(Expression::Function(Box::new(Function::new(
26078 "DATE_DIFF".to_string(),
26079 vec![
26080 Expression::Literal(Literal::String(unit_str)),
26081 cast_d1,
26082 cast_d2,
26083 ],
26084 ))));
26085 }
26086
26087 if matches!(target, DialectType::BigQuery) {
26088 // BigQuery: DATE_DIFF(end_date, start_date, UNIT) - reversed args, CAST to DATETIME
26089 let cast_d1 = Self::ensure_cast_datetime(arg1);
26090 let cast_d2 = Self::ensure_cast_datetime(arg2);
26091 let unit = Expression::Identifier(Identifier::new(unit_str));
26092 return Ok(Expression::Function(Box::new(Function::new(
26093 "DATE_DIFF".to_string(),
26094 vec![cast_d2, cast_d1, unit],
26095 ))));
26096 }
26097
26098 if matches!(target, DialectType::Spark | DialectType::Databricks) {
26099 // Spark/Databricks: DATEDIFF(UNIT, start, end) - uppercase unit
26100 let unit = Expression::Identifier(Identifier::new(unit_str));
26101 return Ok(Expression::Function(Box::new(Function::new(
26102 "DATEDIFF".to_string(),
26103 vec![unit, arg1, arg2],
26104 ))));
26105 }
26106
26107 if matches!(target, DialectType::Hive) {
26108 // Hive: DATEDIFF(end, start) for DAY only, use MONTHS_BETWEEN for MONTH
26109 match unit_str.as_str() {
26110 "MONTH" => {
26111 return Ok(Expression::Function(Box::new(Function::new(
26112 "CAST".to_string(),
26113 vec![Expression::Function(Box::new(Function::new(
26114 "MONTHS_BETWEEN".to_string(),
26115 vec![arg2, arg1],
26116 )))],
26117 ))));
26118 }
26119 "WEEK" => {
26120 return Ok(Expression::Cast(Box::new(Cast {
26121 this: Expression::Div(Box::new(crate::expressions::BinaryOp::new(
26122 Expression::Function(Box::new(Function::new(
26123 "DATEDIFF".to_string(),
26124 vec![arg2, arg1],
26125 ))),
26126 Expression::Literal(Literal::Number("7".to_string())),
26127 ))),
26128 to: DataType::Int {
26129 length: None,
26130 integer_spelling: false,
26131 },
26132 trailing_comments: vec![],
26133 double_colon_syntax: false,
26134 format: None,
26135 default: None,
26136 })));
26137 }
26138 _ => {
26139 // Default: DATEDIFF(end, start) for DAY
26140 return Ok(Expression::Function(Box::new(Function::new(
26141 "DATEDIFF".to_string(),
26142 vec![arg2, arg1],
26143 ))));
26144 }
26145 }
26146 }
26147
26148 if matches!(
26149 target,
26150 DialectType::Presto | DialectType::Trino | DialectType::Athena
26151 ) {
26152 // Presto/Trino: DATE_DIFF('UNIT', start, end)
26153 return Ok(Expression::Function(Box::new(Function::new(
26154 "DATE_DIFF".to_string(),
26155 vec![Expression::Literal(Literal::String(unit_str)), arg1, arg2],
26156 ))));
26157 }
26158
26159 if matches!(target, DialectType::TSQL) {
26160 // TSQL: DATEDIFF(UNIT, start, CAST(end AS DATETIME2))
26161 let cast_d2 = Self::ensure_cast_datetime2(arg2);
26162 let unit = Expression::Identifier(Identifier::new(unit_str));
26163 return Ok(Expression::Function(Box::new(Function::new(
26164 "DATEDIFF".to_string(),
26165 vec![unit, arg1, cast_d2],
26166 ))));
26167 }
26168
26169 if matches!(target, DialectType::PostgreSQL) {
26170 // PostgreSQL doesn't have DATEDIFF - use date subtraction or EXTRACT
26171 // For now, use DATEDIFF (passthrough) with uppercased unit
26172 let unit = Expression::Identifier(Identifier::new(unit_str));
26173 return Ok(Expression::Function(Box::new(Function::new(
26174 "DATEDIFF".to_string(),
26175 vec![unit, arg1, arg2],
26176 ))));
26177 }
26178
26179 // Default: DATEDIFF(UNIT, start, end) with uppercase unit
26180 let unit = Expression::Identifier(Identifier::new(unit_str));
26181 Ok(Expression::Function(Box::new(Function::new(
26182 "DATEDIFF".to_string(),
26183 vec![unit, arg1, arg2],
26184 ))))
26185 }
26186
26187 // DATE_DIFF(date1, date2, unit) -> standard form
26188 "DATE_DIFF" if args.len() == 3 => {
26189 let date1 = args.remove(0);
26190 let date2 = args.remove(0);
26191 let unit_expr = args.remove(0);
26192 let unit_str = get_unit_str(&unit_expr);
26193
26194 if matches!(target, DialectType::BigQuery) {
26195 // BigQuery -> BigQuery: just uppercase the unit, normalize WEEK(SUNDAY) -> WEEK
26196 let norm_unit = if unit_str == "WEEK(SUNDAY)" {
26197 "WEEK".to_string()
26198 } else {
26199 unit_str
26200 };
26201 let norm_d1 = Self::date_literal_to_cast(date1);
26202 let norm_d2 = Self::date_literal_to_cast(date2);
26203 let unit = Expression::Identifier(Identifier::new(norm_unit));
26204 return Ok(Expression::Function(Box::new(Function::new(
26205 f.name,
26206 vec![norm_d1, norm_d2, unit],
26207 ))));
26208 }
26209
26210 if matches!(target, DialectType::MySQL) {
26211 // MySQL DATEDIFF only takes 2 args (date1, date2), returns day difference
26212 let norm_d1 = Self::date_literal_to_cast(date1);
26213 let norm_d2 = Self::date_literal_to_cast(date2);
26214 return Ok(Expression::Function(Box::new(Function::new(
26215 "DATEDIFF".to_string(),
26216 vec![norm_d1, norm_d2],
26217 ))));
26218 }
26219
26220 if matches!(target, DialectType::StarRocks) {
26221 // StarRocks: DATE_DIFF('UNIT', date1, date2) - unit as string, args NOT swapped
26222 let norm_d1 = Self::date_literal_to_cast(date1);
26223 let norm_d2 = Self::date_literal_to_cast(date2);
26224 return Ok(Expression::Function(Box::new(Function::new(
26225 "DATE_DIFF".to_string(),
26226 vec![
26227 Expression::Literal(Literal::String(unit_str)),
26228 norm_d1,
26229 norm_d2,
26230 ],
26231 ))));
26232 }
26233
26234 if matches!(target, DialectType::DuckDB) {
26235 // DuckDB: DATE_DIFF('UNIT', date2, date1) with proper CAST for dates
26236 let norm_d1 = Self::ensure_cast_date(date1);
26237 let norm_d2 = Self::ensure_cast_date(date2);
26238
26239 // Handle WEEK variants: WEEK(MONDAY)/WEEK(SUNDAY)/ISOWEEK/WEEK
26240 let is_week_variant = unit_str == "WEEK"
26241 || unit_str.starts_with("WEEK(")
26242 || unit_str == "ISOWEEK";
26243 if is_week_variant {
26244 // For DuckDB, WEEK-based diffs use DATE_TRUNC approach
26245 // WEEK(MONDAY) / ISOWEEK: DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2), DATE_TRUNC('WEEK', d1))
26246 // WEEK / WEEK(SUNDAY): DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2 + INTERVAL '1' DAY), DATE_TRUNC('WEEK', d1 + INTERVAL '1' DAY))
26247 // WEEK(SATURDAY): DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2 + INTERVAL '-5' DAY), DATE_TRUNC('WEEK', d1 + INTERVAL '-5' DAY))
26248 let day_offset = if unit_str == "WEEK(MONDAY)" || unit_str == "ISOWEEK" {
26249 None // ISO weeks start on Monday, aligned with DATE_TRUNC('WEEK')
26250 } else if unit_str == "WEEK" || unit_str == "WEEK(SUNDAY)" {
26251 Some("1") // Shift Sunday to Monday alignment
26252 } else if unit_str == "WEEK(SATURDAY)" {
26253 Some("-5")
26254 } else if unit_str == "WEEK(TUESDAY)" {
26255 Some("-1")
26256 } else if unit_str == "WEEK(WEDNESDAY)" {
26257 Some("-2")
26258 } else if unit_str == "WEEK(THURSDAY)" {
26259 Some("-3")
26260 } else if unit_str == "WEEK(FRIDAY)" {
26261 Some("-4")
26262 } else {
26263 Some("1") // default to Sunday
26264 };
26265
26266 let make_trunc = |date: Expression, offset: Option<&str>| -> Expression {
26267 let shifted = if let Some(off) = offset {
26268 let interval =
26269 Expression::Interval(Box::new(crate::expressions::Interval {
26270 this: Some(Expression::Literal(Literal::String(
26271 off.to_string(),
26272 ))),
26273 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
26274 unit: crate::expressions::IntervalUnit::Day,
26275 use_plural: false,
26276 }),
26277 }));
26278 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
26279 date, interval,
26280 )))
26281 } else {
26282 date
26283 };
26284 Expression::Function(Box::new(Function::new(
26285 "DATE_TRUNC".to_string(),
26286 vec![
26287 Expression::Literal(Literal::String("WEEK".to_string())),
26288 shifted,
26289 ],
26290 )))
26291 };
26292
26293 let trunc_d2 = make_trunc(norm_d2, day_offset);
26294 let trunc_d1 = make_trunc(norm_d1, day_offset);
26295 return Ok(Expression::Function(Box::new(Function::new(
26296 "DATE_DIFF".to_string(),
26297 vec![
26298 Expression::Literal(Literal::String("WEEK".to_string())),
26299 trunc_d2,
26300 trunc_d1,
26301 ],
26302 ))));
26303 }
26304
26305 return Ok(Expression::Function(Box::new(Function::new(
26306 "DATE_DIFF".to_string(),
26307 vec![
26308 Expression::Literal(Literal::String(unit_str)),
26309 norm_d2,
26310 norm_d1,
26311 ],
26312 ))));
26313 }
26314
26315 // Default: DATEDIFF(unit, date2, date1)
26316 let unit = Expression::Identifier(Identifier::new(unit_str));
26317 Ok(Expression::Function(Box::new(Function::new(
26318 "DATEDIFF".to_string(),
26319 vec![unit, date2, date1],
26320 ))))
26321 }
26322
26323 // TIMESTAMP_ADD(ts, INTERVAL n UNIT) -> target-specific
26324 "TIMESTAMP_ADD" | "DATETIME_ADD" | "TIME_ADD" if args.len() == 2 => {
26325 let ts = args.remove(0);
26326 let interval_expr = args.remove(0);
26327 let (val, unit) = Self::extract_interval_parts(&interval_expr);
26328
26329 match target {
26330 DialectType::Snowflake => {
26331 // TIMESTAMPADD(UNIT, val, CAST(ts AS TIMESTAMPTZ))
26332 // Use TimestampAdd expression so Snowflake generates TIMESTAMPADD
26333 // (Function("TIMESTAMPADD") would be converted to DATEADD by Snowflake's function normalization)
26334 let unit_str = Self::interval_unit_to_string(&unit);
26335 let cast_ts = Self::maybe_cast_ts_to_tz(ts, &name);
26336 Ok(Expression::TimestampAdd(Box::new(
26337 crate::expressions::TimestampAdd {
26338 this: Box::new(val),
26339 expression: Box::new(cast_ts),
26340 unit: Some(unit_str),
26341 },
26342 )))
26343 }
26344 DialectType::Spark | DialectType::Databricks => {
26345 if name == "DATETIME_ADD" && matches!(target, DialectType::Spark) {
26346 // Spark DATETIME_ADD: ts + INTERVAL val UNIT
26347 let interval =
26348 Expression::Interval(Box::new(crate::expressions::Interval {
26349 this: Some(val),
26350 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
26351 unit,
26352 use_plural: false,
26353 }),
26354 }));
26355 Ok(Expression::Add(Box::new(
26356 crate::expressions::BinaryOp::new(ts, interval),
26357 )))
26358 } else if name == "DATETIME_ADD"
26359 && matches!(target, DialectType::Databricks)
26360 {
26361 // Databricks DATETIME_ADD: TIMESTAMPADD(UNIT, val, ts)
26362 let unit_str = Self::interval_unit_to_string(&unit);
26363 Ok(Expression::Function(Box::new(Function::new(
26364 "TIMESTAMPADD".to_string(),
26365 vec![Expression::Identifier(Identifier::new(unit_str)), val, ts],
26366 ))))
26367 } else {
26368 // Presto-style: DATE_ADD('unit', val, CAST(ts AS TIMESTAMP))
26369 let unit_str = Self::interval_unit_to_string(&unit);
26370 let cast_ts =
26371 if name.starts_with("TIMESTAMP") || name.starts_with("DATETIME") {
26372 Self::maybe_cast_ts(ts)
26373 } else {
26374 ts
26375 };
26376 Ok(Expression::Function(Box::new(Function::new(
26377 "DATE_ADD".to_string(),
26378 vec![
26379 Expression::Identifier(Identifier::new(unit_str)),
26380 val,
26381 cast_ts,
26382 ],
26383 ))))
26384 }
26385 }
26386 DialectType::MySQL => {
26387 // DATE_ADD(TIMESTAMP(ts), INTERVAL val UNIT) for MySQL
26388 let mysql_ts = if name.starts_with("TIMESTAMP") {
26389 // Check if already wrapped in TIMESTAMP() function (from cross-dialect normalization)
26390 match &ts {
26391 Expression::Function(ref inner_f)
26392 if inner_f.name.eq_ignore_ascii_case("TIMESTAMP") =>
26393 {
26394 // Already wrapped, keep as-is
26395 ts
26396 }
26397 _ => {
26398 // Unwrap typed literals: TIMESTAMP '...' -> '...' for TIMESTAMP() wrapper
26399 let unwrapped = match ts {
26400 Expression::Literal(Literal::Timestamp(s)) => {
26401 Expression::Literal(Literal::String(s))
26402 }
26403 other => other,
26404 };
26405 Expression::Function(Box::new(Function::new(
26406 "TIMESTAMP".to_string(),
26407 vec![unwrapped],
26408 )))
26409 }
26410 }
26411 } else {
26412 ts
26413 };
26414 Ok(Expression::DateAdd(Box::new(
26415 crate::expressions::DateAddFunc {
26416 this: mysql_ts,
26417 interval: val,
26418 unit,
26419 },
26420 )))
26421 }
26422 _ => {
26423 // DuckDB and others use DateAdd expression (DuckDB converts to + INTERVAL)
26424 let cast_ts = if matches!(target, DialectType::DuckDB) {
26425 if name == "DATETIME_ADD" {
26426 Self::ensure_cast_timestamp(ts)
26427 } else if name.starts_with("TIMESTAMP") {
26428 Self::maybe_cast_ts_to_tz(ts, &name)
26429 } else {
26430 ts
26431 }
26432 } else {
26433 ts
26434 };
26435 Ok(Expression::DateAdd(Box::new(
26436 crate::expressions::DateAddFunc {
26437 this: cast_ts,
26438 interval: val,
26439 unit,
26440 },
26441 )))
26442 }
26443 }
26444 }
26445
26446 // TIMESTAMP_SUB(ts, INTERVAL n UNIT) -> target-specific
26447 "TIMESTAMP_SUB" | "DATETIME_SUB" | "TIME_SUB" if args.len() == 2 => {
26448 let ts = args.remove(0);
26449 let interval_expr = args.remove(0);
26450 let (val, unit) = Self::extract_interval_parts(&interval_expr);
26451
26452 match target {
26453 DialectType::Snowflake => {
26454 // TIMESTAMPADD(UNIT, val * -1, CAST(ts AS TIMESTAMPTZ))
26455 let unit_str = Self::interval_unit_to_string(&unit);
26456 let cast_ts = Self::maybe_cast_ts_to_tz(ts, &name);
26457 let neg_val = Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
26458 val,
26459 Expression::Neg(Box::new(crate::expressions::UnaryOp {
26460 this: Expression::number(1),
26461 })),
26462 )));
26463 Ok(Expression::TimestampAdd(Box::new(
26464 crate::expressions::TimestampAdd {
26465 this: Box::new(neg_val),
26466 expression: Box::new(cast_ts),
26467 unit: Some(unit_str),
26468 },
26469 )))
26470 }
26471 DialectType::Spark | DialectType::Databricks => {
26472 if (name == "DATETIME_SUB" && matches!(target, DialectType::Spark))
26473 || (name == "TIMESTAMP_SUB" && matches!(target, DialectType::Spark))
26474 {
26475 // Spark: ts - INTERVAL val UNIT
26476 let cast_ts = if name.starts_with("TIMESTAMP") {
26477 Self::maybe_cast_ts(ts)
26478 } else {
26479 ts
26480 };
26481 let interval =
26482 Expression::Interval(Box::new(crate::expressions::Interval {
26483 this: Some(val),
26484 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
26485 unit,
26486 use_plural: false,
26487 }),
26488 }));
26489 Ok(Expression::Sub(Box::new(
26490 crate::expressions::BinaryOp::new(cast_ts, interval),
26491 )))
26492 } else {
26493 // Databricks: TIMESTAMPADD(UNIT, val * -1, ts)
26494 let unit_str = Self::interval_unit_to_string(&unit);
26495 let neg_val =
26496 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
26497 val,
26498 Expression::Neg(Box::new(crate::expressions::UnaryOp {
26499 this: Expression::number(1),
26500 })),
26501 )));
26502 Ok(Expression::Function(Box::new(Function::new(
26503 "TIMESTAMPADD".to_string(),
26504 vec![
26505 Expression::Identifier(Identifier::new(unit_str)),
26506 neg_val,
26507 ts,
26508 ],
26509 ))))
26510 }
26511 }
26512 DialectType::MySQL => {
26513 let mysql_ts = if name.starts_with("TIMESTAMP") {
26514 // Check if already wrapped in TIMESTAMP() function (from cross-dialect normalization)
26515 match &ts {
26516 Expression::Function(ref inner_f)
26517 if inner_f.name.eq_ignore_ascii_case("TIMESTAMP") =>
26518 {
26519 // Already wrapped, keep as-is
26520 ts
26521 }
26522 _ => {
26523 let unwrapped = match ts {
26524 Expression::Literal(Literal::Timestamp(s)) => {
26525 Expression::Literal(Literal::String(s))
26526 }
26527 other => other,
26528 };
26529 Expression::Function(Box::new(Function::new(
26530 "TIMESTAMP".to_string(),
26531 vec![unwrapped],
26532 )))
26533 }
26534 }
26535 } else {
26536 ts
26537 };
26538 Ok(Expression::DateSub(Box::new(
26539 crate::expressions::DateAddFunc {
26540 this: mysql_ts,
26541 interval: val,
26542 unit,
26543 },
26544 )))
26545 }
26546 _ => {
26547 let cast_ts = if matches!(target, DialectType::DuckDB) {
26548 if name == "DATETIME_SUB" {
26549 Self::ensure_cast_timestamp(ts)
26550 } else if name.starts_with("TIMESTAMP") {
26551 Self::maybe_cast_ts_to_tz(ts, &name)
26552 } else {
26553 ts
26554 }
26555 } else {
26556 ts
26557 };
26558 Ok(Expression::DateSub(Box::new(
26559 crate::expressions::DateAddFunc {
26560 this: cast_ts,
26561 interval: val,
26562 unit,
26563 },
26564 )))
26565 }
26566 }
26567 }
26568
26569 // DATE_SUB(date, INTERVAL n UNIT) -> target-specific
26570 "DATE_SUB" if args.len() == 2 => {
26571 let date = args.remove(0);
26572 let interval_expr = args.remove(0);
26573 let (val, unit) = Self::extract_interval_parts(&interval_expr);
26574
26575 match target {
26576 DialectType::Databricks | DialectType::Spark => {
26577 // Databricks/Spark: DATE_ADD(date, -val)
26578 // Use DateAdd expression with negative val so it generates correctly
26579 // The generator will output DATE_ADD(date, INTERVAL -val DAY)
26580 // Then Databricks transform converts 2-arg DATE_ADD(date, interval) to DATEADD(DAY, interval, date)
26581 // Instead, we directly output as a simple negated DateSub
26582 Ok(Expression::DateSub(Box::new(
26583 crate::expressions::DateAddFunc {
26584 this: date,
26585 interval: val,
26586 unit,
26587 },
26588 )))
26589 }
26590 DialectType::DuckDB => {
26591 // DuckDB: CAST(date AS DATE) - INTERVAL 'val' UNIT
26592 let cast_date = Self::ensure_cast_date(date);
26593 let interval =
26594 Expression::Interval(Box::new(crate::expressions::Interval {
26595 this: Some(val),
26596 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
26597 unit,
26598 use_plural: false,
26599 }),
26600 }));
26601 Ok(Expression::Sub(Box::new(
26602 crate::expressions::BinaryOp::new(cast_date, interval),
26603 )))
26604 }
26605 DialectType::Snowflake => {
26606 // Snowflake: Let Snowflake's own DateSub -> DATEADD(UNIT, val * -1, date) handler work
26607 // Just ensure the date is cast properly
26608 let cast_date = Self::ensure_cast_date(date);
26609 Ok(Expression::DateSub(Box::new(
26610 crate::expressions::DateAddFunc {
26611 this: cast_date,
26612 interval: val,
26613 unit,
26614 },
26615 )))
26616 }
26617 DialectType::PostgreSQL => {
26618 // PostgreSQL: date - INTERVAL 'val UNIT'
26619 let unit_str = Self::interval_unit_to_string(&unit);
26620 let interval =
26621 Expression::Interval(Box::new(crate::expressions::Interval {
26622 this: Some(Expression::Literal(Literal::String(format!(
26623 "{} {}",
26624 Self::expr_to_string(&val),
26625 unit_str
26626 )))),
26627 unit: None,
26628 }));
26629 Ok(Expression::Sub(Box::new(
26630 crate::expressions::BinaryOp::new(date, interval),
26631 )))
26632 }
26633 _ => Ok(Expression::DateSub(Box::new(
26634 crate::expressions::DateAddFunc {
26635 this: date,
26636 interval: val,
26637 unit,
26638 },
26639 ))),
26640 }
26641 }
26642
26643 // DATEADD(unit, val, date) -> target-specific form
26644 // Used by: Redshift, Snowflake, TSQL, ClickHouse
26645 "DATEADD" if args.len() == 3 => {
26646 let arg0 = args.remove(0);
26647 let arg1 = args.remove(0);
26648 let arg2 = args.remove(0);
26649 let unit_str = get_unit_str(&arg0);
26650
26651 if matches!(target, DialectType::Snowflake | DialectType::TSQL) {
26652 // Keep DATEADD(UNIT, val, date) with uppercased unit
26653 let unit = Expression::Identifier(Identifier::new(unit_str));
26654 // Only CAST to DATETIME2 for TSQL target when source is NOT Spark/Databricks family
26655 let date = if matches!(target, DialectType::TSQL)
26656 && !matches!(
26657 source,
26658 DialectType::Spark | DialectType::Databricks | DialectType::Hive
26659 ) {
26660 Self::ensure_cast_datetime2(arg2)
26661 } else {
26662 arg2
26663 };
26664 return Ok(Expression::Function(Box::new(Function::new(
26665 "DATEADD".to_string(),
26666 vec![unit, arg1, date],
26667 ))));
26668 }
26669
26670 if matches!(target, DialectType::DuckDB) {
26671 // DuckDB: date + INTERVAL 'val' UNIT
26672 let iu = parse_interval_unit(&unit_str);
26673 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
26674 this: Some(arg1),
26675 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
26676 unit: iu,
26677 use_plural: false,
26678 }),
26679 }));
26680 let cast_date = Self::ensure_cast_timestamp(arg2);
26681 return Ok(Expression::Add(Box::new(
26682 crate::expressions::BinaryOp::new(cast_date, interval),
26683 )));
26684 }
26685
26686 if matches!(target, DialectType::BigQuery) {
26687 // BigQuery: DATE_ADD(date, INTERVAL val UNIT) or TIMESTAMP_ADD(ts, INTERVAL val UNIT)
26688 let iu = parse_interval_unit(&unit_str);
26689 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
26690 this: Some(arg1),
26691 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
26692 unit: iu,
26693 use_plural: false,
26694 }),
26695 }));
26696 return Ok(Expression::Function(Box::new(Function::new(
26697 "DATE_ADD".to_string(),
26698 vec![arg2, interval],
26699 ))));
26700 }
26701
26702 if matches!(target, DialectType::Databricks) {
26703 // Databricks: keep DATEADD(UNIT, val, date) format
26704 let unit = Expression::Identifier(Identifier::new(unit_str));
26705 return Ok(Expression::Function(Box::new(Function::new(
26706 "DATEADD".to_string(),
26707 vec![unit, arg1, arg2],
26708 ))));
26709 }
26710
26711 if matches!(target, DialectType::Spark) {
26712 // Spark: convert month-based units to ADD_MONTHS, rest to DATE_ADD
26713 fn multiply_expr_dateadd(expr: Expression, factor: i64) -> Expression {
26714 if let Expression::Literal(crate::expressions::Literal::Number(n)) = &expr {
26715 if let Ok(val) = n.parse::<i64>() {
26716 return Expression::Literal(crate::expressions::Literal::Number(
26717 (val * factor).to_string(),
26718 ));
26719 }
26720 }
26721 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
26722 expr,
26723 Expression::Literal(crate::expressions::Literal::Number(
26724 factor.to_string(),
26725 )),
26726 )))
26727 }
26728 match unit_str.as_str() {
26729 "YEAR" => {
26730 let months = multiply_expr_dateadd(arg1, 12);
26731 return Ok(Expression::Function(Box::new(Function::new(
26732 "ADD_MONTHS".to_string(),
26733 vec![arg2, months],
26734 ))));
26735 }
26736 "QUARTER" => {
26737 let months = multiply_expr_dateadd(arg1, 3);
26738 return Ok(Expression::Function(Box::new(Function::new(
26739 "ADD_MONTHS".to_string(),
26740 vec![arg2, months],
26741 ))));
26742 }
26743 "MONTH" => {
26744 return Ok(Expression::Function(Box::new(Function::new(
26745 "ADD_MONTHS".to_string(),
26746 vec![arg2, arg1],
26747 ))));
26748 }
26749 "WEEK" => {
26750 let days = multiply_expr_dateadd(arg1, 7);
26751 return Ok(Expression::Function(Box::new(Function::new(
26752 "DATE_ADD".to_string(),
26753 vec![arg2, days],
26754 ))));
26755 }
26756 "DAY" => {
26757 return Ok(Expression::Function(Box::new(Function::new(
26758 "DATE_ADD".to_string(),
26759 vec![arg2, arg1],
26760 ))));
26761 }
26762 _ => {
26763 let unit = Expression::Identifier(Identifier::new(unit_str));
26764 return Ok(Expression::Function(Box::new(Function::new(
26765 "DATE_ADD".to_string(),
26766 vec![unit, arg1, arg2],
26767 ))));
26768 }
26769 }
26770 }
26771
26772 if matches!(target, DialectType::Hive) {
26773 // Hive: DATE_ADD(date, val) for DAY, or date + INTERVAL for others
26774 match unit_str.as_str() {
26775 "DAY" => {
26776 return Ok(Expression::Function(Box::new(Function::new(
26777 "DATE_ADD".to_string(),
26778 vec![arg2, arg1],
26779 ))));
26780 }
26781 "MONTH" => {
26782 return Ok(Expression::Function(Box::new(Function::new(
26783 "ADD_MONTHS".to_string(),
26784 vec![arg2, arg1],
26785 ))));
26786 }
26787 _ => {
26788 let iu = parse_interval_unit(&unit_str);
26789 let interval =
26790 Expression::Interval(Box::new(crate::expressions::Interval {
26791 this: Some(arg1),
26792 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
26793 unit: iu,
26794 use_plural: false,
26795 }),
26796 }));
26797 return Ok(Expression::Add(Box::new(
26798 crate::expressions::BinaryOp::new(arg2, interval),
26799 )));
26800 }
26801 }
26802 }
26803
26804 if matches!(target, DialectType::PostgreSQL) {
26805 // PostgreSQL: date + INTERVAL 'val UNIT'
26806 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
26807 this: Some(Expression::Literal(Literal::String(format!(
26808 "{} {}",
26809 Self::expr_to_string(&arg1),
26810 unit_str
26811 )))),
26812 unit: None,
26813 }));
26814 return Ok(Expression::Add(Box::new(
26815 crate::expressions::BinaryOp::new(arg2, interval),
26816 )));
26817 }
26818
26819 if matches!(
26820 target,
26821 DialectType::Presto | DialectType::Trino | DialectType::Athena
26822 ) {
26823 // Presto/Trino: DATE_ADD('UNIT', val, date)
26824 return Ok(Expression::Function(Box::new(Function::new(
26825 "DATE_ADD".to_string(),
26826 vec![Expression::Literal(Literal::String(unit_str)), arg1, arg2],
26827 ))));
26828 }
26829
26830 if matches!(target, DialectType::ClickHouse) {
26831 // ClickHouse: DATE_ADD(UNIT, val, date)
26832 let unit = Expression::Identifier(Identifier::new(unit_str));
26833 return Ok(Expression::Function(Box::new(Function::new(
26834 "DATE_ADD".to_string(),
26835 vec![unit, arg1, arg2],
26836 ))));
26837 }
26838
26839 // Default: keep DATEADD with uppercased unit
26840 let unit = Expression::Identifier(Identifier::new(unit_str));
26841 Ok(Expression::Function(Box::new(Function::new(
26842 "DATEADD".to_string(),
26843 vec![unit, arg1, arg2],
26844 ))))
26845 }
26846
26847 // DATE_ADD(unit, val, date) - 3 arg form from ClickHouse/Presto
26848 "DATE_ADD" if args.len() == 3 => {
26849 let arg0 = args.remove(0);
26850 let arg1 = args.remove(0);
26851 let arg2 = args.remove(0);
26852 let unit_str = get_unit_str(&arg0);
26853
26854 if matches!(
26855 target,
26856 DialectType::Presto | DialectType::Trino | DialectType::Athena
26857 ) {
26858 // Presto/Trino: DATE_ADD('UNIT', val, date)
26859 return Ok(Expression::Function(Box::new(Function::new(
26860 "DATE_ADD".to_string(),
26861 vec![Expression::Literal(Literal::String(unit_str)), arg1, arg2],
26862 ))));
26863 }
26864
26865 if matches!(
26866 target,
26867 DialectType::Snowflake | DialectType::TSQL | DialectType::Redshift
26868 ) {
26869 // DATEADD(UNIT, val, date)
26870 let unit = Expression::Identifier(Identifier::new(unit_str));
26871 let date = if matches!(target, DialectType::TSQL) {
26872 Self::ensure_cast_datetime2(arg2)
26873 } else {
26874 arg2
26875 };
26876 return Ok(Expression::Function(Box::new(Function::new(
26877 "DATEADD".to_string(),
26878 vec![unit, arg1, date],
26879 ))));
26880 }
26881
26882 if matches!(target, DialectType::DuckDB) {
26883 // DuckDB: date + INTERVAL val UNIT
26884 let iu = parse_interval_unit(&unit_str);
26885 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
26886 this: Some(arg1),
26887 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
26888 unit: iu,
26889 use_plural: false,
26890 }),
26891 }));
26892 return Ok(Expression::Add(Box::new(
26893 crate::expressions::BinaryOp::new(arg2, interval),
26894 )));
26895 }
26896
26897 if matches!(target, DialectType::Spark | DialectType::Databricks) {
26898 // Spark: DATE_ADD(UNIT, val, date) with uppercased unit
26899 let unit = Expression::Identifier(Identifier::new(unit_str));
26900 return Ok(Expression::Function(Box::new(Function::new(
26901 "DATE_ADD".to_string(),
26902 vec![unit, arg1, arg2],
26903 ))));
26904 }
26905
26906 // Default: DATE_ADD(UNIT, val, date)
26907 let unit = Expression::Identifier(Identifier::new(unit_str));
26908 Ok(Expression::Function(Box::new(Function::new(
26909 "DATE_ADD".to_string(),
26910 vec![unit, arg1, arg2],
26911 ))))
26912 }
26913
26914 // DATE_ADD(date, INTERVAL val UNIT) - 2 arg BigQuery form
26915 "DATE_ADD" if args.len() == 2 => {
26916 let date = args.remove(0);
26917 let interval_expr = args.remove(0);
26918 let (val, unit) = Self::extract_interval_parts(&interval_expr);
26919 let unit_str = Self::interval_unit_to_string(&unit);
26920
26921 match target {
26922 DialectType::DuckDB => {
26923 // DuckDB: CAST(date AS DATE) + INTERVAL 'val' UNIT
26924 let cast_date = Self::ensure_cast_date(date);
26925 let quoted_val = Self::quote_interval_val(&val);
26926 let interval =
26927 Expression::Interval(Box::new(crate::expressions::Interval {
26928 this: Some(quoted_val),
26929 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
26930 unit,
26931 use_plural: false,
26932 }),
26933 }));
26934 Ok(Expression::Add(Box::new(
26935 crate::expressions::BinaryOp::new(cast_date, interval),
26936 )))
26937 }
26938 DialectType::PostgreSQL => {
26939 // PostgreSQL: date + INTERVAL 'val UNIT'
26940 let interval =
26941 Expression::Interval(Box::new(crate::expressions::Interval {
26942 this: Some(Expression::Literal(Literal::String(format!(
26943 "{} {}",
26944 Self::expr_to_string(&val),
26945 unit_str
26946 )))),
26947 unit: None,
26948 }));
26949 Ok(Expression::Add(Box::new(
26950 crate::expressions::BinaryOp::new(date, interval),
26951 )))
26952 }
26953 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
26954 // Presto: DATE_ADD('UNIT', CAST('val' AS BIGINT), date)
26955 let val_str = Self::expr_to_string(&val);
26956 Ok(Expression::Function(Box::new(Function::new(
26957 "DATE_ADD".to_string(),
26958 vec![
26959 Expression::Literal(Literal::String(unit_str)),
26960 Expression::Cast(Box::new(Cast {
26961 this: Expression::Literal(Literal::String(val_str)),
26962 to: DataType::BigInt { length: None },
26963 trailing_comments: vec![],
26964 double_colon_syntax: false,
26965 format: None,
26966 default: None,
26967 })),
26968 date,
26969 ],
26970 ))))
26971 }
26972 DialectType::Spark | DialectType::Hive => {
26973 // Spark/Hive: DATE_ADD(date, val) for DAY
26974 match unit_str.as_str() {
26975 "DAY" => Ok(Expression::Function(Box::new(Function::new(
26976 "DATE_ADD".to_string(),
26977 vec![date, val],
26978 )))),
26979 "MONTH" => Ok(Expression::Function(Box::new(Function::new(
26980 "ADD_MONTHS".to_string(),
26981 vec![date, val],
26982 )))),
26983 _ => {
26984 let iu = parse_interval_unit(&unit_str);
26985 let interval =
26986 Expression::Interval(Box::new(crate::expressions::Interval {
26987 this: Some(val),
26988 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
26989 unit: iu,
26990 use_plural: false,
26991 }),
26992 }));
26993 Ok(Expression::Function(Box::new(Function::new(
26994 "DATE_ADD".to_string(),
26995 vec![date, interval],
26996 ))))
26997 }
26998 }
26999 }
27000 DialectType::Snowflake => {
27001 // Snowflake: DATEADD(UNIT, 'val', CAST(date AS DATE))
27002 let cast_date = Self::ensure_cast_date(date);
27003 let val_str = Self::expr_to_string(&val);
27004 Ok(Expression::Function(Box::new(Function::new(
27005 "DATEADD".to_string(),
27006 vec![
27007 Expression::Identifier(Identifier::new(unit_str)),
27008 Expression::Literal(Literal::String(val_str)),
27009 cast_date,
27010 ],
27011 ))))
27012 }
27013 DialectType::TSQL | DialectType::Fabric => {
27014 let cast_date = Self::ensure_cast_datetime2(date);
27015 Ok(Expression::Function(Box::new(Function::new(
27016 "DATEADD".to_string(),
27017 vec![
27018 Expression::Identifier(Identifier::new(unit_str)),
27019 val,
27020 cast_date,
27021 ],
27022 ))))
27023 }
27024 DialectType::Redshift => Ok(Expression::Function(Box::new(Function::new(
27025 "DATEADD".to_string(),
27026 vec![Expression::Identifier(Identifier::new(unit_str)), val, date],
27027 )))),
27028 DialectType::MySQL => {
27029 // MySQL: DATE_ADD(date, INTERVAL 'val' UNIT)
27030 let quoted_val = Self::quote_interval_val(&val);
27031 let iu = parse_interval_unit(&unit_str);
27032 let interval =
27033 Expression::Interval(Box::new(crate::expressions::Interval {
27034 this: Some(quoted_val),
27035 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27036 unit: iu,
27037 use_plural: false,
27038 }),
27039 }));
27040 Ok(Expression::Function(Box::new(Function::new(
27041 "DATE_ADD".to_string(),
27042 vec![date, interval],
27043 ))))
27044 }
27045 DialectType::BigQuery => {
27046 // BigQuery: DATE_ADD(date, INTERVAL 'val' UNIT)
27047 let quoted_val = Self::quote_interval_val(&val);
27048 let iu = parse_interval_unit(&unit_str);
27049 let interval =
27050 Expression::Interval(Box::new(crate::expressions::Interval {
27051 this: Some(quoted_val),
27052 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27053 unit: iu,
27054 use_plural: false,
27055 }),
27056 }));
27057 Ok(Expression::Function(Box::new(Function::new(
27058 "DATE_ADD".to_string(),
27059 vec![date, interval],
27060 ))))
27061 }
27062 DialectType::Databricks => Ok(Expression::Function(Box::new(Function::new(
27063 "DATEADD".to_string(),
27064 vec![Expression::Identifier(Identifier::new(unit_str)), val, date],
27065 )))),
27066 _ => {
27067 // Default: keep as DATE_ADD with decomposed interval
27068 Ok(Expression::DateAdd(Box::new(
27069 crate::expressions::DateAddFunc {
27070 this: date,
27071 interval: val,
27072 unit,
27073 },
27074 )))
27075 }
27076 }
27077 }
27078
27079 // ADD_MONTHS(date, val) -> target-specific form
27080 "ADD_MONTHS" if args.len() == 2 => {
27081 let date = args.remove(0);
27082 let val = args.remove(0);
27083
27084 if matches!(target, DialectType::TSQL) {
27085 // TSQL: DATEADD(MONTH, val, CAST(date AS DATETIME2))
27086 let cast_date = Self::ensure_cast_datetime2(date);
27087 return Ok(Expression::Function(Box::new(Function::new(
27088 "DATEADD".to_string(),
27089 vec![
27090 Expression::Identifier(Identifier::new("MONTH")),
27091 val,
27092 cast_date,
27093 ],
27094 ))));
27095 }
27096
27097 if matches!(target, DialectType::DuckDB) {
27098 // DuckDB: date + INTERVAL val MONTH
27099 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
27100 this: Some(val),
27101 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27102 unit: crate::expressions::IntervalUnit::Month,
27103 use_plural: false,
27104 }),
27105 }));
27106 return Ok(Expression::Add(Box::new(
27107 crate::expressions::BinaryOp::new(date, interval),
27108 )));
27109 }
27110
27111 if matches!(target, DialectType::Snowflake) {
27112 // Snowflake: keep ADD_MONTHS when source is also Snowflake, else DATEADD
27113 if matches!(source, DialectType::Snowflake) {
27114 return Ok(Expression::Function(Box::new(Function::new(
27115 "ADD_MONTHS".to_string(),
27116 vec![date, val],
27117 ))));
27118 }
27119 return Ok(Expression::Function(Box::new(Function::new(
27120 "DATEADD".to_string(),
27121 vec![Expression::Identifier(Identifier::new("MONTH")), val, date],
27122 ))));
27123 }
27124
27125 if matches!(target, DialectType::Spark | DialectType::Databricks) {
27126 // Spark: ADD_MONTHS(date, val) - keep as is
27127 return Ok(Expression::Function(Box::new(Function::new(
27128 "ADD_MONTHS".to_string(),
27129 vec![date, val],
27130 ))));
27131 }
27132
27133 if matches!(target, DialectType::Hive) {
27134 return Ok(Expression::Function(Box::new(Function::new(
27135 "ADD_MONTHS".to_string(),
27136 vec![date, val],
27137 ))));
27138 }
27139
27140 if matches!(
27141 target,
27142 DialectType::Presto | DialectType::Trino | DialectType::Athena
27143 ) {
27144 // Presto: DATE_ADD('MONTH', val, date)
27145 return Ok(Expression::Function(Box::new(Function::new(
27146 "DATE_ADD".to_string(),
27147 vec![
27148 Expression::Literal(Literal::String("MONTH".to_string())),
27149 val,
27150 date,
27151 ],
27152 ))));
27153 }
27154
27155 // Default: keep ADD_MONTHS
27156 Ok(Expression::Function(Box::new(Function::new(
27157 "ADD_MONTHS".to_string(),
27158 vec![date, val],
27159 ))))
27160 }
27161
27162 // SAFE_DIVIDE(x, y) -> target-specific form directly
27163 "SAFE_DIVIDE" if args.len() == 2 => {
27164 let x = args.remove(0);
27165 let y = args.remove(0);
27166 // Wrap x and y in parens if they're complex expressions
27167 let y_ref = match &y {
27168 Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => {
27169 y.clone()
27170 }
27171 _ => Expression::Paren(Box::new(Paren {
27172 this: y.clone(),
27173 trailing_comments: vec![],
27174 })),
27175 };
27176 let x_ref = match &x {
27177 Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => {
27178 x.clone()
27179 }
27180 _ => Expression::Paren(Box::new(Paren {
27181 this: x.clone(),
27182 trailing_comments: vec![],
27183 })),
27184 };
27185 let condition = Expression::Neq(Box::new(crate::expressions::BinaryOp::new(
27186 y_ref.clone(),
27187 Expression::number(0),
27188 )));
27189 let div_expr = Expression::Div(Box::new(crate::expressions::BinaryOp::new(
27190 x_ref.clone(),
27191 y_ref.clone(),
27192 )));
27193
27194 match target {
27195 DialectType::DuckDB | DialectType::PostgreSQL => {
27196 // CASE WHEN y <> 0 THEN x / y ELSE NULL END
27197 let result_div = if matches!(target, DialectType::PostgreSQL) {
27198 let cast_x = Expression::Cast(Box::new(Cast {
27199 this: x_ref,
27200 to: DataType::Custom {
27201 name: "DOUBLE PRECISION".to_string(),
27202 },
27203 trailing_comments: vec![],
27204 double_colon_syntax: false,
27205 format: None,
27206 default: None,
27207 }));
27208 Expression::Div(Box::new(crate::expressions::BinaryOp::new(
27209 cast_x, y_ref,
27210 )))
27211 } else {
27212 div_expr
27213 };
27214 Ok(Expression::Case(Box::new(crate::expressions::Case {
27215 operand: None,
27216 whens: vec![(condition, result_div)],
27217 else_: Some(Expression::Null(crate::expressions::Null)),
27218 comments: Vec::new(),
27219 })))
27220 }
27221 DialectType::Snowflake => {
27222 // IFF(y <> 0, x / y, NULL)
27223 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
27224 condition,
27225 true_value: div_expr,
27226 false_value: Some(Expression::Null(crate::expressions::Null)),
27227 original_name: Some("IFF".to_string()),
27228 })))
27229 }
27230 DialectType::Presto | DialectType::Trino => {
27231 // IF(y <> 0, CAST(x AS DOUBLE) / y, NULL)
27232 let cast_x = Expression::Cast(Box::new(Cast {
27233 this: x_ref,
27234 to: DataType::Double {
27235 precision: None,
27236 scale: None,
27237 },
27238 trailing_comments: vec![],
27239 double_colon_syntax: false,
27240 format: None,
27241 default: None,
27242 }));
27243 let cast_div = Expression::Div(Box::new(
27244 crate::expressions::BinaryOp::new(cast_x, y_ref),
27245 ));
27246 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
27247 condition,
27248 true_value: cast_div,
27249 false_value: Some(Expression::Null(crate::expressions::Null)),
27250 original_name: None,
27251 })))
27252 }
27253 _ => {
27254 // IF(y <> 0, x / y, NULL)
27255 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
27256 condition,
27257 true_value: div_expr,
27258 false_value: Some(Expression::Null(crate::expressions::Null)),
27259 original_name: None,
27260 })))
27261 }
27262 }
27263 }
27264
27265 // GENERATE_UUID() -> UUID() with CAST to string
27266 "GENERATE_UUID" => {
27267 let uuid_expr = Expression::Uuid(Box::new(crate::expressions::Uuid {
27268 this: None,
27269 name: None,
27270 is_string: None,
27271 }));
27272 // Most targets need CAST(UUID() AS TEXT/VARCHAR/STRING)
27273 let cast_type = match target {
27274 DialectType::DuckDB => Some(DataType::Text),
27275 DialectType::Presto | DialectType::Trino => Some(DataType::VarChar {
27276 length: None,
27277 parenthesized_length: false,
27278 }),
27279 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
27280 Some(DataType::String { length: None })
27281 }
27282 _ => None,
27283 };
27284 if let Some(dt) = cast_type {
27285 Ok(Expression::Cast(Box::new(Cast {
27286 this: uuid_expr,
27287 to: dt,
27288 trailing_comments: vec![],
27289 double_colon_syntax: false,
27290 format: None,
27291 default: None,
27292 })))
27293 } else {
27294 Ok(uuid_expr)
27295 }
27296 }
27297
27298 // COUNTIF(x) -> CountIf expression
27299 "COUNTIF" if args.len() == 1 => {
27300 let arg = args.remove(0);
27301 Ok(Expression::CountIf(Box::new(crate::expressions::AggFunc {
27302 this: arg,
27303 distinct: false,
27304 filter: None,
27305 order_by: vec![],
27306 name: None,
27307 ignore_nulls: None,
27308 having_max: None,
27309 limit: None,
27310 })))
27311 }
27312
27313 // EDIT_DISTANCE(col1, col2, ...) -> Levenshtein expression
27314 "EDIT_DISTANCE" => {
27315 // Strip named arguments (max_distance => N) and pass as positional
27316 let mut positional_args: Vec<Expression> = vec![];
27317 for arg in args {
27318 match arg {
27319 Expression::NamedArgument(na) => {
27320 positional_args.push(na.value);
27321 }
27322 other => positional_args.push(other),
27323 }
27324 }
27325 if positional_args.len() >= 2 {
27326 let col1 = positional_args.remove(0);
27327 let col2 = positional_args.remove(0);
27328 let levenshtein = crate::expressions::BinaryFunc {
27329 this: col1,
27330 expression: col2,
27331 original_name: None,
27332 };
27333 // Pass extra args through a function wrapper with all args
27334 if !positional_args.is_empty() {
27335 let max_dist = positional_args.remove(0);
27336 // DuckDB: CASE WHEN LEVENSHTEIN(a, b) IS NULL OR max IS NULL THEN NULL ELSE LEAST(LEVENSHTEIN(a, b), max) END
27337 if matches!(target, DialectType::DuckDB) {
27338 let lev = Expression::Function(Box::new(Function::new(
27339 "LEVENSHTEIN".to_string(),
27340 vec![levenshtein.this, levenshtein.expression],
27341 )));
27342 let lev_is_null =
27343 Expression::IsNull(Box::new(crate::expressions::IsNull {
27344 this: lev.clone(),
27345 not: false,
27346 postfix_form: false,
27347 }));
27348 let max_is_null =
27349 Expression::IsNull(Box::new(crate::expressions::IsNull {
27350 this: max_dist.clone(),
27351 not: false,
27352 postfix_form: false,
27353 }));
27354 let null_check =
27355 Expression::Or(Box::new(crate::expressions::BinaryOp {
27356 left: lev_is_null,
27357 right: max_is_null,
27358 left_comments: Vec::new(),
27359 operator_comments: Vec::new(),
27360 trailing_comments: Vec::new(),
27361 }));
27362 let least =
27363 Expression::Least(Box::new(crate::expressions::VarArgFunc {
27364 expressions: vec![lev, max_dist],
27365 original_name: None,
27366 }));
27367 return Ok(Expression::Case(Box::new(crate::expressions::Case {
27368 operand: None,
27369 whens: vec![(
27370 null_check,
27371 Expression::Null(crate::expressions::Null),
27372 )],
27373 else_: Some(least),
27374 comments: Vec::new(),
27375 })));
27376 }
27377 let mut all_args = vec![levenshtein.this, levenshtein.expression, max_dist];
27378 all_args.extend(positional_args);
27379 // PostgreSQL: use LEVENSHTEIN_LESS_EQUAL when max_distance is provided
27380 let func_name = if matches!(target, DialectType::PostgreSQL) {
27381 "LEVENSHTEIN_LESS_EQUAL"
27382 } else {
27383 "LEVENSHTEIN"
27384 };
27385 return Ok(Expression::Function(Box::new(Function::new(
27386 func_name.to_string(),
27387 all_args,
27388 ))));
27389 }
27390 Ok(Expression::Levenshtein(Box::new(levenshtein)))
27391 } else {
27392 Ok(Expression::Function(Box::new(Function::new(
27393 "EDIT_DISTANCE".to_string(),
27394 positional_args,
27395 ))))
27396 }
27397 }
27398
27399 // TIMESTAMP_SECONDS(x) -> UnixToTime with scale 0
27400 "TIMESTAMP_SECONDS" if args.len() == 1 => {
27401 let arg = args.remove(0);
27402 Ok(Expression::UnixToTime(Box::new(
27403 crate::expressions::UnixToTime {
27404 this: Box::new(arg),
27405 scale: Some(0),
27406 zone: None,
27407 hours: None,
27408 minutes: None,
27409 format: None,
27410 target_type: None,
27411 },
27412 )))
27413 }
27414
27415 // TIMESTAMP_MILLIS(x) -> UnixToTime with scale 3
27416 "TIMESTAMP_MILLIS" if args.len() == 1 => {
27417 let arg = args.remove(0);
27418 Ok(Expression::UnixToTime(Box::new(
27419 crate::expressions::UnixToTime {
27420 this: Box::new(arg),
27421 scale: Some(3),
27422 zone: None,
27423 hours: None,
27424 minutes: None,
27425 format: None,
27426 target_type: None,
27427 },
27428 )))
27429 }
27430
27431 // TIMESTAMP_MICROS(x) -> UnixToTime with scale 6
27432 "TIMESTAMP_MICROS" if args.len() == 1 => {
27433 let arg = args.remove(0);
27434 Ok(Expression::UnixToTime(Box::new(
27435 crate::expressions::UnixToTime {
27436 this: Box::new(arg),
27437 scale: Some(6),
27438 zone: None,
27439 hours: None,
27440 minutes: None,
27441 format: None,
27442 target_type: None,
27443 },
27444 )))
27445 }
27446
27447 // DIV(x, y) -> IntDiv expression
27448 "DIV" if args.len() == 2 => {
27449 let x = args.remove(0);
27450 let y = args.remove(0);
27451 Ok(Expression::IntDiv(Box::new(
27452 crate::expressions::BinaryFunc {
27453 this: x,
27454 expression: y,
27455 original_name: None,
27456 },
27457 )))
27458 }
27459
27460 // TO_HEX(x) -> target-specific form
27461 "TO_HEX" if args.len() == 1 => {
27462 let arg = args.remove(0);
27463 // Check if inner function already returns hex string in certain targets
27464 let inner_returns_hex = matches!(&arg, Expression::Function(f) if matches!(f.name.as_str(), "MD5" | "SHA1" | "SHA256" | "SHA512"));
27465 if matches!(target, DialectType::BigQuery) {
27466 // BQ->BQ: keep as TO_HEX
27467 Ok(Expression::Function(Box::new(Function::new(
27468 "TO_HEX".to_string(),
27469 vec![arg],
27470 ))))
27471 } else if matches!(target, DialectType::DuckDB) && inner_returns_hex {
27472 // DuckDB: MD5/SHA already return hex strings, so TO_HEX is redundant
27473 Ok(arg)
27474 } else if matches!(target, DialectType::Snowflake) && inner_returns_hex {
27475 // Snowflake: TO_HEX(SHA1(x)) -> TO_CHAR(SHA1_BINARY(x))
27476 // TO_HEX(MD5(x)) -> TO_CHAR(MD5_BINARY(x))
27477 // TO_HEX(SHA256(x)) -> TO_CHAR(SHA2_BINARY(x, 256))
27478 // TO_HEX(SHA512(x)) -> TO_CHAR(SHA2_BINARY(x, 512))
27479 if let Expression::Function(ref inner_f) = arg {
27480 let inner_args = inner_f.args.clone();
27481 let binary_func = match inner_f.name.to_uppercase().as_str() {
27482 "SHA1" => Expression::Function(Box::new(Function::new(
27483 "SHA1_BINARY".to_string(),
27484 inner_args,
27485 ))),
27486 "MD5" => Expression::Function(Box::new(Function::new(
27487 "MD5_BINARY".to_string(),
27488 inner_args,
27489 ))),
27490 "SHA256" => {
27491 let mut a = inner_args;
27492 a.push(Expression::number(256));
27493 Expression::Function(Box::new(Function::new(
27494 "SHA2_BINARY".to_string(),
27495 a,
27496 )))
27497 }
27498 "SHA512" => {
27499 let mut a = inner_args;
27500 a.push(Expression::number(512));
27501 Expression::Function(Box::new(Function::new(
27502 "SHA2_BINARY".to_string(),
27503 a,
27504 )))
27505 }
27506 _ => arg.clone(),
27507 };
27508 Ok(Expression::Function(Box::new(Function::new(
27509 "TO_CHAR".to_string(),
27510 vec![binary_func],
27511 ))))
27512 } else {
27513 let inner = Expression::Function(Box::new(Function::new(
27514 "HEX".to_string(),
27515 vec![arg],
27516 )));
27517 Ok(Expression::Lower(Box::new(
27518 crate::expressions::UnaryFunc::new(inner),
27519 )))
27520 }
27521 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
27522 let inner = Expression::Function(Box::new(Function::new(
27523 "TO_HEX".to_string(),
27524 vec![arg],
27525 )));
27526 Ok(Expression::Lower(Box::new(
27527 crate::expressions::UnaryFunc::new(inner),
27528 )))
27529 } else {
27530 let inner =
27531 Expression::Function(Box::new(Function::new("HEX".to_string(), vec![arg])));
27532 Ok(Expression::Lower(Box::new(
27533 crate::expressions::UnaryFunc::new(inner),
27534 )))
27535 }
27536 }
27537
27538 // LAST_DAY(date, unit) -> strip unit for most targets, or transform for PostgreSQL
27539 "LAST_DAY" if args.len() == 2 => {
27540 let date = args.remove(0);
27541 let _unit = args.remove(0); // Strip the unit (MONTH is default)
27542 Ok(Expression::Function(Box::new(Function::new(
27543 "LAST_DAY".to_string(),
27544 vec![date],
27545 ))))
27546 }
27547
27548 // GENERATE_ARRAY(start, end, step?) -> GenerateSeries expression
27549 "GENERATE_ARRAY" => {
27550 let start = args.get(0).cloned();
27551 let end = args.get(1).cloned();
27552 let step = args.get(2).cloned();
27553 Ok(Expression::GenerateSeries(Box::new(
27554 crate::expressions::GenerateSeries {
27555 start: start.map(Box::new),
27556 end: end.map(Box::new),
27557 step: step.map(Box::new),
27558 is_end_exclusive: None,
27559 },
27560 )))
27561 }
27562
27563 // GENERATE_TIMESTAMP_ARRAY(start, end, step) -> GenerateSeries expression
27564 "GENERATE_TIMESTAMP_ARRAY" => {
27565 let start = args.get(0).cloned();
27566 let end = args.get(1).cloned();
27567 let step = args.get(2).cloned();
27568
27569 if matches!(target, DialectType::DuckDB) {
27570 // DuckDB: GENERATE_SERIES(CAST(start AS TIMESTAMP), CAST(end AS TIMESTAMP), step)
27571 // Only cast string literals - leave columns/expressions as-is
27572 let maybe_cast_ts = |expr: Expression| -> Expression {
27573 if matches!(&expr, Expression::Literal(Literal::String(_))) {
27574 Expression::Cast(Box::new(Cast {
27575 this: expr,
27576 to: DataType::Timestamp {
27577 precision: None,
27578 timezone: false,
27579 },
27580 trailing_comments: vec![],
27581 double_colon_syntax: false,
27582 format: None,
27583 default: None,
27584 }))
27585 } else {
27586 expr
27587 }
27588 };
27589 let cast_start = start.map(maybe_cast_ts);
27590 let cast_end = end.map(maybe_cast_ts);
27591 Ok(Expression::GenerateSeries(Box::new(
27592 crate::expressions::GenerateSeries {
27593 start: cast_start.map(Box::new),
27594 end: cast_end.map(Box::new),
27595 step: step.map(Box::new),
27596 is_end_exclusive: None,
27597 },
27598 )))
27599 } else {
27600 Ok(Expression::GenerateSeries(Box::new(
27601 crate::expressions::GenerateSeries {
27602 start: start.map(Box::new),
27603 end: end.map(Box::new),
27604 step: step.map(Box::new),
27605 is_end_exclusive: None,
27606 },
27607 )))
27608 }
27609 }
27610
27611 // TO_JSON(x) -> target-specific (from Spark/Hive)
27612 "TO_JSON" => {
27613 match target {
27614 DialectType::Presto | DialectType::Trino => {
27615 // JSON_FORMAT(CAST(x AS JSON))
27616 let arg = args
27617 .into_iter()
27618 .next()
27619 .unwrap_or(Expression::Null(crate::expressions::Null));
27620 let cast_json = Expression::Cast(Box::new(Cast {
27621 this: arg,
27622 to: DataType::Custom {
27623 name: "JSON".to_string(),
27624 },
27625 trailing_comments: vec![],
27626 double_colon_syntax: false,
27627 format: None,
27628 default: None,
27629 }));
27630 Ok(Expression::Function(Box::new(Function::new(
27631 "JSON_FORMAT".to_string(),
27632 vec![cast_json],
27633 ))))
27634 }
27635 DialectType::BigQuery => Ok(Expression::Function(Box::new(Function::new(
27636 "TO_JSON_STRING".to_string(),
27637 args,
27638 )))),
27639 DialectType::DuckDB => {
27640 // CAST(TO_JSON(x) AS TEXT)
27641 let arg = args
27642 .into_iter()
27643 .next()
27644 .unwrap_or(Expression::Null(crate::expressions::Null));
27645 let to_json = Expression::Function(Box::new(Function::new(
27646 "TO_JSON".to_string(),
27647 vec![arg],
27648 )));
27649 Ok(Expression::Cast(Box::new(Cast {
27650 this: to_json,
27651 to: DataType::Text,
27652 trailing_comments: vec![],
27653 double_colon_syntax: false,
27654 format: None,
27655 default: None,
27656 })))
27657 }
27658 _ => Ok(Expression::Function(Box::new(Function::new(
27659 "TO_JSON".to_string(),
27660 args,
27661 )))),
27662 }
27663 }
27664
27665 // TO_JSON_STRING(x) -> target-specific
27666 "TO_JSON_STRING" => {
27667 match target {
27668 DialectType::Spark | DialectType::Databricks | DialectType::Hive => Ok(
27669 Expression::Function(Box::new(Function::new("TO_JSON".to_string(), args))),
27670 ),
27671 DialectType::Presto | DialectType::Trino => {
27672 // JSON_FORMAT(CAST(x AS JSON))
27673 let arg = args
27674 .into_iter()
27675 .next()
27676 .unwrap_or(Expression::Null(crate::expressions::Null));
27677 let cast_json = Expression::Cast(Box::new(Cast {
27678 this: arg,
27679 to: DataType::Custom {
27680 name: "JSON".to_string(),
27681 },
27682 trailing_comments: vec![],
27683 double_colon_syntax: false,
27684 format: None,
27685 default: None,
27686 }));
27687 Ok(Expression::Function(Box::new(Function::new(
27688 "JSON_FORMAT".to_string(),
27689 vec![cast_json],
27690 ))))
27691 }
27692 DialectType::DuckDB => {
27693 // CAST(TO_JSON(x) AS TEXT)
27694 let arg = args
27695 .into_iter()
27696 .next()
27697 .unwrap_or(Expression::Null(crate::expressions::Null));
27698 let to_json = Expression::Function(Box::new(Function::new(
27699 "TO_JSON".to_string(),
27700 vec![arg],
27701 )));
27702 Ok(Expression::Cast(Box::new(Cast {
27703 this: to_json,
27704 to: DataType::Text,
27705 trailing_comments: vec![],
27706 double_colon_syntax: false,
27707 format: None,
27708 default: None,
27709 })))
27710 }
27711 DialectType::Snowflake => {
27712 // TO_JSON(x)
27713 Ok(Expression::Function(Box::new(Function::new(
27714 "TO_JSON".to_string(),
27715 args,
27716 ))))
27717 }
27718 _ => Ok(Expression::Function(Box::new(Function::new(
27719 "TO_JSON_STRING".to_string(),
27720 args,
27721 )))),
27722 }
27723 }
27724
27725 // SAFE_ADD(x, y) -> SafeAdd expression
27726 "SAFE_ADD" if args.len() == 2 => {
27727 let x = args.remove(0);
27728 let y = args.remove(0);
27729 Ok(Expression::SafeAdd(Box::new(crate::expressions::SafeAdd {
27730 this: Box::new(x),
27731 expression: Box::new(y),
27732 })))
27733 }
27734
27735 // SAFE_SUBTRACT(x, y) -> SafeSubtract expression
27736 "SAFE_SUBTRACT" if args.len() == 2 => {
27737 let x = args.remove(0);
27738 let y = args.remove(0);
27739 Ok(Expression::SafeSubtract(Box::new(
27740 crate::expressions::SafeSubtract {
27741 this: Box::new(x),
27742 expression: Box::new(y),
27743 },
27744 )))
27745 }
27746
27747 // SAFE_MULTIPLY(x, y) -> SafeMultiply expression
27748 "SAFE_MULTIPLY" if args.len() == 2 => {
27749 let x = args.remove(0);
27750 let y = args.remove(0);
27751 Ok(Expression::SafeMultiply(Box::new(
27752 crate::expressions::SafeMultiply {
27753 this: Box::new(x),
27754 expression: Box::new(y),
27755 },
27756 )))
27757 }
27758
27759 // REGEXP_CONTAINS(str, pattern) -> RegexpLike expression
27760 "REGEXP_CONTAINS" if args.len() == 2 => {
27761 let str_expr = args.remove(0);
27762 let pattern = args.remove(0);
27763 Ok(Expression::RegexpLike(Box::new(
27764 crate::expressions::RegexpFunc {
27765 this: str_expr,
27766 pattern,
27767 flags: None,
27768 },
27769 )))
27770 }
27771
27772 // CONTAINS_SUBSTR(a, b) -> CONTAINS(LOWER(a), LOWER(b))
27773 "CONTAINS_SUBSTR" if args.len() == 2 => {
27774 let a = args.remove(0);
27775 let b = args.remove(0);
27776 let lower_a = Expression::Lower(Box::new(crate::expressions::UnaryFunc::new(a)));
27777 let lower_b = Expression::Lower(Box::new(crate::expressions::UnaryFunc::new(b)));
27778 Ok(Expression::Function(Box::new(Function::new(
27779 "CONTAINS".to_string(),
27780 vec![lower_a, lower_b],
27781 ))))
27782 }
27783
27784 // INT64(x) -> CAST(x AS BIGINT)
27785 "INT64" if args.len() == 1 => {
27786 let arg = args.remove(0);
27787 Ok(Expression::Cast(Box::new(Cast {
27788 this: arg,
27789 to: DataType::BigInt { length: None },
27790 trailing_comments: vec![],
27791 double_colon_syntax: false,
27792 format: None,
27793 default: None,
27794 })))
27795 }
27796
27797 // INSTR(str, substr) -> target-specific
27798 "INSTR" if args.len() >= 2 => {
27799 let str_expr = args.remove(0);
27800 let substr = args.remove(0);
27801 if matches!(target, DialectType::Snowflake) {
27802 // CHARINDEX(substr, str)
27803 Ok(Expression::Function(Box::new(Function::new(
27804 "CHARINDEX".to_string(),
27805 vec![substr, str_expr],
27806 ))))
27807 } else if matches!(target, DialectType::BigQuery) {
27808 // Keep as INSTR
27809 Ok(Expression::Function(Box::new(Function::new(
27810 "INSTR".to_string(),
27811 vec![str_expr, substr],
27812 ))))
27813 } else {
27814 // Default: keep as INSTR
27815 Ok(Expression::Function(Box::new(Function::new(
27816 "INSTR".to_string(),
27817 vec![str_expr, substr],
27818 ))))
27819 }
27820 }
27821
27822 // BigQuery DATE_TRUNC(expr, unit) -> DATE_TRUNC('unit', expr) for standard SQL
27823 "DATE_TRUNC" if args.len() == 2 => {
27824 let expr = args.remove(0);
27825 let unit_expr = args.remove(0);
27826 let unit_str = get_unit_str(&unit_expr);
27827
27828 match target {
27829 DialectType::DuckDB
27830 | DialectType::Snowflake
27831 | DialectType::PostgreSQL
27832 | DialectType::Presto
27833 | DialectType::Trino
27834 | DialectType::Databricks
27835 | DialectType::Spark
27836 | DialectType::Redshift
27837 | DialectType::ClickHouse
27838 | DialectType::TSQL => {
27839 // Standard: DATE_TRUNC('UNIT', expr)
27840 Ok(Expression::Function(Box::new(Function::new(
27841 "DATE_TRUNC".to_string(),
27842 vec![Expression::Literal(Literal::String(unit_str)), expr],
27843 ))))
27844 }
27845 _ => {
27846 // Keep BigQuery arg order: DATE_TRUNC(expr, unit)
27847 Ok(Expression::Function(Box::new(Function::new(
27848 "DATE_TRUNC".to_string(),
27849 vec![expr, unit_expr],
27850 ))))
27851 }
27852 }
27853 }
27854
27855 // TIMESTAMP_TRUNC / DATETIME_TRUNC -> target-specific
27856 "TIMESTAMP_TRUNC" | "DATETIME_TRUNC" if args.len() >= 2 => {
27857 // TIMESTAMP_TRUNC(ts, unit) or TIMESTAMP_TRUNC(ts, unit, timezone)
27858 let ts = args.remove(0);
27859 let unit_expr = args.remove(0);
27860 let tz = if !args.is_empty() {
27861 Some(args.remove(0))
27862 } else {
27863 None
27864 };
27865 let unit_str = get_unit_str(&unit_expr);
27866
27867 match target {
27868 DialectType::DuckDB => {
27869 // DuckDB: DATE_TRUNC('UNIT', CAST(ts AS TIMESTAMPTZ))
27870 // With timezone: DATE_TRUNC('UNIT', ts AT TIME ZONE 'tz') AT TIME ZONE 'tz' (for DAY granularity)
27871 // Without timezone for MINUTE+ granularity: just DATE_TRUNC
27872 let is_coarse = matches!(
27873 unit_str.as_str(),
27874 "DAY" | "WEEK" | "MONTH" | "QUARTER" | "YEAR"
27875 );
27876 // For DATETIME_TRUNC, cast string args to TIMESTAMP
27877 let cast_ts = if name == "DATETIME_TRUNC" {
27878 match ts {
27879 Expression::Literal(Literal::String(ref _s)) => {
27880 Expression::Cast(Box::new(Cast {
27881 this: ts,
27882 to: DataType::Timestamp {
27883 precision: None,
27884 timezone: false,
27885 },
27886 trailing_comments: vec![],
27887 double_colon_syntax: false,
27888 format: None,
27889 default: None,
27890 }))
27891 }
27892 _ => Self::maybe_cast_ts_to_tz(ts, &name),
27893 }
27894 } else {
27895 Self::maybe_cast_ts_to_tz(ts, &name)
27896 };
27897
27898 if let Some(tz_arg) = tz {
27899 if is_coarse {
27900 // DATE_TRUNC('UNIT', ts AT TIME ZONE 'tz') AT TIME ZONE 'tz'
27901 let at_tz = Expression::AtTimeZone(Box::new(
27902 crate::expressions::AtTimeZone {
27903 this: cast_ts,
27904 zone: tz_arg.clone(),
27905 },
27906 ));
27907 let date_trunc = Expression::Function(Box::new(Function::new(
27908 "DATE_TRUNC".to_string(),
27909 vec![Expression::Literal(Literal::String(unit_str)), at_tz],
27910 )));
27911 Ok(Expression::AtTimeZone(Box::new(
27912 crate::expressions::AtTimeZone {
27913 this: date_trunc,
27914 zone: tz_arg,
27915 },
27916 )))
27917 } else {
27918 // For MINUTE/HOUR: no AT TIME ZONE wrapper, just DATE_TRUNC('UNIT', ts)
27919 Ok(Expression::Function(Box::new(Function::new(
27920 "DATE_TRUNC".to_string(),
27921 vec![Expression::Literal(Literal::String(unit_str)), cast_ts],
27922 ))))
27923 }
27924 } else {
27925 // No timezone: DATE_TRUNC('UNIT', CAST(ts AS TIMESTAMPTZ))
27926 Ok(Expression::Function(Box::new(Function::new(
27927 "DATE_TRUNC".to_string(),
27928 vec![Expression::Literal(Literal::String(unit_str)), cast_ts],
27929 ))))
27930 }
27931 }
27932 DialectType::Databricks | DialectType::Spark => {
27933 // Databricks/Spark: DATE_TRUNC('UNIT', ts)
27934 Ok(Expression::Function(Box::new(Function::new(
27935 "DATE_TRUNC".to_string(),
27936 vec![Expression::Literal(Literal::String(unit_str)), ts],
27937 ))))
27938 }
27939 _ => {
27940 // Default: keep as TIMESTAMP_TRUNC('UNIT', ts, [tz])
27941 let unit = Expression::Literal(Literal::String(unit_str));
27942 let mut date_trunc_args = vec![unit, ts];
27943 if let Some(tz_arg) = tz {
27944 date_trunc_args.push(tz_arg);
27945 }
27946 Ok(Expression::Function(Box::new(Function::new(
27947 "TIMESTAMP_TRUNC".to_string(),
27948 date_trunc_args,
27949 ))))
27950 }
27951 }
27952 }
27953
27954 // TIME(h, m, s) -> target-specific, TIME('string') -> CAST('string' AS TIME)
27955 "TIME" => {
27956 if args.len() == 3 {
27957 // TIME(h, m, s) constructor
27958 match target {
27959 DialectType::TSQL => {
27960 // TIMEFROMPARTS(h, m, s, 0, 0)
27961 args.push(Expression::number(0));
27962 args.push(Expression::number(0));
27963 Ok(Expression::Function(Box::new(Function::new(
27964 "TIMEFROMPARTS".to_string(),
27965 args,
27966 ))))
27967 }
27968 DialectType::MySQL => Ok(Expression::Function(Box::new(Function::new(
27969 "MAKETIME".to_string(),
27970 args,
27971 )))),
27972 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
27973 Function::new("MAKE_TIME".to_string(), args),
27974 ))),
27975 _ => Ok(Expression::Function(Box::new(Function::new(
27976 "TIME".to_string(),
27977 args,
27978 )))),
27979 }
27980 } else if args.len() == 1 {
27981 let arg = args.remove(0);
27982 if matches!(target, DialectType::Spark) {
27983 // Spark: CAST(x AS TIMESTAMP) (yes, TIMESTAMP not TIME)
27984 Ok(Expression::Cast(Box::new(Cast {
27985 this: arg,
27986 to: DataType::Timestamp {
27987 timezone: false,
27988 precision: None,
27989 },
27990 trailing_comments: vec![],
27991 double_colon_syntax: false,
27992 format: None,
27993 default: None,
27994 })))
27995 } else {
27996 // Most targets: CAST(x AS TIME)
27997 Ok(Expression::Cast(Box::new(Cast {
27998 this: arg,
27999 to: DataType::Time {
28000 precision: None,
28001 timezone: false,
28002 },
28003 trailing_comments: vec![],
28004 double_colon_syntax: false,
28005 format: None,
28006 default: None,
28007 })))
28008 }
28009 } else if args.len() == 2 {
28010 // TIME(expr, timezone) -> CAST(CAST(expr AS TIMESTAMPTZ) AT TIME ZONE tz AS TIME)
28011 let expr = args.remove(0);
28012 let tz = args.remove(0);
28013 let cast_tstz = Expression::Cast(Box::new(Cast {
28014 this: expr,
28015 to: DataType::Timestamp {
28016 timezone: true,
28017 precision: None,
28018 },
28019 trailing_comments: vec![],
28020 double_colon_syntax: false,
28021 format: None,
28022 default: None,
28023 }));
28024 let at_tz = Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
28025 this: cast_tstz,
28026 zone: tz,
28027 }));
28028 Ok(Expression::Cast(Box::new(Cast {
28029 this: at_tz,
28030 to: DataType::Time {
28031 precision: None,
28032 timezone: false,
28033 },
28034 trailing_comments: vec![],
28035 double_colon_syntax: false,
28036 format: None,
28037 default: None,
28038 })))
28039 } else {
28040 Ok(Expression::Function(Box::new(Function::new(
28041 "TIME".to_string(),
28042 args,
28043 ))))
28044 }
28045 }
28046
28047 // DATETIME('string') -> CAST('string' AS TIMESTAMP)
28048 // DATETIME('date', TIME 'time') -> CAST(CAST('date' AS DATE) + CAST('time' AS TIME) AS TIMESTAMP)
28049 // DATETIME('string', 'timezone') -> CAST(CAST('string' AS TIMESTAMPTZ) AT TIME ZONE tz AS TIMESTAMP)
28050 // DATETIME(y, m, d, h, min, s) -> target-specific
28051 "DATETIME" => {
28052 // For BigQuery target: keep DATETIME function but convert TIME literal to CAST
28053 if matches!(target, DialectType::BigQuery) {
28054 if args.len() == 2 {
28055 let has_time_literal =
28056 matches!(&args[1], Expression::Literal(Literal::Time(_)));
28057 if has_time_literal {
28058 let first = args.remove(0);
28059 let second = args.remove(0);
28060 let time_as_cast = match second {
28061 Expression::Literal(Literal::Time(s)) => {
28062 Expression::Cast(Box::new(Cast {
28063 this: Expression::Literal(Literal::String(s)),
28064 to: DataType::Time {
28065 precision: None,
28066 timezone: false,
28067 },
28068 trailing_comments: vec![],
28069 double_colon_syntax: false,
28070 format: None,
28071 default: None,
28072 }))
28073 }
28074 other => other,
28075 };
28076 return Ok(Expression::Function(Box::new(Function::new(
28077 "DATETIME".to_string(),
28078 vec![first, time_as_cast],
28079 ))));
28080 }
28081 }
28082 return Ok(Expression::Function(Box::new(Function::new(
28083 "DATETIME".to_string(),
28084 args,
28085 ))));
28086 }
28087
28088 if args.len() == 1 {
28089 let arg = args.remove(0);
28090 Ok(Expression::Cast(Box::new(Cast {
28091 this: arg,
28092 to: DataType::Timestamp {
28093 timezone: false,
28094 precision: None,
28095 },
28096 trailing_comments: vec![],
28097 double_colon_syntax: false,
28098 format: None,
28099 default: None,
28100 })))
28101 } else if args.len() == 2 {
28102 let first = args.remove(0);
28103 let second = args.remove(0);
28104 // Check if second arg is a TIME literal
28105 let is_time_literal = matches!(&second, Expression::Literal(Literal::Time(_)));
28106 if is_time_literal {
28107 // DATETIME('date', TIME 'time') -> CAST(CAST(date AS DATE) + CAST('time' AS TIME) AS TIMESTAMP)
28108 let cast_date = Expression::Cast(Box::new(Cast {
28109 this: first,
28110 to: DataType::Date,
28111 trailing_comments: vec![],
28112 double_colon_syntax: false,
28113 format: None,
28114 default: None,
28115 }));
28116 // Convert TIME 'x' literal to string 'x' so CAST produces CAST('x' AS TIME) not CAST(TIME 'x' AS TIME)
28117 let time_as_string = match second {
28118 Expression::Literal(Literal::Time(s)) => {
28119 Expression::Literal(Literal::String(s))
28120 }
28121 other => other,
28122 };
28123 let cast_time = Expression::Cast(Box::new(Cast {
28124 this: time_as_string,
28125 to: DataType::Time {
28126 precision: None,
28127 timezone: false,
28128 },
28129 trailing_comments: vec![],
28130 double_colon_syntax: false,
28131 format: None,
28132 default: None,
28133 }));
28134 let add_expr =
28135 Expression::Add(Box::new(BinaryOp::new(cast_date, cast_time)));
28136 Ok(Expression::Cast(Box::new(Cast {
28137 this: add_expr,
28138 to: DataType::Timestamp {
28139 timezone: false,
28140 precision: None,
28141 },
28142 trailing_comments: vec![],
28143 double_colon_syntax: false,
28144 format: None,
28145 default: None,
28146 })))
28147 } else {
28148 // DATETIME('string', 'timezone')
28149 let cast_tstz = Expression::Cast(Box::new(Cast {
28150 this: first,
28151 to: DataType::Timestamp {
28152 timezone: true,
28153 precision: None,
28154 },
28155 trailing_comments: vec![],
28156 double_colon_syntax: false,
28157 format: None,
28158 default: None,
28159 }));
28160 let at_tz =
28161 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
28162 this: cast_tstz,
28163 zone: second,
28164 }));
28165 Ok(Expression::Cast(Box::new(Cast {
28166 this: at_tz,
28167 to: DataType::Timestamp {
28168 timezone: false,
28169 precision: None,
28170 },
28171 trailing_comments: vec![],
28172 double_colon_syntax: false,
28173 format: None,
28174 default: None,
28175 })))
28176 }
28177 } else if args.len() >= 3 {
28178 // DATETIME(y, m, d, h, min, s) -> TIMESTAMP_FROM_PARTS for Snowflake
28179 // For other targets, use MAKE_TIMESTAMP or similar
28180 if matches!(target, DialectType::Snowflake) {
28181 Ok(Expression::Function(Box::new(Function::new(
28182 "TIMESTAMP_FROM_PARTS".to_string(),
28183 args,
28184 ))))
28185 } else {
28186 Ok(Expression::Function(Box::new(Function::new(
28187 "DATETIME".to_string(),
28188 args,
28189 ))))
28190 }
28191 } else {
28192 Ok(Expression::Function(Box::new(Function::new(
28193 "DATETIME".to_string(),
28194 args,
28195 ))))
28196 }
28197 }
28198
28199 // TIMESTAMP(x) -> CAST(x AS TIMESTAMP WITH TIME ZONE) for Presto
28200 // TIMESTAMP(x, tz) -> CAST(x AS TIMESTAMP) AT TIME ZONE tz for DuckDB
28201 "TIMESTAMP" => {
28202 if args.len() == 1 {
28203 let arg = args.remove(0);
28204 Ok(Expression::Cast(Box::new(Cast {
28205 this: arg,
28206 to: DataType::Timestamp {
28207 timezone: true,
28208 precision: None,
28209 },
28210 trailing_comments: vec![],
28211 double_colon_syntax: false,
28212 format: None,
28213 default: None,
28214 })))
28215 } else if args.len() == 2 {
28216 let arg = args.remove(0);
28217 let tz = args.remove(0);
28218 let cast_ts = Expression::Cast(Box::new(Cast {
28219 this: arg,
28220 to: DataType::Timestamp {
28221 timezone: false,
28222 precision: None,
28223 },
28224 trailing_comments: vec![],
28225 double_colon_syntax: false,
28226 format: None,
28227 default: None,
28228 }));
28229 if matches!(target, DialectType::Snowflake) {
28230 // CONVERT_TIMEZONE('tz', CAST(x AS TIMESTAMP))
28231 Ok(Expression::Function(Box::new(Function::new(
28232 "CONVERT_TIMEZONE".to_string(),
28233 vec![tz, cast_ts],
28234 ))))
28235 } else {
28236 Ok(Expression::AtTimeZone(Box::new(
28237 crate::expressions::AtTimeZone {
28238 this: cast_ts,
28239 zone: tz,
28240 },
28241 )))
28242 }
28243 } else {
28244 Ok(Expression::Function(Box::new(Function::new(
28245 "TIMESTAMP".to_string(),
28246 args,
28247 ))))
28248 }
28249 }
28250
28251 // STRING(x) -> CAST(x AS VARCHAR/TEXT)
28252 // STRING(x, tz) -> CAST(CAST(x AS TIMESTAMP) AT TIME ZONE 'UTC' AT TIME ZONE tz AS VARCHAR/TEXT)
28253 "STRING" => {
28254 if args.len() == 1 {
28255 let arg = args.remove(0);
28256 let cast_type = match target {
28257 DialectType::DuckDB => DataType::Text,
28258 _ => DataType::VarChar {
28259 length: None,
28260 parenthesized_length: false,
28261 },
28262 };
28263 Ok(Expression::Cast(Box::new(Cast {
28264 this: arg,
28265 to: cast_type,
28266 trailing_comments: vec![],
28267 double_colon_syntax: false,
28268 format: None,
28269 default: None,
28270 })))
28271 } else if args.len() == 2 {
28272 let arg = args.remove(0);
28273 let tz = args.remove(0);
28274 let cast_type = match target {
28275 DialectType::DuckDB => DataType::Text,
28276 _ => DataType::VarChar {
28277 length: None,
28278 parenthesized_length: false,
28279 },
28280 };
28281 if matches!(target, DialectType::Snowflake) {
28282 // STRING(x, tz) -> CAST(CONVERT_TIMEZONE('UTC', tz, x) AS VARCHAR)
28283 let convert_tz = Expression::Function(Box::new(Function::new(
28284 "CONVERT_TIMEZONE".to_string(),
28285 vec![
28286 Expression::Literal(Literal::String("UTC".to_string())),
28287 tz,
28288 arg,
28289 ],
28290 )));
28291 Ok(Expression::Cast(Box::new(Cast {
28292 this: convert_tz,
28293 to: cast_type,
28294 trailing_comments: vec![],
28295 double_colon_syntax: false,
28296 format: None,
28297 default: None,
28298 })))
28299 } else {
28300 // STRING(x, tz) -> CAST(CAST(x AS TIMESTAMP) AT TIME ZONE 'UTC' AT TIME ZONE tz AS TEXT/VARCHAR)
28301 let cast_ts = Expression::Cast(Box::new(Cast {
28302 this: arg,
28303 to: DataType::Timestamp {
28304 timezone: false,
28305 precision: None,
28306 },
28307 trailing_comments: vec![],
28308 double_colon_syntax: false,
28309 format: None,
28310 default: None,
28311 }));
28312 let at_utc =
28313 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
28314 this: cast_ts,
28315 zone: Expression::Literal(Literal::String("UTC".to_string())),
28316 }));
28317 let at_tz =
28318 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
28319 this: at_utc,
28320 zone: tz,
28321 }));
28322 Ok(Expression::Cast(Box::new(Cast {
28323 this: at_tz,
28324 to: cast_type,
28325 trailing_comments: vec![],
28326 double_colon_syntax: false,
28327 format: None,
28328 default: None,
28329 })))
28330 }
28331 } else {
28332 Ok(Expression::Function(Box::new(Function::new(
28333 "STRING".to_string(),
28334 args,
28335 ))))
28336 }
28337 }
28338
28339 // UNIX_SECONDS, UNIX_MILLIS, UNIX_MICROS as functions (not expressions)
28340 "UNIX_SECONDS" if args.len() == 1 => {
28341 let ts = args.remove(0);
28342 match target {
28343 DialectType::DuckDB => {
28344 // CAST(EPOCH(CAST(ts AS TIMESTAMPTZ)) AS BIGINT)
28345 let cast_ts = Self::ensure_cast_timestamptz(ts);
28346 let epoch = Expression::Function(Box::new(Function::new(
28347 "EPOCH".to_string(),
28348 vec![cast_ts],
28349 )));
28350 Ok(Expression::Cast(Box::new(Cast {
28351 this: epoch,
28352 to: DataType::BigInt { length: None },
28353 trailing_comments: vec![],
28354 double_colon_syntax: false,
28355 format: None,
28356 default: None,
28357 })))
28358 }
28359 DialectType::Snowflake => {
28360 // TIMESTAMPDIFF(SECONDS, CAST('1970-01-01 00:00:00+00' AS TIMESTAMPTZ), ts)
28361 let epoch = Expression::Cast(Box::new(Cast {
28362 this: Expression::Literal(Literal::String(
28363 "1970-01-01 00:00:00+00".to_string(),
28364 )),
28365 to: DataType::Timestamp {
28366 timezone: true,
28367 precision: None,
28368 },
28369 trailing_comments: vec![],
28370 double_colon_syntax: false,
28371 format: None,
28372 default: None,
28373 }));
28374 Ok(Expression::TimestampDiff(Box::new(
28375 crate::expressions::TimestampDiff {
28376 this: Box::new(epoch),
28377 expression: Box::new(ts),
28378 unit: Some("SECONDS".to_string()),
28379 },
28380 )))
28381 }
28382 _ => Ok(Expression::Function(Box::new(Function::new(
28383 "UNIX_SECONDS".to_string(),
28384 vec![ts],
28385 )))),
28386 }
28387 }
28388
28389 "UNIX_MILLIS" if args.len() == 1 => {
28390 let ts = args.remove(0);
28391 match target {
28392 DialectType::DuckDB => {
28393 // EPOCH_MS(CAST(ts AS TIMESTAMPTZ))
28394 let cast_ts = Self::ensure_cast_timestamptz(ts);
28395 Ok(Expression::Function(Box::new(Function::new(
28396 "EPOCH_MS".to_string(),
28397 vec![cast_ts],
28398 ))))
28399 }
28400 _ => Ok(Expression::Function(Box::new(Function::new(
28401 "UNIX_MILLIS".to_string(),
28402 vec![ts],
28403 )))),
28404 }
28405 }
28406
28407 "UNIX_MICROS" if args.len() == 1 => {
28408 let ts = args.remove(0);
28409 match target {
28410 DialectType::DuckDB => {
28411 // EPOCH_US(CAST(ts AS TIMESTAMPTZ))
28412 let cast_ts = Self::ensure_cast_timestamptz(ts);
28413 Ok(Expression::Function(Box::new(Function::new(
28414 "EPOCH_US".to_string(),
28415 vec![cast_ts],
28416 ))))
28417 }
28418 _ => Ok(Expression::Function(Box::new(Function::new(
28419 "UNIX_MICROS".to_string(),
28420 vec![ts],
28421 )))),
28422 }
28423 }
28424
28425 // ARRAY_CONCAT / LIST_CONCAT -> target-specific
28426 "ARRAY_CONCAT" | "LIST_CONCAT" => {
28427 match target {
28428 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
28429 // CONCAT(arr1, arr2, ...)
28430 Ok(Expression::Function(Box::new(Function::new(
28431 "CONCAT".to_string(),
28432 args,
28433 ))))
28434 }
28435 DialectType::Presto | DialectType::Trino => {
28436 // CONCAT(arr1, arr2, ...)
28437 Ok(Expression::Function(Box::new(Function::new(
28438 "CONCAT".to_string(),
28439 args,
28440 ))))
28441 }
28442 DialectType::Snowflake => {
28443 // ARRAY_CAT(arr1, ARRAY_CAT(arr2, arr3))
28444 if args.len() == 1 {
28445 // ARRAY_CAT requires 2 args, add empty array as []
28446 let empty_arr = Expression::ArrayFunc(Box::new(
28447 crate::expressions::ArrayConstructor {
28448 expressions: vec![],
28449 bracket_notation: true,
28450 use_list_keyword: false,
28451 },
28452 ));
28453 let mut new_args = args;
28454 new_args.push(empty_arr);
28455 Ok(Expression::Function(Box::new(Function::new(
28456 "ARRAY_CAT".to_string(),
28457 new_args,
28458 ))))
28459 } else if args.is_empty() {
28460 Ok(Expression::Function(Box::new(Function::new(
28461 "ARRAY_CAT".to_string(),
28462 args,
28463 ))))
28464 } else {
28465 let mut it = args.into_iter().rev();
28466 let mut result = it.next().unwrap();
28467 for arr in it {
28468 result = Expression::Function(Box::new(Function::new(
28469 "ARRAY_CAT".to_string(),
28470 vec![arr, result],
28471 )));
28472 }
28473 Ok(result)
28474 }
28475 }
28476 DialectType::PostgreSQL => {
28477 // ARRAY_CAT(arr1, ARRAY_CAT(arr2, arr3))
28478 if args.len() <= 1 {
28479 Ok(Expression::Function(Box::new(Function::new(
28480 "ARRAY_CAT".to_string(),
28481 args,
28482 ))))
28483 } else {
28484 let mut it = args.into_iter().rev();
28485 let mut result = it.next().unwrap();
28486 for arr in it {
28487 result = Expression::Function(Box::new(Function::new(
28488 "ARRAY_CAT".to_string(),
28489 vec![arr, result],
28490 )));
28491 }
28492 Ok(result)
28493 }
28494 }
28495 DialectType::Redshift => {
28496 // ARRAY_CONCAT(arr1, ARRAY_CONCAT(arr2, arr3))
28497 if args.len() <= 2 {
28498 Ok(Expression::Function(Box::new(Function::new(
28499 "ARRAY_CONCAT".to_string(),
28500 args,
28501 ))))
28502 } else {
28503 let mut it = args.into_iter().rev();
28504 let mut result = it.next().unwrap();
28505 for arr in it {
28506 result = Expression::Function(Box::new(Function::new(
28507 "ARRAY_CONCAT".to_string(),
28508 vec![arr, result],
28509 )));
28510 }
28511 Ok(result)
28512 }
28513 }
28514 DialectType::DuckDB => {
28515 // LIST_CONCAT supports multiple args natively in DuckDB
28516 Ok(Expression::Function(Box::new(Function::new(
28517 "LIST_CONCAT".to_string(),
28518 args,
28519 ))))
28520 }
28521 _ => Ok(Expression::Function(Box::new(Function::new(
28522 "ARRAY_CONCAT".to_string(),
28523 args,
28524 )))),
28525 }
28526 }
28527
28528 // ARRAY_CONCAT_AGG -> Snowflake: ARRAY_FLATTEN(ARRAY_AGG(x))
28529 "ARRAY_CONCAT_AGG" if args.len() == 1 => {
28530 let arg = args.remove(0);
28531 match target {
28532 DialectType::Snowflake => {
28533 let array_agg =
28534 Expression::ArrayAgg(Box::new(crate::expressions::AggFunc {
28535 this: arg,
28536 distinct: false,
28537 filter: None,
28538 order_by: vec![],
28539 name: None,
28540 ignore_nulls: None,
28541 having_max: None,
28542 limit: None,
28543 }));
28544 Ok(Expression::Function(Box::new(Function::new(
28545 "ARRAY_FLATTEN".to_string(),
28546 vec![array_agg],
28547 ))))
28548 }
28549 _ => Ok(Expression::Function(Box::new(Function::new(
28550 "ARRAY_CONCAT_AGG".to_string(),
28551 vec![arg],
28552 )))),
28553 }
28554 }
28555
28556 // MD5/SHA1/SHA256/SHA512 -> target-specific hash functions
28557 "MD5" if args.len() == 1 => {
28558 let arg = args.remove(0);
28559 match target {
28560 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
28561 // UNHEX(MD5(x))
28562 let md5 = Expression::Function(Box::new(Function::new(
28563 "MD5".to_string(),
28564 vec![arg],
28565 )));
28566 Ok(Expression::Function(Box::new(Function::new(
28567 "UNHEX".to_string(),
28568 vec![md5],
28569 ))))
28570 }
28571 DialectType::Snowflake => {
28572 // MD5_BINARY(x)
28573 Ok(Expression::Function(Box::new(Function::new(
28574 "MD5_BINARY".to_string(),
28575 vec![arg],
28576 ))))
28577 }
28578 _ => Ok(Expression::Function(Box::new(Function::new(
28579 "MD5".to_string(),
28580 vec![arg],
28581 )))),
28582 }
28583 }
28584
28585 "SHA1" if args.len() == 1 => {
28586 let arg = args.remove(0);
28587 match target {
28588 DialectType::DuckDB => {
28589 // UNHEX(SHA1(x))
28590 let sha1 = Expression::Function(Box::new(Function::new(
28591 "SHA1".to_string(),
28592 vec![arg],
28593 )));
28594 Ok(Expression::Function(Box::new(Function::new(
28595 "UNHEX".to_string(),
28596 vec![sha1],
28597 ))))
28598 }
28599 _ => Ok(Expression::Function(Box::new(Function::new(
28600 "SHA1".to_string(),
28601 vec![arg],
28602 )))),
28603 }
28604 }
28605
28606 "SHA256" if args.len() == 1 => {
28607 let arg = args.remove(0);
28608 match target {
28609 DialectType::DuckDB => {
28610 // UNHEX(SHA256(x))
28611 let sha = Expression::Function(Box::new(Function::new(
28612 "SHA256".to_string(),
28613 vec![arg],
28614 )));
28615 Ok(Expression::Function(Box::new(Function::new(
28616 "UNHEX".to_string(),
28617 vec![sha],
28618 ))))
28619 }
28620 DialectType::Snowflake => {
28621 // SHA2_BINARY(x, 256)
28622 Ok(Expression::Function(Box::new(Function::new(
28623 "SHA2_BINARY".to_string(),
28624 vec![arg, Expression::number(256)],
28625 ))))
28626 }
28627 DialectType::Redshift | DialectType::Spark => {
28628 // SHA2(x, 256)
28629 Ok(Expression::Function(Box::new(Function::new(
28630 "SHA2".to_string(),
28631 vec![arg, Expression::number(256)],
28632 ))))
28633 }
28634 _ => Ok(Expression::Function(Box::new(Function::new(
28635 "SHA256".to_string(),
28636 vec![arg],
28637 )))),
28638 }
28639 }
28640
28641 "SHA512" if args.len() == 1 => {
28642 let arg = args.remove(0);
28643 match target {
28644 DialectType::Snowflake => {
28645 // SHA2_BINARY(x, 512)
28646 Ok(Expression::Function(Box::new(Function::new(
28647 "SHA2_BINARY".to_string(),
28648 vec![arg, Expression::number(512)],
28649 ))))
28650 }
28651 DialectType::Redshift | DialectType::Spark => {
28652 // SHA2(x, 512)
28653 Ok(Expression::Function(Box::new(Function::new(
28654 "SHA2".to_string(),
28655 vec![arg, Expression::number(512)],
28656 ))))
28657 }
28658 _ => Ok(Expression::Function(Box::new(Function::new(
28659 "SHA512".to_string(),
28660 vec![arg],
28661 )))),
28662 }
28663 }
28664
28665 // REGEXP_EXTRACT_ALL(str, pattern) -> add default group arg
28666 "REGEXP_EXTRACT_ALL" if args.len() == 2 => {
28667 let str_expr = args.remove(0);
28668 let pattern = args.remove(0);
28669
28670 // Check if pattern contains capturing groups (parentheses)
28671 let has_groups = match &pattern {
28672 Expression::Literal(Literal::String(s)) => s.contains('(') && s.contains(')'),
28673 _ => false,
28674 };
28675
28676 match target {
28677 DialectType::DuckDB => {
28678 let group = if has_groups {
28679 Expression::number(1)
28680 } else {
28681 Expression::number(0)
28682 };
28683 Ok(Expression::Function(Box::new(Function::new(
28684 "REGEXP_EXTRACT_ALL".to_string(),
28685 vec![str_expr, pattern, group],
28686 ))))
28687 }
28688 DialectType::Spark | DialectType::Databricks => {
28689 // Spark's default group_index is 1 (same as BigQuery), so omit for capturing groups
28690 if has_groups {
28691 Ok(Expression::Function(Box::new(Function::new(
28692 "REGEXP_EXTRACT_ALL".to_string(),
28693 vec![str_expr, pattern],
28694 ))))
28695 } else {
28696 Ok(Expression::Function(Box::new(Function::new(
28697 "REGEXP_EXTRACT_ALL".to_string(),
28698 vec![str_expr, pattern, Expression::number(0)],
28699 ))))
28700 }
28701 }
28702 DialectType::Presto | DialectType::Trino => {
28703 if has_groups {
28704 Ok(Expression::Function(Box::new(Function::new(
28705 "REGEXP_EXTRACT_ALL".to_string(),
28706 vec![str_expr, pattern, Expression::number(1)],
28707 ))))
28708 } else {
28709 Ok(Expression::Function(Box::new(Function::new(
28710 "REGEXP_EXTRACT_ALL".to_string(),
28711 vec![str_expr, pattern],
28712 ))))
28713 }
28714 }
28715 DialectType::Snowflake => {
28716 if has_groups {
28717 // REGEXP_EXTRACT_ALL(str, pattern, 1, 1, 'c', 1)
28718 Ok(Expression::Function(Box::new(Function::new(
28719 "REGEXP_EXTRACT_ALL".to_string(),
28720 vec![
28721 str_expr,
28722 pattern,
28723 Expression::number(1),
28724 Expression::number(1),
28725 Expression::Literal(Literal::String("c".to_string())),
28726 Expression::number(1),
28727 ],
28728 ))))
28729 } else {
28730 Ok(Expression::Function(Box::new(Function::new(
28731 "REGEXP_EXTRACT_ALL".to_string(),
28732 vec![str_expr, pattern],
28733 ))))
28734 }
28735 }
28736 _ => Ok(Expression::Function(Box::new(Function::new(
28737 "REGEXP_EXTRACT_ALL".to_string(),
28738 vec![str_expr, pattern],
28739 )))),
28740 }
28741 }
28742
28743 // MOD(x, y) -> x % y for PostgreSQL/DuckDB
28744 "MOD" if args.len() == 2 => {
28745 match target {
28746 DialectType::PostgreSQL
28747 | DialectType::DuckDB
28748 | DialectType::Presto
28749 | DialectType::Trino
28750 | DialectType::Athena
28751 | DialectType::Snowflake => {
28752 let x = args.remove(0);
28753 let y = args.remove(0);
28754 // Wrap complex expressions in parens to preserve precedence
28755 let needs_paren = |e: &Expression| {
28756 matches!(
28757 e,
28758 Expression::Add(_)
28759 | Expression::Sub(_)
28760 | Expression::Mul(_)
28761 | Expression::Div(_)
28762 )
28763 };
28764 let x = if needs_paren(&x) {
28765 Expression::Paren(Box::new(crate::expressions::Paren {
28766 this: x,
28767 trailing_comments: vec![],
28768 }))
28769 } else {
28770 x
28771 };
28772 let y = if needs_paren(&y) {
28773 Expression::Paren(Box::new(crate::expressions::Paren {
28774 this: y,
28775 trailing_comments: vec![],
28776 }))
28777 } else {
28778 y
28779 };
28780 Ok(Expression::Mod(Box::new(
28781 crate::expressions::BinaryOp::new(x, y),
28782 )))
28783 }
28784 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
28785 // Hive/Spark: a % b
28786 let x = args.remove(0);
28787 let y = args.remove(0);
28788 let needs_paren = |e: &Expression| {
28789 matches!(
28790 e,
28791 Expression::Add(_)
28792 | Expression::Sub(_)
28793 | Expression::Mul(_)
28794 | Expression::Div(_)
28795 )
28796 };
28797 let x = if needs_paren(&x) {
28798 Expression::Paren(Box::new(crate::expressions::Paren {
28799 this: x,
28800 trailing_comments: vec![],
28801 }))
28802 } else {
28803 x
28804 };
28805 let y = if needs_paren(&y) {
28806 Expression::Paren(Box::new(crate::expressions::Paren {
28807 this: y,
28808 trailing_comments: vec![],
28809 }))
28810 } else {
28811 y
28812 };
28813 Ok(Expression::Mod(Box::new(
28814 crate::expressions::BinaryOp::new(x, y),
28815 )))
28816 }
28817 _ => Ok(Expression::Function(Box::new(Function::new(
28818 "MOD".to_string(),
28819 args,
28820 )))),
28821 }
28822 }
28823
28824 // ARRAY_FILTER(arr, lambda) -> FILTER for Hive/Spark/Presto, ARRAY_FILTER for StarRocks
28825 "ARRAY_FILTER" if args.len() == 2 => {
28826 let name = match target {
28827 DialectType::DuckDB => "LIST_FILTER",
28828 DialectType::StarRocks => "ARRAY_FILTER",
28829 _ => "FILTER",
28830 };
28831 Ok(Expression::Function(Box::new(Function::new(
28832 name.to_string(),
28833 args,
28834 ))))
28835 }
28836 // FILTER(arr, lambda) -> ARRAY_FILTER for StarRocks, LIST_FILTER for DuckDB
28837 "FILTER" if args.len() == 2 => {
28838 let name = match target {
28839 DialectType::DuckDB => "LIST_FILTER",
28840 DialectType::StarRocks => "ARRAY_FILTER",
28841 _ => "FILTER",
28842 };
28843 Ok(Expression::Function(Box::new(Function::new(
28844 name.to_string(),
28845 args,
28846 ))))
28847 }
28848 // REDUCE(arr, init, lambda1, lambda2) -> AGGREGATE for Spark
28849 "REDUCE" if args.len() >= 3 => {
28850 let name = match target {
28851 DialectType::Spark | DialectType::Databricks => "AGGREGATE",
28852 _ => "REDUCE",
28853 };
28854 Ok(Expression::Function(Box::new(Function::new(
28855 name.to_string(),
28856 args,
28857 ))))
28858 }
28859 // ARRAY_REVERSE(x) -> arrayReverse for ClickHouse (handled by generator)
28860 "ARRAY_REVERSE" if args.len() == 1 => Ok(Expression::Function(Box::new(
28861 Function::new("ARRAY_REVERSE".to_string(), args),
28862 ))),
28863
28864 // CONCAT(a, b, ...) -> a || b || ... for DuckDB with 3+ args
28865 "CONCAT" if args.len() > 2 => match target {
28866 DialectType::DuckDB => {
28867 let mut it = args.into_iter();
28868 let mut result = it.next().unwrap();
28869 for arg in it {
28870 result = Expression::DPipe(Box::new(crate::expressions::DPipe {
28871 this: Box::new(result),
28872 expression: Box::new(arg),
28873 safe: None,
28874 }));
28875 }
28876 Ok(result)
28877 }
28878 _ => Ok(Expression::Function(Box::new(Function::new(
28879 "CONCAT".to_string(),
28880 args,
28881 )))),
28882 },
28883
28884 // GENERATE_DATE_ARRAY(start, end[, step]) -> target-specific
28885 "GENERATE_DATE_ARRAY" => {
28886 if matches!(target, DialectType::BigQuery) {
28887 // BQ->BQ: add default interval if not present
28888 if args.len() == 2 {
28889 let start = args.remove(0);
28890 let end = args.remove(0);
28891 let default_interval =
28892 Expression::Interval(Box::new(crate::expressions::Interval {
28893 this: Some(Expression::Literal(Literal::String("1".to_string()))),
28894 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
28895 unit: crate::expressions::IntervalUnit::Day,
28896 use_plural: false,
28897 }),
28898 }));
28899 Ok(Expression::Function(Box::new(Function::new(
28900 "GENERATE_DATE_ARRAY".to_string(),
28901 vec![start, end, default_interval],
28902 ))))
28903 } else {
28904 Ok(Expression::Function(Box::new(Function::new(
28905 "GENERATE_DATE_ARRAY".to_string(),
28906 args,
28907 ))))
28908 }
28909 } else if matches!(target, DialectType::DuckDB) {
28910 // DuckDB: CAST(GENERATE_SERIES(CAST(start AS DATE), CAST(end AS DATE), step) AS DATE[])
28911 let start = args.get(0).cloned();
28912 let end = args.get(1).cloned();
28913 let step = args.get(2).cloned().or_else(|| {
28914 Some(Expression::Interval(Box::new(
28915 crate::expressions::Interval {
28916 this: Some(Expression::Literal(Literal::String("1".to_string()))),
28917 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
28918 unit: crate::expressions::IntervalUnit::Day,
28919 use_plural: false,
28920 }),
28921 },
28922 )))
28923 });
28924
28925 // Wrap start/end in CAST(... AS DATE) only for string literals
28926 let maybe_cast_date = |expr: Expression| -> Expression {
28927 if matches!(&expr, Expression::Literal(Literal::String(_))) {
28928 Expression::Cast(Box::new(Cast {
28929 this: expr,
28930 to: DataType::Date,
28931 trailing_comments: vec![],
28932 double_colon_syntax: false,
28933 format: None,
28934 default: None,
28935 }))
28936 } else {
28937 expr
28938 }
28939 };
28940 let cast_start = start.map(maybe_cast_date);
28941 let cast_end = end.map(maybe_cast_date);
28942
28943 let gen_series =
28944 Expression::GenerateSeries(Box::new(crate::expressions::GenerateSeries {
28945 start: cast_start.map(Box::new),
28946 end: cast_end.map(Box::new),
28947 step: step.map(Box::new),
28948 is_end_exclusive: None,
28949 }));
28950
28951 // Wrap in CAST(... AS DATE[])
28952 Ok(Expression::Cast(Box::new(Cast {
28953 this: gen_series,
28954 to: DataType::Array {
28955 element_type: Box::new(DataType::Date),
28956 dimension: None,
28957 },
28958 trailing_comments: vec![],
28959 double_colon_syntax: false,
28960 format: None,
28961 default: None,
28962 })))
28963 } else if matches!(target, DialectType::Snowflake) {
28964 // Snowflake: keep as GENERATE_DATE_ARRAY function for later transform
28965 // (transform_generate_date_array_snowflake will convert to ARRAY_GENERATE_RANGE + DATEADD)
28966 if args.len() == 2 {
28967 let start = args.remove(0);
28968 let end = args.remove(0);
28969 let default_interval =
28970 Expression::Interval(Box::new(crate::expressions::Interval {
28971 this: Some(Expression::Literal(Literal::String("1".to_string()))),
28972 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
28973 unit: crate::expressions::IntervalUnit::Day,
28974 use_plural: false,
28975 }),
28976 }));
28977 Ok(Expression::Function(Box::new(Function::new(
28978 "GENERATE_DATE_ARRAY".to_string(),
28979 vec![start, end, default_interval],
28980 ))))
28981 } else {
28982 Ok(Expression::Function(Box::new(Function::new(
28983 "GENERATE_DATE_ARRAY".to_string(),
28984 args,
28985 ))))
28986 }
28987 } else {
28988 // Convert to GenerateSeries for other targets
28989 let start = args.get(0).cloned();
28990 let end = args.get(1).cloned();
28991 let step = args.get(2).cloned().or_else(|| {
28992 Some(Expression::Interval(Box::new(
28993 crate::expressions::Interval {
28994 this: Some(Expression::Literal(Literal::String("1".to_string()))),
28995 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
28996 unit: crate::expressions::IntervalUnit::Day,
28997 use_plural: false,
28998 }),
28999 },
29000 )))
29001 });
29002 Ok(Expression::GenerateSeries(Box::new(
29003 crate::expressions::GenerateSeries {
29004 start: start.map(Box::new),
29005 end: end.map(Box::new),
29006 step: step.map(Box::new),
29007 is_end_exclusive: None,
29008 },
29009 )))
29010 }
29011 }
29012
29013 // PARSE_DATE(format, str) -> target-specific
29014 "PARSE_DATE" if args.len() == 2 => {
29015 let format = args.remove(0);
29016 let str_expr = args.remove(0);
29017 match target {
29018 DialectType::DuckDB => {
29019 // CAST(STRPTIME(str, duck_format) AS DATE)
29020 let duck_format = Self::bq_format_to_duckdb(&format);
29021 let strptime = Expression::Function(Box::new(Function::new(
29022 "STRPTIME".to_string(),
29023 vec![str_expr, duck_format],
29024 )));
29025 Ok(Expression::Cast(Box::new(Cast {
29026 this: strptime,
29027 to: DataType::Date,
29028 trailing_comments: vec![],
29029 double_colon_syntax: false,
29030 format: None,
29031 default: None,
29032 })))
29033 }
29034 DialectType::Snowflake => {
29035 // _POLYGLOT_DATE(str, snowflake_format)
29036 // Use marker so Snowflake target transform keeps it as DATE() instead of TO_DATE()
29037 let sf_format = Self::bq_format_to_snowflake(&format);
29038 Ok(Expression::Function(Box::new(Function::new(
29039 "_POLYGLOT_DATE".to_string(),
29040 vec![str_expr, sf_format],
29041 ))))
29042 }
29043 _ => Ok(Expression::Function(Box::new(Function::new(
29044 "PARSE_DATE".to_string(),
29045 vec![format, str_expr],
29046 )))),
29047 }
29048 }
29049
29050 // PARSE_TIMESTAMP(format, str) -> target-specific
29051 "PARSE_TIMESTAMP" if args.len() >= 2 => {
29052 let format = args.remove(0);
29053 let str_expr = args.remove(0);
29054 let tz = if !args.is_empty() {
29055 Some(args.remove(0))
29056 } else {
29057 None
29058 };
29059 match target {
29060 DialectType::DuckDB => {
29061 let duck_format = Self::bq_format_to_duckdb(&format);
29062 let strptime = Expression::Function(Box::new(Function::new(
29063 "STRPTIME".to_string(),
29064 vec![str_expr, duck_format],
29065 )));
29066 Ok(strptime)
29067 }
29068 _ => {
29069 let mut result_args = vec![format, str_expr];
29070 if let Some(tz_arg) = tz {
29071 result_args.push(tz_arg);
29072 }
29073 Ok(Expression::Function(Box::new(Function::new(
29074 "PARSE_TIMESTAMP".to_string(),
29075 result_args,
29076 ))))
29077 }
29078 }
29079 }
29080
29081 // FORMAT_DATE(format, date) -> target-specific
29082 "FORMAT_DATE" if args.len() == 2 => {
29083 let format = args.remove(0);
29084 let date_expr = args.remove(0);
29085 match target {
29086 DialectType::DuckDB => {
29087 // STRFTIME(CAST(date AS DATE), format)
29088 let cast_date = Expression::Cast(Box::new(Cast {
29089 this: date_expr,
29090 to: DataType::Date,
29091 trailing_comments: vec![],
29092 double_colon_syntax: false,
29093 format: None,
29094 default: None,
29095 }));
29096 Ok(Expression::Function(Box::new(Function::new(
29097 "STRFTIME".to_string(),
29098 vec![cast_date, format],
29099 ))))
29100 }
29101 _ => Ok(Expression::Function(Box::new(Function::new(
29102 "FORMAT_DATE".to_string(),
29103 vec![format, date_expr],
29104 )))),
29105 }
29106 }
29107
29108 // FORMAT_DATETIME(format, datetime) -> target-specific
29109 "FORMAT_DATETIME" if args.len() == 2 => {
29110 let format = args.remove(0);
29111 let dt_expr = args.remove(0);
29112
29113 if matches!(target, DialectType::BigQuery) {
29114 // BQ->BQ: normalize %H:%M:%S to %T, %x to %D
29115 let norm_format = Self::bq_format_normalize_bq(&format);
29116 // Also strip DATETIME keyword from typed literals
29117 let norm_dt = match dt_expr {
29118 Expression::Literal(Literal::Timestamp(s)) => {
29119 Expression::Cast(Box::new(Cast {
29120 this: Expression::Literal(Literal::String(s)),
29121 to: DataType::Custom {
29122 name: "DATETIME".to_string(),
29123 },
29124 trailing_comments: vec![],
29125 double_colon_syntax: false,
29126 format: None,
29127 default: None,
29128 }))
29129 }
29130 other => other,
29131 };
29132 return Ok(Expression::Function(Box::new(Function::new(
29133 "FORMAT_DATETIME".to_string(),
29134 vec![norm_format, norm_dt],
29135 ))));
29136 }
29137
29138 match target {
29139 DialectType::DuckDB => {
29140 // STRFTIME(CAST(dt AS TIMESTAMP), duckdb_format)
29141 let cast_dt = Self::ensure_cast_timestamp(dt_expr);
29142 let duck_format = Self::bq_format_to_duckdb(&format);
29143 Ok(Expression::Function(Box::new(Function::new(
29144 "STRFTIME".to_string(),
29145 vec![cast_dt, duck_format],
29146 ))))
29147 }
29148 _ => Ok(Expression::Function(Box::new(Function::new(
29149 "FORMAT_DATETIME".to_string(),
29150 vec![format, dt_expr],
29151 )))),
29152 }
29153 }
29154
29155 // FORMAT_TIMESTAMP(format, ts) -> target-specific
29156 "FORMAT_TIMESTAMP" if args.len() == 2 => {
29157 let format = args.remove(0);
29158 let ts_expr = args.remove(0);
29159 match target {
29160 DialectType::DuckDB => {
29161 // STRFTIME(CAST(CAST(ts AS TIMESTAMPTZ) AS TIMESTAMP), format)
29162 let cast_tstz = Self::ensure_cast_timestamptz(ts_expr);
29163 let cast_ts = Expression::Cast(Box::new(Cast {
29164 this: cast_tstz,
29165 to: DataType::Timestamp {
29166 timezone: false,
29167 precision: None,
29168 },
29169 trailing_comments: vec![],
29170 double_colon_syntax: false,
29171 format: None,
29172 default: None,
29173 }));
29174 Ok(Expression::Function(Box::new(Function::new(
29175 "STRFTIME".to_string(),
29176 vec![cast_ts, format],
29177 ))))
29178 }
29179 DialectType::Snowflake => {
29180 // TO_CHAR(CAST(CAST(ts AS TIMESTAMPTZ) AS TIMESTAMP), snowflake_format)
29181 let cast_tstz = Self::ensure_cast_timestamptz(ts_expr);
29182 let cast_ts = Expression::Cast(Box::new(Cast {
29183 this: cast_tstz,
29184 to: DataType::Timestamp {
29185 timezone: false,
29186 precision: None,
29187 },
29188 trailing_comments: vec![],
29189 double_colon_syntax: false,
29190 format: None,
29191 default: None,
29192 }));
29193 let sf_format = Self::bq_format_to_snowflake(&format);
29194 Ok(Expression::Function(Box::new(Function::new(
29195 "TO_CHAR".to_string(),
29196 vec![cast_ts, sf_format],
29197 ))))
29198 }
29199 _ => Ok(Expression::Function(Box::new(Function::new(
29200 "FORMAT_TIMESTAMP".to_string(),
29201 vec![format, ts_expr],
29202 )))),
29203 }
29204 }
29205
29206 // UNIX_DATE(date) -> DATE_DIFF('DAY', '1970-01-01', date) for DuckDB
29207 "UNIX_DATE" if args.len() == 1 => {
29208 let date = args.remove(0);
29209 match target {
29210 DialectType::DuckDB => {
29211 let epoch = Expression::Cast(Box::new(Cast {
29212 this: Expression::Literal(Literal::String("1970-01-01".to_string())),
29213 to: DataType::Date,
29214 trailing_comments: vec![],
29215 double_colon_syntax: false,
29216 format: None,
29217 default: None,
29218 }));
29219 // DATE_DIFF('DAY', epoch, date) but date might be DATE '...' literal
29220 // Need to convert DATE literal to CAST
29221 let norm_date = Self::date_literal_to_cast(date);
29222 Ok(Expression::Function(Box::new(Function::new(
29223 "DATE_DIFF".to_string(),
29224 vec![
29225 Expression::Literal(Literal::String("DAY".to_string())),
29226 epoch,
29227 norm_date,
29228 ],
29229 ))))
29230 }
29231 _ => Ok(Expression::Function(Box::new(Function::new(
29232 "UNIX_DATE".to_string(),
29233 vec![date],
29234 )))),
29235 }
29236 }
29237
29238 // UNIX_SECONDS(ts) -> target-specific
29239 "UNIX_SECONDS" if args.len() == 1 => {
29240 let ts = args.remove(0);
29241 match target {
29242 DialectType::DuckDB => {
29243 // CAST(EPOCH(CAST(ts AS TIMESTAMPTZ)) AS BIGINT)
29244 let norm_ts = Self::ts_literal_to_cast_tz(ts);
29245 let epoch = Expression::Function(Box::new(Function::new(
29246 "EPOCH".to_string(),
29247 vec![norm_ts],
29248 )));
29249 Ok(Expression::Cast(Box::new(Cast {
29250 this: epoch,
29251 to: DataType::BigInt { length: None },
29252 trailing_comments: vec![],
29253 double_colon_syntax: false,
29254 format: None,
29255 default: None,
29256 })))
29257 }
29258 DialectType::Snowflake => {
29259 // TIMESTAMPDIFF(SECONDS, CAST('1970-01-01 00:00:00+00' AS TIMESTAMPTZ), ts)
29260 let epoch = Expression::Cast(Box::new(Cast {
29261 this: Expression::Literal(Literal::String(
29262 "1970-01-01 00:00:00+00".to_string(),
29263 )),
29264 to: DataType::Timestamp {
29265 timezone: true,
29266 precision: None,
29267 },
29268 trailing_comments: vec![],
29269 double_colon_syntax: false,
29270 format: None,
29271 default: None,
29272 }));
29273 Ok(Expression::Function(Box::new(Function::new(
29274 "TIMESTAMPDIFF".to_string(),
29275 vec![
29276 Expression::Identifier(Identifier::new("SECONDS".to_string())),
29277 epoch,
29278 ts,
29279 ],
29280 ))))
29281 }
29282 _ => Ok(Expression::Function(Box::new(Function::new(
29283 "UNIX_SECONDS".to_string(),
29284 vec![ts],
29285 )))),
29286 }
29287 }
29288
29289 // UNIX_MILLIS(ts) -> target-specific
29290 "UNIX_MILLIS" if args.len() == 1 => {
29291 let ts = args.remove(0);
29292 match target {
29293 DialectType::DuckDB => {
29294 let norm_ts = Self::ts_literal_to_cast_tz(ts);
29295 Ok(Expression::Function(Box::new(Function::new(
29296 "EPOCH_MS".to_string(),
29297 vec![norm_ts],
29298 ))))
29299 }
29300 _ => Ok(Expression::Function(Box::new(Function::new(
29301 "UNIX_MILLIS".to_string(),
29302 vec![ts],
29303 )))),
29304 }
29305 }
29306
29307 // UNIX_MICROS(ts) -> target-specific
29308 "UNIX_MICROS" if args.len() == 1 => {
29309 let ts = args.remove(0);
29310 match target {
29311 DialectType::DuckDB => {
29312 let norm_ts = Self::ts_literal_to_cast_tz(ts);
29313 Ok(Expression::Function(Box::new(Function::new(
29314 "EPOCH_US".to_string(),
29315 vec![norm_ts],
29316 ))))
29317 }
29318 _ => Ok(Expression::Function(Box::new(Function::new(
29319 "UNIX_MICROS".to_string(),
29320 vec![ts],
29321 )))),
29322 }
29323 }
29324
29325 // INSTR(str, substr) -> target-specific
29326 "INSTR" => {
29327 if matches!(target, DialectType::BigQuery) {
29328 // BQ->BQ: keep as INSTR
29329 Ok(Expression::Function(Box::new(Function::new(
29330 "INSTR".to_string(),
29331 args,
29332 ))))
29333 } else if matches!(target, DialectType::Snowflake) && args.len() == 2 {
29334 // Snowflake: CHARINDEX(substr, str) - swap args
29335 let str_expr = args.remove(0);
29336 let substr = args.remove(0);
29337 Ok(Expression::Function(Box::new(Function::new(
29338 "CHARINDEX".to_string(),
29339 vec![substr, str_expr],
29340 ))))
29341 } else {
29342 // Keep as INSTR for other targets
29343 Ok(Expression::Function(Box::new(Function::new(
29344 "INSTR".to_string(),
29345 args,
29346 ))))
29347 }
29348 }
29349
29350 // CURRENT_TIMESTAMP / CURRENT_DATE handling - parens normalization and timezone
29351 "CURRENT_TIMESTAMP" | "CURRENT_DATE" | "CURRENT_DATETIME" | "CURRENT_TIME" => {
29352 if matches!(target, DialectType::BigQuery) {
29353 // BQ->BQ: always output with parens (function form), keep any timezone arg
29354 Ok(Expression::Function(Box::new(Function::new(name, args))))
29355 } else if name == "CURRENT_DATE" && args.len() == 1 {
29356 // CURRENT_DATE('UTC') - has timezone arg
29357 let tz_arg = args.remove(0);
29358 match target {
29359 DialectType::DuckDB => {
29360 // CAST(CURRENT_TIMESTAMP AT TIME ZONE 'UTC' AS DATE)
29361 let ct = Expression::CurrentTimestamp(
29362 crate::expressions::CurrentTimestamp {
29363 precision: None,
29364 sysdate: false,
29365 },
29366 );
29367 let at_tz =
29368 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
29369 this: ct,
29370 zone: tz_arg,
29371 }));
29372 Ok(Expression::Cast(Box::new(Cast {
29373 this: at_tz,
29374 to: DataType::Date,
29375 trailing_comments: vec![],
29376 double_colon_syntax: false,
29377 format: None,
29378 default: None,
29379 })))
29380 }
29381 DialectType::Snowflake => {
29382 // CAST(CONVERT_TIMEZONE('UTC', CURRENT_TIMESTAMP()) AS DATE)
29383 let ct = Expression::Function(Box::new(Function::new(
29384 "CURRENT_TIMESTAMP".to_string(),
29385 vec![],
29386 )));
29387 let convert = Expression::Function(Box::new(Function::new(
29388 "CONVERT_TIMEZONE".to_string(),
29389 vec![tz_arg, ct],
29390 )));
29391 Ok(Expression::Cast(Box::new(Cast {
29392 this: convert,
29393 to: DataType::Date,
29394 trailing_comments: vec![],
29395 double_colon_syntax: false,
29396 format: None,
29397 default: None,
29398 })))
29399 }
29400 _ => {
29401 // PostgreSQL, MySQL, etc.: CURRENT_DATE AT TIME ZONE 'UTC'
29402 let cd = Expression::CurrentDate(crate::expressions::CurrentDate);
29403 Ok(Expression::AtTimeZone(Box::new(
29404 crate::expressions::AtTimeZone {
29405 this: cd,
29406 zone: tz_arg,
29407 },
29408 )))
29409 }
29410 }
29411 } else if (name == "CURRENT_TIMESTAMP"
29412 || name == "CURRENT_TIME"
29413 || name == "CURRENT_DATE")
29414 && args.is_empty()
29415 && matches!(
29416 target,
29417 DialectType::PostgreSQL
29418 | DialectType::DuckDB
29419 | DialectType::Presto
29420 | DialectType::Trino
29421 )
29422 {
29423 // These targets want no-parens CURRENT_TIMESTAMP / CURRENT_DATE / CURRENT_TIME
29424 if name == "CURRENT_TIMESTAMP" {
29425 Ok(Expression::CurrentTimestamp(
29426 crate::expressions::CurrentTimestamp {
29427 precision: None,
29428 sysdate: false,
29429 },
29430 ))
29431 } else if name == "CURRENT_DATE" {
29432 Ok(Expression::CurrentDate(crate::expressions::CurrentDate))
29433 } else {
29434 // CURRENT_TIME
29435 Ok(Expression::CurrentTime(crate::expressions::CurrentTime {
29436 precision: None,
29437 }))
29438 }
29439 } else {
29440 // All other targets: keep as function (with parens)
29441 Ok(Expression::Function(Box::new(Function::new(name, args))))
29442 }
29443 }
29444
29445 // JSON_QUERY(json, path) -> target-specific
29446 "JSON_QUERY" if args.len() == 2 => {
29447 match target {
29448 DialectType::DuckDB | DialectType::SQLite => {
29449 // json -> path syntax
29450 let json_expr = args.remove(0);
29451 let path = args.remove(0);
29452 Ok(Expression::JsonExtract(Box::new(
29453 crate::expressions::JsonExtractFunc {
29454 this: json_expr,
29455 path,
29456 returning: None,
29457 arrow_syntax: true,
29458 hash_arrow_syntax: false,
29459 wrapper_option: None,
29460 quotes_option: None,
29461 on_scalar_string: false,
29462 on_error: None,
29463 },
29464 )))
29465 }
29466 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
29467 Ok(Expression::Function(Box::new(Function::new(
29468 "GET_JSON_OBJECT".to_string(),
29469 args,
29470 ))))
29471 }
29472 DialectType::PostgreSQL | DialectType::Redshift => Ok(Expression::Function(
29473 Box::new(Function::new("JSON_EXTRACT_PATH".to_string(), args)),
29474 )),
29475 _ => Ok(Expression::Function(Box::new(Function::new(
29476 "JSON_QUERY".to_string(),
29477 args,
29478 )))),
29479 }
29480 }
29481
29482 // JSON_VALUE_ARRAY(json, path) -> target-specific
29483 "JSON_VALUE_ARRAY" if args.len() == 2 => {
29484 match target {
29485 DialectType::DuckDB => {
29486 // CAST(json -> path AS TEXT[])
29487 let json_expr = args.remove(0);
29488 let path = args.remove(0);
29489 let arrow = Expression::JsonExtract(Box::new(
29490 crate::expressions::JsonExtractFunc {
29491 this: json_expr,
29492 path,
29493 returning: None,
29494 arrow_syntax: true,
29495 hash_arrow_syntax: false,
29496 wrapper_option: None,
29497 quotes_option: None,
29498 on_scalar_string: false,
29499 on_error: None,
29500 },
29501 ));
29502 Ok(Expression::Cast(Box::new(Cast {
29503 this: arrow,
29504 to: DataType::Array {
29505 element_type: Box::new(DataType::Text),
29506 dimension: None,
29507 },
29508 trailing_comments: vec![],
29509 double_colon_syntax: false,
29510 format: None,
29511 default: None,
29512 })))
29513 }
29514 DialectType::Snowflake => {
29515 let json_expr = args.remove(0);
29516 let path_expr = args.remove(0);
29517 // Convert JSON path from $.path to just path
29518 let sf_path = if let Expression::Literal(Literal::String(ref s)) = path_expr
29519 {
29520 let trimmed = s.trim_start_matches('$').trim_start_matches('.');
29521 Expression::Literal(Literal::String(trimmed.to_string()))
29522 } else {
29523 path_expr
29524 };
29525 let parse_json = Expression::Function(Box::new(Function::new(
29526 "PARSE_JSON".to_string(),
29527 vec![json_expr],
29528 )));
29529 let get_path = Expression::Function(Box::new(Function::new(
29530 "GET_PATH".to_string(),
29531 vec![parse_json, sf_path],
29532 )));
29533 // TRANSFORM(get_path, x -> CAST(x AS VARCHAR))
29534 let cast_expr = Expression::Cast(Box::new(Cast {
29535 this: Expression::Identifier(Identifier::new("x")),
29536 to: DataType::VarChar {
29537 length: None,
29538 parenthesized_length: false,
29539 },
29540 trailing_comments: vec![],
29541 double_colon_syntax: false,
29542 format: None,
29543 default: None,
29544 }));
29545 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
29546 parameters: vec![Identifier::new("x")],
29547 body: cast_expr,
29548 colon: false,
29549 parameter_types: vec![],
29550 }));
29551 Ok(Expression::Function(Box::new(Function::new(
29552 "TRANSFORM".to_string(),
29553 vec![get_path, lambda],
29554 ))))
29555 }
29556 _ => Ok(Expression::Function(Box::new(Function::new(
29557 "JSON_VALUE_ARRAY".to_string(),
29558 args,
29559 )))),
29560 }
29561 }
29562
29563 // BigQuery REGEXP_EXTRACT(val, regex[, position[, occurrence]]) -> target dialects
29564 // BigQuery's 3rd arg is "position" (starting char index), 4th is "occurrence" (which match to return)
29565 // This is different from Hive/Spark where 3rd arg is "group_index"
29566 "REGEXP_EXTRACT" if matches!(source, DialectType::BigQuery) => {
29567 match target {
29568 DialectType::DuckDB
29569 | DialectType::Presto
29570 | DialectType::Trino
29571 | DialectType::Athena => {
29572 if args.len() == 2 {
29573 // REGEXP_EXTRACT(val, regex) -> REGEXP_EXTRACT(val, regex, 1)
29574 args.push(Expression::number(1));
29575 Ok(Expression::Function(Box::new(Function::new(
29576 "REGEXP_EXTRACT".to_string(),
29577 args,
29578 ))))
29579 } else if args.len() == 3 {
29580 let val = args.remove(0);
29581 let regex = args.remove(0);
29582 let position = args.remove(0);
29583 let is_pos_1 = matches!(&position, Expression::Literal(Literal::Number(n)) if n == "1");
29584 if is_pos_1 {
29585 Ok(Expression::Function(Box::new(Function::new(
29586 "REGEXP_EXTRACT".to_string(),
29587 vec![val, regex, Expression::number(1)],
29588 ))))
29589 } else {
29590 let substring_expr = Expression::Function(Box::new(Function::new(
29591 "SUBSTRING".to_string(),
29592 vec![val, position],
29593 )));
29594 let nullif_expr = Expression::Function(Box::new(Function::new(
29595 "NULLIF".to_string(),
29596 vec![
29597 substring_expr,
29598 Expression::Literal(Literal::String(String::new())),
29599 ],
29600 )));
29601 Ok(Expression::Function(Box::new(Function::new(
29602 "REGEXP_EXTRACT".to_string(),
29603 vec![nullif_expr, regex, Expression::number(1)],
29604 ))))
29605 }
29606 } else if args.len() == 4 {
29607 let val = args.remove(0);
29608 let regex = args.remove(0);
29609 let position = args.remove(0);
29610 let occurrence = args.remove(0);
29611 let is_pos_1 = matches!(&position, Expression::Literal(Literal::Number(n)) if n == "1");
29612 let is_occ_1 = matches!(&occurrence, Expression::Literal(Literal::Number(n)) if n == "1");
29613 if is_pos_1 && is_occ_1 {
29614 Ok(Expression::Function(Box::new(Function::new(
29615 "REGEXP_EXTRACT".to_string(),
29616 vec![val, regex, Expression::number(1)],
29617 ))))
29618 } else {
29619 let subject = if is_pos_1 {
29620 val
29621 } else {
29622 let substring_expr = Expression::Function(Box::new(
29623 Function::new("SUBSTRING".to_string(), vec![val, position]),
29624 ));
29625 Expression::Function(Box::new(Function::new(
29626 "NULLIF".to_string(),
29627 vec![
29628 substring_expr,
29629 Expression::Literal(Literal::String(String::new())),
29630 ],
29631 )))
29632 };
29633 let extract_all = Expression::Function(Box::new(Function::new(
29634 "REGEXP_EXTRACT_ALL".to_string(),
29635 vec![subject, regex, Expression::number(1)],
29636 )));
29637 Ok(Expression::Function(Box::new(Function::new(
29638 "ARRAY_EXTRACT".to_string(),
29639 vec![extract_all, occurrence],
29640 ))))
29641 }
29642 } else {
29643 Ok(Expression::Function(Box::new(Function {
29644 name: f.name,
29645 args,
29646 distinct: f.distinct,
29647 trailing_comments: f.trailing_comments,
29648 use_bracket_syntax: f.use_bracket_syntax,
29649 no_parens: f.no_parens,
29650 quoted: f.quoted,
29651 })))
29652 }
29653 }
29654 DialectType::Snowflake => {
29655 // BigQuery REGEXP_EXTRACT -> Snowflake REGEXP_SUBSTR
29656 Ok(Expression::Function(Box::new(Function::new(
29657 "REGEXP_SUBSTR".to_string(),
29658 args,
29659 ))))
29660 }
29661 _ => {
29662 // For other targets (Hive/Spark/BigQuery): pass through as-is
29663 // BigQuery's default group behavior matches Hive/Spark for 2-arg case
29664 Ok(Expression::Function(Box::new(Function {
29665 name: f.name,
29666 args,
29667 distinct: f.distinct,
29668 trailing_comments: f.trailing_comments,
29669 use_bracket_syntax: f.use_bracket_syntax,
29670 no_parens: f.no_parens,
29671 quoted: f.quoted,
29672 })))
29673 }
29674 }
29675 }
29676
29677 // BigQuery STRUCT(args) -> target-specific struct expression
29678 "STRUCT" => {
29679 // Convert Function args to Struct fields
29680 let mut fields: Vec<(Option<String>, Expression)> = Vec::new();
29681 for (i, arg) in args.into_iter().enumerate() {
29682 match arg {
29683 Expression::Alias(a) => {
29684 // Named field: expr AS name
29685 fields.push((Some(a.alias.name.clone()), a.this));
29686 }
29687 other => {
29688 // Unnamed field: for Spark/Hive, keep as None
29689 // For Snowflake, auto-name as _N
29690 // For DuckDB, use column name for column refs, _N for others
29691 if matches!(target, DialectType::Snowflake) {
29692 fields.push((Some(format!("_{}", i)), other));
29693 } else if matches!(target, DialectType::DuckDB) {
29694 let auto_name = match &other {
29695 Expression::Column(col) => col.name.name.clone(),
29696 _ => format!("_{}", i),
29697 };
29698 fields.push((Some(auto_name), other));
29699 } else {
29700 fields.push((None, other));
29701 }
29702 }
29703 }
29704 }
29705
29706 match target {
29707 DialectType::Snowflake => {
29708 // OBJECT_CONSTRUCT('name', value, ...)
29709 let mut oc_args = Vec::new();
29710 for (name, val) in &fields {
29711 if let Some(n) = name {
29712 oc_args.push(Expression::Literal(Literal::String(n.clone())));
29713 oc_args.push(val.clone());
29714 } else {
29715 oc_args.push(val.clone());
29716 }
29717 }
29718 Ok(Expression::Function(Box::new(Function::new(
29719 "OBJECT_CONSTRUCT".to_string(),
29720 oc_args,
29721 ))))
29722 }
29723 DialectType::DuckDB => {
29724 // {'name': value, ...}
29725 Ok(Expression::Struct(Box::new(crate::expressions::Struct {
29726 fields,
29727 })))
29728 }
29729 DialectType::Hive => {
29730 // STRUCT(val1, val2, ...) - strip aliases
29731 let hive_fields: Vec<(Option<String>, Expression)> =
29732 fields.into_iter().map(|(_, v)| (None, v)).collect();
29733 Ok(Expression::Struct(Box::new(crate::expressions::Struct {
29734 fields: hive_fields,
29735 })))
29736 }
29737 DialectType::Spark | DialectType::Databricks => {
29738 // Use Expression::Struct to bypass Spark target transform auto-naming
29739 Ok(Expression::Struct(Box::new(crate::expressions::Struct {
29740 fields,
29741 })))
29742 }
29743 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
29744 // Check if all fields are named AND all have inferable types - if so, wrap in CAST(ROW(...) AS ROW(name TYPE, ...))
29745 let all_named =
29746 !fields.is_empty() && fields.iter().all(|(name, _)| name.is_some());
29747 let all_types_inferable = all_named
29748 && fields
29749 .iter()
29750 .all(|(_, val)| Self::can_infer_presto_type(val));
29751 let row_args: Vec<Expression> =
29752 fields.iter().map(|(_, v)| v.clone()).collect();
29753 let row_expr = Expression::Function(Box::new(Function::new(
29754 "ROW".to_string(),
29755 row_args,
29756 )));
29757 if all_named && all_types_inferable {
29758 // Build ROW type with inferred types
29759 let mut row_type_fields = Vec::new();
29760 for (name, val) in &fields {
29761 if let Some(n) = name {
29762 let type_str = Self::infer_sql_type_for_presto(val);
29763 row_type_fields.push(crate::expressions::StructField::new(
29764 n.clone(),
29765 crate::expressions::DataType::Custom { name: type_str },
29766 ));
29767 }
29768 }
29769 let row_type = crate::expressions::DataType::Struct {
29770 fields: row_type_fields,
29771 nested: true,
29772 };
29773 Ok(Expression::Cast(Box::new(Cast {
29774 this: row_expr,
29775 to: row_type,
29776 trailing_comments: Vec::new(),
29777 double_colon_syntax: false,
29778 format: None,
29779 default: None,
29780 })))
29781 } else {
29782 Ok(row_expr)
29783 }
29784 }
29785 _ => {
29786 // Default: keep as STRUCT function with original args
29787 let mut new_args = Vec::new();
29788 for (name, val) in fields {
29789 if let Some(n) = name {
29790 new_args.push(Expression::Alias(Box::new(
29791 crate::expressions::Alias::new(val, Identifier::new(n)),
29792 )));
29793 } else {
29794 new_args.push(val);
29795 }
29796 }
29797 Ok(Expression::Function(Box::new(Function::new(
29798 "STRUCT".to_string(),
29799 new_args,
29800 ))))
29801 }
29802 }
29803 }
29804
29805 // ROUND(x, n, 'ROUND_HALF_EVEN') -> ROUND_EVEN(x, n) for DuckDB
29806 "ROUND" if args.len() == 3 => {
29807 let x = args.remove(0);
29808 let n = args.remove(0);
29809 let mode = args.remove(0);
29810 // Check if mode is 'ROUND_HALF_EVEN'
29811 let is_half_even = matches!(&mode, Expression::Literal(Literal::String(s)) if s.eq_ignore_ascii_case("ROUND_HALF_EVEN"));
29812 if is_half_even && matches!(target, DialectType::DuckDB) {
29813 Ok(Expression::Function(Box::new(Function::new(
29814 "ROUND_EVEN".to_string(),
29815 vec![x, n],
29816 ))))
29817 } else {
29818 // Pass through with all args
29819 Ok(Expression::Function(Box::new(Function::new(
29820 "ROUND".to_string(),
29821 vec![x, n, mode],
29822 ))))
29823 }
29824 }
29825
29826 // MAKE_INTERVAL(year, month, named_args...) -> INTERVAL string for Snowflake/DuckDB
29827 "MAKE_INTERVAL" => {
29828 // MAKE_INTERVAL(1, 2, minute => 5, day => 3)
29829 // The positional args are: year, month
29830 // Named args are: day =>, minute =>, etc.
29831 // For Snowflake: INTERVAL '1 year, 2 month, 5 minute, 3 day'
29832 // For DuckDB: INTERVAL '1 year 2 month 5 minute 3 day'
29833 // For BigQuery->BigQuery: reorder named args (day before minute)
29834 if matches!(target, DialectType::Snowflake | DialectType::DuckDB) {
29835 let mut parts: Vec<(String, String)> = Vec::new();
29836 let mut pos_idx = 0;
29837 let pos_units = ["year", "month"];
29838 for arg in &args {
29839 if let Expression::NamedArgument(na) = arg {
29840 // Named arg like minute => 5
29841 let unit = na.name.name.clone();
29842 if let Expression::Literal(Literal::Number(n)) = &na.value {
29843 parts.push((unit, n.clone()));
29844 }
29845 } else if pos_idx < pos_units.len() {
29846 if let Expression::Literal(Literal::Number(n)) = arg {
29847 parts.push((pos_units[pos_idx].to_string(), n.clone()));
29848 }
29849 pos_idx += 1;
29850 }
29851 }
29852 // Don't sort - preserve original argument order
29853 let separator = if matches!(target, DialectType::Snowflake) {
29854 ", "
29855 } else {
29856 " "
29857 };
29858 let interval_str = parts
29859 .iter()
29860 .map(|(u, v)| format!("{} {}", v, u))
29861 .collect::<Vec<_>>()
29862 .join(separator);
29863 Ok(Expression::Interval(Box::new(
29864 crate::expressions::Interval {
29865 this: Some(Expression::Literal(Literal::String(interval_str))),
29866 unit: None,
29867 },
29868 )))
29869 } else if matches!(target, DialectType::BigQuery) {
29870 // BigQuery->BigQuery: reorder named args (day, minute, etc.)
29871 let mut positional = Vec::new();
29872 let mut named: Vec<(
29873 String,
29874 Expression,
29875 crate::expressions::NamedArgSeparator,
29876 )> = Vec::new();
29877 let _pos_units = ["year", "month"];
29878 let mut _pos_idx = 0;
29879 for arg in args {
29880 if let Expression::NamedArgument(na) = arg {
29881 named.push((na.name.name.clone(), na.value, na.separator));
29882 } else {
29883 positional.push(arg);
29884 _pos_idx += 1;
29885 }
29886 }
29887 // Sort named args by: day, hour, minute, second
29888 let unit_order = |u: &str| -> usize {
29889 match u.to_lowercase().as_str() {
29890 "day" => 0,
29891 "hour" => 1,
29892 "minute" => 2,
29893 "second" => 3,
29894 _ => 4,
29895 }
29896 };
29897 named.sort_by_key(|(u, _, _)| unit_order(u));
29898 let mut result_args = positional;
29899 for (name, value, sep) in named {
29900 result_args.push(Expression::NamedArgument(Box::new(
29901 crate::expressions::NamedArgument {
29902 name: Identifier::new(&name),
29903 value,
29904 separator: sep,
29905 },
29906 )));
29907 }
29908 Ok(Expression::Function(Box::new(Function::new(
29909 "MAKE_INTERVAL".to_string(),
29910 result_args,
29911 ))))
29912 } else {
29913 Ok(Expression::Function(Box::new(Function::new(
29914 "MAKE_INTERVAL".to_string(),
29915 args,
29916 ))))
29917 }
29918 }
29919
29920 // ARRAY_TO_STRING(array, sep, null_text) -> ARRAY_TO_STRING(LIST_TRANSFORM(array, x -> COALESCE(x, null_text)), sep) for DuckDB
29921 "ARRAY_TO_STRING" if args.len() == 3 => {
29922 let arr = args.remove(0);
29923 let sep = args.remove(0);
29924 let null_text = args.remove(0);
29925 match target {
29926 DialectType::DuckDB => {
29927 // LIST_TRANSFORM(array, x -> COALESCE(x, null_text))
29928 let _lambda_param =
29929 Expression::Identifier(crate::expressions::Identifier::new("x"));
29930 let coalesce =
29931 Expression::Coalesce(Box::new(crate::expressions::VarArgFunc {
29932 original_name: None,
29933 expressions: vec![
29934 Expression::Identifier(crate::expressions::Identifier::new(
29935 "x",
29936 )),
29937 null_text,
29938 ],
29939 }));
29940 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
29941 parameters: vec![crate::expressions::Identifier::new("x")],
29942 body: coalesce,
29943 colon: false,
29944 parameter_types: vec![],
29945 }));
29946 let list_transform = Expression::Function(Box::new(Function::new(
29947 "LIST_TRANSFORM".to_string(),
29948 vec![arr, lambda],
29949 )));
29950 Ok(Expression::Function(Box::new(Function::new(
29951 "ARRAY_TO_STRING".to_string(),
29952 vec![list_transform, sep],
29953 ))))
29954 }
29955 _ => Ok(Expression::Function(Box::new(Function::new(
29956 "ARRAY_TO_STRING".to_string(),
29957 vec![arr, sep, null_text],
29958 )))),
29959 }
29960 }
29961
29962 // LENGTH(x) -> CASE TYPEOF(x) ... for DuckDB
29963 "LENGTH" if args.len() == 1 => {
29964 let arg = args.remove(0);
29965 match target {
29966 DialectType::DuckDB => {
29967 // CASE TYPEOF(foo) WHEN 'BLOB' THEN OCTET_LENGTH(CAST(foo AS BLOB)) ELSE LENGTH(CAST(foo AS TEXT)) END
29968 let typeof_func = Expression::Function(Box::new(Function::new(
29969 "TYPEOF".to_string(),
29970 vec![arg.clone()],
29971 )));
29972 let blob_cast = Expression::Cast(Box::new(Cast {
29973 this: arg.clone(),
29974 to: DataType::VarBinary { length: None },
29975 trailing_comments: vec![],
29976 double_colon_syntax: false,
29977 format: None,
29978 default: None,
29979 }));
29980 let octet_length = Expression::Function(Box::new(Function::new(
29981 "OCTET_LENGTH".to_string(),
29982 vec![blob_cast],
29983 )));
29984 let text_cast = Expression::Cast(Box::new(Cast {
29985 this: arg,
29986 to: DataType::Text,
29987 trailing_comments: vec![],
29988 double_colon_syntax: false,
29989 format: None,
29990 default: None,
29991 }));
29992 let length_text = Expression::Function(Box::new(Function::new(
29993 "LENGTH".to_string(),
29994 vec![text_cast],
29995 )));
29996 Ok(Expression::Case(Box::new(crate::expressions::Case {
29997 operand: Some(typeof_func),
29998 whens: vec![(
29999 Expression::Literal(Literal::String("BLOB".to_string())),
30000 octet_length,
30001 )],
30002 else_: Some(length_text),
30003 comments: Vec::new(),
30004 })))
30005 }
30006 _ => Ok(Expression::Function(Box::new(Function::new(
30007 "LENGTH".to_string(),
30008 vec![arg],
30009 )))),
30010 }
30011 }
30012
30013 // PERCENTILE_CONT(x, fraction RESPECT NULLS) -> QUANTILE_CONT(x, fraction) for DuckDB
30014 "PERCENTILE_CONT" if args.len() >= 2 && matches!(source, DialectType::BigQuery) => {
30015 // BigQuery PERCENTILE_CONT(x, fraction [RESPECT|IGNORE NULLS]) OVER ()
30016 // The args should be [x, fraction] with the null handling stripped
30017 // For DuckDB: QUANTILE_CONT(x, fraction)
30018 // For Spark: PERCENTILE_CONT(x, fraction) RESPECT NULLS (handled at window level)
30019 match target {
30020 DialectType::DuckDB => {
30021 // Strip down to just 2 args, rename to QUANTILE_CONT
30022 let x = args[0].clone();
30023 let frac = args[1].clone();
30024 Ok(Expression::Function(Box::new(Function::new(
30025 "QUANTILE_CONT".to_string(),
30026 vec![x, frac],
30027 ))))
30028 }
30029 _ => Ok(Expression::Function(Box::new(Function::new(
30030 "PERCENTILE_CONT".to_string(),
30031 args,
30032 )))),
30033 }
30034 }
30035
30036 // All others: pass through
30037 _ => Ok(Expression::Function(Box::new(Function {
30038 name: f.name,
30039 args,
30040 distinct: f.distinct,
30041 trailing_comments: f.trailing_comments,
30042 use_bracket_syntax: f.use_bracket_syntax,
30043 no_parens: f.no_parens,
30044 quoted: f.quoted,
30045 }))),
30046 }
30047 }
30048
30049 /// Check if we can reliably infer the SQL type for Presto/Trino ROW CAST.
30050 /// Returns false for column references and other non-literal expressions where the type is unknown.
30051 fn can_infer_presto_type(expr: &Expression) -> bool {
30052 match expr {
30053 Expression::Literal(_) => true,
30054 Expression::Boolean(_) => true,
30055 Expression::Array(_) | Expression::ArrayFunc(_) => true,
30056 Expression::Struct(_) | Expression::StructFunc(_) => true,
30057 Expression::Function(f) => {
30058 let up = f.name.to_uppercase();
30059 up == "STRUCT"
30060 || up == "ROW"
30061 || up == "CURRENT_DATE"
30062 || up == "CURRENT_TIMESTAMP"
30063 || up == "NOW"
30064 }
30065 Expression::Cast(_) => true,
30066 Expression::Neg(inner) => Self::can_infer_presto_type(&inner.this),
30067 _ => false,
30068 }
30069 }
30070
30071 /// Infer SQL type name for a Presto/Trino ROW CAST from a literal expression
30072 fn infer_sql_type_for_presto(expr: &Expression) -> String {
30073 use crate::expressions::Literal;
30074 match expr {
30075 Expression::Literal(Literal::String(_)) => "VARCHAR".to_string(),
30076 Expression::Literal(Literal::Number(n)) => {
30077 if n.contains('.') {
30078 "DOUBLE".to_string()
30079 } else {
30080 "INTEGER".to_string()
30081 }
30082 }
30083 Expression::Boolean(_) => "BOOLEAN".to_string(),
30084 Expression::Literal(Literal::Date(_)) => "DATE".to_string(),
30085 Expression::Literal(Literal::Timestamp(_)) => "TIMESTAMP".to_string(),
30086 Expression::Literal(Literal::Datetime(_)) => "TIMESTAMP".to_string(),
30087 Expression::Array(_) | Expression::ArrayFunc(_) => "ARRAY(VARCHAR)".to_string(),
30088 Expression::Struct(_) | Expression::StructFunc(_) => "ROW".to_string(),
30089 Expression::Function(f) => {
30090 let up = f.name.to_uppercase();
30091 if up == "STRUCT" || up == "ROW" {
30092 "ROW".to_string()
30093 } else if up == "CURRENT_DATE" {
30094 "DATE".to_string()
30095 } else if up == "CURRENT_TIMESTAMP" || up == "NOW" {
30096 "TIMESTAMP".to_string()
30097 } else {
30098 "VARCHAR".to_string()
30099 }
30100 }
30101 Expression::Cast(c) => {
30102 // If already cast, use the target type
30103 Self::data_type_to_presto_string(&c.to)
30104 }
30105 _ => "VARCHAR".to_string(),
30106 }
30107 }
30108
30109 /// Convert a DataType to its Presto/Trino string representation for ROW type
30110 fn data_type_to_presto_string(dt: &crate::expressions::DataType) -> String {
30111 use crate::expressions::DataType;
30112 match dt {
30113 DataType::VarChar { .. } | DataType::Text | DataType::String { .. } => {
30114 "VARCHAR".to_string()
30115 }
30116 DataType::Int { .. }
30117 | DataType::BigInt { .. }
30118 | DataType::SmallInt { .. }
30119 | DataType::TinyInt { .. } => "INTEGER".to_string(),
30120 DataType::Float { .. } | DataType::Double { .. } => "DOUBLE".to_string(),
30121 DataType::Boolean => "BOOLEAN".to_string(),
30122 DataType::Date => "DATE".to_string(),
30123 DataType::Timestamp { .. } => "TIMESTAMP".to_string(),
30124 DataType::Struct { fields, .. } => {
30125 let field_strs: Vec<String> = fields
30126 .iter()
30127 .map(|f| {
30128 format!(
30129 "{} {}",
30130 f.name,
30131 Self::data_type_to_presto_string(&f.data_type)
30132 )
30133 })
30134 .collect();
30135 format!("ROW({})", field_strs.join(", "))
30136 }
30137 DataType::Array { element_type, .. } => {
30138 format!("ARRAY({})", Self::data_type_to_presto_string(element_type))
30139 }
30140 DataType::Custom { name } => {
30141 // Pass through custom type names (e.g., "INTEGER", "VARCHAR" from earlier inference)
30142 name.clone()
30143 }
30144 _ => "VARCHAR".to_string(),
30145 }
30146 }
30147
30148 /// Convert IntervalUnit to string
30149 fn interval_unit_to_string(unit: &crate::expressions::IntervalUnit) -> String {
30150 match unit {
30151 crate::expressions::IntervalUnit::Year => "YEAR".to_string(),
30152 crate::expressions::IntervalUnit::Quarter => "QUARTER".to_string(),
30153 crate::expressions::IntervalUnit::Month => "MONTH".to_string(),
30154 crate::expressions::IntervalUnit::Week => "WEEK".to_string(),
30155 crate::expressions::IntervalUnit::Day => "DAY".to_string(),
30156 crate::expressions::IntervalUnit::Hour => "HOUR".to_string(),
30157 crate::expressions::IntervalUnit::Minute => "MINUTE".to_string(),
30158 crate::expressions::IntervalUnit::Second => "SECOND".to_string(),
30159 crate::expressions::IntervalUnit::Millisecond => "MILLISECOND".to_string(),
30160 crate::expressions::IntervalUnit::Microsecond => "MICROSECOND".to_string(),
30161 crate::expressions::IntervalUnit::Nanosecond => "NANOSECOND".to_string(),
30162 }
30163 }
30164
30165 /// Extract unit string from an expression (uppercased)
30166 fn get_unit_str_static(expr: &Expression) -> String {
30167 use crate::expressions::Literal;
30168 match expr {
30169 Expression::Identifier(id) => id.name.to_uppercase(),
30170 Expression::Literal(Literal::String(s)) => s.to_uppercase(),
30171 Expression::Column(col) => col.name.name.to_uppercase(),
30172 Expression::Function(f) => {
30173 let base = f.name.to_uppercase();
30174 if !f.args.is_empty() {
30175 let inner = Self::get_unit_str_static(&f.args[0]);
30176 format!("{}({})", base, inner)
30177 } else {
30178 base
30179 }
30180 }
30181 _ => "DAY".to_string(),
30182 }
30183 }
30184
30185 /// Parse unit string to IntervalUnit
30186 fn parse_interval_unit_static(s: &str) -> crate::expressions::IntervalUnit {
30187 match s {
30188 "YEAR" | "YY" | "YYYY" => crate::expressions::IntervalUnit::Year,
30189 "QUARTER" | "QQ" | "Q" => crate::expressions::IntervalUnit::Quarter,
30190 "MONTH" | "MM" | "M" => crate::expressions::IntervalUnit::Month,
30191 "WEEK" | "WK" | "WW" | "ISOWEEK" => crate::expressions::IntervalUnit::Week,
30192 "DAY" | "DD" | "D" | "DY" => crate::expressions::IntervalUnit::Day,
30193 "HOUR" | "HH" => crate::expressions::IntervalUnit::Hour,
30194 "MINUTE" | "MI" | "N" => crate::expressions::IntervalUnit::Minute,
30195 "SECOND" | "SS" | "S" => crate::expressions::IntervalUnit::Second,
30196 "MILLISECOND" | "MS" => crate::expressions::IntervalUnit::Millisecond,
30197 "MICROSECOND" | "MCS" | "US" => crate::expressions::IntervalUnit::Microsecond,
30198 _ if s.starts_with("WEEK(") => crate::expressions::IntervalUnit::Week,
30199 _ => crate::expressions::IntervalUnit::Day,
30200 }
30201 }
30202
30203 /// Convert expression to simple string for interval building
30204 fn expr_to_string_static(expr: &Expression) -> String {
30205 use crate::expressions::Literal;
30206 match expr {
30207 Expression::Literal(Literal::Number(s)) => s.clone(),
30208 Expression::Literal(Literal::String(s)) => s.clone(),
30209 Expression::Identifier(id) => id.name.clone(),
30210 Expression::Neg(f) => format!("-{}", Self::expr_to_string_static(&f.this)),
30211 _ => "1".to_string(),
30212 }
30213 }
30214
30215 /// Extract a simple string representation from a literal expression
30216 fn expr_to_string(expr: &Expression) -> String {
30217 use crate::expressions::Literal;
30218 match expr {
30219 Expression::Literal(Literal::Number(s)) => s.clone(),
30220 Expression::Literal(Literal::String(s)) => s.clone(),
30221 Expression::Neg(f) => format!("-{}", Self::expr_to_string(&f.this)),
30222 Expression::Identifier(id) => id.name.clone(),
30223 _ => "1".to_string(),
30224 }
30225 }
30226
30227 /// Quote an interval value expression as a string literal if it's a number (or negated number)
30228 fn quote_interval_val(expr: &Expression) -> Expression {
30229 use crate::expressions::Literal;
30230 match expr {
30231 Expression::Literal(Literal::Number(n)) => {
30232 Expression::Literal(Literal::String(n.clone()))
30233 }
30234 Expression::Literal(Literal::String(_)) => expr.clone(),
30235 Expression::Neg(inner) => {
30236 if let Expression::Literal(Literal::Number(n)) = &inner.this {
30237 Expression::Literal(Literal::String(format!("-{}", n)))
30238 } else {
30239 expr.clone()
30240 }
30241 }
30242 _ => expr.clone(),
30243 }
30244 }
30245
30246 /// Check if a timestamp string contains timezone info (offset like +02:00, or named timezone)
30247 fn timestamp_string_has_timezone(ts: &str) -> bool {
30248 let trimmed = ts.trim();
30249 // Check for numeric timezone offsets: +N, -N, +NN:NN, -NN:NN at end
30250 if let Some(last_space) = trimmed.rfind(' ') {
30251 let suffix = &trimmed[last_space + 1..];
30252 if (suffix.starts_with('+') || suffix.starts_with('-')) && suffix.len() > 1 {
30253 let rest = &suffix[1..];
30254 if rest.chars().all(|c| c.is_ascii_digit() || c == ':') {
30255 return true;
30256 }
30257 }
30258 }
30259 // Check for named timezone abbreviations
30260 let ts_lower = trimmed.to_lowercase();
30261 let tz_abbrevs = [" utc", " gmt", " cet", " est", " pst", " cst", " mst"];
30262 for abbrev in &tz_abbrevs {
30263 if ts_lower.ends_with(abbrev) {
30264 return true;
30265 }
30266 }
30267 false
30268 }
30269
30270 /// Maybe CAST timestamp literal to TIMESTAMPTZ for Snowflake
30271 fn maybe_cast_ts_to_tz(expr: Expression, func_name: &str) -> Expression {
30272 use crate::expressions::{Cast, DataType, Literal};
30273 match expr {
30274 Expression::Literal(Literal::Timestamp(s)) => {
30275 let tz = func_name.starts_with("TIMESTAMP");
30276 Expression::Cast(Box::new(Cast {
30277 this: Expression::Literal(Literal::String(s)),
30278 to: if tz {
30279 DataType::Timestamp {
30280 timezone: true,
30281 precision: None,
30282 }
30283 } else {
30284 DataType::Timestamp {
30285 timezone: false,
30286 precision: None,
30287 }
30288 },
30289 trailing_comments: vec![],
30290 double_colon_syntax: false,
30291 format: None,
30292 default: None,
30293 }))
30294 }
30295 other => other,
30296 }
30297 }
30298
30299 /// Maybe CAST timestamp literal to TIMESTAMP (no tz)
30300 fn maybe_cast_ts(expr: Expression) -> Expression {
30301 use crate::expressions::{Cast, DataType, Literal};
30302 match expr {
30303 Expression::Literal(Literal::Timestamp(s)) => Expression::Cast(Box::new(Cast {
30304 this: Expression::Literal(Literal::String(s)),
30305 to: DataType::Timestamp {
30306 timezone: false,
30307 precision: None,
30308 },
30309 trailing_comments: vec![],
30310 double_colon_syntax: false,
30311 format: None,
30312 default: None,
30313 })),
30314 other => other,
30315 }
30316 }
30317
30318 /// Convert DATE 'x' literal to CAST('x' AS DATE)
30319 fn date_literal_to_cast(expr: Expression) -> Expression {
30320 use crate::expressions::{Cast, DataType, Literal};
30321 match expr {
30322 Expression::Literal(Literal::Date(s)) => Expression::Cast(Box::new(Cast {
30323 this: Expression::Literal(Literal::String(s)),
30324 to: DataType::Date,
30325 trailing_comments: vec![],
30326 double_colon_syntax: false,
30327 format: None,
30328 default: None,
30329 })),
30330 other => other,
30331 }
30332 }
30333
30334 /// Ensure an expression that should be a date is CAST(... AS DATE).
30335 /// Handles both DATE literals and string literals that look like dates.
30336 fn ensure_cast_date(expr: Expression) -> Expression {
30337 use crate::expressions::{Cast, DataType, Literal};
30338 match expr {
30339 Expression::Literal(Literal::Date(s)) => Expression::Cast(Box::new(Cast {
30340 this: Expression::Literal(Literal::String(s)),
30341 to: DataType::Date,
30342 trailing_comments: vec![],
30343 double_colon_syntax: false,
30344 format: None,
30345 default: None,
30346 })),
30347 Expression::Literal(Literal::String(ref _s)) => {
30348 // String literal that should be a date -> CAST('s' AS DATE)
30349 Expression::Cast(Box::new(Cast {
30350 this: expr,
30351 to: DataType::Date,
30352 trailing_comments: vec![],
30353 double_colon_syntax: false,
30354 format: None,
30355 default: None,
30356 }))
30357 }
30358 // Already a CAST or other expression -> leave as-is
30359 other => other,
30360 }
30361 }
30362
30363 /// Force CAST(expr AS DATE) for any expression (not just literals)
30364 /// Skips if the expression is already a CAST to DATE
30365 fn force_cast_date(expr: Expression) -> Expression {
30366 use crate::expressions::{Cast, DataType};
30367 // If it's already a CAST to DATE, don't double-wrap
30368 if let Expression::Cast(ref c) = expr {
30369 if matches!(c.to, DataType::Date) {
30370 return expr;
30371 }
30372 }
30373 Expression::Cast(Box::new(Cast {
30374 this: expr,
30375 to: DataType::Date,
30376 trailing_comments: vec![],
30377 double_colon_syntax: false,
30378 format: None,
30379 default: None,
30380 }))
30381 }
30382
30383 /// Internal TO_DATE function that won't be converted to CAST by the Snowflake handler.
30384 /// Uses the name `_POLYGLOT_TO_DATE` which is not recognized by the TO_DATE -> CAST logic.
30385 /// The Snowflake DATEDIFF handler converts these back to TO_DATE.
30386 const PRESERVED_TO_DATE: &'static str = "_POLYGLOT_TO_DATE";
30387
30388 fn ensure_to_date_preserved(expr: Expression) -> Expression {
30389 use crate::expressions::{Function, Literal};
30390 if matches!(expr, Expression::Literal(Literal::String(_))) {
30391 Expression::Function(Box::new(Function::new(
30392 Self::PRESERVED_TO_DATE.to_string(),
30393 vec![expr],
30394 )))
30395 } else {
30396 expr
30397 }
30398 }
30399
30400 /// TRY_CAST(expr AS DATE) - used for DuckDB when TO_DATE is unwrapped
30401 fn try_cast_date(expr: Expression) -> Expression {
30402 use crate::expressions::{Cast, DataType};
30403 Expression::TryCast(Box::new(Cast {
30404 this: expr,
30405 to: DataType::Date,
30406 trailing_comments: vec![],
30407 double_colon_syntax: false,
30408 format: None,
30409 default: None,
30410 }))
30411 }
30412
30413 /// CAST(CAST(expr AS TIMESTAMP) AS DATE) - used when Hive string dates need to be cast
30414 fn double_cast_timestamp_date(expr: Expression) -> Expression {
30415 use crate::expressions::{Cast, DataType};
30416 let inner = Expression::Cast(Box::new(Cast {
30417 this: expr,
30418 to: DataType::Timestamp {
30419 timezone: false,
30420 precision: None,
30421 },
30422 trailing_comments: vec![],
30423 double_colon_syntax: false,
30424 format: None,
30425 default: None,
30426 }));
30427 Expression::Cast(Box::new(Cast {
30428 this: inner,
30429 to: DataType::Date,
30430 trailing_comments: vec![],
30431 double_colon_syntax: false,
30432 format: None,
30433 default: None,
30434 }))
30435 }
30436
30437 /// CAST(CAST(expr AS DATETIME) AS DATE) - BigQuery variant
30438 fn double_cast_datetime_date(expr: Expression) -> Expression {
30439 use crate::expressions::{Cast, DataType};
30440 let inner = Expression::Cast(Box::new(Cast {
30441 this: expr,
30442 to: DataType::Custom {
30443 name: "DATETIME".to_string(),
30444 },
30445 trailing_comments: vec![],
30446 double_colon_syntax: false,
30447 format: None,
30448 default: None,
30449 }));
30450 Expression::Cast(Box::new(Cast {
30451 this: inner,
30452 to: DataType::Date,
30453 trailing_comments: vec![],
30454 double_colon_syntax: false,
30455 format: None,
30456 default: None,
30457 }))
30458 }
30459
30460 /// CAST(CAST(expr AS DATETIME2) AS DATE) - TSQL variant
30461 fn double_cast_datetime2_date(expr: Expression) -> Expression {
30462 use crate::expressions::{Cast, DataType};
30463 let inner = Expression::Cast(Box::new(Cast {
30464 this: expr,
30465 to: DataType::Custom {
30466 name: "DATETIME2".to_string(),
30467 },
30468 trailing_comments: vec![],
30469 double_colon_syntax: false,
30470 format: None,
30471 default: None,
30472 }));
30473 Expression::Cast(Box::new(Cast {
30474 this: inner,
30475 to: DataType::Date,
30476 trailing_comments: vec![],
30477 double_colon_syntax: false,
30478 format: None,
30479 default: None,
30480 }))
30481 }
30482
30483 /// Convert Hive/Java-style date format strings to C-style (strftime) format
30484 /// e.g., "yyyy-MM-dd'T'HH" -> "%Y-%m-%d'T'%H"
30485 fn hive_format_to_c_format(fmt: &str) -> String {
30486 let mut result = String::new();
30487 let chars: Vec<char> = fmt.chars().collect();
30488 let mut i = 0;
30489 while i < chars.len() {
30490 match chars[i] {
30491 'y' => {
30492 let mut count = 0;
30493 while i < chars.len() && chars[i] == 'y' {
30494 count += 1;
30495 i += 1;
30496 }
30497 if count >= 4 {
30498 result.push_str("%Y");
30499 } else if count == 2 {
30500 result.push_str("%y");
30501 } else {
30502 result.push_str("%Y");
30503 }
30504 }
30505 'M' => {
30506 let mut count = 0;
30507 while i < chars.len() && chars[i] == 'M' {
30508 count += 1;
30509 i += 1;
30510 }
30511 if count >= 3 {
30512 result.push_str("%b");
30513 } else if count == 2 {
30514 result.push_str("%m");
30515 } else {
30516 result.push_str("%m");
30517 }
30518 }
30519 'd' => {
30520 let mut _count = 0;
30521 while i < chars.len() && chars[i] == 'd' {
30522 _count += 1;
30523 i += 1;
30524 }
30525 result.push_str("%d");
30526 }
30527 'H' => {
30528 let mut _count = 0;
30529 while i < chars.len() && chars[i] == 'H' {
30530 _count += 1;
30531 i += 1;
30532 }
30533 result.push_str("%H");
30534 }
30535 'h' => {
30536 let mut _count = 0;
30537 while i < chars.len() && chars[i] == 'h' {
30538 _count += 1;
30539 i += 1;
30540 }
30541 result.push_str("%I");
30542 }
30543 'm' => {
30544 let mut _count = 0;
30545 while i < chars.len() && chars[i] == 'm' {
30546 _count += 1;
30547 i += 1;
30548 }
30549 result.push_str("%M");
30550 }
30551 's' => {
30552 let mut _count = 0;
30553 while i < chars.len() && chars[i] == 's' {
30554 _count += 1;
30555 i += 1;
30556 }
30557 result.push_str("%S");
30558 }
30559 'S' => {
30560 // Fractional seconds - skip
30561 while i < chars.len() && chars[i] == 'S' {
30562 i += 1;
30563 }
30564 result.push_str("%f");
30565 }
30566 'a' => {
30567 // AM/PM
30568 while i < chars.len() && chars[i] == 'a' {
30569 i += 1;
30570 }
30571 result.push_str("%p");
30572 }
30573 'E' => {
30574 let mut count = 0;
30575 while i < chars.len() && chars[i] == 'E' {
30576 count += 1;
30577 i += 1;
30578 }
30579 if count >= 4 {
30580 result.push_str("%A");
30581 } else {
30582 result.push_str("%a");
30583 }
30584 }
30585 '\'' => {
30586 // Quoted literal text - pass through the quotes and content
30587 result.push('\'');
30588 i += 1;
30589 while i < chars.len() && chars[i] != '\'' {
30590 result.push(chars[i]);
30591 i += 1;
30592 }
30593 if i < chars.len() {
30594 result.push('\'');
30595 i += 1;
30596 }
30597 }
30598 c => {
30599 result.push(c);
30600 i += 1;
30601 }
30602 }
30603 }
30604 result
30605 }
30606
30607 /// Convert Hive/Java format to Presto format (uses %T for HH:mm:ss)
30608 fn hive_format_to_presto_format(fmt: &str) -> String {
30609 let c_fmt = Self::hive_format_to_c_format(fmt);
30610 // Presto uses %T for HH:MM:SS
30611 c_fmt.replace("%H:%M:%S", "%T")
30612 }
30613
30614 /// Ensure a timestamp-like expression for DuckDB with CAST(... AS TIMESTAMP)
30615 fn ensure_cast_timestamp(expr: Expression) -> Expression {
30616 use crate::expressions::{Cast, DataType, Literal};
30617 match expr {
30618 Expression::Literal(Literal::Timestamp(s)) => Expression::Cast(Box::new(Cast {
30619 this: Expression::Literal(Literal::String(s)),
30620 to: DataType::Timestamp {
30621 timezone: false,
30622 precision: None,
30623 },
30624 trailing_comments: vec![],
30625 double_colon_syntax: false,
30626 format: None,
30627 default: None,
30628 })),
30629 Expression::Literal(Literal::String(ref _s)) => Expression::Cast(Box::new(Cast {
30630 this: expr,
30631 to: DataType::Timestamp {
30632 timezone: false,
30633 precision: None,
30634 },
30635 trailing_comments: vec![],
30636 double_colon_syntax: false,
30637 format: None,
30638 default: None,
30639 })),
30640 Expression::Literal(Literal::Datetime(s)) => Expression::Cast(Box::new(Cast {
30641 this: Expression::Literal(Literal::String(s)),
30642 to: DataType::Timestamp {
30643 timezone: false,
30644 precision: None,
30645 },
30646 trailing_comments: vec![],
30647 double_colon_syntax: false,
30648 format: None,
30649 default: None,
30650 })),
30651 other => other,
30652 }
30653 }
30654
30655 /// Force CAST to TIMESTAMP for any expression (not just literals)
30656 /// Used when transpiling from Redshift/TSQL where DATEDIFF/DATEADD args need explicit timestamp cast
30657 fn force_cast_timestamp(expr: Expression) -> Expression {
30658 use crate::expressions::{Cast, DataType};
30659 // Don't double-wrap if already a CAST to TIMESTAMP
30660 if let Expression::Cast(ref c) = expr {
30661 if matches!(c.to, DataType::Timestamp { .. }) {
30662 return expr;
30663 }
30664 }
30665 Expression::Cast(Box::new(Cast {
30666 this: expr,
30667 to: DataType::Timestamp {
30668 timezone: false,
30669 precision: None,
30670 },
30671 trailing_comments: vec![],
30672 double_colon_syntax: false,
30673 format: None,
30674 default: None,
30675 }))
30676 }
30677
30678 /// Ensure a timestamp-like expression for DuckDB with CAST(... AS TIMESTAMPTZ)
30679 fn ensure_cast_timestamptz(expr: Expression) -> Expression {
30680 use crate::expressions::{Cast, DataType, Literal};
30681 match expr {
30682 Expression::Literal(Literal::Timestamp(s)) => Expression::Cast(Box::new(Cast {
30683 this: Expression::Literal(Literal::String(s)),
30684 to: DataType::Timestamp {
30685 timezone: true,
30686 precision: None,
30687 },
30688 trailing_comments: vec![],
30689 double_colon_syntax: false,
30690 format: None,
30691 default: None,
30692 })),
30693 Expression::Literal(Literal::String(ref _s)) => Expression::Cast(Box::new(Cast {
30694 this: expr,
30695 to: DataType::Timestamp {
30696 timezone: true,
30697 precision: None,
30698 },
30699 trailing_comments: vec![],
30700 double_colon_syntax: false,
30701 format: None,
30702 default: None,
30703 })),
30704 Expression::Literal(Literal::Datetime(s)) => Expression::Cast(Box::new(Cast {
30705 this: Expression::Literal(Literal::String(s)),
30706 to: DataType::Timestamp {
30707 timezone: true,
30708 precision: None,
30709 },
30710 trailing_comments: vec![],
30711 double_colon_syntax: false,
30712 format: None,
30713 default: None,
30714 })),
30715 other => other,
30716 }
30717 }
30718
30719 /// Ensure expression is CAST to DATETIME (for BigQuery)
30720 fn ensure_cast_datetime(expr: Expression) -> Expression {
30721 use crate::expressions::{Cast, DataType, Literal};
30722 match expr {
30723 Expression::Literal(Literal::String(ref _s)) => Expression::Cast(Box::new(Cast {
30724 this: expr,
30725 to: DataType::Custom {
30726 name: "DATETIME".to_string(),
30727 },
30728 trailing_comments: vec![],
30729 double_colon_syntax: false,
30730 format: None,
30731 default: None,
30732 })),
30733 other => other,
30734 }
30735 }
30736
30737 /// Force CAST expression to DATETIME (for BigQuery) - always wraps unless already DATETIME
30738 fn force_cast_datetime(expr: Expression) -> Expression {
30739 use crate::expressions::{Cast, DataType};
30740 if let Expression::Cast(ref c) = expr {
30741 if let DataType::Custom { ref name } = c.to {
30742 if name.eq_ignore_ascii_case("DATETIME") {
30743 return expr;
30744 }
30745 }
30746 }
30747 Expression::Cast(Box::new(Cast {
30748 this: expr,
30749 to: DataType::Custom {
30750 name: "DATETIME".to_string(),
30751 },
30752 trailing_comments: vec![],
30753 double_colon_syntax: false,
30754 format: None,
30755 default: None,
30756 }))
30757 }
30758
30759 /// Ensure expression is CAST to DATETIME2 (for TSQL)
30760 fn ensure_cast_datetime2(expr: Expression) -> Expression {
30761 use crate::expressions::{Cast, DataType, Literal};
30762 match expr {
30763 Expression::Literal(Literal::String(ref _s)) => Expression::Cast(Box::new(Cast {
30764 this: expr,
30765 to: DataType::Custom {
30766 name: "DATETIME2".to_string(),
30767 },
30768 trailing_comments: vec![],
30769 double_colon_syntax: false,
30770 format: None,
30771 default: None,
30772 })),
30773 other => other,
30774 }
30775 }
30776
30777 /// Convert TIMESTAMP 'x' literal to CAST('x' AS TIMESTAMPTZ) for DuckDB
30778 fn ts_literal_to_cast_tz(expr: Expression) -> Expression {
30779 use crate::expressions::{Cast, DataType, Literal};
30780 match expr {
30781 Expression::Literal(Literal::Timestamp(s)) => Expression::Cast(Box::new(Cast {
30782 this: Expression::Literal(Literal::String(s)),
30783 to: DataType::Timestamp {
30784 timezone: true,
30785 precision: None,
30786 },
30787 trailing_comments: vec![],
30788 double_colon_syntax: false,
30789 format: None,
30790 default: None,
30791 })),
30792 other => other,
30793 }
30794 }
30795
30796 /// Convert BigQuery format string to Snowflake format string
30797 fn bq_format_to_snowflake(format_expr: &Expression) -> Expression {
30798 use crate::expressions::Literal;
30799 if let Expression::Literal(Literal::String(s)) = format_expr {
30800 let sf = s
30801 .replace("%Y", "yyyy")
30802 .replace("%m", "mm")
30803 .replace("%d", "DD")
30804 .replace("%H", "HH24")
30805 .replace("%M", "MI")
30806 .replace("%S", "SS")
30807 .replace("%b", "mon")
30808 .replace("%B", "Month")
30809 .replace("%e", "FMDD");
30810 Expression::Literal(Literal::String(sf))
30811 } else {
30812 format_expr.clone()
30813 }
30814 }
30815
30816 /// Convert BigQuery format string to DuckDB format string
30817 fn bq_format_to_duckdb(format_expr: &Expression) -> Expression {
30818 use crate::expressions::Literal;
30819 if let Expression::Literal(Literal::String(s)) = format_expr {
30820 let duck = s
30821 .replace("%T", "%H:%M:%S")
30822 .replace("%F", "%Y-%m-%d")
30823 .replace("%D", "%m/%d/%y")
30824 .replace("%x", "%m/%d/%y")
30825 .replace("%c", "%a %b %-d %H:%M:%S %Y")
30826 .replace("%e", "%-d")
30827 .replace("%E6S", "%S.%f");
30828 Expression::Literal(Literal::String(duck))
30829 } else {
30830 format_expr.clone()
30831 }
30832 }
30833
30834 /// Convert BigQuery CAST FORMAT elements (like YYYY, MM, DD) to strftime (like %Y, %m, %d)
30835 fn bq_cast_format_to_strftime(format_expr: &Expression) -> Expression {
30836 use crate::expressions::Literal;
30837 if let Expression::Literal(Literal::String(s)) = format_expr {
30838 // Replace format elements from longest to shortest to avoid partial matches
30839 let result = s
30840 .replace("YYYYMMDD", "%Y%m%d")
30841 .replace("YYYY", "%Y")
30842 .replace("YY", "%y")
30843 .replace("MONTH", "%B")
30844 .replace("MON", "%b")
30845 .replace("MM", "%m")
30846 .replace("DD", "%d")
30847 .replace("HH24", "%H")
30848 .replace("HH12", "%I")
30849 .replace("HH", "%I")
30850 .replace("MI", "%M")
30851 .replace("SSTZH", "%S%z")
30852 .replace("SS", "%S")
30853 .replace("TZH", "%z");
30854 Expression::Literal(Literal::String(result))
30855 } else {
30856 format_expr.clone()
30857 }
30858 }
30859
30860 /// Normalize BigQuery format strings for BQ->BQ output
30861 fn bq_format_normalize_bq(format_expr: &Expression) -> Expression {
30862 use crate::expressions::Literal;
30863 if let Expression::Literal(Literal::String(s)) = format_expr {
30864 let norm = s.replace("%H:%M:%S", "%T").replace("%x", "%D");
30865 Expression::Literal(Literal::String(norm))
30866 } else {
30867 format_expr.clone()
30868 }
30869 }
30870}
30871
30872#[cfg(test)]
30873mod tests {
30874 use super::*;
30875
30876 #[test]
30877 fn test_dialect_type_from_str() {
30878 assert_eq!(
30879 "postgres".parse::<DialectType>().unwrap(),
30880 DialectType::PostgreSQL
30881 );
30882 assert_eq!(
30883 "postgresql".parse::<DialectType>().unwrap(),
30884 DialectType::PostgreSQL
30885 );
30886 assert_eq!("mysql".parse::<DialectType>().unwrap(), DialectType::MySQL);
30887 assert_eq!(
30888 "bigquery".parse::<DialectType>().unwrap(),
30889 DialectType::BigQuery
30890 );
30891 }
30892
30893 #[test]
30894 fn test_basic_transpile() {
30895 let dialect = Dialect::get(DialectType::Generic);
30896 let result = dialect
30897 .transpile_to("SELECT 1", DialectType::PostgreSQL)
30898 .unwrap();
30899 assert_eq!(result.len(), 1);
30900 assert_eq!(result[0], "SELECT 1");
30901 }
30902
30903 #[test]
30904 fn test_function_transformation_mysql() {
30905 // NVL should be transformed to IFNULL in MySQL
30906 let dialect = Dialect::get(DialectType::Generic);
30907 let result = dialect
30908 .transpile_to("SELECT NVL(a, b)", DialectType::MySQL)
30909 .unwrap();
30910 assert_eq!(result[0], "SELECT IFNULL(a, b)");
30911 }
30912
30913 #[test]
30914 fn test_get_path_duckdb() {
30915 // Test: step by step
30916 let snowflake = Dialect::get(DialectType::Snowflake);
30917
30918 // Step 1: Parse and check what Snowflake produces as intermediate
30919 let result_sf_sf = snowflake
30920 .transpile_to(
30921 "SELECT PARSE_JSON('{\"fruit\":\"banana\"}'):fruit",
30922 DialectType::Snowflake,
30923 )
30924 .unwrap();
30925 eprintln!("Snowflake->Snowflake colon: {}", result_sf_sf[0]);
30926
30927 // Step 2: DuckDB target
30928 let result_sf_dk = snowflake
30929 .transpile_to(
30930 "SELECT PARSE_JSON('{\"fruit\":\"banana\"}'):fruit",
30931 DialectType::DuckDB,
30932 )
30933 .unwrap();
30934 eprintln!("Snowflake->DuckDB colon: {}", result_sf_dk[0]);
30935
30936 // Step 3: GET_PATH directly
30937 let result_gp = snowflake
30938 .transpile_to(
30939 "SELECT GET_PATH(PARSE_JSON('{\"fruit\":\"banana\"}'), 'fruit')",
30940 DialectType::DuckDB,
30941 )
30942 .unwrap();
30943 eprintln!("Snowflake->DuckDB explicit GET_PATH: {}", result_gp[0]);
30944 }
30945
30946 #[test]
30947 fn test_function_transformation_postgres() {
30948 // IFNULL should be transformed to COALESCE in PostgreSQL
30949 let dialect = Dialect::get(DialectType::Generic);
30950 let result = dialect
30951 .transpile_to("SELECT IFNULL(a, b)", DialectType::PostgreSQL)
30952 .unwrap();
30953 assert_eq!(result[0], "SELECT COALESCE(a, b)");
30954
30955 // NVL should also be transformed to COALESCE
30956 let result = dialect
30957 .transpile_to("SELECT NVL(a, b)", DialectType::PostgreSQL)
30958 .unwrap();
30959 assert_eq!(result[0], "SELECT COALESCE(a, b)");
30960 }
30961
30962 #[test]
30963 fn test_hive_cast_to_trycast() {
30964 // Hive CAST should become TRY_CAST for targets that support it
30965 let hive = Dialect::get(DialectType::Hive);
30966 let result = hive
30967 .transpile_to("CAST(1 AS INT)", DialectType::DuckDB)
30968 .unwrap();
30969 assert_eq!(result[0], "TRY_CAST(1 AS INT)");
30970
30971 let result = hive
30972 .transpile_to("CAST(1 AS INT)", DialectType::Presto)
30973 .unwrap();
30974 assert_eq!(result[0], "TRY_CAST(1 AS INTEGER)");
30975 }
30976
30977 #[test]
30978 fn test_hive_array_identity() {
30979 // Hive ARRAY<DATE> should preserve angle bracket syntax
30980 let sql = "CREATE EXTERNAL TABLE `my_table` (`a7` ARRAY<DATE>) ROW FORMAT SERDE 'a' STORED AS INPUTFORMAT 'b' OUTPUTFORMAT 'c' LOCATION 'd' TBLPROPERTIES ('e'='f')";
30981 let hive = Dialect::get(DialectType::Hive);
30982
30983 // Test via transpile_to (this works)
30984 let result = hive.transpile_to(sql, DialectType::Hive).unwrap();
30985 eprintln!("Hive ARRAY via transpile_to: {}", result[0]);
30986 assert!(
30987 result[0].contains("ARRAY<DATE>"),
30988 "transpile_to: Expected ARRAY<DATE>, got: {}",
30989 result[0]
30990 );
30991
30992 // Test via parse -> transform -> generate (identity test path)
30993 let ast = hive.parse(sql).unwrap();
30994 let transformed = hive.transform(ast[0].clone()).unwrap();
30995 let output = hive.generate(&transformed).unwrap();
30996 eprintln!("Hive ARRAY via identity path: {}", output);
30997 assert!(
30998 output.contains("ARRAY<DATE>"),
30999 "identity path: Expected ARRAY<DATE>, got: {}",
31000 output
31001 );
31002 }
31003
31004 #[test]
31005 fn test_starrocks_delete_between_expansion() {
31006 // StarRocks doesn't support BETWEEN in DELETE statements
31007 let dialect = Dialect::get(DialectType::Generic);
31008
31009 // BETWEEN should be expanded to >= AND <= in DELETE
31010 let result = dialect
31011 .transpile_to(
31012 "DELETE FROM t WHERE a BETWEEN b AND c",
31013 DialectType::StarRocks,
31014 )
31015 .unwrap();
31016 assert_eq!(result[0], "DELETE FROM t WHERE a >= b AND a <= c");
31017
31018 // NOT BETWEEN should be expanded to < OR > in DELETE
31019 let result = dialect
31020 .transpile_to(
31021 "DELETE FROM t WHERE a NOT BETWEEN b AND c",
31022 DialectType::StarRocks,
31023 )
31024 .unwrap();
31025 assert_eq!(result[0], "DELETE FROM t WHERE a < b OR a > c");
31026
31027 // BETWEEN in SELECT should NOT be expanded (StarRocks supports it there)
31028 let result = dialect
31029 .transpile_to(
31030 "SELECT * FROM t WHERE a BETWEEN b AND c",
31031 DialectType::StarRocks,
31032 )
31033 .unwrap();
31034 assert!(
31035 result[0].contains("BETWEEN"),
31036 "BETWEEN should be preserved in SELECT"
31037 );
31038 }
31039
31040 #[test]
31041 fn test_snowflake_ltrim_rtrim_parse() {
31042 let sf = Dialect::get(DialectType::Snowflake);
31043 let sql = "SELECT LTRIM(RTRIM(col)) FROM t1";
31044 let result = sf.transpile_to(sql, DialectType::DuckDB);
31045 match &result {
31046 Ok(r) => eprintln!("LTRIM/RTRIM result: {}", r[0]),
31047 Err(e) => eprintln!("LTRIM/RTRIM error: {}", e),
31048 }
31049 assert!(
31050 result.is_ok(),
31051 "Expected successful parse of LTRIM(RTRIM(col)), got error: {:?}",
31052 result.err()
31053 );
31054 }
31055
31056 #[test]
31057 fn test_duckdb_count_if_parse() {
31058 let duck = Dialect::get(DialectType::DuckDB);
31059 let sql = "COUNT_IF(x)";
31060 let result = duck.transpile_to(sql, DialectType::DuckDB);
31061 match &result {
31062 Ok(r) => eprintln!("COUNT_IF result: {}", r[0]),
31063 Err(e) => eprintln!("COUNT_IF error: {}", e),
31064 }
31065 assert!(
31066 result.is_ok(),
31067 "Expected successful parse of COUNT_IF(x), got error: {:?}",
31068 result.err()
31069 );
31070 }
31071
31072 #[test]
31073 fn test_tsql_cast_tinyint_parse() {
31074 let tsql = Dialect::get(DialectType::TSQL);
31075 let sql = "CAST(X AS TINYINT)";
31076 let result = tsql.transpile_to(sql, DialectType::DuckDB);
31077 match &result {
31078 Ok(r) => eprintln!("TSQL CAST TINYINT result: {}", r[0]),
31079 Err(e) => eprintln!("TSQL CAST TINYINT error: {}", e),
31080 }
31081 assert!(
31082 result.is_ok(),
31083 "Expected successful transpile, got error: {:?}",
31084 result.err()
31085 );
31086 }
31087
31088 #[test]
31089 fn test_pg_hash_bitwise_xor() {
31090 let dialect = Dialect::get(DialectType::PostgreSQL);
31091 let result = dialect
31092 .transpile_to("x # y", DialectType::PostgreSQL)
31093 .unwrap();
31094 assert_eq!(result[0], "x # y");
31095 }
31096
31097 #[test]
31098 fn test_pg_array_to_duckdb() {
31099 let dialect = Dialect::get(DialectType::PostgreSQL);
31100 let result = dialect
31101 .transpile_to("SELECT ARRAY[1, 2, 3] @> ARRAY[1, 2]", DialectType::DuckDB)
31102 .unwrap();
31103 assert_eq!(result[0], "SELECT [1, 2, 3] @> [1, 2]");
31104 }
31105
31106 #[test]
31107 fn test_array_remove_bigquery() {
31108 let dialect = Dialect::get(DialectType::Generic);
31109 let result = dialect
31110 .transpile_to("ARRAY_REMOVE(the_array, target)", DialectType::BigQuery)
31111 .unwrap();
31112 assert_eq!(
31113 result[0],
31114 "ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)"
31115 );
31116 }
31117
31118 #[test]
31119 fn test_map_clickhouse_case() {
31120 let dialect = Dialect::get(DialectType::Generic);
31121 let parsed = dialect
31122 .parse("CAST(MAP('a', '1') AS MAP(TEXT, TEXT))")
31123 .unwrap();
31124 eprintln!("MAP parsed: {:?}", parsed);
31125 let result = dialect
31126 .transpile_to(
31127 "CAST(MAP('a', '1') AS MAP(TEXT, TEXT))",
31128 DialectType::ClickHouse,
31129 )
31130 .unwrap();
31131 eprintln!("MAP result: {}", result[0]);
31132 }
31133
31134 #[test]
31135 fn test_generate_date_array_presto() {
31136 let dialect = Dialect::get(DialectType::Generic);
31137 let result = dialect.transpile_to(
31138 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
31139 DialectType::Presto,
31140 ).unwrap();
31141 eprintln!("GDA -> Presto: {}", result[0]);
31142 assert_eq!(result[0], "SELECT * FROM UNNEST(SEQUENCE(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), (1 * INTERVAL '7' DAY)))");
31143 }
31144
31145 #[test]
31146 fn test_generate_date_array_postgres() {
31147 let dialect = Dialect::get(DialectType::Generic);
31148 let result = dialect.transpile_to(
31149 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
31150 DialectType::PostgreSQL,
31151 ).unwrap();
31152 eprintln!("GDA -> PostgreSQL: {}", result[0]);
31153 }
31154
31155 #[test]
31156 fn test_generate_date_array_snowflake() {
31157 let dialect = Dialect::get(DialectType::Generic);
31158 let result = dialect.transpile_to(
31159 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
31160 DialectType::Snowflake,
31161 ).unwrap();
31162 eprintln!("GDA -> Snowflake: {}", result[0]);
31163 }
31164
31165 #[test]
31166 fn test_array_length_generate_date_array_snowflake() {
31167 let dialect = Dialect::get(DialectType::Generic);
31168 let result = dialect.transpile_to(
31169 "SELECT ARRAY_LENGTH(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
31170 DialectType::Snowflake,
31171 ).unwrap();
31172 eprintln!("ARRAY_LENGTH(GDA) -> Snowflake: {}", result[0]);
31173 }
31174
31175 #[test]
31176 fn test_generate_date_array_mysql() {
31177 let dialect = Dialect::get(DialectType::Generic);
31178 let result = dialect.transpile_to(
31179 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
31180 DialectType::MySQL,
31181 ).unwrap();
31182 eprintln!("GDA -> MySQL: {}", result[0]);
31183 }
31184
31185 #[test]
31186 fn test_generate_date_array_redshift() {
31187 let dialect = Dialect::get(DialectType::Generic);
31188 let result = dialect.transpile_to(
31189 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
31190 DialectType::Redshift,
31191 ).unwrap();
31192 eprintln!("GDA -> Redshift: {}", result[0]);
31193 }
31194
31195 #[test]
31196 fn test_generate_date_array_tsql() {
31197 let dialect = Dialect::get(DialectType::Generic);
31198 let result = dialect.transpile_to(
31199 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
31200 DialectType::TSQL,
31201 ).unwrap();
31202 eprintln!("GDA -> TSQL: {}", result[0]);
31203 }
31204
31205 #[test]
31206 fn test_struct_colon_syntax() {
31207 let dialect = Dialect::get(DialectType::Generic);
31208 // Test without colon first
31209 let result = dialect.transpile_to(
31210 "CAST((1, 2, 3, 4) AS STRUCT<a TINYINT, b SMALLINT, c INT, d BIGINT>)",
31211 DialectType::ClickHouse,
31212 );
31213 match result {
31214 Ok(r) => eprintln!("STRUCT no colon -> ClickHouse: {}", r[0]),
31215 Err(e) => eprintln!("STRUCT no colon error: {}", e),
31216 }
31217 // Now test with colon
31218 let result = dialect.transpile_to(
31219 "CAST((1, 2, 3, 4) AS STRUCT<a: TINYINT, b: SMALLINT, c: INT, d: BIGINT>)",
31220 DialectType::ClickHouse,
31221 );
31222 match result {
31223 Ok(r) => eprintln!("STRUCT colon -> ClickHouse: {}", r[0]),
31224 Err(e) => eprintln!("STRUCT colon error: {}", e),
31225 }
31226 }
31227
31228 #[test]
31229 fn test_generate_date_array_cte_wrapped_mysql() {
31230 let dialect = Dialect::get(DialectType::Generic);
31231 let result = dialect.transpile_to(
31232 "WITH dates AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))) SELECT * FROM dates",
31233 DialectType::MySQL,
31234 ).unwrap();
31235 eprintln!("GDA CTE -> MySQL: {}", result[0]);
31236 }
31237
31238 #[test]
31239 fn test_generate_date_array_cte_wrapped_tsql() {
31240 let dialect = Dialect::get(DialectType::Generic);
31241 let result = dialect.transpile_to(
31242 "WITH dates AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))) SELECT * FROM dates",
31243 DialectType::TSQL,
31244 ).unwrap();
31245 eprintln!("GDA CTE -> TSQL: {}", result[0]);
31246 }
31247}